{ "best_global_step": 240500, "best_metric": 0.9813696191263432, "best_model_checkpoint": "/workspace/output/lora_r4/checkpoint-240500", "epoch": 68.69145614533069, "eval_steps": 500, "global_step": 242000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002838489923360772, "grad_norm": 7.826807975769043, "learning_rate": 9.999744535906897e-05, "loss": 7.1339569091796875, "step": 10 }, { "epoch": 0.005676979846721544, "grad_norm": 7.7576518058776855, "learning_rate": 9.999460686914563e-05, "loss": 6.710009765625, "step": 20 }, { "epoch": 0.008515469770082317, "grad_norm": 7.841175556182861, "learning_rate": 9.999176837922227e-05, "loss": 6.468508911132813, "step": 30 }, { "epoch": 0.011353959693443088, "grad_norm": 7.927770137786865, "learning_rate": 9.99889298892989e-05, "loss": 6.272714233398437, "step": 40 }, { "epoch": 0.01419244961680386, "grad_norm": 8.131056785583496, "learning_rate": 9.998609139937554e-05, "loss": 6.034750366210938, "step": 50 }, { "epoch": 0.017030939540164634, "grad_norm": 7.635435104370117, "learning_rate": 9.998325290945218e-05, "loss": 5.990374755859375, "step": 60 }, { "epoch": 0.019869429463525403, "grad_norm": 8.140981674194336, "learning_rate": 9.99804144195288e-05, "loss": 5.84742431640625, "step": 70 }, { "epoch": 0.022707919386886176, "grad_norm": 8.634374618530273, "learning_rate": 9.997757592960545e-05, "loss": 5.69959716796875, "step": 80 }, { "epoch": 0.02554640931024695, "grad_norm": 8.20428466796875, "learning_rate": 9.99747374396821e-05, "loss": 5.6543731689453125, "step": 90 }, { "epoch": 0.02838489923360772, "grad_norm": 8.128388404846191, "learning_rate": 9.997189894975873e-05, "loss": 5.422869873046875, "step": 100 }, { "epoch": 0.031223389156968494, "grad_norm": 8.217612266540527, "learning_rate": 9.996906045983537e-05, "loss": 5.347100830078125, "step": 110 }, { "epoch": 0.03406187908032927, "grad_norm": 8.654301643371582, "learning_rate": 9.996622196991201e-05, "loss": 5.253237915039063, "step": 120 }, { "epoch": 0.03690036900369004, "grad_norm": 8.540105819702148, "learning_rate": 9.996338347998865e-05, "loss": 5.096551513671875, "step": 130 }, { "epoch": 0.039738858927050806, "grad_norm": 8.838942527770996, "learning_rate": 9.996054499006528e-05, "loss": 4.921728515625, "step": 140 }, { "epoch": 0.04257734885041158, "grad_norm": 8.903046607971191, "learning_rate": 9.995770650014194e-05, "loss": 4.8088134765625, "step": 150 }, { "epoch": 0.04541583877377235, "grad_norm": 9.810368537902832, "learning_rate": 9.995486801021858e-05, "loss": 4.75050048828125, "step": 160 }, { "epoch": 0.04825432869713313, "grad_norm": 10.207559585571289, "learning_rate": 9.99520295202952e-05, "loss": 4.630392456054688, "step": 170 }, { "epoch": 0.0510928186204939, "grad_norm": 10.042340278625488, "learning_rate": 9.994919103037185e-05, "loss": 4.4931793212890625, "step": 180 }, { "epoch": 0.05393130854385467, "grad_norm": 10.114358901977539, "learning_rate": 9.994635254044849e-05, "loss": 4.399362182617187, "step": 190 }, { "epoch": 0.05676979846721544, "grad_norm": 10.164721488952637, "learning_rate": 9.994351405052512e-05, "loss": 4.324098205566406, "step": 200 }, { "epoch": 0.05960828839057621, "grad_norm": 11.823846817016602, "learning_rate": 9.994067556060176e-05, "loss": 4.284576416015625, "step": 210 }, { "epoch": 0.06244677831393699, "grad_norm": 11.347098350524902, "learning_rate": 9.993783707067841e-05, "loss": 4.103669738769531, "step": 220 }, { "epoch": 0.06528526823729776, "grad_norm": 11.463887214660645, "learning_rate": 9.993499858075504e-05, "loss": 4.066629028320312, "step": 230 }, { "epoch": 0.06812375816065853, "grad_norm": 12.289628982543945, "learning_rate": 9.993216009083168e-05, "loss": 4.048051452636718, "step": 240 }, { "epoch": 0.0709622480840193, "grad_norm": 12.22239875793457, "learning_rate": 9.992932160090832e-05, "loss": 3.9947723388671874, "step": 250 }, { "epoch": 0.07380073800738007, "grad_norm": 12.375640869140625, "learning_rate": 9.992648311098496e-05, "loss": 3.9365478515625, "step": 260 }, { "epoch": 0.07663922793074085, "grad_norm": 12.353590965270996, "learning_rate": 9.992364462106159e-05, "loss": 3.8360382080078126, "step": 270 }, { "epoch": 0.07947771785410161, "grad_norm": 11.50862979888916, "learning_rate": 9.992080613113823e-05, "loss": 3.845257568359375, "step": 280 }, { "epoch": 0.08231620777746239, "grad_norm": 12.36604118347168, "learning_rate": 9.991796764121489e-05, "loss": 3.7559188842773437, "step": 290 }, { "epoch": 0.08515469770082316, "grad_norm": 13.271099090576172, "learning_rate": 9.991512915129152e-05, "loss": 3.75750732421875, "step": 300 }, { "epoch": 0.08799318762418393, "grad_norm": 12.769731521606445, "learning_rate": 9.991229066136816e-05, "loss": 3.6090957641601564, "step": 310 }, { "epoch": 0.0908316775475447, "grad_norm": 12.775890350341797, "learning_rate": 9.99094521714448e-05, "loss": 3.6455108642578127, "step": 320 }, { "epoch": 0.09367016747090548, "grad_norm": 12.35387897491455, "learning_rate": 9.990661368152143e-05, "loss": 3.5637794494628907, "step": 330 }, { "epoch": 0.09650865739426626, "grad_norm": 13.14075756072998, "learning_rate": 9.990377519159807e-05, "loss": 3.536479187011719, "step": 340 }, { "epoch": 0.09934714731762702, "grad_norm": 13.470285415649414, "learning_rate": 9.990093670167472e-05, "loss": 3.44134521484375, "step": 350 }, { "epoch": 0.1021856372409878, "grad_norm": 13.043403625488281, "learning_rate": 9.989809821175135e-05, "loss": 3.4576507568359376, "step": 360 }, { "epoch": 0.10502412716434857, "grad_norm": 13.769856452941895, "learning_rate": 9.989525972182799e-05, "loss": 3.3593673706054688, "step": 370 }, { "epoch": 0.10786261708770933, "grad_norm": 12.712862014770508, "learning_rate": 9.989242123190463e-05, "loss": 3.2312248229980467, "step": 380 }, { "epoch": 0.11070110701107011, "grad_norm": 13.926446914672852, "learning_rate": 9.988958274198128e-05, "loss": 3.353959655761719, "step": 390 }, { "epoch": 0.11353959693443089, "grad_norm": 13.616968154907227, "learning_rate": 9.98867442520579e-05, "loss": 3.261731719970703, "step": 400 }, { "epoch": 0.11637808685779165, "grad_norm": 14.255325317382812, "learning_rate": 9.988390576213454e-05, "loss": 3.2977920532226563, "step": 410 }, { "epoch": 0.11921657678115243, "grad_norm": 14.241050720214844, "learning_rate": 9.988106727221119e-05, "loss": 3.2018287658691404, "step": 420 }, { "epoch": 0.1220550667045132, "grad_norm": 16.304302215576172, "learning_rate": 9.987822878228783e-05, "loss": 3.2613784790039064, "step": 430 }, { "epoch": 0.12489355662787398, "grad_norm": 13.858403205871582, "learning_rate": 9.987539029236447e-05, "loss": 3.149138641357422, "step": 440 }, { "epoch": 0.12773204655123474, "grad_norm": 13.387377738952637, "learning_rate": 9.987283565143344e-05, "loss": 3.088349151611328, "step": 450 }, { "epoch": 0.13057053647459552, "grad_norm": 13.450538635253906, "learning_rate": 9.986999716151008e-05, "loss": 3.0058448791503904, "step": 460 }, { "epoch": 0.1334090263979563, "grad_norm": 13.723711013793945, "learning_rate": 9.986715867158672e-05, "loss": 2.9626235961914062, "step": 470 }, { "epoch": 0.13624751632131707, "grad_norm": 14.08718204498291, "learning_rate": 9.986432018166336e-05, "loss": 3.0352642059326174, "step": 480 }, { "epoch": 0.13908600624467782, "grad_norm": 13.817708969116211, "learning_rate": 9.986148169174e-05, "loss": 2.928153228759766, "step": 490 }, { "epoch": 0.1419244961680386, "grad_norm": 14.170774459838867, "learning_rate": 9.985864320181664e-05, "loss": 2.9139144897460936, "step": 500 }, { "epoch": 0.1419244961680386, "eval_accuracy": 0.18929229986647167, "eval_loss": 3.110288143157959, "eval_runtime": 31.3673, "eval_samples_per_second": 501.382, "eval_steps_per_second": 7.843, "step": 500 }, { "epoch": 0.14476298609139937, "grad_norm": 15.852224349975586, "learning_rate": 9.985580471189327e-05, "loss": 2.839629364013672, "step": 510 }, { "epoch": 0.14760147601476015, "grad_norm": 13.73141860961914, "learning_rate": 9.985296622196991e-05, "loss": 2.8279544830322267, "step": 520 }, { "epoch": 0.15043996593812092, "grad_norm": 16.905445098876953, "learning_rate": 9.985012773204657e-05, "loss": 2.8459964752197267, "step": 530 }, { "epoch": 0.1532784558614817, "grad_norm": 15.00737190246582, "learning_rate": 9.98472892421232e-05, "loss": 2.8309253692626952, "step": 540 }, { "epoch": 0.15611694578484248, "grad_norm": 15.849299430847168, "learning_rate": 9.984445075219984e-05, "loss": 2.8491188049316407, "step": 550 }, { "epoch": 0.15895543570820322, "grad_norm": 13.277036666870117, "learning_rate": 9.984161226227648e-05, "loss": 2.750569152832031, "step": 560 }, { "epoch": 0.161793925631564, "grad_norm": 19.458072662353516, "learning_rate": 9.98387737723531e-05, "loss": 2.669076156616211, "step": 570 }, { "epoch": 0.16463241555492478, "grad_norm": 15.88986587524414, "learning_rate": 9.983593528242975e-05, "loss": 2.7419994354248045, "step": 580 }, { "epoch": 0.16747090547828555, "grad_norm": 17.070947647094727, "learning_rate": 9.983309679250639e-05, "loss": 2.667735481262207, "step": 590 }, { "epoch": 0.17030939540164633, "grad_norm": 17.319679260253906, "learning_rate": 9.983025830258303e-05, "loss": 2.7211326599121093, "step": 600 }, { "epoch": 0.1731478853250071, "grad_norm": 16.1612606048584, "learning_rate": 9.982741981265967e-05, "loss": 2.620807075500488, "step": 610 }, { "epoch": 0.17598637524836785, "grad_norm": 17.112060546875, "learning_rate": 9.982458132273631e-05, "loss": 2.6665264129638673, "step": 620 }, { "epoch": 0.17882486517172863, "grad_norm": 14.721344947814941, "learning_rate": 9.982174283281295e-05, "loss": 2.5853302001953127, "step": 630 }, { "epoch": 0.1816633550950894, "grad_norm": 14.741107940673828, "learning_rate": 9.981890434288958e-05, "loss": 2.572748565673828, "step": 640 }, { "epoch": 0.18450184501845018, "grad_norm": 15.590767860412598, "learning_rate": 9.981606585296622e-05, "loss": 2.5285110473632812, "step": 650 }, { "epoch": 0.18734033494181096, "grad_norm": 18.87065315246582, "learning_rate": 9.981322736304286e-05, "loss": 2.5050926208496094, "step": 660 }, { "epoch": 0.19017882486517174, "grad_norm": 16.421432495117188, "learning_rate": 9.98103888731195e-05, "loss": 2.427301597595215, "step": 670 }, { "epoch": 0.1930173147885325, "grad_norm": 18.1635684967041, "learning_rate": 9.980755038319615e-05, "loss": 2.3481977462768553, "step": 680 }, { "epoch": 0.19585580471189326, "grad_norm": 18.698192596435547, "learning_rate": 9.980471189327279e-05, "loss": 2.39544734954834, "step": 690 }, { "epoch": 0.19869429463525404, "grad_norm": 16.736194610595703, "learning_rate": 9.980187340334942e-05, "loss": 2.4873001098632814, "step": 700 }, { "epoch": 0.2015327845586148, "grad_norm": 19.857229232788086, "learning_rate": 9.979903491342606e-05, "loss": 2.3658916473388674, "step": 710 }, { "epoch": 0.2043712744819756, "grad_norm": 17.635812759399414, "learning_rate": 9.97961964235027e-05, "loss": 2.336787796020508, "step": 720 }, { "epoch": 0.20720976440533637, "grad_norm": 16.26897430419922, "learning_rate": 9.979335793357934e-05, "loss": 2.271711540222168, "step": 730 }, { "epoch": 0.21004825432869714, "grad_norm": 16.782176971435547, "learning_rate": 9.979051944365598e-05, "loss": 2.321268653869629, "step": 740 }, { "epoch": 0.21288674425205792, "grad_norm": 23.24675178527832, "learning_rate": 9.978768095373262e-05, "loss": 2.2826637268066405, "step": 750 }, { "epoch": 0.21572523417541867, "grad_norm": 16.791034698486328, "learning_rate": 9.978484246380926e-05, "loss": 2.306294631958008, "step": 760 }, { "epoch": 0.21856372409877944, "grad_norm": 26.815982818603516, "learning_rate": 9.978200397388589e-05, "loss": 2.3874790191650392, "step": 770 }, { "epoch": 0.22140221402214022, "grad_norm": 14.967060089111328, "learning_rate": 9.977916548396253e-05, "loss": 2.256601905822754, "step": 780 }, { "epoch": 0.224240703945501, "grad_norm": 15.23949146270752, "learning_rate": 9.977632699403917e-05, "loss": 2.1650556564331054, "step": 790 }, { "epoch": 0.22707919386886177, "grad_norm": 17.264827728271484, "learning_rate": 9.977348850411582e-05, "loss": 2.195321273803711, "step": 800 }, { "epoch": 0.22991768379222255, "grad_norm": 20.194473266601562, "learning_rate": 9.977065001419246e-05, "loss": 2.245586967468262, "step": 810 }, { "epoch": 0.2327561737155833, "grad_norm": 17.323143005371094, "learning_rate": 9.97678115242691e-05, "loss": 2.259768486022949, "step": 820 }, { "epoch": 0.23559466363894407, "grad_norm": 15.63845443725586, "learning_rate": 9.976497303434573e-05, "loss": 2.2193319320678713, "step": 830 }, { "epoch": 0.23843315356230485, "grad_norm": 17.313743591308594, "learning_rate": 9.976213454442237e-05, "loss": 2.1672977447509765, "step": 840 }, { "epoch": 0.24127164348566563, "grad_norm": 17.40256690979004, "learning_rate": 9.975929605449901e-05, "loss": 2.0945472717285156, "step": 850 }, { "epoch": 0.2441101334090264, "grad_norm": 21.776893615722656, "learning_rate": 9.975645756457565e-05, "loss": 2.078630256652832, "step": 860 }, { "epoch": 0.24694862333238718, "grad_norm": 19.48147964477539, "learning_rate": 9.975361907465229e-05, "loss": 2.1304529190063475, "step": 870 }, { "epoch": 0.24978711325574796, "grad_norm": 16.93062973022461, "learning_rate": 9.975078058472893e-05, "loss": 2.069942092895508, "step": 880 }, { "epoch": 0.2526256031791087, "grad_norm": 17.80629539489746, "learning_rate": 9.974794209480557e-05, "loss": 2.0870670318603515, "step": 890 }, { "epoch": 0.2554640931024695, "grad_norm": 20.66566276550293, "learning_rate": 9.97451036048822e-05, "loss": 2.1093101501464844, "step": 900 }, { "epoch": 0.25830258302583026, "grad_norm": 16.249364852905273, "learning_rate": 9.974226511495884e-05, "loss": 2.1144399642944336, "step": 910 }, { "epoch": 0.26114107294919103, "grad_norm": 18.46486473083496, "learning_rate": 9.973942662503549e-05, "loss": 2.0278759002685547, "step": 920 }, { "epoch": 0.2639795628725518, "grad_norm": 17.295791625976562, "learning_rate": 9.973658813511213e-05, "loss": 1.9899866104125976, "step": 930 }, { "epoch": 0.2668180527959126, "grad_norm": 16.32284164428711, "learning_rate": 9.973374964518877e-05, "loss": 1.9527853012084961, "step": 940 }, { "epoch": 0.26965654271927336, "grad_norm": 18.139652252197266, "learning_rate": 9.973091115526541e-05, "loss": 1.9534086227416991, "step": 950 }, { "epoch": 0.27249503264263414, "grad_norm": 16.14771842956543, "learning_rate": 9.972807266534204e-05, "loss": 1.9304651260375976, "step": 960 }, { "epoch": 0.2753335225659949, "grad_norm": 18.864286422729492, "learning_rate": 9.972523417541868e-05, "loss": 2.0156524658203123, "step": 970 }, { "epoch": 0.27817201248935564, "grad_norm": 15.858384132385254, "learning_rate": 9.972239568549532e-05, "loss": 1.974644088745117, "step": 980 }, { "epoch": 0.2810105024127164, "grad_norm": 28.33545684814453, "learning_rate": 9.971955719557196e-05, "loss": 1.913672637939453, "step": 990 }, { "epoch": 0.2838489923360772, "grad_norm": 20.52104377746582, "learning_rate": 9.971700255464094e-05, "loss": 1.8821027755737305, "step": 1000 }, { "epoch": 0.2838489923360772, "eval_accuracy": 0.3853246010046417, "eval_loss": 2.1993799209594727, "eval_runtime": 30.911, "eval_samples_per_second": 508.784, "eval_steps_per_second": 7.958, "step": 1000 }, { "epoch": 0.28668748225943796, "grad_norm": 17.233631134033203, "learning_rate": 9.971416406471757e-05, "loss": 1.888556671142578, "step": 1010 }, { "epoch": 0.28952597218279874, "grad_norm": 18.930858612060547, "learning_rate": 9.971132557479421e-05, "loss": 1.9341995239257812, "step": 1020 }, { "epoch": 0.2923644621061595, "grad_norm": 23.888870239257812, "learning_rate": 9.970848708487085e-05, "loss": 1.8232383728027344, "step": 1030 }, { "epoch": 0.2952029520295203, "grad_norm": 21.316509246826172, "learning_rate": 9.970564859494749e-05, "loss": 1.9536394119262694, "step": 1040 }, { "epoch": 0.29804144195288107, "grad_norm": 21.5119571685791, "learning_rate": 9.970281010502413e-05, "loss": 1.856839370727539, "step": 1050 }, { "epoch": 0.30087993187624185, "grad_norm": 18.16112518310547, "learning_rate": 9.969997161510078e-05, "loss": 1.801273727416992, "step": 1060 }, { "epoch": 0.3037184217996026, "grad_norm": 14.466642379760742, "learning_rate": 9.969713312517742e-05, "loss": 1.7231252670288086, "step": 1070 }, { "epoch": 0.3065569117229634, "grad_norm": 31.817644119262695, "learning_rate": 9.969429463525405e-05, "loss": 1.9028480529785157, "step": 1080 }, { "epoch": 0.3093954016463242, "grad_norm": 20.963695526123047, "learning_rate": 9.969145614533069e-05, "loss": 1.851097869873047, "step": 1090 }, { "epoch": 0.31223389156968495, "grad_norm": 27.92727279663086, "learning_rate": 9.968861765540733e-05, "loss": 1.7934017181396484, "step": 1100 }, { "epoch": 0.3150723814930457, "grad_norm": 22.669452667236328, "learning_rate": 9.968577916548397e-05, "loss": 1.7833684921264648, "step": 1110 }, { "epoch": 0.31791087141640645, "grad_norm": 20.252046585083008, "learning_rate": 9.968294067556061e-05, "loss": 1.7792678833007813, "step": 1120 }, { "epoch": 0.3207493613397672, "grad_norm": 18.623905181884766, "learning_rate": 9.968010218563725e-05, "loss": 1.8188812255859375, "step": 1130 }, { "epoch": 0.323587851263128, "grad_norm": 19.032197952270508, "learning_rate": 9.967726369571388e-05, "loss": 1.6491582870483399, "step": 1140 }, { "epoch": 0.3264263411864888, "grad_norm": 22.769596099853516, "learning_rate": 9.967442520579052e-05, "loss": 1.7024646759033204, "step": 1150 }, { "epoch": 0.32926483110984955, "grad_norm": 17.219308853149414, "learning_rate": 9.967158671586716e-05, "loss": 1.701007080078125, "step": 1160 }, { "epoch": 0.33210332103321033, "grad_norm": 18.769567489624023, "learning_rate": 9.96687482259438e-05, "loss": 1.747934341430664, "step": 1170 }, { "epoch": 0.3349418109565711, "grad_norm": 16.897687911987305, "learning_rate": 9.966590973602045e-05, "loss": 1.6520572662353517, "step": 1180 }, { "epoch": 0.3377803008799319, "grad_norm": 18.1732234954834, "learning_rate": 9.966307124609709e-05, "loss": 1.717509651184082, "step": 1190 }, { "epoch": 0.34061879080329266, "grad_norm": 18.14131736755371, "learning_rate": 9.966023275617373e-05, "loss": 1.6966127395629882, "step": 1200 }, { "epoch": 0.34345728072665344, "grad_norm": 24.596132278442383, "learning_rate": 9.965739426625036e-05, "loss": 1.6875707626342773, "step": 1210 }, { "epoch": 0.3462957706500142, "grad_norm": 20.04149055480957, "learning_rate": 9.9654555776327e-05, "loss": 1.6078899383544922, "step": 1220 }, { "epoch": 0.349134260573375, "grad_norm": 18.574588775634766, "learning_rate": 9.965171728640364e-05, "loss": 1.682082748413086, "step": 1230 }, { "epoch": 0.3519727504967357, "grad_norm": 20.25678253173828, "learning_rate": 9.964887879648028e-05, "loss": 1.7114227294921875, "step": 1240 }, { "epoch": 0.3548112404200965, "grad_norm": 20.133079528808594, "learning_rate": 9.964604030655692e-05, "loss": 1.6490610122680665, "step": 1250 }, { "epoch": 0.35764973034345726, "grad_norm": 22.176372528076172, "learning_rate": 9.964320181663356e-05, "loss": 1.676508903503418, "step": 1260 }, { "epoch": 0.36048822026681804, "grad_norm": 19.151033401489258, "learning_rate": 9.964036332671019e-05, "loss": 1.6343936920166016, "step": 1270 }, { "epoch": 0.3633267101901788, "grad_norm": 18.87334442138672, "learning_rate": 9.963752483678683e-05, "loss": 1.5801480293273926, "step": 1280 }, { "epoch": 0.3661652001135396, "grad_norm": 18.33944320678711, "learning_rate": 9.963468634686347e-05, "loss": 1.6038869857788085, "step": 1290 }, { "epoch": 0.36900369003690037, "grad_norm": 26.26329231262207, "learning_rate": 9.963184785694011e-05, "loss": 1.5226101875305176, "step": 1300 }, { "epoch": 0.37184217996026114, "grad_norm": 23.27513885498047, "learning_rate": 9.962900936701676e-05, "loss": 1.6262874603271484, "step": 1310 }, { "epoch": 0.3746806698836219, "grad_norm": 20.425983428955078, "learning_rate": 9.96261708770934e-05, "loss": 1.5267268180847169, "step": 1320 }, { "epoch": 0.3775191598069827, "grad_norm": 31.24871063232422, "learning_rate": 9.962333238717003e-05, "loss": 1.5792694091796875, "step": 1330 }, { "epoch": 0.3803576497303435, "grad_norm": 19.59308433532715, "learning_rate": 9.962049389724667e-05, "loss": 1.5731708526611328, "step": 1340 }, { "epoch": 0.38319613965370425, "grad_norm": 23.49835205078125, "learning_rate": 9.961765540732331e-05, "loss": 1.5430625915527343, "step": 1350 }, { "epoch": 0.386034629577065, "grad_norm": 18.125337600708008, "learning_rate": 9.961481691739995e-05, "loss": 1.6195716857910156, "step": 1360 }, { "epoch": 0.3888731195004258, "grad_norm": 20.855470657348633, "learning_rate": 9.961197842747658e-05, "loss": 1.5137943267822265, "step": 1370 }, { "epoch": 0.3917116094237865, "grad_norm": 19.750150680541992, "learning_rate": 9.960913993755323e-05, "loss": 1.5584377288818358, "step": 1380 }, { "epoch": 0.3945500993471473, "grad_norm": 17.54871940612793, "learning_rate": 9.960630144762987e-05, "loss": 1.509945011138916, "step": 1390 }, { "epoch": 0.3973885892705081, "grad_norm": 19.041837692260742, "learning_rate": 9.96034629577065e-05, "loss": 1.4547015190124513, "step": 1400 }, { "epoch": 0.40022707919386885, "grad_norm": 23.58037757873535, "learning_rate": 9.960062446778314e-05, "loss": 1.4935945510864257, "step": 1410 }, { "epoch": 0.4030655691172296, "grad_norm": 21.330665588378906, "learning_rate": 9.959778597785978e-05, "loss": 1.5234458923339844, "step": 1420 }, { "epoch": 0.4059040590405904, "grad_norm": 27.210187911987305, "learning_rate": 9.959494748793641e-05, "loss": 1.4157642364501952, "step": 1430 }, { "epoch": 0.4087425489639512, "grad_norm": 24.996427536010742, "learning_rate": 9.959210899801307e-05, "loss": 1.4804776191711426, "step": 1440 }, { "epoch": 0.41158103888731196, "grad_norm": 20.157520294189453, "learning_rate": 9.958927050808971e-05, "loss": 1.5209645271301269, "step": 1450 }, { "epoch": 0.41441952881067273, "grad_norm": 24.016756057739258, "learning_rate": 9.958643201816634e-05, "loss": 1.5178532600402832, "step": 1460 }, { "epoch": 0.4172580187340335, "grad_norm": 24.519412994384766, "learning_rate": 9.958359352824298e-05, "loss": 1.512025737762451, "step": 1470 }, { "epoch": 0.4200965086573943, "grad_norm": 26.331186294555664, "learning_rate": 9.958075503831962e-05, "loss": 1.4657453536987304, "step": 1480 }, { "epoch": 0.42293499858075506, "grad_norm": 22.380062103271484, "learning_rate": 9.957791654839626e-05, "loss": 1.468503761291504, "step": 1490 }, { "epoch": 0.42577348850411584, "grad_norm": 20.46269989013672, "learning_rate": 9.957507805847289e-05, "loss": 1.493769645690918, "step": 1500 }, { "epoch": 0.42577348850411584, "eval_accuracy": 0.4980606600114453, "eval_loss": 1.7268073558807373, "eval_runtime": 31.0286, "eval_samples_per_second": 506.854, "eval_steps_per_second": 7.928, "step": 1500 }, { "epoch": 0.42861197842747656, "grad_norm": 31.702930450439453, "learning_rate": 9.957223956854954e-05, "loss": 1.5163578033447265, "step": 1510 }, { "epoch": 0.43145046835083734, "grad_norm": 18.17645263671875, "learning_rate": 9.956940107862618e-05, "loss": 1.4007353782653809, "step": 1520 }, { "epoch": 0.4342889582741981, "grad_norm": 18.51492691040039, "learning_rate": 9.956656258870281e-05, "loss": 1.3827913284301758, "step": 1530 }, { "epoch": 0.4371274481975589, "grad_norm": 19.405197143554688, "learning_rate": 9.956372409877945e-05, "loss": 1.3922388076782226, "step": 1540 }, { "epoch": 0.43996593812091966, "grad_norm": 31.843347549438477, "learning_rate": 9.95608856088561e-05, "loss": 1.4234270095825194, "step": 1550 }, { "epoch": 0.44280442804428044, "grad_norm": 35.88179016113281, "learning_rate": 9.955804711893272e-05, "loss": 1.4969772338867187, "step": 1560 }, { "epoch": 0.4456429179676412, "grad_norm": 20.38347816467285, "learning_rate": 9.955520862900936e-05, "loss": 1.4426648139953613, "step": 1570 }, { "epoch": 0.448481407891002, "grad_norm": 19.19594955444336, "learning_rate": 9.955237013908602e-05, "loss": 1.3540478706359864, "step": 1580 }, { "epoch": 0.45131989781436277, "grad_norm": 20.872447967529297, "learning_rate": 9.954953164916265e-05, "loss": 1.392230224609375, "step": 1590 }, { "epoch": 0.45415838773772355, "grad_norm": 25.477401733398438, "learning_rate": 9.954669315923929e-05, "loss": 1.3302290916442872, "step": 1600 }, { "epoch": 0.4569968776610843, "grad_norm": 19.187055587768555, "learning_rate": 9.954385466931593e-05, "loss": 1.3069095611572266, "step": 1610 }, { "epoch": 0.4598353675844451, "grad_norm": 25.863494873046875, "learning_rate": 9.954101617939257e-05, "loss": 1.321756935119629, "step": 1620 }, { "epoch": 0.4626738575078059, "grad_norm": 18.916994094848633, "learning_rate": 9.95381776894692e-05, "loss": 1.322645378112793, "step": 1630 }, { "epoch": 0.4655123474311666, "grad_norm": 21.816265106201172, "learning_rate": 9.953533919954585e-05, "loss": 1.330703067779541, "step": 1640 }, { "epoch": 0.46835083735452737, "grad_norm": 20.472036361694336, "learning_rate": 9.95325007096225e-05, "loss": 1.334905242919922, "step": 1650 }, { "epoch": 0.47118932727788815, "grad_norm": 24.386188507080078, "learning_rate": 9.952966221969912e-05, "loss": 1.3683245658874512, "step": 1660 }, { "epoch": 0.4740278172012489, "grad_norm": 28.326887130737305, "learning_rate": 9.952682372977576e-05, "loss": 1.2968414306640625, "step": 1670 }, { "epoch": 0.4768663071246097, "grad_norm": 17.494138717651367, "learning_rate": 9.95239852398524e-05, "loss": 1.3585668563842774, "step": 1680 }, { "epoch": 0.4797047970479705, "grad_norm": 19.921358108520508, "learning_rate": 9.952114674992903e-05, "loss": 1.3288690567016601, "step": 1690 }, { "epoch": 0.48254328697133125, "grad_norm": 20.302352905273438, "learning_rate": 9.951830826000567e-05, "loss": 1.3096282958984375, "step": 1700 }, { "epoch": 0.48538177689469203, "grad_norm": 28.344350814819336, "learning_rate": 9.951546977008233e-05, "loss": 1.3355764389038085, "step": 1710 }, { "epoch": 0.4882202668180528, "grad_norm": 18.56201171875, "learning_rate": 9.951263128015896e-05, "loss": 1.3571157455444336, "step": 1720 }, { "epoch": 0.4910587567414136, "grad_norm": 20.75018310546875, "learning_rate": 9.95097927902356e-05, "loss": 1.295008087158203, "step": 1730 }, { "epoch": 0.49389724666477436, "grad_norm": 21.700300216674805, "learning_rate": 9.950695430031224e-05, "loss": 1.3051130294799804, "step": 1740 }, { "epoch": 0.49673573658813513, "grad_norm": 24.367721557617188, "learning_rate": 9.950411581038888e-05, "loss": 1.2838464736938477, "step": 1750 }, { "epoch": 0.4995742265114959, "grad_norm": 18.05948257446289, "learning_rate": 9.950127732046551e-05, "loss": 1.2749036788940429, "step": 1760 }, { "epoch": 0.5024127164348566, "grad_norm": 26.424760818481445, "learning_rate": 9.949843883054215e-05, "loss": 1.1802459716796876, "step": 1770 }, { "epoch": 0.5052512063582174, "grad_norm": 23.55957794189453, "learning_rate": 9.94956003406188e-05, "loss": 1.279608917236328, "step": 1780 }, { "epoch": 0.5080896962815782, "grad_norm": 24.43779754638672, "learning_rate": 9.949276185069543e-05, "loss": 1.3498580932617188, "step": 1790 }, { "epoch": 0.510928186204939, "grad_norm": 26.3443660736084, "learning_rate": 9.948992336077207e-05, "loss": 1.3418119430541993, "step": 1800 }, { "epoch": 0.5137666761282997, "grad_norm": 22.686662673950195, "learning_rate": 9.948708487084872e-05, "loss": 1.2427737236022949, "step": 1810 }, { "epoch": 0.5166051660516605, "grad_norm": 27.369094848632812, "learning_rate": 9.948424638092534e-05, "loss": 1.3421775817871093, "step": 1820 }, { "epoch": 0.5194436559750213, "grad_norm": 20.854188919067383, "learning_rate": 9.948140789100199e-05, "loss": 1.2650754928588868, "step": 1830 }, { "epoch": 0.5222821458983821, "grad_norm": 19.906824111938477, "learning_rate": 9.947856940107864e-05, "loss": 1.1981738090515137, "step": 1840 }, { "epoch": 0.5251206358217428, "grad_norm": 18.435914993286133, "learning_rate": 9.947573091115527e-05, "loss": 1.2321277618408204, "step": 1850 }, { "epoch": 0.5279591257451036, "grad_norm": 23.23478126525879, "learning_rate": 9.947289242123191e-05, "loss": 1.2562955856323241, "step": 1860 }, { "epoch": 0.5307976156684644, "grad_norm": 21.96221160888672, "learning_rate": 9.947005393130855e-05, "loss": 1.2214305877685547, "step": 1870 }, { "epoch": 0.5336361055918252, "grad_norm": 22.680828094482422, "learning_rate": 9.946721544138519e-05, "loss": 1.2809284210205079, "step": 1880 }, { "epoch": 0.536474595515186, "grad_norm": 19.553720474243164, "learning_rate": 9.946437695146182e-05, "loss": 1.15775146484375, "step": 1890 }, { "epoch": 0.5393130854385467, "grad_norm": 21.628679275512695, "learning_rate": 9.946153846153846e-05, "loss": 1.1703840255737306, "step": 1900 }, { "epoch": 0.5421515753619075, "grad_norm": 27.79534912109375, "learning_rate": 9.945869997161512e-05, "loss": 1.1535178184509278, "step": 1910 }, { "epoch": 0.5449900652852683, "grad_norm": 23.678956985473633, "learning_rate": 9.945586148169174e-05, "loss": 1.2113995552062988, "step": 1920 }, { "epoch": 0.547828555208629, "grad_norm": 19.417715072631836, "learning_rate": 9.945302299176839e-05, "loss": 1.2194194793701172, "step": 1930 }, { "epoch": 0.5506670451319898, "grad_norm": 17.17325210571289, "learning_rate": 9.945018450184503e-05, "loss": 1.1678752899169922, "step": 1940 }, { "epoch": 0.5535055350553506, "grad_norm": 20.972352981567383, "learning_rate": 9.944734601192165e-05, "loss": 1.1621131896972656, "step": 1950 }, { "epoch": 0.5563440249787113, "grad_norm": 19.941749572753906, "learning_rate": 9.94445075219983e-05, "loss": 1.1394742965698241, "step": 1960 }, { "epoch": 0.559182514902072, "grad_norm": 21.390018463134766, "learning_rate": 9.944166903207494e-05, "loss": 1.1680900573730468, "step": 1970 }, { "epoch": 0.5620210048254328, "grad_norm": 24.322654724121094, "learning_rate": 9.943883054215158e-05, "loss": 1.0931670188903808, "step": 1980 }, { "epoch": 0.5648594947487936, "grad_norm": 18.85932731628418, "learning_rate": 9.943599205222822e-05, "loss": 1.1821538925170898, "step": 1990 }, { "epoch": 0.5676979846721544, "grad_norm": 21.754899978637695, "learning_rate": 9.943315356230486e-05, "loss": 1.163304901123047, "step": 2000 }, { "epoch": 0.5676979846721544, "eval_accuracy": 0.5731544477649901, "eval_loss": 1.4467488527297974, "eval_runtime": 31.4133, "eval_samples_per_second": 500.647, "eval_steps_per_second": 7.831, "step": 2000 }, { "epoch": 0.5705364745955152, "grad_norm": 22.230655670166016, "learning_rate": 9.94303150723815e-05, "loss": 1.1267522811889648, "step": 2010 }, { "epoch": 0.5733749645188759, "grad_norm": 27.563358306884766, "learning_rate": 9.942747658245813e-05, "loss": 1.1471932411193848, "step": 2020 }, { "epoch": 0.5762134544422367, "grad_norm": 23.25899887084961, "learning_rate": 9.942463809253477e-05, "loss": 1.12424898147583, "step": 2030 }, { "epoch": 0.5790519443655975, "grad_norm": 24.0449161529541, "learning_rate": 9.942179960261143e-05, "loss": 1.0815184593200684, "step": 2040 }, { "epoch": 0.5818904342889583, "grad_norm": 33.145957946777344, "learning_rate": 9.941896111268806e-05, "loss": 1.100113582611084, "step": 2050 }, { "epoch": 0.584728924212319, "grad_norm": 18.245145797729492, "learning_rate": 9.94161226227647e-05, "loss": 1.14227352142334, "step": 2060 }, { "epoch": 0.5875674141356798, "grad_norm": 24.452871322631836, "learning_rate": 9.941328413284134e-05, "loss": 1.0578697204589844, "step": 2070 }, { "epoch": 0.5904059040590406, "grad_norm": 21.05449867248535, "learning_rate": 9.941044564291797e-05, "loss": 1.0928358078002929, "step": 2080 }, { "epoch": 0.5932443939824014, "grad_norm": 19.354047775268555, "learning_rate": 9.940760715299461e-05, "loss": 1.088101291656494, "step": 2090 }, { "epoch": 0.5960828839057621, "grad_norm": 23.127643585205078, "learning_rate": 9.940476866307125e-05, "loss": 1.1032953262329102, "step": 2100 }, { "epoch": 0.5989213738291229, "grad_norm": 23.332597732543945, "learning_rate": 9.940193017314789e-05, "loss": 1.1219491958618164, "step": 2110 }, { "epoch": 0.6017598637524837, "grad_norm": 27.56525421142578, "learning_rate": 9.939909168322453e-05, "loss": 1.1679291725158691, "step": 2120 }, { "epoch": 0.6045983536758445, "grad_norm": 25.26095962524414, "learning_rate": 9.939625319330117e-05, "loss": 1.0028508186340332, "step": 2130 }, { "epoch": 0.6074368435992052, "grad_norm": 22.253829956054688, "learning_rate": 9.939341470337781e-05, "loss": 1.0808586120605468, "step": 2140 }, { "epoch": 0.610275333522566, "grad_norm": 24.0926456451416, "learning_rate": 9.939057621345444e-05, "loss": 1.0530101776123046, "step": 2150 }, { "epoch": 0.6131138234459268, "grad_norm": 24.591312408447266, "learning_rate": 9.938773772353108e-05, "loss": 1.057276725769043, "step": 2160 }, { "epoch": 0.6159523133692876, "grad_norm": 32.76468276977539, "learning_rate": 9.938489923360772e-05, "loss": 1.0916509628295898, "step": 2170 }, { "epoch": 0.6187908032926484, "grad_norm": 37.99615478515625, "learning_rate": 9.938206074368437e-05, "loss": 1.0856529235839845, "step": 2180 }, { "epoch": 0.6216292932160091, "grad_norm": 29.176586151123047, "learning_rate": 9.937922225376101e-05, "loss": 1.0997905731201172, "step": 2190 }, { "epoch": 0.6244677831393699, "grad_norm": 28.28167152404785, "learning_rate": 9.937638376383765e-05, "loss": 1.0920446395874024, "step": 2200 }, { "epoch": 0.6273062730627307, "grad_norm": 19.030431747436523, "learning_rate": 9.937354527391428e-05, "loss": 1.0603958129882813, "step": 2210 }, { "epoch": 0.6301447629860913, "grad_norm": 18.90097427368164, "learning_rate": 9.937070678399092e-05, "loss": 1.085594654083252, "step": 2220 }, { "epoch": 0.6329832529094521, "grad_norm": 41.02661895751953, "learning_rate": 9.936786829406756e-05, "loss": 1.0644072532653808, "step": 2230 }, { "epoch": 0.6358217428328129, "grad_norm": 28.411516189575195, "learning_rate": 9.93650298041442e-05, "loss": 1.0713820457458496, "step": 2240 }, { "epoch": 0.6386602327561737, "grad_norm": 22.9285945892334, "learning_rate": 9.936219131422084e-05, "loss": 1.0817390441894532, "step": 2250 }, { "epoch": 0.6414987226795344, "grad_norm": 28.542499542236328, "learning_rate": 9.935935282429748e-05, "loss": 1.1385957717895507, "step": 2260 }, { "epoch": 0.6443372126028952, "grad_norm": 21.064485549926758, "learning_rate": 9.935651433437411e-05, "loss": 1.0123428344726562, "step": 2270 }, { "epoch": 0.647175702526256, "grad_norm": 22.37677001953125, "learning_rate": 9.935367584445075e-05, "loss": 1.0572824478149414, "step": 2280 }, { "epoch": 0.6500141924496168, "grad_norm": 20.43739128112793, "learning_rate": 9.93508373545274e-05, "loss": 1.0530651092529297, "step": 2290 }, { "epoch": 0.6528526823729776, "grad_norm": 30.628801345825195, "learning_rate": 9.934799886460404e-05, "loss": 1.0473257064819337, "step": 2300 }, { "epoch": 0.6556911722963383, "grad_norm": 31.006044387817383, "learning_rate": 9.934516037468068e-05, "loss": 1.0243597030639648, "step": 2310 }, { "epoch": 0.6585296622196991, "grad_norm": 24.01661491394043, "learning_rate": 9.934232188475732e-05, "loss": 1.0115116119384766, "step": 2320 }, { "epoch": 0.6613681521430599, "grad_norm": 20.445096969604492, "learning_rate": 9.933948339483396e-05, "loss": 1.0294639587402343, "step": 2330 }, { "epoch": 0.6642066420664207, "grad_norm": 20.086233139038086, "learning_rate": 9.933664490491059e-05, "loss": 1.017643356323242, "step": 2340 }, { "epoch": 0.6670451319897814, "grad_norm": 23.536048889160156, "learning_rate": 9.933380641498723e-05, "loss": 1.0868244171142578, "step": 2350 }, { "epoch": 0.6698836219131422, "grad_norm": 26.774625778198242, "learning_rate": 9.933096792506387e-05, "loss": 1.0430272102355957, "step": 2360 }, { "epoch": 0.672722111836503, "grad_norm": 32.199214935302734, "learning_rate": 9.93281294351405e-05, "loss": 0.9647135734558105, "step": 2370 }, { "epoch": 0.6755606017598638, "grad_norm": 28.380857467651367, "learning_rate": 9.932529094521715e-05, "loss": 0.9824605941772461, "step": 2380 }, { "epoch": 0.6783990916832245, "grad_norm": 26.344202041625977, "learning_rate": 9.93224524552938e-05, "loss": 0.9529353141784668, "step": 2390 }, { "epoch": 0.6812375816065853, "grad_norm": 19.980390548706055, "learning_rate": 9.931961396537042e-05, "loss": 1.0057503700256347, "step": 2400 }, { "epoch": 0.6840760715299461, "grad_norm": 24.75773811340332, "learning_rate": 9.931677547544706e-05, "loss": 1.0897193908691407, "step": 2410 }, { "epoch": 0.6869145614533069, "grad_norm": 31.166263580322266, "learning_rate": 9.93139369855237e-05, "loss": 0.9823177337646485, "step": 2420 }, { "epoch": 0.6897530513766676, "grad_norm": 14.208596229553223, "learning_rate": 9.931109849560035e-05, "loss": 0.998142147064209, "step": 2430 }, { "epoch": 0.6925915413000284, "grad_norm": 22.746000289916992, "learning_rate": 9.930826000567699e-05, "loss": 1.0114707946777344, "step": 2440 }, { "epoch": 0.6954300312233892, "grad_norm": 25.27461051940918, "learning_rate": 9.930542151575363e-05, "loss": 0.9773696899414063, "step": 2450 }, { "epoch": 0.69826852114675, "grad_norm": 22.146265029907227, "learning_rate": 9.930258302583027e-05, "loss": 1.0271379470825195, "step": 2460 }, { "epoch": 0.7011070110701108, "grad_norm": 19.95235824584961, "learning_rate": 9.92997445359069e-05, "loss": 0.9382213592529297, "step": 2470 }, { "epoch": 0.7039455009934714, "grad_norm": 16.78559684753418, "learning_rate": 9.929690604598354e-05, "loss": 0.9645964622497558, "step": 2480 }, { "epoch": 0.7067839909168322, "grad_norm": 18.311519622802734, "learning_rate": 9.929406755606018e-05, "loss": 0.9811866760253907, "step": 2490 }, { "epoch": 0.709622480840193, "grad_norm": 34.024078369140625, "learning_rate": 9.929122906613681e-05, "loss": 0.9456801414489746, "step": 2500 }, { "epoch": 0.709622480840193, "eval_accuracy": 0.6204616265021937, "eval_loss": 1.2559458017349243, "eval_runtime": 30.558, "eval_samples_per_second": 514.661, "eval_steps_per_second": 8.05, "step": 2500 }, { "epoch": 0.7124609707635537, "grad_norm": 24.287723541259766, "learning_rate": 9.928839057621346e-05, "loss": 0.9708761215209961, "step": 2510 }, { "epoch": 0.7152994606869145, "grad_norm": 22.299070358276367, "learning_rate": 9.92855520862901e-05, "loss": 1.0146733283996583, "step": 2520 }, { "epoch": 0.7181379506102753, "grad_norm": 23.246183395385742, "learning_rate": 9.928271359636673e-05, "loss": 1.0145591735839843, "step": 2530 }, { "epoch": 0.7209764405336361, "grad_norm": 30.1379337310791, "learning_rate": 9.927987510644337e-05, "loss": 0.9871024131774903, "step": 2540 }, { "epoch": 0.7238149304569969, "grad_norm": 28.686357498168945, "learning_rate": 9.927703661652002e-05, "loss": 0.962646770477295, "step": 2550 }, { "epoch": 0.7266534203803576, "grad_norm": 22.367624282836914, "learning_rate": 9.927419812659666e-05, "loss": 0.9451247215270996, "step": 2560 }, { "epoch": 0.7294919103037184, "grad_norm": 23.129642486572266, "learning_rate": 9.92713596366733e-05, "loss": 0.9488322257995605, "step": 2570 }, { "epoch": 0.7323304002270792, "grad_norm": 19.723764419555664, "learning_rate": 9.926852114674994e-05, "loss": 0.9426379203796387, "step": 2580 }, { "epoch": 0.73516889015044, "grad_norm": 24.903884887695312, "learning_rate": 9.926568265682658e-05, "loss": 0.9138444900512696, "step": 2590 }, { "epoch": 0.7380073800738007, "grad_norm": 34.61288070678711, "learning_rate": 9.926284416690321e-05, "loss": 0.95067138671875, "step": 2600 }, { "epoch": 0.7408458699971615, "grad_norm": 23.43537139892578, "learning_rate": 9.926000567697985e-05, "loss": 0.9514213562011719, "step": 2610 }, { "epoch": 0.7436843599205223, "grad_norm": 20.223928451538086, "learning_rate": 9.925716718705649e-05, "loss": 0.9066153526306152, "step": 2620 }, { "epoch": 0.7465228498438831, "grad_norm": 30.943580627441406, "learning_rate": 9.925432869713312e-05, "loss": 0.9603883743286132, "step": 2630 }, { "epoch": 0.7493613397672438, "grad_norm": 26.97696876525879, "learning_rate": 9.925149020720977e-05, "loss": 0.8883302688598633, "step": 2640 }, { "epoch": 0.7521998296906046, "grad_norm": 28.247447967529297, "learning_rate": 9.924865171728642e-05, "loss": 0.9468140602111816, "step": 2650 }, { "epoch": 0.7550383196139654, "grad_norm": 33.24482727050781, "learning_rate": 9.924581322736304e-05, "loss": 0.8988624572753906, "step": 2660 }, { "epoch": 0.7578768095373262, "grad_norm": 21.37324333190918, "learning_rate": 9.924297473743968e-05, "loss": 0.9730520248413086, "step": 2670 }, { "epoch": 0.760715299460687, "grad_norm": 21.114492416381836, "learning_rate": 9.924013624751633e-05, "loss": 0.9491323471069336, "step": 2680 }, { "epoch": 0.7635537893840477, "grad_norm": 16.801727294921875, "learning_rate": 9.923729775759297e-05, "loss": 0.8683090209960938, "step": 2690 }, { "epoch": 0.7663922793074085, "grad_norm": 29.216243743896484, "learning_rate": 9.92344592676696e-05, "loss": 0.9094600677490234, "step": 2700 }, { "epoch": 0.7692307692307693, "grad_norm": 35.45439910888672, "learning_rate": 9.923162077774625e-05, "loss": 0.9245884895324707, "step": 2710 }, { "epoch": 0.77206925915413, "grad_norm": 23.179960250854492, "learning_rate": 9.922878228782289e-05, "loss": 0.9042709350585938, "step": 2720 }, { "epoch": 0.7749077490774908, "grad_norm": 16.718400955200195, "learning_rate": 9.922594379789952e-05, "loss": 0.8826107978820801, "step": 2730 }, { "epoch": 0.7777462390008516, "grad_norm": 24.61229705810547, "learning_rate": 9.922310530797616e-05, "loss": 0.9001324653625489, "step": 2740 }, { "epoch": 0.7805847289242123, "grad_norm": 30.58627700805664, "learning_rate": 9.92202668180528e-05, "loss": 0.8385726928710937, "step": 2750 }, { "epoch": 0.783423218847573, "grad_norm": 16.988853454589844, "learning_rate": 9.921742832812943e-05, "loss": 0.8284055709838867, "step": 2760 }, { "epoch": 0.7862617087709338, "grad_norm": 28.70233917236328, "learning_rate": 9.921458983820609e-05, "loss": 0.8698087692260742, "step": 2770 }, { "epoch": 0.7891001986942946, "grad_norm": 19.30978012084961, "learning_rate": 9.921175134828273e-05, "loss": 0.8799326896667481, "step": 2780 }, { "epoch": 0.7919386886176554, "grad_norm": 20.468929290771484, "learning_rate": 9.920891285835935e-05, "loss": 0.8119211196899414, "step": 2790 }, { "epoch": 0.7947771785410161, "grad_norm": 33.029483795166016, "learning_rate": 9.9206074368436e-05, "loss": 0.8412540435791016, "step": 2800 }, { "epoch": 0.7976156684643769, "grad_norm": 24.97670555114746, "learning_rate": 9.920323587851264e-05, "loss": 0.8865632057189942, "step": 2810 }, { "epoch": 0.8004541583877377, "grad_norm": 28.665922164916992, "learning_rate": 9.920039738858928e-05, "loss": 0.9044490814208984, "step": 2820 }, { "epoch": 0.8032926483110985, "grad_norm": 21.4758358001709, "learning_rate": 9.91975588986659e-05, "loss": 0.8875882148742675, "step": 2830 }, { "epoch": 0.8061311382344593, "grad_norm": 26.156322479248047, "learning_rate": 9.919472040874256e-05, "loss": 0.8137760162353516, "step": 2840 }, { "epoch": 0.80896962815782, "grad_norm": 17.85251808166504, "learning_rate": 9.91918819188192e-05, "loss": 0.9045653343200684, "step": 2850 }, { "epoch": 0.8118081180811808, "grad_norm": 24.476266860961914, "learning_rate": 9.918904342889583e-05, "loss": 0.8285739898681641, "step": 2860 }, { "epoch": 0.8146466080045416, "grad_norm": 19.197765350341797, "learning_rate": 9.918620493897247e-05, "loss": 0.9241982460021972, "step": 2870 }, { "epoch": 0.8174850979279024, "grad_norm": 22.9984188079834, "learning_rate": 9.918336644904911e-05, "loss": 0.8890705108642578, "step": 2880 }, { "epoch": 0.8203235878512631, "grad_norm": 23.804393768310547, "learning_rate": 9.918052795912574e-05, "loss": 0.8551264762878418, "step": 2890 }, { "epoch": 0.8231620777746239, "grad_norm": 27.835195541381836, "learning_rate": 9.917768946920238e-05, "loss": 0.8502161026000976, "step": 2900 }, { "epoch": 0.8260005676979847, "grad_norm": 21.51314353942871, "learning_rate": 9.917485097927904e-05, "loss": 0.7782979011535645, "step": 2910 }, { "epoch": 0.8288390576213455, "grad_norm": 27.626466751098633, "learning_rate": 9.917201248935566e-05, "loss": 0.8109992027282715, "step": 2920 }, { "epoch": 0.8316775475447062, "grad_norm": 21.60063362121582, "learning_rate": 9.91691739994323e-05, "loss": 0.8673000335693359, "step": 2930 }, { "epoch": 0.834516037468067, "grad_norm": 19.806446075439453, "learning_rate": 9.916633550950895e-05, "loss": 0.8503800392150879, "step": 2940 }, { "epoch": 0.8373545273914278, "grad_norm": 19.74971580505371, "learning_rate": 9.916349701958559e-05, "loss": 0.8043561935424804, "step": 2950 }, { "epoch": 0.8401930173147886, "grad_norm": 29.61273956298828, "learning_rate": 9.916065852966222e-05, "loss": 0.7918411731719971, "step": 2960 }, { "epoch": 0.8430315072381493, "grad_norm": 16.50292205810547, "learning_rate": 9.915782003973887e-05, "loss": 0.7851666450500489, "step": 2970 }, { "epoch": 0.8458699971615101, "grad_norm": 21.183700561523438, "learning_rate": 9.915498154981551e-05, "loss": 0.810921573638916, "step": 2980 }, { "epoch": 0.8487084870848709, "grad_norm": 26.505327224731445, "learning_rate": 9.915214305989214e-05, "loss": 0.791579294204712, "step": 2990 }, { "epoch": 0.8515469770082317, "grad_norm": Infinity, "learning_rate": 9.914930456996878e-05, "loss": 0.7944314002990722, "step": 3000 }, { "epoch": 0.8515469770082317, "eval_accuracy": 0.6640172950976029, "eval_loss": 1.1031973361968994, "eval_runtime": 30.454, "eval_samples_per_second": 516.418, "eval_steps_per_second": 8.078, "step": 3000 }, { "epoch": 0.8543854669315923, "grad_norm": 20.32843017578125, "learning_rate": 9.914674992903775e-05, "loss": 0.7646556377410889, "step": 3010 }, { "epoch": 0.8572239568549531, "grad_norm": 22.286470413208008, "learning_rate": 9.91439114391144e-05, "loss": 0.792286729812622, "step": 3020 }, { "epoch": 0.8600624467783139, "grad_norm": 19.36547088623047, "learning_rate": 9.914107294919105e-05, "loss": 0.8225823402404785, "step": 3030 }, { "epoch": 0.8629009367016747, "grad_norm": 21.49129867553711, "learning_rate": 9.913823445926767e-05, "loss": 0.8195921897888183, "step": 3040 }, { "epoch": 0.8657394266250354, "grad_norm": 25.84406852722168, "learning_rate": 9.913539596934431e-05, "loss": 0.8258531570434571, "step": 3050 }, { "epoch": 0.8685779165483962, "grad_norm": 21.42317771911621, "learning_rate": 9.913255747942096e-05, "loss": 0.7712396621704102, "step": 3060 }, { "epoch": 0.871416406471757, "grad_norm": 20.756650924682617, "learning_rate": 9.912971898949758e-05, "loss": 0.8155352592468261, "step": 3070 }, { "epoch": 0.8742548963951178, "grad_norm": 24.09754753112793, "learning_rate": 9.912688049957422e-05, "loss": 0.7634074211120605, "step": 3080 }, { "epoch": 0.8770933863184786, "grad_norm": 28.720779418945312, "learning_rate": 9.912404200965088e-05, "loss": 0.7250977039337159, "step": 3090 }, { "epoch": 0.8799318762418393, "grad_norm": 22.041954040527344, "learning_rate": 9.912120351972751e-05, "loss": 0.7625031471252441, "step": 3100 }, { "epoch": 0.8827703661652001, "grad_norm": 21.66750144958496, "learning_rate": 9.911836502980415e-05, "loss": 0.7681513786315918, "step": 3110 }, { "epoch": 0.8856088560885609, "grad_norm": 23.560832977294922, "learning_rate": 9.911552653988079e-05, "loss": 0.7657046318054199, "step": 3120 }, { "epoch": 0.8884473460119217, "grad_norm": 28.501028060913086, "learning_rate": 9.911268804995743e-05, "loss": 0.782001543045044, "step": 3130 }, { "epoch": 0.8912858359352824, "grad_norm": 19.86785888671875, "learning_rate": 9.910984956003406e-05, "loss": 0.8034740447998047, "step": 3140 }, { "epoch": 0.8941243258586432, "grad_norm": 23.600494384765625, "learning_rate": 9.910701107011071e-05, "loss": 0.8363421440124512, "step": 3150 }, { "epoch": 0.896962815782004, "grad_norm": 18.663354873657227, "learning_rate": 9.910417258018734e-05, "loss": 0.7630080223083496, "step": 3160 }, { "epoch": 0.8998013057053648, "grad_norm": 29.64305305480957, "learning_rate": 9.910133409026398e-05, "loss": 0.7915078639984131, "step": 3170 }, { "epoch": 0.9026397956287255, "grad_norm": 32.460205078125, "learning_rate": 9.909849560034063e-05, "loss": 0.8089962959289551, "step": 3180 }, { "epoch": 0.9054782855520863, "grad_norm": 20.414539337158203, "learning_rate": 9.909565711041727e-05, "loss": 0.7740297794342041, "step": 3190 }, { "epoch": 0.9083167754754471, "grad_norm": 33.839813232421875, "learning_rate": 9.90928186204939e-05, "loss": 0.752277946472168, "step": 3200 }, { "epoch": 0.9111552653988079, "grad_norm": 18.867141723632812, "learning_rate": 9.909026397956287e-05, "loss": 0.7238951683044433, "step": 3210 }, { "epoch": 0.9139937553221686, "grad_norm": 22.304485321044922, "learning_rate": 9.908742548963952e-05, "loss": 0.7545382976531982, "step": 3220 }, { "epoch": 0.9168322452455294, "grad_norm": 33.62503433227539, "learning_rate": 9.908458699971616e-05, "loss": 0.8270857810974122, "step": 3230 }, { "epoch": 0.9196707351688902, "grad_norm": 18.09872817993164, "learning_rate": 9.90817485097928e-05, "loss": 0.7963118553161621, "step": 3240 }, { "epoch": 0.922509225092251, "grad_norm": 22.346670150756836, "learning_rate": 9.907891001986943e-05, "loss": 0.7598023414611816, "step": 3250 }, { "epoch": 0.9253477150156117, "grad_norm": 24.197389602661133, "learning_rate": 9.907607152994607e-05, "loss": 0.7657143115997315, "step": 3260 }, { "epoch": 0.9281862049389724, "grad_norm": 26.21255111694336, "learning_rate": 9.907323304002272e-05, "loss": 0.7388171195983887, "step": 3270 }, { "epoch": 0.9310246948623332, "grad_norm": 28.20396614074707, "learning_rate": 9.907039455009935e-05, "loss": 0.7233112812042236, "step": 3280 }, { "epoch": 0.933863184785694, "grad_norm": 23.623817443847656, "learning_rate": 9.906755606017599e-05, "loss": 0.7306665897369384, "step": 3290 }, { "epoch": 0.9367016747090547, "grad_norm": 23.831466674804688, "learning_rate": 9.906471757025263e-05, "loss": 0.8213624000549317, "step": 3300 }, { "epoch": 0.9395401646324155, "grad_norm": 22.057376861572266, "learning_rate": 9.906187908032926e-05, "loss": 0.7966175556182862, "step": 3310 }, { "epoch": 0.9423786545557763, "grad_norm": 20.871597290039062, "learning_rate": 9.90590405904059e-05, "loss": 0.7152708053588868, "step": 3320 }, { "epoch": 0.9452171444791371, "grad_norm": 18.533130645751953, "learning_rate": 9.905620210048256e-05, "loss": 0.7805844783782959, "step": 3330 }, { "epoch": 0.9480556344024978, "grad_norm": 26.149999618530273, "learning_rate": 9.905336361055918e-05, "loss": 0.7450548648834229, "step": 3340 }, { "epoch": 0.9508941243258586, "grad_norm": 24.809926986694336, "learning_rate": 9.905052512063583e-05, "loss": 0.7699018955230713, "step": 3350 }, { "epoch": 0.9537326142492194, "grad_norm": 34.5396728515625, "learning_rate": 9.904768663071247e-05, "loss": 0.779564380645752, "step": 3360 }, { "epoch": 0.9565711041725802, "grad_norm": 23.306936264038086, "learning_rate": 9.904484814078911e-05, "loss": 0.7437909126281739, "step": 3370 }, { "epoch": 0.959409594095941, "grad_norm": 21.1256103515625, "learning_rate": 9.904200965086574e-05, "loss": 0.7724534034729004, "step": 3380 }, { "epoch": 0.9622480840193017, "grad_norm": 34.26607894897461, "learning_rate": 9.903917116094238e-05, "loss": 0.7829182147979736, "step": 3390 }, { "epoch": 0.9650865739426625, "grad_norm": 25.866369247436523, "learning_rate": 9.903633267101903e-05, "loss": 0.7695268630981446, "step": 3400 }, { "epoch": 0.9679250638660233, "grad_norm": 23.50071907043457, "learning_rate": 9.903349418109566e-05, "loss": 0.7211739540100097, "step": 3410 }, { "epoch": 0.9707635537893841, "grad_norm": 27.50710105895996, "learning_rate": 9.90306556911723e-05, "loss": 0.7612006187438964, "step": 3420 }, { "epoch": 0.9736020437127448, "grad_norm": 27.45734405517578, "learning_rate": 9.902781720124894e-05, "loss": 0.6757657527923584, "step": 3430 }, { "epoch": 0.9764405336361056, "grad_norm": 29.132966995239258, "learning_rate": 9.902497871132557e-05, "loss": 0.7574927806854248, "step": 3440 }, { "epoch": 0.9792790235594664, "grad_norm": 29.643259048461914, "learning_rate": 9.902214022140221e-05, "loss": 0.7482147693634034, "step": 3450 }, { "epoch": 0.9821175134828272, "grad_norm": 21.631839752197266, "learning_rate": 9.901930173147885e-05, "loss": 0.7287088871002197, "step": 3460 }, { "epoch": 0.9849560034061879, "grad_norm": 19.11096954345703, "learning_rate": 9.90164632415555e-05, "loss": 0.6649835586547852, "step": 3470 }, { "epoch": 0.9877944933295487, "grad_norm": 22.88902473449707, "learning_rate": 9.901362475163214e-05, "loss": 0.7077507972717285, "step": 3480 }, { "epoch": 0.9906329832529095, "grad_norm": 21.8216609954834, "learning_rate": 9.901078626170878e-05, "loss": 0.7006841182708741, "step": 3490 }, { "epoch": 0.9934714731762703, "grad_norm": 23.871532440185547, "learning_rate": 9.900794777178542e-05, "loss": 0.705707597732544, "step": 3500 }, { "epoch": 0.9934714731762703, "eval_accuracy": 0.6953010745851084, "eval_loss": 0.980442464351654, "eval_runtime": 30.9652, "eval_samples_per_second": 507.892, "eval_steps_per_second": 7.944, "step": 3500 }, { "epoch": 0.996309963099631, "grad_norm": 19.31228256225586, "learning_rate": 9.900510928186205e-05, "loss": 0.6550156593322753, "step": 3510 }, { "epoch": 0.9991484530229918, "grad_norm": 19.09356689453125, "learning_rate": 9.900227079193869e-05, "loss": 0.6604233264923096, "step": 3520 }, { "epoch": 1.0019869429463526, "grad_norm": 28.049551010131836, "learning_rate": 9.899943230201534e-05, "loss": 0.6727694511413574, "step": 3530 }, { "epoch": 1.0048254328697133, "grad_norm": 26.254486083984375, "learning_rate": 9.899659381209197e-05, "loss": 0.6348094940185547, "step": 3540 }, { "epoch": 1.0076639227930742, "grad_norm": 25.290700912475586, "learning_rate": 9.899375532216861e-05, "loss": 0.651258897781372, "step": 3550 }, { "epoch": 1.0105024127164348, "grad_norm": 20.58706283569336, "learning_rate": 9.899091683224525e-05, "loss": 0.6630977630615235, "step": 3560 }, { "epoch": 1.0133409026397957, "grad_norm": 33.84420394897461, "learning_rate": 9.898807834232188e-05, "loss": 0.6907260417938232, "step": 3570 }, { "epoch": 1.0161793925631564, "grad_norm": 22.32529640197754, "learning_rate": 9.898523985239852e-05, "loss": 0.6630112648010253, "step": 3580 }, { "epoch": 1.0190178824865173, "grad_norm": 31.985055923461914, "learning_rate": 9.898240136247517e-05, "loss": 0.7523481369018554, "step": 3590 }, { "epoch": 1.021856372409878, "grad_norm": 26.934823989868164, "learning_rate": 9.89795628725518e-05, "loss": 0.693488073348999, "step": 3600 }, { "epoch": 1.0246948623332388, "grad_norm": 30.164888381958008, "learning_rate": 9.897672438262845e-05, "loss": 0.6415844917297363, "step": 3610 }, { "epoch": 1.0275333522565995, "grad_norm": 18.834300994873047, "learning_rate": 9.897388589270509e-05, "loss": 0.6850387573242187, "step": 3620 }, { "epoch": 1.0303718421799604, "grad_norm": 21.231327056884766, "learning_rate": 9.897104740278173e-05, "loss": 0.6446299076080322, "step": 3630 }, { "epoch": 1.033210332103321, "grad_norm": 23.240516662597656, "learning_rate": 9.896820891285836e-05, "loss": 0.6083054065704345, "step": 3640 }, { "epoch": 1.036048822026682, "grad_norm": 17.034574508666992, "learning_rate": 9.8965370422935e-05, "loss": 0.6667216300964356, "step": 3650 }, { "epoch": 1.0388873119500426, "grad_norm": 21.844274520874023, "learning_rate": 9.896253193301164e-05, "loss": 0.663185977935791, "step": 3660 }, { "epoch": 1.0417258018734032, "grad_norm": 27.365386962890625, "learning_rate": 9.895969344308828e-05, "loss": 0.6416357517242431, "step": 3670 }, { "epoch": 1.0445642917967641, "grad_norm": 19.69205093383789, "learning_rate": 9.895685495316492e-05, "loss": 0.6922329425811767, "step": 3680 }, { "epoch": 1.0474027817201248, "grad_norm": 18.737306594848633, "learning_rate": 9.895401646324157e-05, "loss": 0.5935844898223877, "step": 3690 }, { "epoch": 1.0502412716434857, "grad_norm": 26.93574333190918, "learning_rate": 9.895117797331819e-05, "loss": 0.713580322265625, "step": 3700 }, { "epoch": 1.0530797615668464, "grad_norm": 21.484880447387695, "learning_rate": 9.894833948339483e-05, "loss": 0.6020592212677002, "step": 3710 }, { "epoch": 1.0559182514902072, "grad_norm": 19.4040584564209, "learning_rate": 9.894550099347148e-05, "loss": 0.6169495582580566, "step": 3720 }, { "epoch": 1.058756741413568, "grad_norm": 23.88226318359375, "learning_rate": 9.894266250354812e-05, "loss": 0.6956416606903076, "step": 3730 }, { "epoch": 1.0615952313369288, "grad_norm": 16.01617431640625, "learning_rate": 9.893982401362476e-05, "loss": 0.6439888954162598, "step": 3740 }, { "epoch": 1.0644337212602895, "grad_norm": 19.912168502807617, "learning_rate": 9.89369855237014e-05, "loss": 0.6417697429656982, "step": 3750 }, { "epoch": 1.0672722111836503, "grad_norm": 19.506757736206055, "learning_rate": 9.893414703377804e-05, "loss": 0.6494669914245605, "step": 3760 }, { "epoch": 1.070110701107011, "grad_norm": 24.823179244995117, "learning_rate": 9.893130854385467e-05, "loss": 0.6445348739624024, "step": 3770 }, { "epoch": 1.072949191030372, "grad_norm": 15.19336986541748, "learning_rate": 9.892847005393131e-05, "loss": 0.6461805820465087, "step": 3780 }, { "epoch": 1.0757876809537326, "grad_norm": 27.75702476501465, "learning_rate": 9.892563156400795e-05, "loss": 0.5833687305450439, "step": 3790 }, { "epoch": 1.0786261708770934, "grad_norm": 24.98112678527832, "learning_rate": 9.89227930740846e-05, "loss": 0.5608679294586182, "step": 3800 }, { "epoch": 1.0814646608004541, "grad_norm": 27.189119338989258, "learning_rate": 9.891995458416123e-05, "loss": 0.6453812122344971, "step": 3810 }, { "epoch": 1.084303150723815, "grad_norm": 21.3582706451416, "learning_rate": 9.891711609423788e-05, "loss": 0.613520622253418, "step": 3820 }, { "epoch": 1.0871416406471757, "grad_norm": 21.198776245117188, "learning_rate": 9.89142776043145e-05, "loss": 0.6817501068115235, "step": 3830 }, { "epoch": 1.0899801305705366, "grad_norm": 23.9219970703125, "learning_rate": 9.891143911439115e-05, "loss": 0.6310290813446044, "step": 3840 }, { "epoch": 1.0928186204938972, "grad_norm": 15.712692260742188, "learning_rate": 9.890860062446779e-05, "loss": 0.6560925960540771, "step": 3850 }, { "epoch": 1.095657110417258, "grad_norm": 27.00271224975586, "learning_rate": 9.890576213454443e-05, "loss": 0.5857605457305908, "step": 3860 }, { "epoch": 1.0984956003406188, "grad_norm": 22.93274688720703, "learning_rate": 9.890292364462107e-05, "loss": 0.609201717376709, "step": 3870 }, { "epoch": 1.1013340902639797, "grad_norm": 17.351171493530273, "learning_rate": 9.890008515469771e-05, "loss": 0.6610034465789795, "step": 3880 }, { "epoch": 1.1041725801873403, "grad_norm": 29.707874298095703, "learning_rate": 9.889724666477435e-05, "loss": 0.666200828552246, "step": 3890 }, { "epoch": 1.1070110701107012, "grad_norm": 20.44675636291504, "learning_rate": 9.889440817485098e-05, "loss": 0.6511158466339111, "step": 3900 }, { "epoch": 1.1098495600340619, "grad_norm": 18.375123977661133, "learning_rate": 9.889156968492762e-05, "loss": 0.5879981994628907, "step": 3910 }, { "epoch": 1.1126880499574225, "grad_norm": 38.74342346191406, "learning_rate": 9.888873119500426e-05, "loss": 0.6463605403900147, "step": 3920 }, { "epoch": 1.1155265398807834, "grad_norm": 35.702579498291016, "learning_rate": 9.88858927050809e-05, "loss": 0.7014100551605225, "step": 3930 }, { "epoch": 1.118365029804144, "grad_norm": 20.338956832885742, "learning_rate": 9.888305421515755e-05, "loss": 0.6385445117950439, "step": 3940 }, { "epoch": 1.121203519727505, "grad_norm": 30.120508193969727, "learning_rate": 9.888021572523419e-05, "loss": 0.6235272884368896, "step": 3950 }, { "epoch": 1.1240420096508656, "grad_norm": 26.171878814697266, "learning_rate": 9.887737723531081e-05, "loss": 0.6421665191650391, "step": 3960 }, { "epoch": 1.1268804995742265, "grad_norm": 21.68423080444336, "learning_rate": 9.887453874538746e-05, "loss": 0.6641988277435302, "step": 3970 }, { "epoch": 1.1297189894975872, "grad_norm": 28.949514389038086, "learning_rate": 9.88717002554641e-05, "loss": 0.5635973930358886, "step": 3980 }, { "epoch": 1.132557479420948, "grad_norm": 19.748165130615234, "learning_rate": 9.886886176554074e-05, "loss": 0.5637505054473877, "step": 3990 }, { "epoch": 1.1353959693443088, "grad_norm": 17.690265655517578, "learning_rate": 9.886602327561738e-05, "loss": 0.5907973766326904, "step": 4000 }, { "epoch": 1.1353959693443088, "eval_accuracy": 0.7279201373434221, "eval_loss": 0.8885079622268677, "eval_runtime": 31.2115, "eval_samples_per_second": 503.885, "eval_steps_per_second": 7.882, "step": 4000 }, { "epoch": 1.1382344592676696, "grad_norm": 34.16864776611328, "learning_rate": 9.886318478569402e-05, "loss": 0.6118478298187255, "step": 4010 }, { "epoch": 1.1410729491910303, "grad_norm": 19.992345809936523, "learning_rate": 9.886034629577065e-05, "loss": 0.707521390914917, "step": 4020 }, { "epoch": 1.1439114391143912, "grad_norm": 27.760913848876953, "learning_rate": 9.885750780584729e-05, "loss": 0.5689629554748535, "step": 4030 }, { "epoch": 1.1467499290377519, "grad_norm": 30.910898208618164, "learning_rate": 9.885466931592393e-05, "loss": 0.6241819381713867, "step": 4040 }, { "epoch": 1.1495884189611127, "grad_norm": 21.5301513671875, "learning_rate": 9.885183082600057e-05, "loss": 0.5930325508117675, "step": 4050 }, { "epoch": 1.1524269088844734, "grad_norm": 37.531471252441406, "learning_rate": 9.88489923360772e-05, "loss": 0.6504276275634766, "step": 4060 }, { "epoch": 1.1552653988078343, "grad_norm": 16.46526336669922, "learning_rate": 9.884615384615386e-05, "loss": 0.6098124980926514, "step": 4070 }, { "epoch": 1.158103888731195, "grad_norm": 30.237173080444336, "learning_rate": 9.88433153562305e-05, "loss": 0.641084861755371, "step": 4080 }, { "epoch": 1.1609423786545559, "grad_norm": 26.651351928710938, "learning_rate": 9.884047686630713e-05, "loss": 0.6080304622650147, "step": 4090 }, { "epoch": 1.1637808685779165, "grad_norm": 25.55205726623535, "learning_rate": 9.883763837638377e-05, "loss": 0.5625734806060791, "step": 4100 }, { "epoch": 1.1666193585012774, "grad_norm": 26.443571090698242, "learning_rate": 9.883479988646041e-05, "loss": 0.5978858470916748, "step": 4110 }, { "epoch": 1.169457848424638, "grad_norm": 20.061460494995117, "learning_rate": 9.883196139653704e-05, "loss": 0.5961314678192139, "step": 4120 }, { "epoch": 1.172296338347999, "grad_norm": 17.195859909057617, "learning_rate": 9.882912290661369e-05, "loss": 0.5361915588378906, "step": 4130 }, { "epoch": 1.1751348282713596, "grad_norm": 32.27775955200195, "learning_rate": 9.882628441669033e-05, "loss": 0.5996068000793457, "step": 4140 }, { "epoch": 1.1779733181947205, "grad_norm": 24.170778274536133, "learning_rate": 9.882344592676696e-05, "loss": 0.5598326206207276, "step": 4150 }, { "epoch": 1.1808118081180812, "grad_norm": 20.161243438720703, "learning_rate": 9.88206074368436e-05, "loss": 0.5899753093719482, "step": 4160 }, { "epoch": 1.1836502980414418, "grad_norm": 25.8084659576416, "learning_rate": 9.881776894692024e-05, "loss": 0.648932933807373, "step": 4170 }, { "epoch": 1.1864887879648027, "grad_norm": 21.893795013427734, "learning_rate": 9.881493045699688e-05, "loss": 0.582167673110962, "step": 4180 }, { "epoch": 1.1893272778881636, "grad_norm": 24.662321090698242, "learning_rate": 9.881209196707351e-05, "loss": 0.6225980281829834, "step": 4190 }, { "epoch": 1.1921657678115243, "grad_norm": 27.636730194091797, "learning_rate": 9.880925347715017e-05, "loss": 0.6037321567535401, "step": 4200 }, { "epoch": 1.195004257734885, "grad_norm": 22.676822662353516, "learning_rate": 9.880641498722681e-05, "loss": 0.6260679721832275, "step": 4210 }, { "epoch": 1.1978427476582458, "grad_norm": 29.929536819458008, "learning_rate": 9.880357649730344e-05, "loss": 0.5525911331176758, "step": 4220 }, { "epoch": 1.2006812375816065, "grad_norm": 28.129732131958008, "learning_rate": 9.880073800738008e-05, "loss": 0.6073970317840576, "step": 4230 }, { "epoch": 1.2035197275049674, "grad_norm": 19.525175094604492, "learning_rate": 9.879789951745672e-05, "loss": 0.5686793327331543, "step": 4240 }, { "epoch": 1.206358217428328, "grad_norm": 17.232412338256836, "learning_rate": 9.879506102753335e-05, "loss": 0.5829248905181885, "step": 4250 }, { "epoch": 1.209196707351689, "grad_norm": 27.632844924926758, "learning_rate": 9.879222253760999e-05, "loss": 0.5319347858428956, "step": 4260 }, { "epoch": 1.2120351972750496, "grad_norm": 24.55426025390625, "learning_rate": 9.878938404768664e-05, "loss": 0.5863132476806641, "step": 4270 }, { "epoch": 1.2148736871984105, "grad_norm": 23.3425350189209, "learning_rate": 9.878654555776327e-05, "loss": 0.6243361949920654, "step": 4280 }, { "epoch": 1.2177121771217712, "grad_norm": 30.333187103271484, "learning_rate": 9.878370706783991e-05, "loss": 0.5781556129455566, "step": 4290 }, { "epoch": 1.220550667045132, "grad_norm": 27.743261337280273, "learning_rate": 9.878086857791655e-05, "loss": 0.6214333057403565, "step": 4300 }, { "epoch": 1.2233891569684927, "grad_norm": 21.51885223388672, "learning_rate": 9.87780300879932e-05, "loss": 0.623208475112915, "step": 4310 }, { "epoch": 1.2262276468918536, "grad_norm": 19.256988525390625, "learning_rate": 9.877519159806982e-05, "loss": 0.5246468544006347, "step": 4320 }, { "epoch": 1.2290661368152143, "grad_norm": 29.382707595825195, "learning_rate": 9.877235310814648e-05, "loss": 0.622248363494873, "step": 4330 }, { "epoch": 1.2319046267385751, "grad_norm": 25.908470153808594, "learning_rate": 9.876951461822312e-05, "loss": 0.5938609600067138, "step": 4340 }, { "epoch": 1.2347431166619358, "grad_norm": 19.781829833984375, "learning_rate": 9.876667612829975e-05, "loss": 0.5751468658447265, "step": 4350 }, { "epoch": 1.2375816065852967, "grad_norm": 22.52198028564453, "learning_rate": 9.876383763837639e-05, "loss": 0.6422913074493408, "step": 4360 }, { "epoch": 1.2404200965086574, "grad_norm": 23.624773025512695, "learning_rate": 9.876099914845303e-05, "loss": 0.5497620105743408, "step": 4370 }, { "epoch": 1.2432585864320183, "grad_norm": 24.279569625854492, "learning_rate": 9.875816065852966e-05, "loss": 0.5888115406036377, "step": 4380 }, { "epoch": 1.246097076355379, "grad_norm": 20.661331176757812, "learning_rate": 9.87553221686063e-05, "loss": 0.5441700935363769, "step": 4390 }, { "epoch": 1.2489355662787398, "grad_norm": 23.45940589904785, "learning_rate": 9.875248367868295e-05, "loss": 0.5755167007446289, "step": 4400 }, { "epoch": 1.2517740562021005, "grad_norm": 36.26948928833008, "learning_rate": 9.874964518875958e-05, "loss": 0.6581007480621338, "step": 4410 }, { "epoch": 1.2546125461254611, "grad_norm": 28.99075698852539, "learning_rate": 9.874680669883622e-05, "loss": 0.5576618194580079, "step": 4420 }, { "epoch": 1.257451036048822, "grad_norm": 15.188714981079102, "learning_rate": 9.874396820891286e-05, "loss": 0.5354149818420411, "step": 4430 }, { "epoch": 1.260289525972183, "grad_norm": 33.135074615478516, "learning_rate": 9.87411297189895e-05, "loss": 0.6364504814147949, "step": 4440 }, { "epoch": 1.2631280158955436, "grad_norm": 21.79518699645996, "learning_rate": 9.873829122906613e-05, "loss": 0.6258384227752686, "step": 4450 }, { "epoch": 1.2659665058189042, "grad_norm": 23.883333206176758, "learning_rate": 9.873545273914278e-05, "loss": 0.549041748046875, "step": 4460 }, { "epoch": 1.2688049957422651, "grad_norm": 17.414018630981445, "learning_rate": 9.873261424921943e-05, "loss": 0.5412386417388916, "step": 4470 }, { "epoch": 1.271643485665626, "grad_norm": 37.17933654785156, "learning_rate": 9.872977575929606e-05, "loss": 0.5415035247802734, "step": 4480 }, { "epoch": 1.2744819755889867, "grad_norm": 20.74306869506836, "learning_rate": 9.87269372693727e-05, "loss": 0.5566246986389161, "step": 4490 }, { "epoch": 1.2773204655123473, "grad_norm": 18.906362533569336, "learning_rate": 9.872409877944934e-05, "loss": 0.5103561401367187, "step": 4500 }, { "epoch": 1.2773204655123473, "eval_accuracy": 0.7405735359572709, "eval_loss": 0.8171529769897461, "eval_runtime": 31.2672, "eval_samples_per_second": 502.988, "eval_steps_per_second": 7.868, "step": 4500 }, { "epoch": 1.2801589554357082, "grad_norm": 20.318050384521484, "learning_rate": 9.872126028952597e-05, "loss": 0.56712646484375, "step": 4510 }, { "epoch": 1.282997445359069, "grad_norm": 22.702953338623047, "learning_rate": 9.871842179960261e-05, "loss": 0.5166520595550537, "step": 4520 }, { "epoch": 1.2858359352824298, "grad_norm": 17.20403289794922, "learning_rate": 9.871558330967926e-05, "loss": 0.5455490112304687, "step": 4530 }, { "epoch": 1.2886744252057905, "grad_norm": 23.980810165405273, "learning_rate": 9.871274481975589e-05, "loss": 0.545859432220459, "step": 4540 }, { "epoch": 1.2915129151291513, "grad_norm": 22.90290069580078, "learning_rate": 9.870990632983253e-05, "loss": 0.5312858581542969, "step": 4550 }, { "epoch": 1.294351405052512, "grad_norm": 15.267108917236328, "learning_rate": 9.870706783990918e-05, "loss": 0.4967654705047607, "step": 4560 }, { "epoch": 1.297189894975873, "grad_norm": 23.086891174316406, "learning_rate": 9.870422934998582e-05, "loss": 0.4933491230010986, "step": 4570 }, { "epoch": 1.3000283848992336, "grad_norm": 30.994117736816406, "learning_rate": 9.870139086006244e-05, "loss": 0.5735067844390869, "step": 4580 }, { "epoch": 1.3028668748225944, "grad_norm": 21.239145278930664, "learning_rate": 9.869855237013909e-05, "loss": 0.540119743347168, "step": 4590 }, { "epoch": 1.305705364745955, "grad_norm": 22.86792755126953, "learning_rate": 9.869571388021574e-05, "loss": 0.5780026912689209, "step": 4600 }, { "epoch": 1.308543854669316, "grad_norm": 30.0904598236084, "learning_rate": 9.869287539029237e-05, "loss": 0.5557725429534912, "step": 4610 }, { "epoch": 1.3113823445926767, "grad_norm": 19.40277671813965, "learning_rate": 9.869003690036901e-05, "loss": 0.5452414989471436, "step": 4620 }, { "epoch": 1.3142208345160376, "grad_norm": 20.77096176147461, "learning_rate": 9.868719841044565e-05, "loss": 0.610050630569458, "step": 4630 }, { "epoch": 1.3170593244393982, "grad_norm": 22.80849266052246, "learning_rate": 9.868435992052228e-05, "loss": 0.5045920848846436, "step": 4640 }, { "epoch": 1.319897814362759, "grad_norm": 23.759414672851562, "learning_rate": 9.868152143059892e-05, "loss": 0.5491971492767334, "step": 4650 }, { "epoch": 1.3227363042861198, "grad_norm": 24.919057846069336, "learning_rate": 9.867868294067558e-05, "loss": 0.5136069297790528, "step": 4660 }, { "epoch": 1.3255747942094804, "grad_norm": 23.633241653442383, "learning_rate": 9.86758444507522e-05, "loss": 0.5374538421630859, "step": 4670 }, { "epoch": 1.3284132841328413, "grad_norm": 19.171987533569336, "learning_rate": 9.867300596082884e-05, "loss": 0.523595142364502, "step": 4680 }, { "epoch": 1.3312517740562022, "grad_norm": 39.36833572387695, "learning_rate": 9.867016747090549e-05, "loss": 0.6079312324523926, "step": 4690 }, { "epoch": 1.3340902639795629, "grad_norm": 22.341962814331055, "learning_rate": 9.866732898098213e-05, "loss": 0.5197252750396728, "step": 4700 }, { "epoch": 1.3369287539029235, "grad_norm": 18.308134078979492, "learning_rate": 9.866449049105876e-05, "loss": 0.5464882850646973, "step": 4710 }, { "epoch": 1.3397672438262844, "grad_norm": 17.265478134155273, "learning_rate": 9.86616520011354e-05, "loss": 0.5506086349487305, "step": 4720 }, { "epoch": 1.3426057337496453, "grad_norm": 23.074758529663086, "learning_rate": 9.865881351121205e-05, "loss": 0.5192683696746826, "step": 4730 }, { "epoch": 1.345444223673006, "grad_norm": 20.067684173583984, "learning_rate": 9.865597502128868e-05, "loss": 0.5306649208068848, "step": 4740 }, { "epoch": 1.3482827135963666, "grad_norm": 20.553401947021484, "learning_rate": 9.865313653136532e-05, "loss": 0.49111275672912597, "step": 4750 }, { "epoch": 1.3511212035197275, "grad_norm": 31.436376571655273, "learning_rate": 9.865029804144196e-05, "loss": 0.5791452884674072, "step": 4760 }, { "epoch": 1.3539596934430882, "grad_norm": 36.756351470947266, "learning_rate": 9.864745955151859e-05, "loss": 0.5307989597320557, "step": 4770 }, { "epoch": 1.356798183366449, "grad_norm": 26.145814895629883, "learning_rate": 9.864462106159523e-05, "loss": 0.48682713508605957, "step": 4780 }, { "epoch": 1.3596366732898097, "grad_norm": 22.17993927001953, "learning_rate": 9.864178257167187e-05, "loss": 0.5243121147155761, "step": 4790 }, { "epoch": 1.3624751632131706, "grad_norm": 23.06700897216797, "learning_rate": 9.863894408174851e-05, "loss": 0.594422721862793, "step": 4800 }, { "epoch": 1.3653136531365313, "grad_norm": 17.529457092285156, "learning_rate": 9.863610559182516e-05, "loss": 0.5081527709960938, "step": 4810 }, { "epoch": 1.3681521430598922, "grad_norm": 19.19485092163086, "learning_rate": 9.86332671019018e-05, "loss": 0.5057069301605225, "step": 4820 }, { "epoch": 1.3709906329832529, "grad_norm": 24.305240631103516, "learning_rate": 9.863042861197844e-05, "loss": 0.5302314758300781, "step": 4830 }, { "epoch": 1.3738291229066137, "grad_norm": 32.79861831665039, "learning_rate": 9.862759012205507e-05, "loss": 0.5537318706512451, "step": 4840 }, { "epoch": 1.3766676128299744, "grad_norm": 21.89700698852539, "learning_rate": 9.862475163213171e-05, "loss": 0.5480470657348633, "step": 4850 }, { "epoch": 1.3795061027533353, "grad_norm": 20.300912857055664, "learning_rate": 9.862191314220835e-05, "loss": 0.505549144744873, "step": 4860 }, { "epoch": 1.382344592676696, "grad_norm": 20.352535247802734, "learning_rate": 9.861907465228499e-05, "loss": 0.5526429176330566, "step": 4870 }, { "epoch": 1.3851830826000568, "grad_norm": 23.988351821899414, "learning_rate": 9.861623616236163e-05, "loss": 0.5067482948303222, "step": 4880 }, { "epoch": 1.3880215725234175, "grad_norm": 30.3094425201416, "learning_rate": 9.861339767243827e-05, "loss": 0.5444604396820069, "step": 4890 }, { "epoch": 1.3908600624467784, "grad_norm": 15.551344871520996, "learning_rate": 9.86105591825149e-05, "loss": 0.5170631885528565, "step": 4900 }, { "epoch": 1.393698552370139, "grad_norm": 22.54707145690918, "learning_rate": 9.860772069259154e-05, "loss": 0.4765817165374756, "step": 4910 }, { "epoch": 1.3965370422934997, "grad_norm": 21.261871337890625, "learning_rate": 9.860488220266818e-05, "loss": 0.4708756446838379, "step": 4920 }, { "epoch": 1.3993755322168606, "grad_norm": 22.6719913482666, "learning_rate": 9.860204371274482e-05, "loss": 0.5953768730163574, "step": 4930 }, { "epoch": 1.4022140221402215, "grad_norm": 25.462444305419922, "learning_rate": 9.859920522282147e-05, "loss": 0.5883163452148438, "step": 4940 }, { "epoch": 1.4050525120635822, "grad_norm": 14.67077350616455, "learning_rate": 9.859636673289811e-05, "loss": 0.487822437286377, "step": 4950 }, { "epoch": 1.4078910019869428, "grad_norm": 23.763290405273438, "learning_rate": 9.859352824297474e-05, "loss": 0.5172186851501465, "step": 4960 }, { "epoch": 1.4107294919103037, "grad_norm": 20.53472900390625, "learning_rate": 9.859068975305138e-05, "loss": 0.5040953159332275, "step": 4970 }, { "epoch": 1.4135679818336646, "grad_norm": 29.127592086791992, "learning_rate": 9.858785126312802e-05, "loss": 0.5164206504821778, "step": 4980 }, { "epoch": 1.4164064717570253, "grad_norm": 21.441164016723633, "learning_rate": 9.858501277320466e-05, "loss": 0.5650917053222656, "step": 4990 }, { "epoch": 1.419244961680386, "grad_norm": 24.15547752380371, "learning_rate": 9.85821742832813e-05, "loss": 0.5443023204803467, "step": 5000 }, { "epoch": 1.419244961680386, "eval_accuracy": 0.7549437273478731, "eval_loss": 0.7658727169036865, "eval_runtime": 30.6986, "eval_samples_per_second": 512.304, "eval_steps_per_second": 8.013, "step": 5000 }, { "epoch": 1.4220834516037468, "grad_norm": 22.526084899902344, "learning_rate": 9.857933579335794e-05, "loss": 0.4786521434783936, "step": 5010 }, { "epoch": 1.4249219415271077, "grad_norm": 15.852435111999512, "learning_rate": 9.857649730343458e-05, "loss": 0.5006365299224853, "step": 5020 }, { "epoch": 1.4277604314504684, "grad_norm": 15.36372184753418, "learning_rate": 9.857365881351121e-05, "loss": 0.44577822685241697, "step": 5030 }, { "epoch": 1.430598921373829, "grad_norm": 15.919493675231934, "learning_rate": 9.857082032358785e-05, "loss": 0.5354157447814941, "step": 5040 }, { "epoch": 1.43343741129719, "grad_norm": 18.767841339111328, "learning_rate": 9.85679818336645e-05, "loss": 0.5587405204772949, "step": 5050 }, { "epoch": 1.4362759012205506, "grad_norm": 25.292882919311523, "learning_rate": 9.856514334374114e-05, "loss": 0.5098119735717773, "step": 5060 }, { "epoch": 1.4391143911439115, "grad_norm": 22.182270050048828, "learning_rate": 9.856230485381778e-05, "loss": 0.5585176467895507, "step": 5070 }, { "epoch": 1.4419528810672722, "grad_norm": 28.664527893066406, "learning_rate": 9.855946636389442e-05, "loss": 0.5147800922393799, "step": 5080 }, { "epoch": 1.444791370990633, "grad_norm": 18.47371482849121, "learning_rate": 9.855662787397105e-05, "loss": 0.536253547668457, "step": 5090 }, { "epoch": 1.4476298609139937, "grad_norm": 25.276735305786133, "learning_rate": 9.855378938404769e-05, "loss": 0.534224271774292, "step": 5100 }, { "epoch": 1.4504683508373546, "grad_norm": 19.44446563720703, "learning_rate": 9.855095089412433e-05, "loss": 0.5034564971923828, "step": 5110 }, { "epoch": 1.4533068407607153, "grad_norm": 23.57341957092285, "learning_rate": 9.854811240420097e-05, "loss": 0.5047745704650879, "step": 5120 }, { "epoch": 1.4561453306840761, "grad_norm": 30.271663665771484, "learning_rate": 9.854527391427761e-05, "loss": 0.5482126235961914, "step": 5130 }, { "epoch": 1.4589838206074368, "grad_norm": 28.177982330322266, "learning_rate": 9.854243542435425e-05, "loss": 0.5251502513885498, "step": 5140 }, { "epoch": 1.4618223105307977, "grad_norm": 33.12307357788086, "learning_rate": 9.85395969344309e-05, "loss": 0.49888076782226565, "step": 5150 }, { "epoch": 1.4646608004541584, "grad_norm": 25.201351165771484, "learning_rate": 9.853675844450752e-05, "loss": 0.5624466896057129, "step": 5160 }, { "epoch": 1.467499290377519, "grad_norm": 16.79703140258789, "learning_rate": 9.853391995458416e-05, "loss": 0.5105113029479981, "step": 5170 }, { "epoch": 1.47033778030088, "grad_norm": 20.61594009399414, "learning_rate": 9.85310814646608e-05, "loss": 0.5088314533233642, "step": 5180 }, { "epoch": 1.4731762702242408, "grad_norm": 22.573232650756836, "learning_rate": 9.852824297473743e-05, "loss": 0.47755999565124513, "step": 5190 }, { "epoch": 1.4760147601476015, "grad_norm": 18.0330753326416, "learning_rate": 9.852540448481409e-05, "loss": 0.46817960739135744, "step": 5200 }, { "epoch": 1.4788532500709621, "grad_norm": 26.57672119140625, "learning_rate": 9.852256599489073e-05, "loss": 0.44509220123291016, "step": 5210 }, { "epoch": 1.481691739994323, "grad_norm": 27.457799911499023, "learning_rate": 9.851972750496736e-05, "loss": 0.49938220977783204, "step": 5220 }, { "epoch": 1.484530229917684, "grad_norm": 29.482223510742188, "learning_rate": 9.8516889015044e-05, "loss": 0.48114356994628904, "step": 5230 }, { "epoch": 1.4873687198410446, "grad_norm": 19.489933013916016, "learning_rate": 9.851405052512064e-05, "loss": 0.5131722450256347, "step": 5240 }, { "epoch": 1.4902072097644052, "grad_norm": 25.34461784362793, "learning_rate": 9.851149588418962e-05, "loss": 0.47336368560791015, "step": 5250 }, { "epoch": 1.4930456996877661, "grad_norm": 22.344881057739258, "learning_rate": 9.850865739426626e-05, "loss": 0.423797082901001, "step": 5260 }, { "epoch": 1.495884189611127, "grad_norm": 21.193986892700195, "learning_rate": 9.850581890434289e-05, "loss": 0.509946632385254, "step": 5270 }, { "epoch": 1.4987226795344877, "grad_norm": 25.462278366088867, "learning_rate": 9.850298041441953e-05, "loss": 0.5746853828430176, "step": 5280 }, { "epoch": 1.5015611694578483, "grad_norm": 21.391799926757812, "learning_rate": 9.850014192449617e-05, "loss": 0.49782500267028806, "step": 5290 }, { "epoch": 1.5043996593812092, "grad_norm": 30.367361068725586, "learning_rate": 9.849730343457281e-05, "loss": 0.4601006031036377, "step": 5300 }, { "epoch": 1.5072381493045701, "grad_norm": 18.497825622558594, "learning_rate": 9.849446494464945e-05, "loss": 0.47089128494262694, "step": 5310 }, { "epoch": 1.5100766392279308, "grad_norm": 24.439403533935547, "learning_rate": 9.84916264547261e-05, "loss": 0.4955033779144287, "step": 5320 }, { "epoch": 1.5129151291512914, "grad_norm": 18.78842544555664, "learning_rate": 9.848878796480274e-05, "loss": 0.4948451042175293, "step": 5330 }, { "epoch": 1.5157536190746523, "grad_norm": 21.88657569885254, "learning_rate": 9.848594947487936e-05, "loss": 0.4650012969970703, "step": 5340 }, { "epoch": 1.518592108998013, "grad_norm": 21.758548736572266, "learning_rate": 9.8483110984956e-05, "loss": 0.4517825603485107, "step": 5350 }, { "epoch": 1.521430598921374, "grad_norm": 22.636329650878906, "learning_rate": 9.848027249503265e-05, "loss": 0.5213921546936036, "step": 5360 }, { "epoch": 1.5242690888447346, "grad_norm": 32.18223571777344, "learning_rate": 9.847743400510928e-05, "loss": 0.5041645526885986, "step": 5370 }, { "epoch": 1.5271075787680952, "grad_norm": 18.59054183959961, "learning_rate": 9.847459551518593e-05, "loss": 0.5283419609069824, "step": 5380 }, { "epoch": 1.529946068691456, "grad_norm": 26.19089698791504, "learning_rate": 9.847175702526257e-05, "loss": 0.49493966102600095, "step": 5390 }, { "epoch": 1.532784558614817, "grad_norm": 19.25995635986328, "learning_rate": 9.84689185353392e-05, "loss": 0.46999635696411135, "step": 5400 }, { "epoch": 1.5356230485381777, "grad_norm": 21.66840171813965, "learning_rate": 9.846608004541584e-05, "loss": 0.46753621101379395, "step": 5410 }, { "epoch": 1.5384615384615383, "grad_norm": 21.555118560791016, "learning_rate": 9.846324155549248e-05, "loss": 0.4348593235015869, "step": 5420 }, { "epoch": 1.5413000283848992, "grad_norm": 20.662364959716797, "learning_rate": 9.846040306556912e-05, "loss": 0.47795305252075193, "step": 5430 }, { "epoch": 1.54413851830826, "grad_norm": 26.63555908203125, "learning_rate": 9.845756457564577e-05, "loss": 0.5018423080444336, "step": 5440 }, { "epoch": 1.5469770082316208, "grad_norm": 17.335065841674805, "learning_rate": 9.84547260857224e-05, "loss": 0.45438499450683595, "step": 5450 }, { "epoch": 1.5498154981549814, "grad_norm": 27.396347045898438, "learning_rate": 9.845188759579905e-05, "loss": 0.5069549560546875, "step": 5460 }, { "epoch": 1.5526539880783423, "grad_norm": 28.821380615234375, "learning_rate": 9.844904910587568e-05, "loss": 0.4687957763671875, "step": 5470 }, { "epoch": 1.5554924780017032, "grad_norm": 32.41615676879883, "learning_rate": 9.844621061595232e-05, "loss": 0.4600421905517578, "step": 5480 }, { "epoch": 1.5583309679250639, "grad_norm": 17.59092903137207, "learning_rate": 9.844337212602896e-05, "loss": 0.4343866348266602, "step": 5490 }, { "epoch": 1.5611694578484245, "grad_norm": 25.264320373535156, "learning_rate": 9.844053363610559e-05, "loss": 0.47960448265075684, "step": 5500 }, { "epoch": 1.5611694578484245, "eval_accuracy": 0.7705220321739683, "eval_loss": 0.7081770300865173, "eval_runtime": 30.734, "eval_samples_per_second": 511.714, "eval_steps_per_second": 8.004, "step": 5500 }, { "epoch": 1.5640079477717854, "grad_norm": 18.138931274414062, "learning_rate": 9.843769514618224e-05, "loss": 0.43840909004211426, "step": 5510 }, { "epoch": 1.5668464376951463, "grad_norm": 31.31865119934082, "learning_rate": 9.843485665625888e-05, "loss": 0.5786248207092285, "step": 5520 }, { "epoch": 1.569684927618507, "grad_norm": 17.037433624267578, "learning_rate": 9.843201816633551e-05, "loss": 0.4403118133544922, "step": 5530 }, { "epoch": 1.5725234175418676, "grad_norm": 21.156137466430664, "learning_rate": 9.842917967641215e-05, "loss": 0.45325593948364257, "step": 5540 }, { "epoch": 1.5753619074652285, "grad_norm": 20.449691772460938, "learning_rate": 9.842634118648879e-05, "loss": 0.5135906219482422, "step": 5550 }, { "epoch": 1.5782003973885894, "grad_norm": 25.842815399169922, "learning_rate": 9.842350269656543e-05, "loss": 0.44756927490234377, "step": 5560 }, { "epoch": 1.58103888731195, "grad_norm": 24.721166610717773, "learning_rate": 9.842066420664206e-05, "loss": 0.4527876853942871, "step": 5570 }, { "epoch": 1.5838773772353107, "grad_norm": 29.005586624145508, "learning_rate": 9.841782571671872e-05, "loss": 0.4674509048461914, "step": 5580 }, { "epoch": 1.5867158671586716, "grad_norm": 26.988758087158203, "learning_rate": 9.841498722679536e-05, "loss": 0.4187464237213135, "step": 5590 }, { "epoch": 1.5895543570820325, "grad_norm": 21.200258255004883, "learning_rate": 9.841214873687199e-05, "loss": 0.4542283058166504, "step": 5600 }, { "epoch": 1.5923928470053932, "grad_norm": 21.527809143066406, "learning_rate": 9.840931024694863e-05, "loss": 0.45181856155395506, "step": 5610 }, { "epoch": 1.5952313369287539, "grad_norm": 28.10869789123535, "learning_rate": 9.840647175702527e-05, "loss": 0.5212711334228516, "step": 5620 }, { "epoch": 1.5980698268521145, "grad_norm": 22.373567581176758, "learning_rate": 9.84036332671019e-05, "loss": 0.441100549697876, "step": 5630 }, { "epoch": 1.6009083167754754, "grad_norm": 20.653213500976562, "learning_rate": 9.840079477717855e-05, "loss": 0.4398199558258057, "step": 5640 }, { "epoch": 1.6037468066988363, "grad_norm": 18.533262252807617, "learning_rate": 9.83979562872552e-05, "loss": 0.40874290466308594, "step": 5650 }, { "epoch": 1.606585296622197, "grad_norm": 17.033687591552734, "learning_rate": 9.839511779733182e-05, "loss": 0.43529257774353025, "step": 5660 }, { "epoch": 1.6094237865455576, "grad_norm": 25.372634887695312, "learning_rate": 9.839227930740846e-05, "loss": 0.5042860984802247, "step": 5670 }, { "epoch": 1.6122622764689185, "grad_norm": 26.601974487304688, "learning_rate": 9.83894408174851e-05, "loss": 0.4922661781311035, "step": 5680 }, { "epoch": 1.6151007663922794, "grad_norm": 20.631139755249023, "learning_rate": 9.838660232756175e-05, "loss": 0.4670083999633789, "step": 5690 }, { "epoch": 1.61793925631564, "grad_norm": 15.214738845825195, "learning_rate": 9.838376383763837e-05, "loss": 0.5103687286376953, "step": 5700 }, { "epoch": 1.6207777462390007, "grad_norm": 22.460304260253906, "learning_rate": 9.838092534771503e-05, "loss": 0.4493797779083252, "step": 5710 }, { "epoch": 1.6236162361623616, "grad_norm": 20.383995056152344, "learning_rate": 9.837808685779167e-05, "loss": 0.5087112426757813, "step": 5720 }, { "epoch": 1.6264547260857225, "grad_norm": 31.7694091796875, "learning_rate": 9.83752483678683e-05, "loss": 0.48621907234191897, "step": 5730 }, { "epoch": 1.6292932160090832, "grad_norm": 24.267576217651367, "learning_rate": 9.837240987794494e-05, "loss": 0.43189225196838377, "step": 5740 }, { "epoch": 1.6321317059324438, "grad_norm": 17.485614776611328, "learning_rate": 9.836957138802158e-05, "loss": 0.4363883018493652, "step": 5750 }, { "epoch": 1.6349701958558047, "grad_norm": 17.831676483154297, "learning_rate": 9.836673289809821e-05, "loss": 0.36375472545623777, "step": 5760 }, { "epoch": 1.6378086857791656, "grad_norm": 38.249237060546875, "learning_rate": 9.836389440817485e-05, "loss": 0.487896203994751, "step": 5770 }, { "epoch": 1.6406471757025263, "grad_norm": 21.259401321411133, "learning_rate": 9.83610559182515e-05, "loss": 0.4958378314971924, "step": 5780 }, { "epoch": 1.643485665625887, "grad_norm": 18.802688598632812, "learning_rate": 9.835821742832813e-05, "loss": 0.4471120834350586, "step": 5790 }, { "epoch": 1.6463241555492478, "grad_norm": 18.47874641418457, "learning_rate": 9.835537893840477e-05, "loss": 0.46596393585205076, "step": 5800 }, { "epoch": 1.6491626454726087, "grad_norm": 22.69192123413086, "learning_rate": 9.835254044848141e-05, "loss": 0.5249762535095215, "step": 5810 }, { "epoch": 1.6520011353959694, "grad_norm": 23.77510643005371, "learning_rate": 9.834970195855806e-05, "loss": 0.49897050857543945, "step": 5820 }, { "epoch": 1.65483962531933, "grad_norm": 20.74818992614746, "learning_rate": 9.834686346863468e-05, "loss": 0.43282279968261717, "step": 5830 }, { "epoch": 1.657678115242691, "grad_norm": 27.07232093811035, "learning_rate": 9.834402497871134e-05, "loss": 0.452546501159668, "step": 5840 }, { "epoch": 1.6605166051660518, "grad_norm": 18.120189666748047, "learning_rate": 9.834118648878797e-05, "loss": 0.45517935752868655, "step": 5850 }, { "epoch": 1.6633550950894125, "grad_norm": 15.141501426696777, "learning_rate": 9.833834799886461e-05, "loss": 0.4280442237854004, "step": 5860 }, { "epoch": 1.6661935850127731, "grad_norm": 29.13498306274414, "learning_rate": 9.833550950894125e-05, "loss": 0.45500760078430175, "step": 5870 }, { "epoch": 1.6690320749361338, "grad_norm": 17.46603012084961, "learning_rate": 9.833267101901789e-05, "loss": 0.5003047943115234, "step": 5880 }, { "epoch": 1.6718705648594947, "grad_norm": 47.59585952758789, "learning_rate": 9.832983252909452e-05, "loss": 0.4896832466125488, "step": 5890 }, { "epoch": 1.6747090547828556, "grad_norm": 24.156326293945312, "learning_rate": 9.832699403917116e-05, "loss": 0.48364977836608886, "step": 5900 }, { "epoch": 1.6775475447062163, "grad_norm": 27.248226165771484, "learning_rate": 9.832415554924781e-05, "loss": 0.45184640884399413, "step": 5910 }, { "epoch": 1.680386034629577, "grad_norm": 21.402156829833984, "learning_rate": 9.832131705932444e-05, "loss": 0.4230981349945068, "step": 5920 }, { "epoch": 1.6832245245529378, "grad_norm": 21.467369079589844, "learning_rate": 9.831847856940108e-05, "loss": 0.39625232219696044, "step": 5930 }, { "epoch": 1.6860630144762987, "grad_norm": 17.363279342651367, "learning_rate": 9.831564007947773e-05, "loss": 0.4277812957763672, "step": 5940 }, { "epoch": 1.6889015043996594, "grad_norm": 22.998035430908203, "learning_rate": 9.831280158955435e-05, "loss": 0.3986526012420654, "step": 5950 }, { "epoch": 1.69173999432302, "grad_norm": 26.156076431274414, "learning_rate": 9.8309963099631e-05, "loss": 0.4157834053039551, "step": 5960 }, { "epoch": 1.694578484246381, "grad_norm": 27.324661254882812, "learning_rate": 9.830712460970764e-05, "loss": 0.40967526435852053, "step": 5970 }, { "epoch": 1.6974169741697418, "grad_norm": 31.61458396911621, "learning_rate": 9.830428611978428e-05, "loss": 0.49207186698913574, "step": 5980 }, { "epoch": 1.7002554640931025, "grad_norm": 15.488746643066406, "learning_rate": 9.830144762986092e-05, "loss": 0.4254718780517578, "step": 5990 }, { "epoch": 1.7030939540164631, "grad_norm": 19.080184936523438, "learning_rate": 9.829860913993756e-05, "loss": 0.3942666530609131, "step": 6000 }, { "epoch": 1.7030939540164631, "eval_accuracy": 0.7946843008838304, "eval_loss": 0.639552891254425, "eval_runtime": 31.3826, "eval_samples_per_second": 501.138, "eval_steps_per_second": 7.839, "step": 6000 }, { "epoch": 1.705932443939824, "grad_norm": 17.96277618408203, "learning_rate": 9.82957706500142e-05, "loss": 0.45555100440979, "step": 6010 }, { "epoch": 1.708770933863185, "grad_norm": 25.49015998840332, "learning_rate": 9.829293216009083e-05, "loss": 0.4165348052978516, "step": 6020 }, { "epoch": 1.7116094237865456, "grad_norm": 18.687419891357422, "learning_rate": 9.829009367016747e-05, "loss": 0.42314891815185546, "step": 6030 }, { "epoch": 1.7144479137099062, "grad_norm": 28.25301170349121, "learning_rate": 9.828725518024413e-05, "loss": 0.46349000930786133, "step": 6040 }, { "epoch": 1.7172864036332671, "grad_norm": 18.832815170288086, "learning_rate": 9.828441669032075e-05, "loss": 0.3894632816314697, "step": 6050 }, { "epoch": 1.720124893556628, "grad_norm": 18.941354751586914, "learning_rate": 9.82815782003974e-05, "loss": 0.424459171295166, "step": 6060 }, { "epoch": 1.7229633834799887, "grad_norm": 21.745758056640625, "learning_rate": 9.827873971047404e-05, "loss": 0.4320863723754883, "step": 6070 }, { "epoch": 1.7258018734033493, "grad_norm": 26.7862491607666, "learning_rate": 9.827590122055066e-05, "loss": 0.4254425525665283, "step": 6080 }, { "epoch": 1.7286403633267102, "grad_norm": 29.282751083374023, "learning_rate": 9.82730627306273e-05, "loss": 0.4424506664276123, "step": 6090 }, { "epoch": 1.7314788532500711, "grad_norm": 22.056499481201172, "learning_rate": 9.827022424070395e-05, "loss": 0.40218181610107423, "step": 6100 }, { "epoch": 1.7343173431734318, "grad_norm": 18.969566345214844, "learning_rate": 9.826738575078059e-05, "loss": 0.4059330463409424, "step": 6110 }, { "epoch": 1.7371558330967924, "grad_norm": 26.770959854125977, "learning_rate": 9.826454726085723e-05, "loss": 0.4434517383575439, "step": 6120 }, { "epoch": 1.7399943230201533, "grad_norm": 16.149120330810547, "learning_rate": 9.826170877093387e-05, "loss": 0.41785407066345215, "step": 6130 }, { "epoch": 1.742832812943514, "grad_norm": 17.30535316467285, "learning_rate": 9.825887028101051e-05, "loss": 0.4103550434112549, "step": 6140 }, { "epoch": 1.7456713028668749, "grad_norm": 18.982412338256836, "learning_rate": 9.825603179108714e-05, "loss": 0.41153688430786134, "step": 6150 }, { "epoch": 1.7485097927902356, "grad_norm": 35.373897552490234, "learning_rate": 9.825319330116378e-05, "loss": 0.44315595626831056, "step": 6160 }, { "epoch": 1.7513482827135962, "grad_norm": 19.156211853027344, "learning_rate": 9.825035481124042e-05, "loss": 0.404082727432251, "step": 6170 }, { "epoch": 1.754186772636957, "grad_norm": 26.301593780517578, "learning_rate": 9.824751632131706e-05, "loss": 0.4820054054260254, "step": 6180 }, { "epoch": 1.757025262560318, "grad_norm": 23.461977005004883, "learning_rate": 9.82446778313937e-05, "loss": 0.41650118827819826, "step": 6190 }, { "epoch": 1.7598637524836787, "grad_norm": 18.080190658569336, "learning_rate": 9.824183934147035e-05, "loss": 0.4413625240325928, "step": 6200 }, { "epoch": 1.7627022424070393, "grad_norm": 29.39219093322754, "learning_rate": 9.823900085154697e-05, "loss": 0.4654662609100342, "step": 6210 }, { "epoch": 1.7655407323304002, "grad_norm": 24.986982345581055, "learning_rate": 9.823616236162362e-05, "loss": 0.3866588592529297, "step": 6220 }, { "epoch": 1.768379222253761, "grad_norm": 22.976842880249023, "learning_rate": 9.823332387170026e-05, "loss": 0.39845123291015627, "step": 6230 }, { "epoch": 1.7712177121771218, "grad_norm": 22.65195083618164, "learning_rate": 9.82304853817769e-05, "loss": 0.35651321411132814, "step": 6240 }, { "epoch": 1.7740562021004824, "grad_norm": 26.74050521850586, "learning_rate": 9.822764689185354e-05, "loss": 0.34719023704528806, "step": 6250 }, { "epoch": 1.7768946920238433, "grad_norm": 17.39774513244629, "learning_rate": 9.822480840193018e-05, "loss": 0.4214783668518066, "step": 6260 }, { "epoch": 1.7797331819472042, "grad_norm": 16.551368713378906, "learning_rate": 9.822196991200682e-05, "loss": 0.436419153213501, "step": 6270 }, { "epoch": 1.7825716718705649, "grad_norm": 29.02804946899414, "learning_rate": 9.821913142208345e-05, "loss": 0.4335481643676758, "step": 6280 }, { "epoch": 1.7854101617939255, "grad_norm": 22.586355209350586, "learning_rate": 9.821629293216009e-05, "loss": 0.35754120349884033, "step": 6290 }, { "epoch": 1.7882486517172864, "grad_norm": 21.191234588623047, "learning_rate": 9.821345444223673e-05, "loss": 0.4479619026184082, "step": 6300 }, { "epoch": 1.7910871416406473, "grad_norm": 29.112369537353516, "learning_rate": 9.821061595231337e-05, "loss": 0.4265907287597656, "step": 6310 }, { "epoch": 1.793925631564008, "grad_norm": 20.455699920654297, "learning_rate": 9.820777746239002e-05, "loss": 0.3897282600402832, "step": 6320 }, { "epoch": 1.7967641214873686, "grad_norm": 18.384050369262695, "learning_rate": 9.820493897246666e-05, "loss": 0.37078032493591306, "step": 6330 }, { "epoch": 1.7996026114107295, "grad_norm": 17.571855545043945, "learning_rate": 9.820210048254329e-05, "loss": 0.43453564643859866, "step": 6340 }, { "epoch": 1.8024411013340904, "grad_norm": 19.029672622680664, "learning_rate": 9.819926199261993e-05, "loss": 0.4427356719970703, "step": 6350 }, { "epoch": 1.805279591257451, "grad_norm": 17.716753005981445, "learning_rate": 9.819642350269657e-05, "loss": 0.4016223430633545, "step": 6360 }, { "epoch": 1.8081180811808117, "grad_norm": 21.720796585083008, "learning_rate": 9.819358501277321e-05, "loss": 0.41228551864624025, "step": 6370 }, { "epoch": 1.8109565711041726, "grad_norm": 17.8159122467041, "learning_rate": 9.819074652284985e-05, "loss": 0.41730575561523436, "step": 6380 }, { "epoch": 1.8137950610275335, "grad_norm": 28.501855850219727, "learning_rate": 9.818790803292649e-05, "loss": 0.43465280532836914, "step": 6390 }, { "epoch": 1.8166335509508942, "grad_norm": 17.53424644470215, "learning_rate": 9.818506954300313e-05, "loss": 0.43177337646484376, "step": 6400 }, { "epoch": 1.8194720408742548, "grad_norm": 17.43556785583496, "learning_rate": 9.818223105307976e-05, "loss": 0.3786790370941162, "step": 6410 }, { "epoch": 1.8223105307976155, "grad_norm": 24.341060638427734, "learning_rate": 9.81793925631564e-05, "loss": 0.4329082489013672, "step": 6420 }, { "epoch": 1.8251490207209764, "grad_norm": 18.160860061645508, "learning_rate": 9.817655407323304e-05, "loss": 0.33026275634765623, "step": 6430 }, { "epoch": 1.8279875106443373, "grad_norm": 29.488039016723633, "learning_rate": 9.817371558330969e-05, "loss": 0.36414299011230467, "step": 6440 }, { "epoch": 1.830826000567698, "grad_norm": 24.25476837158203, "learning_rate": 9.817087709338633e-05, "loss": 0.36670989990234376, "step": 6450 }, { "epoch": 1.8336644904910586, "grad_norm": 22.606950759887695, "learning_rate": 9.816803860346297e-05, "loss": 0.4424624443054199, "step": 6460 }, { "epoch": 1.8365029804144195, "grad_norm": 21.106725692749023, "learning_rate": 9.81652001135396e-05, "loss": 0.42706780433654784, "step": 6470 }, { "epoch": 1.8393414703377804, "grad_norm": 22.923452377319336, "learning_rate": 9.816236162361624e-05, "loss": 0.3846797704696655, "step": 6480 }, { "epoch": 1.842179960261141, "grad_norm": 16.407604217529297, "learning_rate": 9.815952313369288e-05, "loss": 0.4001880168914795, "step": 6490 }, { "epoch": 1.8450184501845017, "grad_norm": 22.22679901123047, "learning_rate": 9.815668464376952e-05, "loss": 0.381205153465271, "step": 6500 }, { "epoch": 1.8450184501845017, "eval_accuracy": 0.7997075093787753, "eval_loss": 0.6205105781555176, "eval_runtime": 30.6759, "eval_samples_per_second": 512.683, "eval_steps_per_second": 8.019, "step": 6500 }, { "epoch": 1.8478569401078626, "grad_norm": 23.107955932617188, "learning_rate": 9.815384615384616e-05, "loss": 0.3948862314224243, "step": 6510 }, { "epoch": 1.8506954300312235, "grad_norm": 15.464545249938965, "learning_rate": 9.81510076639228e-05, "loss": 0.3841205835342407, "step": 6520 }, { "epoch": 1.8535339199545842, "grad_norm": 24.38384437561035, "learning_rate": 9.814816917399944e-05, "loss": 0.34784417152404784, "step": 6530 }, { "epoch": 1.8563724098779448, "grad_norm": 32.742393493652344, "learning_rate": 9.814533068407607e-05, "loss": 0.40121960639953613, "step": 6540 }, { "epoch": 1.8592108998013057, "grad_norm": 30.2054386138916, "learning_rate": 9.814249219415271e-05, "loss": 0.46240997314453125, "step": 6550 }, { "epoch": 1.8620493897246666, "grad_norm": 25.12553596496582, "learning_rate": 9.813965370422936e-05, "loss": 0.4601171016693115, "step": 6560 }, { "epoch": 1.8648878796480273, "grad_norm": 23.124380111694336, "learning_rate": 9.8136815214306e-05, "loss": 0.38168768882751464, "step": 6570 }, { "epoch": 1.867726369571388, "grad_norm": 26.014198303222656, "learning_rate": 9.813397672438264e-05, "loss": 0.4082041263580322, "step": 6580 }, { "epoch": 1.8705648594947488, "grad_norm": 23.301454544067383, "learning_rate": 9.813113823445928e-05, "loss": 0.45189976692199707, "step": 6590 }, { "epoch": 1.8734033494181097, "grad_norm": 26.73699188232422, "learning_rate": 9.812829974453591e-05, "loss": 0.41971330642700194, "step": 6600 }, { "epoch": 1.8762418393414704, "grad_norm": 19.089204788208008, "learning_rate": 9.812546125461255e-05, "loss": 0.4108613967895508, "step": 6610 }, { "epoch": 1.879080329264831, "grad_norm": 30.80247688293457, "learning_rate": 9.812262276468919e-05, "loss": 0.3896260499954224, "step": 6620 }, { "epoch": 1.881918819188192, "grad_norm": 16.476621627807617, "learning_rate": 9.811978427476583e-05, "loss": 0.4086155891418457, "step": 6630 }, { "epoch": 1.8847573091115528, "grad_norm": 30.7562198638916, "learning_rate": 9.811694578484247e-05, "loss": 0.3979541301727295, "step": 6640 }, { "epoch": 1.8875957990349135, "grad_norm": 24.587360382080078, "learning_rate": 9.811410729491911e-05, "loss": 0.4108292102813721, "step": 6650 }, { "epoch": 1.8904342889582741, "grad_norm": 29.429893493652344, "learning_rate": 9.811126880499576e-05, "loss": 0.4692255973815918, "step": 6660 }, { "epoch": 1.8932727788816348, "grad_norm": 26.78128433227539, "learning_rate": 9.810843031507238e-05, "loss": 0.4545595645904541, "step": 6670 }, { "epoch": 1.8961112688049957, "grad_norm": 32.98216247558594, "learning_rate": 9.810559182514902e-05, "loss": 0.39421920776367186, "step": 6680 }, { "epoch": 1.8989497587283566, "grad_norm": 20.42596435546875, "learning_rate": 9.810275333522567e-05, "loss": 0.3842628479003906, "step": 6690 }, { "epoch": 1.9017882486517173, "grad_norm": 15.698198318481445, "learning_rate": 9.80999148453023e-05, "loss": 0.3461360692977905, "step": 6700 }, { "epoch": 1.904626738575078, "grad_norm": 29.326900482177734, "learning_rate": 9.809707635537895e-05, "loss": 0.3522496223449707, "step": 6710 }, { "epoch": 1.9074652284984388, "grad_norm": 14.160161972045898, "learning_rate": 9.809423786545559e-05, "loss": 0.36911160945892335, "step": 6720 }, { "epoch": 1.9103037184217997, "grad_norm": 25.620317459106445, "learning_rate": 9.809139937553222e-05, "loss": 0.4214602470397949, "step": 6730 }, { "epoch": 1.9131422083451604, "grad_norm": 18.86240005493164, "learning_rate": 9.808856088560886e-05, "loss": 0.3971108913421631, "step": 6740 }, { "epoch": 1.915980698268521, "grad_norm": 26.27129364013672, "learning_rate": 9.80857223956855e-05, "loss": 0.4118006229400635, "step": 6750 }, { "epoch": 1.918819188191882, "grad_norm": 23.562463760375977, "learning_rate": 9.808288390576214e-05, "loss": 0.4610177516937256, "step": 6760 }, { "epoch": 1.9216576781152428, "grad_norm": 16.37548065185547, "learning_rate": 9.808004541583878e-05, "loss": 0.3787146806716919, "step": 6770 }, { "epoch": 1.9244961680386035, "grad_norm": 16.402164459228516, "learning_rate": 9.807720692591542e-05, "loss": 0.314204478263855, "step": 6780 }, { "epoch": 1.9273346579619641, "grad_norm": 23.63580322265625, "learning_rate": 9.807436843599205e-05, "loss": 0.4527307033538818, "step": 6790 }, { "epoch": 1.930173147885325, "grad_norm": 16.004045486450195, "learning_rate": 9.80715299460687e-05, "loss": 0.39499173164367674, "step": 6800 }, { "epoch": 1.933011637808686, "grad_norm": 25.094459533691406, "learning_rate": 9.806869145614534e-05, "loss": 0.37181546688079836, "step": 6810 }, { "epoch": 1.9358501277320466, "grad_norm": 19.275381088256836, "learning_rate": 9.806585296622198e-05, "loss": 0.3720922708511353, "step": 6820 }, { "epoch": 1.9386886176554072, "grad_norm": 24.782304763793945, "learning_rate": 9.80630144762986e-05, "loss": 0.4120591640472412, "step": 6830 }, { "epoch": 1.9415271075787681, "grad_norm": 22.285507202148438, "learning_rate": 9.806017598637526e-05, "loss": 0.32892069816589353, "step": 6840 }, { "epoch": 1.944365597502129, "grad_norm": 17.815025329589844, "learning_rate": 9.80573374964519e-05, "loss": 0.37518939971923826, "step": 6850 }, { "epoch": 1.9472040874254897, "grad_norm": 18.14682388305664, "learning_rate": 9.805449900652853e-05, "loss": 0.3778093338012695, "step": 6860 }, { "epoch": 1.9500425773488503, "grad_norm": 21.517192840576172, "learning_rate": 9.805166051660517e-05, "loss": 0.3278337001800537, "step": 6870 }, { "epoch": 1.9528810672722112, "grad_norm": 13.659611701965332, "learning_rate": 9.804882202668181e-05, "loss": 0.3915505647659302, "step": 6880 }, { "epoch": 1.9557195571955721, "grad_norm": 14.353165626525879, "learning_rate": 9.804598353675844e-05, "loss": 0.37442266941070557, "step": 6890 }, { "epoch": 1.9585580471189328, "grad_norm": 22.526288986206055, "learning_rate": 9.804314504683508e-05, "loss": 0.3907177209854126, "step": 6900 }, { "epoch": 1.9613965370422934, "grad_norm": 21.90485191345215, "learning_rate": 9.804030655691174e-05, "loss": 0.3444492816925049, "step": 6910 }, { "epoch": 1.964235026965654, "grad_norm": 14.914915084838867, "learning_rate": 9.803746806698836e-05, "loss": 0.43480501174926756, "step": 6920 }, { "epoch": 1.967073516889015, "grad_norm": 23.14503288269043, "learning_rate": 9.8034629577065e-05, "loss": 0.4471869945526123, "step": 6930 }, { "epoch": 1.9699120068123759, "grad_norm": 23.77007484436035, "learning_rate": 9.803179108714165e-05, "loss": 0.3919124364852905, "step": 6940 }, { "epoch": 1.9727504967357365, "grad_norm": 26.774288177490234, "learning_rate": 9.802895259721829e-05, "loss": 0.39920334815979003, "step": 6950 }, { "epoch": 1.9755889866590972, "grad_norm": 22.82596778869629, "learning_rate": 9.802611410729492e-05, "loss": 0.3753786087036133, "step": 6960 }, { "epoch": 1.978427476582458, "grad_norm": 17.98094940185547, "learning_rate": 9.802327561737157e-05, "loss": 0.3512239933013916, "step": 6970 }, { "epoch": 1.981265966505819, "grad_norm": 18.104963302612305, "learning_rate": 9.802043712744821e-05, "loss": 0.3635380268096924, "step": 6980 }, { "epoch": 1.9841044564291797, "grad_norm": 22.595094680786133, "learning_rate": 9.801759863752484e-05, "loss": 0.3666935920715332, "step": 6990 }, { "epoch": 1.9869429463525403, "grad_norm": 29.48496437072754, "learning_rate": 9.801476014760148e-05, "loss": 0.45305633544921875, "step": 7000 }, { "epoch": 1.9869429463525403, "eval_accuracy": 0.789979016977173, "eval_loss": 0.6328722834587097, "eval_runtime": 30.9315, "eval_samples_per_second": 508.445, "eval_steps_per_second": 7.953, "step": 7000 }, { "epoch": 1.9897814362759012, "grad_norm": 22.97522735595703, "learning_rate": 9.801192165767812e-05, "loss": 0.36184446811676024, "step": 7010 }, { "epoch": 1.992619926199262, "grad_norm": 27.06800651550293, "learning_rate": 9.800908316775475e-05, "loss": 0.37371642589569093, "step": 7020 }, { "epoch": 1.9954584161226228, "grad_norm": 22.68690299987793, "learning_rate": 9.800624467783139e-05, "loss": 0.4028749942779541, "step": 7030 }, { "epoch": 1.9982969060459834, "grad_norm": 28.094987869262695, "learning_rate": 9.800340618790805e-05, "loss": 0.4097541332244873, "step": 7040 }, { "epoch": 2.001135395969344, "grad_norm": 25.623306274414062, "learning_rate": 9.800056769798467e-05, "loss": 0.3332174777984619, "step": 7050 }, { "epoch": 2.003973885892705, "grad_norm": 26.121572494506836, "learning_rate": 9.799772920806132e-05, "loss": 0.3804765224456787, "step": 7060 }, { "epoch": 2.006812375816066, "grad_norm": 18.207355499267578, "learning_rate": 9.799489071813796e-05, "loss": 0.36211156845092773, "step": 7070 }, { "epoch": 2.0096508657394265, "grad_norm": 26.486698150634766, "learning_rate": 9.79920522282146e-05, "loss": 0.37827110290527344, "step": 7080 }, { "epoch": 2.012489355662787, "grad_norm": 27.40904998779297, "learning_rate": 9.798921373829123e-05, "loss": 0.3477623701095581, "step": 7090 }, { "epoch": 2.0153278455861483, "grad_norm": 26.22162628173828, "learning_rate": 9.798637524836787e-05, "loss": 0.4224153995513916, "step": 7100 }, { "epoch": 2.018166335509509, "grad_norm": 11.713774681091309, "learning_rate": 9.798353675844452e-05, "loss": 0.38394105434417725, "step": 7110 }, { "epoch": 2.0210048254328696, "grad_norm": 16.71647071838379, "learning_rate": 9.798069826852115e-05, "loss": 0.3284349203109741, "step": 7120 }, { "epoch": 2.0238433153562303, "grad_norm": 20.081064224243164, "learning_rate": 9.797785977859779e-05, "loss": 0.35011241436004636, "step": 7130 }, { "epoch": 2.0266818052795914, "grad_norm": 33.82632064819336, "learning_rate": 9.797502128867443e-05, "loss": 0.3723596096038818, "step": 7140 }, { "epoch": 2.029520295202952, "grad_norm": 24.423551559448242, "learning_rate": 9.797218279875106e-05, "loss": 0.34326722621917727, "step": 7150 }, { "epoch": 2.0323587851263127, "grad_norm": 15.357254981994629, "learning_rate": 9.79693443088277e-05, "loss": 0.3487834453582764, "step": 7160 }, { "epoch": 2.0351972750496734, "grad_norm": 18.3219051361084, "learning_rate": 9.796650581890436e-05, "loss": 0.3497222900390625, "step": 7170 }, { "epoch": 2.0380357649730345, "grad_norm": 17.77417755126953, "learning_rate": 9.796366732898098e-05, "loss": 0.3414299488067627, "step": 7180 }, { "epoch": 2.040874254896395, "grad_norm": 19.74038314819336, "learning_rate": 9.796082883905763e-05, "loss": 0.34571359157562254, "step": 7190 }, { "epoch": 2.043712744819756, "grad_norm": 29.782472610473633, "learning_rate": 9.795799034913427e-05, "loss": 0.36627726554870604, "step": 7200 }, { "epoch": 2.0465512347431165, "grad_norm": 18.892847061157227, "learning_rate": 9.795515185921091e-05, "loss": 0.4110989570617676, "step": 7210 }, { "epoch": 2.0493897246664776, "grad_norm": 18.120851516723633, "learning_rate": 9.795231336928754e-05, "loss": 0.3553095817565918, "step": 7220 }, { "epoch": 2.0522282145898383, "grad_norm": 18.801067352294922, "learning_rate": 9.794947487936418e-05, "loss": 0.331646466255188, "step": 7230 }, { "epoch": 2.055066704513199, "grad_norm": 21.328035354614258, "learning_rate": 9.794663638944083e-05, "loss": 0.3528684854507446, "step": 7240 }, { "epoch": 2.0579051944365596, "grad_norm": 22.32634735107422, "learning_rate": 9.794379789951746e-05, "loss": 0.2940554618835449, "step": 7250 }, { "epoch": 2.0607436843599207, "grad_norm": 22.976261138916016, "learning_rate": 9.79409594095941e-05, "loss": 0.36672842502593994, "step": 7260 }, { "epoch": 2.0635821742832814, "grad_norm": 18.52858543395996, "learning_rate": 9.793812091967074e-05, "loss": 0.32463598251342773, "step": 7270 }, { "epoch": 2.066420664206642, "grad_norm": 28.223352432250977, "learning_rate": 9.793528242974737e-05, "loss": 0.34141054153442385, "step": 7280 }, { "epoch": 2.0692591541300027, "grad_norm": 18.35302734375, "learning_rate": 9.793244393982401e-05, "loss": 0.3978186845779419, "step": 7290 }, { "epoch": 2.072097644053364, "grad_norm": 24.369665145874023, "learning_rate": 9.792960544990065e-05, "loss": 0.3742377519607544, "step": 7300 }, { "epoch": 2.0749361339767245, "grad_norm": 24.92734146118164, "learning_rate": 9.79267669599773e-05, "loss": 0.3147447109222412, "step": 7310 }, { "epoch": 2.077774623900085, "grad_norm": 26.140825271606445, "learning_rate": 9.792392847005394e-05, "loss": 0.3315492868423462, "step": 7320 }, { "epoch": 2.080613113823446, "grad_norm": 20.15946388244629, "learning_rate": 9.792108998013058e-05, "loss": 0.37865581512451174, "step": 7330 }, { "epoch": 2.0834516037468065, "grad_norm": 29.028505325317383, "learning_rate": 9.791825149020722e-05, "loss": 0.3692589998245239, "step": 7340 }, { "epoch": 2.0862900936701676, "grad_norm": 18.025115966796875, "learning_rate": 9.791541300028385e-05, "loss": 0.34498021602630613, "step": 7350 }, { "epoch": 2.0891285835935283, "grad_norm": 19.393327713012695, "learning_rate": 9.791257451036049e-05, "loss": 0.3126228332519531, "step": 7360 }, { "epoch": 2.091967073516889, "grad_norm": 25.120500564575195, "learning_rate": 9.790973602043714e-05, "loss": 0.299014687538147, "step": 7370 }, { "epoch": 2.0948055634402496, "grad_norm": 16.622690200805664, "learning_rate": 9.790689753051377e-05, "loss": 0.33715310096740725, "step": 7380 }, { "epoch": 2.0976440533636107, "grad_norm": 19.6770076751709, "learning_rate": 9.790405904059041e-05, "loss": 0.3389828443527222, "step": 7390 }, { "epoch": 2.1004825432869714, "grad_norm": 14.121631622314453, "learning_rate": 9.790122055066705e-05, "loss": 0.3276146173477173, "step": 7400 }, { "epoch": 2.103321033210332, "grad_norm": 19.206398010253906, "learning_rate": 9.789838206074368e-05, "loss": 0.33128836154937746, "step": 7410 }, { "epoch": 2.1061595231336927, "grad_norm": 24.142192840576172, "learning_rate": 9.789554357082032e-05, "loss": 0.3608201503753662, "step": 7420 }, { "epoch": 2.108998013057054, "grad_norm": 19.778400421142578, "learning_rate": 9.789270508089696e-05, "loss": 0.3426323890686035, "step": 7430 }, { "epoch": 2.1118365029804145, "grad_norm": 24.99077033996582, "learning_rate": 9.78898665909736e-05, "loss": 0.3042409420013428, "step": 7440 }, { "epoch": 2.114674992903775, "grad_norm": 22.850847244262695, "learning_rate": 9.788702810105025e-05, "loss": 0.3346649885177612, "step": 7450 }, { "epoch": 2.117513482827136, "grad_norm": 17.953664779663086, "learning_rate": 9.788418961112689e-05, "loss": 0.35388185977935793, "step": 7460 }, { "epoch": 2.120351972750497, "grad_norm": 25.097293853759766, "learning_rate": 9.788163497019586e-05, "loss": 0.3761943578720093, "step": 7470 }, { "epoch": 2.1231904626738576, "grad_norm": 17.763038635253906, "learning_rate": 9.78787964802725e-05, "loss": 0.32521159648895265, "step": 7480 }, { "epoch": 2.1260289525972182, "grad_norm": 14.84680461883545, "learning_rate": 9.787595799034914e-05, "loss": 0.29300243854522706, "step": 7490 }, { "epoch": 2.128867442520579, "grad_norm": 22.55581283569336, "learning_rate": 9.787311950042578e-05, "loss": 0.2999121189117432, "step": 7500 }, { "epoch": 2.128867442520579, "eval_accuracy": 0.8193552489349526, "eval_loss": 0.5586711764335632, "eval_runtime": 30.6876, "eval_samples_per_second": 512.486, "eval_steps_per_second": 8.016, "step": 7500 }, { "epoch": 2.13170593244394, "grad_norm": 22.64993667602539, "learning_rate": 9.787028101050242e-05, "loss": 0.33211965560913087, "step": 7510 }, { "epoch": 2.1345444223673007, "grad_norm": 29.761520385742188, "learning_rate": 9.786744252057906e-05, "loss": 0.3662957429885864, "step": 7520 }, { "epoch": 2.1373829122906614, "grad_norm": 21.352767944335938, "learning_rate": 9.786460403065569e-05, "loss": 0.34453125, "step": 7530 }, { "epoch": 2.140221402214022, "grad_norm": 18.434667587280273, "learning_rate": 9.786176554073233e-05, "loss": 0.3489013671875, "step": 7540 }, { "epoch": 2.1430598921373827, "grad_norm": 12.389979362487793, "learning_rate": 9.785892705080897e-05, "loss": 0.3259838581085205, "step": 7550 }, { "epoch": 2.145898382060744, "grad_norm": 26.911739349365234, "learning_rate": 9.785608856088561e-05, "loss": 0.3636974096298218, "step": 7560 }, { "epoch": 2.1487368719841045, "grad_norm": 17.13495445251465, "learning_rate": 9.785325007096226e-05, "loss": 0.3096443176269531, "step": 7570 }, { "epoch": 2.151575361907465, "grad_norm": 13.735611915588379, "learning_rate": 9.78504115810389e-05, "loss": 0.28812298774719236, "step": 7580 }, { "epoch": 2.1544138518308262, "grad_norm": 13.61890983581543, "learning_rate": 9.784757309111552e-05, "loss": 0.27205934524536135, "step": 7590 }, { "epoch": 2.157252341754187, "grad_norm": 17.72872543334961, "learning_rate": 9.784473460119217e-05, "loss": 0.36398863792419434, "step": 7600 }, { "epoch": 2.1600908316775476, "grad_norm": 18.36585235595703, "learning_rate": 9.784189611126881e-05, "loss": 0.32454309463500974, "step": 7610 }, { "epoch": 2.1629293216009082, "grad_norm": 28.443626403808594, "learning_rate": 9.783905762134545e-05, "loss": 0.30087525844573976, "step": 7620 }, { "epoch": 2.165767811524269, "grad_norm": 16.3806209564209, "learning_rate": 9.783621913142209e-05, "loss": 0.33084516525268554, "step": 7630 }, { "epoch": 2.16860630144763, "grad_norm": 20.08734130859375, "learning_rate": 9.783338064149873e-05, "loss": 0.35816125869750975, "step": 7640 }, { "epoch": 2.1714447913709907, "grad_norm": 16.4975643157959, "learning_rate": 9.783054215157536e-05, "loss": 0.29642434120178224, "step": 7650 }, { "epoch": 2.1742832812943513, "grad_norm": 25.36775016784668, "learning_rate": 9.7827703661652e-05, "loss": 0.31282219886779783, "step": 7660 }, { "epoch": 2.177121771217712, "grad_norm": 11.673345565795898, "learning_rate": 9.782486517172864e-05, "loss": 0.32711076736450195, "step": 7670 }, { "epoch": 2.179960261141073, "grad_norm": 23.321889877319336, "learning_rate": 9.782202668180528e-05, "loss": 0.3195405721664429, "step": 7680 }, { "epoch": 2.1827987510644338, "grad_norm": 29.165145874023438, "learning_rate": 9.781918819188193e-05, "loss": 0.3221855163574219, "step": 7690 }, { "epoch": 2.1856372409877944, "grad_norm": 19.81364631652832, "learning_rate": 9.781634970195857e-05, "loss": 0.3239097833633423, "step": 7700 }, { "epoch": 2.188475730911155, "grad_norm": 15.657233238220215, "learning_rate": 9.781351121203521e-05, "loss": 0.29824137687683105, "step": 7710 }, { "epoch": 2.191314220834516, "grad_norm": 17.727561950683594, "learning_rate": 9.781067272211184e-05, "loss": 0.2912928581237793, "step": 7720 }, { "epoch": 2.194152710757877, "grad_norm": 16.810564041137695, "learning_rate": 9.780783423218848e-05, "loss": 0.3414383172988892, "step": 7730 }, { "epoch": 2.1969912006812375, "grad_norm": 15.82119083404541, "learning_rate": 9.780499574226512e-05, "loss": 0.3083316802978516, "step": 7740 }, { "epoch": 2.199829690604598, "grad_norm": 15.556926727294922, "learning_rate": 9.780215725234176e-05, "loss": 0.3164150476455688, "step": 7750 }, { "epoch": 2.2026681805279593, "grad_norm": 21.46554946899414, "learning_rate": 9.77993187624184e-05, "loss": 0.3076106309890747, "step": 7760 }, { "epoch": 2.20550667045132, "grad_norm": 13.634368896484375, "learning_rate": 9.779648027249504e-05, "loss": 0.319075345993042, "step": 7770 }, { "epoch": 2.2083451603746806, "grad_norm": 15.295550346374512, "learning_rate": 9.779364178257167e-05, "loss": 0.29931752681732177, "step": 7780 }, { "epoch": 2.2111836502980413, "grad_norm": 24.052366256713867, "learning_rate": 9.779080329264831e-05, "loss": 0.3561910390853882, "step": 7790 }, { "epoch": 2.2140221402214024, "grad_norm": 17.625972747802734, "learning_rate": 9.778796480272495e-05, "loss": 0.330292272567749, "step": 7800 }, { "epoch": 2.216860630144763, "grad_norm": 22.240184783935547, "learning_rate": 9.77851263128016e-05, "loss": 0.3710068941116333, "step": 7810 }, { "epoch": 2.2196991200681238, "grad_norm": 16.99149513244629, "learning_rate": 9.778228782287824e-05, "loss": 0.3270531892776489, "step": 7820 }, { "epoch": 2.2225376099914844, "grad_norm": 26.887069702148438, "learning_rate": 9.777944933295488e-05, "loss": 0.28733437061309813, "step": 7830 }, { "epoch": 2.225376099914845, "grad_norm": 23.501020431518555, "learning_rate": 9.777661084303152e-05, "loss": 0.3077794075012207, "step": 7840 }, { "epoch": 2.228214589838206, "grad_norm": 14.051553726196289, "learning_rate": 9.777377235310815e-05, "loss": 0.3143048048019409, "step": 7850 }, { "epoch": 2.231053079761567, "grad_norm": 16.144062042236328, "learning_rate": 9.777093386318479e-05, "loss": 0.3136625051498413, "step": 7860 }, { "epoch": 2.2338915696849275, "grad_norm": 27.20977783203125, "learning_rate": 9.776809537326143e-05, "loss": 0.34062178134918214, "step": 7870 }, { "epoch": 2.236730059608288, "grad_norm": 12.354023933410645, "learning_rate": 9.776525688333806e-05, "loss": 0.3248645544052124, "step": 7880 }, { "epoch": 2.2395685495316493, "grad_norm": 21.091035842895508, "learning_rate": 9.776241839341471e-05, "loss": 0.32216098308563235, "step": 7890 }, { "epoch": 2.24240703945501, "grad_norm": 17.950258255004883, "learning_rate": 9.775957990349135e-05, "loss": 0.3246042013168335, "step": 7900 }, { "epoch": 2.2452455293783706, "grad_norm": 28.928653717041016, "learning_rate": 9.775674141356798e-05, "loss": 0.3273427724838257, "step": 7910 }, { "epoch": 2.2480840193017313, "grad_norm": 22.439340591430664, "learning_rate": 9.775390292364462e-05, "loss": 0.3079803228378296, "step": 7920 }, { "epoch": 2.2509225092250924, "grad_norm": 15.47249698638916, "learning_rate": 9.775106443372126e-05, "loss": 0.29904541969299314, "step": 7930 }, { "epoch": 2.253760999148453, "grad_norm": 18.008270263671875, "learning_rate": 9.77482259437979e-05, "loss": 0.3077368259429932, "step": 7940 }, { "epoch": 2.2565994890718137, "grad_norm": 19.825355529785156, "learning_rate": 9.774538745387455e-05, "loss": 0.32225110530853274, "step": 7950 }, { "epoch": 2.2594379789951744, "grad_norm": 14.30937671661377, "learning_rate": 9.774254896395119e-05, "loss": 0.31217069625854493, "step": 7960 }, { "epoch": 2.2622764689185355, "grad_norm": 23.95633888244629, "learning_rate": 9.773971047402783e-05, "loss": 0.2829510450363159, "step": 7970 }, { "epoch": 2.265114958841896, "grad_norm": 21.46575927734375, "learning_rate": 9.773687198410446e-05, "loss": 0.30298519134521484, "step": 7980 }, { "epoch": 2.267953448765257, "grad_norm": 16.935693740844727, "learning_rate": 9.77340334941811e-05, "loss": 0.2906353235244751, "step": 7990 }, { "epoch": 2.2707919386886175, "grad_norm": 14.386497497558594, "learning_rate": 9.773119500425774e-05, "loss": 0.24933280944824218, "step": 8000 }, { "epoch": 2.2707919386886175, "eval_accuracy": 0.8336618554078973, "eval_loss": 0.510450005531311, "eval_runtime": 30.7075, "eval_samples_per_second": 512.154, "eval_steps_per_second": 8.011, "step": 8000 }, { "epoch": 2.2736304286119786, "grad_norm": 15.910746574401855, "learning_rate": 9.772835651433437e-05, "loss": 0.33203461170196535, "step": 8010 }, { "epoch": 2.2764689185353393, "grad_norm": 20.63860511779785, "learning_rate": 9.772551802441102e-05, "loss": 0.3112318992614746, "step": 8020 }, { "epoch": 2.2793074084587, "grad_norm": 12.859118461608887, "learning_rate": 9.772267953448766e-05, "loss": 0.30755631923675536, "step": 8030 }, { "epoch": 2.2821458983820606, "grad_norm": 22.16994285583496, "learning_rate": 9.771984104456429e-05, "loss": 0.32435686588287355, "step": 8040 }, { "epoch": 2.2849843883054213, "grad_norm": 24.568498611450195, "learning_rate": 9.771700255464093e-05, "loss": 0.28578126430511475, "step": 8050 }, { "epoch": 2.2878228782287824, "grad_norm": 16.076248168945312, "learning_rate": 9.771416406471757e-05, "loss": 0.29748618602752686, "step": 8060 }, { "epoch": 2.290661368152143, "grad_norm": 28.567386627197266, "learning_rate": 9.771132557479422e-05, "loss": 0.2868338584899902, "step": 8070 }, { "epoch": 2.2934998580755037, "grad_norm": 25.460519790649414, "learning_rate": 9.770848708487086e-05, "loss": 0.30639047622680665, "step": 8080 }, { "epoch": 2.296338347998865, "grad_norm": 12.130402565002441, "learning_rate": 9.77056485949475e-05, "loss": 0.34181952476501465, "step": 8090 }, { "epoch": 2.2991768379222255, "grad_norm": 17.020689010620117, "learning_rate": 9.770281010502414e-05, "loss": 0.3766122579574585, "step": 8100 }, { "epoch": 2.302015327845586, "grad_norm": 19.27068519592285, "learning_rate": 9.769997161510077e-05, "loss": 0.32407164573669434, "step": 8110 }, { "epoch": 2.304853817768947, "grad_norm": 22.321697235107422, "learning_rate": 9.769713312517741e-05, "loss": 0.30633792877197263, "step": 8120 }, { "epoch": 2.3076923076923075, "grad_norm": 27.439123153686523, "learning_rate": 9.769429463525405e-05, "loss": 0.3041942358016968, "step": 8130 }, { "epoch": 2.3105307976156686, "grad_norm": 18.074853897094727, "learning_rate": 9.769145614533068e-05, "loss": 0.29930384159088136, "step": 8140 }, { "epoch": 2.3133692875390293, "grad_norm": 17.662498474121094, "learning_rate": 9.768861765540733e-05, "loss": 0.31606063842773435, "step": 8150 }, { "epoch": 2.31620777746239, "grad_norm": 21.386085510253906, "learning_rate": 9.768577916548397e-05, "loss": 0.33449127674102785, "step": 8160 }, { "epoch": 2.3190462673857506, "grad_norm": 23.739849090576172, "learning_rate": 9.76829406755606e-05, "loss": 0.4126762866973877, "step": 8170 }, { "epoch": 2.3218847573091117, "grad_norm": 23.911813735961914, "learning_rate": 9.768010218563724e-05, "loss": 0.3007689952850342, "step": 8180 }, { "epoch": 2.3247232472324724, "grad_norm": 18.675647735595703, "learning_rate": 9.767726369571389e-05, "loss": 0.3121593475341797, "step": 8190 }, { "epoch": 2.327561737155833, "grad_norm": 25.354106903076172, "learning_rate": 9.767442520579053e-05, "loss": 0.36575496196746826, "step": 8200 }, { "epoch": 2.3304002270791937, "grad_norm": 30.7576904296875, "learning_rate": 9.767158671586715e-05, "loss": 0.32697985172271726, "step": 8210 }, { "epoch": 2.333238717002555, "grad_norm": 20.862083435058594, "learning_rate": 9.766874822594381e-05, "loss": 0.3296887636184692, "step": 8220 }, { "epoch": 2.3360772069259155, "grad_norm": 12.933531761169434, "learning_rate": 9.766590973602045e-05, "loss": 0.2782109260559082, "step": 8230 }, { "epoch": 2.338915696849276, "grad_norm": 19.33204460144043, "learning_rate": 9.766307124609708e-05, "loss": 0.36731810569763185, "step": 8240 }, { "epoch": 2.341754186772637, "grad_norm": 33.66826629638672, "learning_rate": 9.766023275617372e-05, "loss": 0.31624741554260255, "step": 8250 }, { "epoch": 2.344592676695998, "grad_norm": 29.914854049682617, "learning_rate": 9.765739426625036e-05, "loss": 0.3358884334564209, "step": 8260 }, { "epoch": 2.3474311666193586, "grad_norm": 19.980573654174805, "learning_rate": 9.765455577632699e-05, "loss": 0.3604869365692139, "step": 8270 }, { "epoch": 2.3502696565427192, "grad_norm": 15.825258255004883, "learning_rate": 9.765171728640364e-05, "loss": 0.29133899211883546, "step": 8280 }, { "epoch": 2.35310814646608, "grad_norm": 20.521116256713867, "learning_rate": 9.764887879648029e-05, "loss": 0.3142831325531006, "step": 8290 }, { "epoch": 2.355946636389441, "grad_norm": 16.169811248779297, "learning_rate": 9.764604030655691e-05, "loss": 0.2787348985671997, "step": 8300 }, { "epoch": 2.3587851263128017, "grad_norm": 31.150718688964844, "learning_rate": 9.764320181663355e-05, "loss": 0.37035021781921384, "step": 8310 }, { "epoch": 2.3616236162361623, "grad_norm": 17.649940490722656, "learning_rate": 9.76403633267102e-05, "loss": 0.3348374843597412, "step": 8320 }, { "epoch": 2.364462106159523, "grad_norm": 13.433549880981445, "learning_rate": 9.763752483678684e-05, "loss": 0.2467961311340332, "step": 8330 }, { "epoch": 2.3673005960828837, "grad_norm": 21.340105056762695, "learning_rate": 9.763468634686347e-05, "loss": 0.31401829719543456, "step": 8340 }, { "epoch": 2.370139086006245, "grad_norm": 17.576339721679688, "learning_rate": 9.763184785694012e-05, "loss": 0.28417556285858153, "step": 8350 }, { "epoch": 2.3729775759296055, "grad_norm": 24.587268829345703, "learning_rate": 9.762900936701676e-05, "loss": 0.32737605571746825, "step": 8360 }, { "epoch": 2.375816065852966, "grad_norm": 19.446542739868164, "learning_rate": 9.762617087709339e-05, "loss": 0.29132063388824464, "step": 8370 }, { "epoch": 2.3786545557763272, "grad_norm": 11.685669898986816, "learning_rate": 9.762333238717003e-05, "loss": 0.3012071132659912, "step": 8380 }, { "epoch": 2.381493045699688, "grad_norm": 15.930485725402832, "learning_rate": 9.762049389724667e-05, "loss": 0.27760627269744875, "step": 8390 }, { "epoch": 2.3843315356230486, "grad_norm": 25.309608459472656, "learning_rate": 9.76176554073233e-05, "loss": 0.33383493423461913, "step": 8400 }, { "epoch": 2.3871700255464092, "grad_norm": 10.296014785766602, "learning_rate": 9.761481691739994e-05, "loss": 0.3022672176361084, "step": 8410 }, { "epoch": 2.39000851546977, "grad_norm": 19.012435913085938, "learning_rate": 9.76119784274766e-05, "loss": 0.3082521200180054, "step": 8420 }, { "epoch": 2.392847005393131, "grad_norm": 14.833815574645996, "learning_rate": 9.760913993755322e-05, "loss": 0.3190229177474976, "step": 8430 }, { "epoch": 2.3956854953164917, "grad_norm": 20.57181167602539, "learning_rate": 9.760630144762987e-05, "loss": 0.30741300582885744, "step": 8440 }, { "epoch": 2.3985239852398523, "grad_norm": 17.030780792236328, "learning_rate": 9.760346295770651e-05, "loss": 0.3428426027297974, "step": 8450 }, { "epoch": 2.401362475163213, "grad_norm": 22.705467224121094, "learning_rate": 9.760062446778315e-05, "loss": 0.30942912101745607, "step": 8460 }, { "epoch": 2.404200965086574, "grad_norm": 20.66131019592285, "learning_rate": 9.759778597785978e-05, "loss": 0.3165341854095459, "step": 8470 }, { "epoch": 2.4070394550099348, "grad_norm": 21.866134643554688, "learning_rate": 9.759494748793643e-05, "loss": 0.31316912174224854, "step": 8480 }, { "epoch": 2.4098779449332954, "grad_norm": 20.54009246826172, "learning_rate": 9.759210899801306e-05, "loss": 0.3653756618499756, "step": 8490 }, { "epoch": 2.412716434856656, "grad_norm": 18.931283950805664, "learning_rate": 9.75892705080897e-05, "loss": 0.3053940773010254, "step": 8500 }, { "epoch": 2.412716434856656, "eval_accuracy": 0.839130158326445, "eval_loss": 0.48315316438674927, "eval_runtime": 31.3929, "eval_samples_per_second": 500.973, "eval_steps_per_second": 7.836, "step": 8500 }, { "epoch": 2.415554924780017, "grad_norm": 32.2748908996582, "learning_rate": 9.758643201816634e-05, "loss": 0.3398677587509155, "step": 8510 }, { "epoch": 2.418393414703378, "grad_norm": 15.286317825317383, "learning_rate": 9.758359352824298e-05, "loss": 0.3296861410140991, "step": 8520 }, { "epoch": 2.4212319046267385, "grad_norm": 19.433284759521484, "learning_rate": 9.758075503831961e-05, "loss": 0.35013091564178467, "step": 8530 }, { "epoch": 2.424070394550099, "grad_norm": 21.255708694458008, "learning_rate": 9.757791654839625e-05, "loss": 0.3604008436203003, "step": 8540 }, { "epoch": 2.42690888447346, "grad_norm": 28.56644058227539, "learning_rate": 9.757507805847291e-05, "loss": 0.32774953842163085, "step": 8550 }, { "epoch": 2.429747374396821, "grad_norm": 20.850976943969727, "learning_rate": 9.757223956854953e-05, "loss": 0.2773128986358643, "step": 8560 }, { "epoch": 2.4325858643201816, "grad_norm": 18.52234649658203, "learning_rate": 9.756940107862618e-05, "loss": 0.3332803249359131, "step": 8570 }, { "epoch": 2.4354243542435423, "grad_norm": 19.95413589477539, "learning_rate": 9.756656258870282e-05, "loss": 0.3028581619262695, "step": 8580 }, { "epoch": 2.4382628441669034, "grad_norm": 41.320655822753906, "learning_rate": 9.756372409877945e-05, "loss": 0.3280078172683716, "step": 8590 }, { "epoch": 2.441101334090264, "grad_norm": 13.023110389709473, "learning_rate": 9.756088560885609e-05, "loss": 0.29077088832855225, "step": 8600 }, { "epoch": 2.4439398240136248, "grad_norm": 18.225536346435547, "learning_rate": 9.755804711893273e-05, "loss": 0.30920071601867677, "step": 8610 }, { "epoch": 2.4467783139369854, "grad_norm": 24.94522476196289, "learning_rate": 9.755520862900937e-05, "loss": 0.2795328378677368, "step": 8620 }, { "epoch": 2.449616803860346, "grad_norm": 13.974796295166016, "learning_rate": 9.755237013908601e-05, "loss": 0.316956615447998, "step": 8630 }, { "epoch": 2.452455293783707, "grad_norm": 17.750377655029297, "learning_rate": 9.754953164916265e-05, "loss": 0.30039684772491454, "step": 8640 }, { "epoch": 2.455293783707068, "grad_norm": 16.818546295166016, "learning_rate": 9.75466931592393e-05, "loss": 0.3117759943008423, "step": 8650 }, { "epoch": 2.4581322736304285, "grad_norm": 19.495594024658203, "learning_rate": 9.754385466931592e-05, "loss": 0.25922765731811526, "step": 8660 }, { "epoch": 2.4609707635537896, "grad_norm": 25.982568740844727, "learning_rate": 9.754101617939256e-05, "loss": 0.299273681640625, "step": 8670 }, { "epoch": 2.4638092534771503, "grad_norm": 22.021488189697266, "learning_rate": 9.753817768946922e-05, "loss": 0.3233955383300781, "step": 8680 }, { "epoch": 2.466647743400511, "grad_norm": 18.47892189025879, "learning_rate": 9.753533919954585e-05, "loss": 0.2856843709945679, "step": 8690 }, { "epoch": 2.4694862333238716, "grad_norm": 14.465981483459473, "learning_rate": 9.753250070962249e-05, "loss": 0.29565207958221434, "step": 8700 }, { "epoch": 2.4723247232472323, "grad_norm": 20.1898136138916, "learning_rate": 9.752966221969913e-05, "loss": 0.3390942096710205, "step": 8710 }, { "epoch": 2.4751632131705934, "grad_norm": 17.767154693603516, "learning_rate": 9.752682372977576e-05, "loss": 0.3217710733413696, "step": 8720 }, { "epoch": 2.478001703093954, "grad_norm": 30.87446403503418, "learning_rate": 9.75239852398524e-05, "loss": 0.2890691041946411, "step": 8730 }, { "epoch": 2.4808401930173147, "grad_norm": 20.463594436645508, "learning_rate": 9.752114674992904e-05, "loss": 0.30511558055877686, "step": 8740 }, { "epoch": 2.4836786829406754, "grad_norm": 23.91307258605957, "learning_rate": 9.751830826000568e-05, "loss": 0.3336294174194336, "step": 8750 }, { "epoch": 2.4865171728640365, "grad_norm": 16.377883911132812, "learning_rate": 9.751546977008232e-05, "loss": 0.28200268745422363, "step": 8760 }, { "epoch": 2.489355662787397, "grad_norm": 16.278593063354492, "learning_rate": 9.751263128015896e-05, "loss": 0.2989437818527222, "step": 8770 }, { "epoch": 2.492194152710758, "grad_norm": 22.372589111328125, "learning_rate": 9.75097927902356e-05, "loss": 0.298288631439209, "step": 8780 }, { "epoch": 2.4950326426341185, "grad_norm": 15.453804016113281, "learning_rate": 9.750695430031223e-05, "loss": 0.26072652339935304, "step": 8790 }, { "epoch": 2.4978711325574796, "grad_norm": 19.527374267578125, "learning_rate": 9.750411581038887e-05, "loss": 0.26543455123901366, "step": 8800 }, { "epoch": 2.5007096224808403, "grad_norm": 19.62587547302246, "learning_rate": 9.750127732046552e-05, "loss": 0.37260947227478025, "step": 8810 }, { "epoch": 2.503548112404201, "grad_norm": 19.925113677978516, "learning_rate": 9.749843883054216e-05, "loss": 0.33139777183532715, "step": 8820 }, { "epoch": 2.5063866023275616, "grad_norm": 17.96927261352539, "learning_rate": 9.74956003406188e-05, "loss": 0.27111947536468506, "step": 8830 }, { "epoch": 2.5092250922509223, "grad_norm": 13.09363079071045, "learning_rate": 9.749276185069544e-05, "loss": 0.2507824420928955, "step": 8840 }, { "epoch": 2.5120635821742834, "grad_norm": 10.441879272460938, "learning_rate": 9.748992336077207e-05, "loss": 0.2787137985229492, "step": 8850 }, { "epoch": 2.514902072097644, "grad_norm": 18.93663215637207, "learning_rate": 9.748708487084871e-05, "loss": 0.28047478199005127, "step": 8860 }, { "epoch": 2.5177405620210047, "grad_norm": 17.196422576904297, "learning_rate": 9.748424638092535e-05, "loss": 0.2845618486404419, "step": 8870 }, { "epoch": 2.520579051944366, "grad_norm": 22.02267837524414, "learning_rate": 9.748140789100199e-05, "loss": 0.25998194217681886, "step": 8880 }, { "epoch": 2.5234175418677265, "grad_norm": 12.58385181427002, "learning_rate": 9.747856940107863e-05, "loss": 0.24599096775054932, "step": 8890 }, { "epoch": 2.526256031791087, "grad_norm": 18.140384674072266, "learning_rate": 9.747573091115527e-05, "loss": 0.2813450336456299, "step": 8900 }, { "epoch": 2.529094521714448, "grad_norm": 19.379297256469727, "learning_rate": 9.747289242123192e-05, "loss": 0.2888960361480713, "step": 8910 }, { "epoch": 2.5319330116378085, "grad_norm": 19.3806209564209, "learning_rate": 9.747005393130854e-05, "loss": 0.31046576499938966, "step": 8920 }, { "epoch": 2.5347715015611696, "grad_norm": 19.743083953857422, "learning_rate": 9.746721544138518e-05, "loss": 0.3509793519973755, "step": 8930 }, { "epoch": 2.5376099914845303, "grad_norm": 25.64822769165039, "learning_rate": 9.746437695146183e-05, "loss": 0.32240374088287355, "step": 8940 }, { "epoch": 2.540448481407891, "grad_norm": 19.924394607543945, "learning_rate": 9.746153846153847e-05, "loss": 0.3332366466522217, "step": 8950 }, { "epoch": 2.543286971331252, "grad_norm": 14.92116641998291, "learning_rate": 9.745869997161511e-05, "loss": 0.2333456516265869, "step": 8960 }, { "epoch": 2.5461254612546127, "grad_norm": 12.108552932739258, "learning_rate": 9.745586148169175e-05, "loss": 0.286391282081604, "step": 8970 }, { "epoch": 2.5489639511779734, "grad_norm": 29.60675811767578, "learning_rate": 9.745302299176838e-05, "loss": 0.30680859088897705, "step": 8980 }, { "epoch": 2.551802441101334, "grad_norm": 22.1567440032959, "learning_rate": 9.745018450184502e-05, "loss": 0.2720050573348999, "step": 8990 }, { "epoch": 2.5546409310246947, "grad_norm": 14.802998542785645, "learning_rate": 9.744734601192166e-05, "loss": 0.271009635925293, "step": 9000 }, { "epoch": 2.5546409310246947, "eval_accuracy": 0.8457429897628282, "eval_loss": 0.47256723046302795, "eval_runtime": 31.5425, "eval_samples_per_second": 498.597, "eval_steps_per_second": 7.799, "step": 9000 }, { "epoch": 2.557479420948056, "grad_norm": 14.7639799118042, "learning_rate": 9.74445075219983e-05, "loss": 0.257672119140625, "step": 9010 }, { "epoch": 2.5603179108714165, "grad_norm": 18.669347763061523, "learning_rate": 9.744166903207494e-05, "loss": 0.28345017433166503, "step": 9020 }, { "epoch": 2.563156400794777, "grad_norm": 19.739337921142578, "learning_rate": 9.743883054215158e-05, "loss": 0.2709732294082642, "step": 9030 }, { "epoch": 2.565994890718138, "grad_norm": 16.077856063842773, "learning_rate": 9.743599205222823e-05, "loss": 0.2761526584625244, "step": 9040 }, { "epoch": 2.5688333806414985, "grad_norm": 26.328725814819336, "learning_rate": 9.743315356230485e-05, "loss": 0.2954942941665649, "step": 9050 }, { "epoch": 2.5716718705648596, "grad_norm": 17.859054565429688, "learning_rate": 9.74303150723815e-05, "loss": 0.28468611240386965, "step": 9060 }, { "epoch": 2.5745103604882202, "grad_norm": 12.289284706115723, "learning_rate": 9.742747658245814e-05, "loss": 0.24026684761047362, "step": 9070 }, { "epoch": 2.577348850411581, "grad_norm": 17.859779357910156, "learning_rate": 9.742463809253478e-05, "loss": 0.2966356992721558, "step": 9080 }, { "epoch": 2.580187340334942, "grad_norm": 17.773258209228516, "learning_rate": 9.742179960261142e-05, "loss": 0.2913994312286377, "step": 9090 }, { "epoch": 2.5830258302583027, "grad_norm": 25.365699768066406, "learning_rate": 9.741896111268806e-05, "loss": 0.2949223518371582, "step": 9100 }, { "epoch": 2.5858643201816633, "grad_norm": 17.04157066345215, "learning_rate": 9.741612262276469e-05, "loss": 0.28518497943878174, "step": 9110 }, { "epoch": 2.588702810105024, "grad_norm": 14.898686408996582, "learning_rate": 9.741328413284133e-05, "loss": 0.24409422874450684, "step": 9120 }, { "epoch": 2.5915413000283847, "grad_norm": 19.66142463684082, "learning_rate": 9.741044564291797e-05, "loss": 0.280489444732666, "step": 9130 }, { "epoch": 2.594379789951746, "grad_norm": 13.848917007446289, "learning_rate": 9.740760715299461e-05, "loss": 0.25444056987762453, "step": 9140 }, { "epoch": 2.5972182798751065, "grad_norm": 12.390437126159668, "learning_rate": 9.740476866307125e-05, "loss": 0.3165001392364502, "step": 9150 }, { "epoch": 2.600056769798467, "grad_norm": 9.805212020874023, "learning_rate": 9.74019301731479e-05, "loss": 0.2544929027557373, "step": 9160 }, { "epoch": 2.6028952597218282, "grad_norm": 21.45377540588379, "learning_rate": 9.739909168322454e-05, "loss": 0.2922818183898926, "step": 9170 }, { "epoch": 2.605733749645189, "grad_norm": 12.3560209274292, "learning_rate": 9.739625319330116e-05, "loss": 0.2575647830963135, "step": 9180 }, { "epoch": 2.6085722395685496, "grad_norm": 19.43159294128418, "learning_rate": 9.73934147033778e-05, "loss": 0.3042638301849365, "step": 9190 }, { "epoch": 2.61141072949191, "grad_norm": 19.24188995361328, "learning_rate": 9.739057621345445e-05, "loss": 0.2710337400436401, "step": 9200 }, { "epoch": 2.614249219415271, "grad_norm": 17.602937698364258, "learning_rate": 9.738773772353108e-05, "loss": 0.35931103229522704, "step": 9210 }, { "epoch": 2.617087709338632, "grad_norm": 25.234039306640625, "learning_rate": 9.738489923360773e-05, "loss": 0.3196626424789429, "step": 9220 }, { "epoch": 2.6199261992619927, "grad_norm": 21.392139434814453, "learning_rate": 9.738206074368437e-05, "loss": 0.2411559820175171, "step": 9230 }, { "epoch": 2.6227646891853533, "grad_norm": 14.251041412353516, "learning_rate": 9.7379222253761e-05, "loss": 0.27556719779968264, "step": 9240 }, { "epoch": 2.6256031791087144, "grad_norm": 16.886171340942383, "learning_rate": 9.737638376383764e-05, "loss": 0.26039795875549315, "step": 9250 }, { "epoch": 2.628441669032075, "grad_norm": 15.684998512268066, "learning_rate": 9.737354527391428e-05, "loss": 0.2410115957260132, "step": 9260 }, { "epoch": 2.6312801589554358, "grad_norm": 13.421048164367676, "learning_rate": 9.737070678399092e-05, "loss": 0.2873006105422974, "step": 9270 }, { "epoch": 2.6341186488787964, "grad_norm": 23.026832580566406, "learning_rate": 9.736786829406756e-05, "loss": 0.30839715003967283, "step": 9280 }, { "epoch": 2.636957138802157, "grad_norm": 13.106962203979492, "learning_rate": 9.73650298041442e-05, "loss": 0.31653358936309817, "step": 9290 }, { "epoch": 2.639795628725518, "grad_norm": 17.663724899291992, "learning_rate": 9.736219131422085e-05, "loss": 0.3148358821868896, "step": 9300 }, { "epoch": 2.642634118648879, "grad_norm": 13.780986785888672, "learning_rate": 9.735935282429748e-05, "loss": 0.2812326669692993, "step": 9310 }, { "epoch": 2.6454726085722395, "grad_norm": 15.959388732910156, "learning_rate": 9.735651433437412e-05, "loss": 0.27190275192260743, "step": 9320 }, { "epoch": 2.6483110984956, "grad_norm": 19.790674209594727, "learning_rate": 9.735367584445076e-05, "loss": 0.28694896697998046, "step": 9330 }, { "epoch": 2.651149588418961, "grad_norm": 24.940673828125, "learning_rate": 9.735083735452739e-05, "loss": 0.2751932621002197, "step": 9340 }, { "epoch": 2.653988078342322, "grad_norm": 14.492531776428223, "learning_rate": 9.734799886460404e-05, "loss": 0.2863935947418213, "step": 9350 }, { "epoch": 2.6568265682656826, "grad_norm": 20.169912338256836, "learning_rate": 9.734516037468068e-05, "loss": 0.3258250951766968, "step": 9360 }, { "epoch": 2.6596650581890433, "grad_norm": 25.907611846923828, "learning_rate": 9.734232188475731e-05, "loss": 0.313360857963562, "step": 9370 }, { "epoch": 2.6625035481124044, "grad_norm": 17.57925796508789, "learning_rate": 9.733948339483395e-05, "loss": 0.2980458974838257, "step": 9380 }, { "epoch": 2.665342038035765, "grad_norm": 16.746706008911133, "learning_rate": 9.733664490491059e-05, "loss": 0.28972275257110597, "step": 9390 }, { "epoch": 2.6681805279591257, "grad_norm": 30.41115951538086, "learning_rate": 9.733380641498723e-05, "loss": 0.2900726079940796, "step": 9400 }, { "epoch": 2.6710190178824864, "grad_norm": 13.418630599975586, "learning_rate": 9.733096792506388e-05, "loss": 0.2452016830444336, "step": 9410 }, { "epoch": 2.673857507805847, "grad_norm": 20.504331588745117, "learning_rate": 9.732812943514052e-05, "loss": 0.24994661808013915, "step": 9420 }, { "epoch": 2.676695997729208, "grad_norm": 29.031526565551758, "learning_rate": 9.732529094521714e-05, "loss": 0.31679112911224366, "step": 9430 }, { "epoch": 2.679534487652569, "grad_norm": 23.531126022338867, "learning_rate": 9.732245245529379e-05, "loss": 0.2530877351760864, "step": 9440 }, { "epoch": 2.6823729775759295, "grad_norm": 11.991886138916016, "learning_rate": 9.731961396537043e-05, "loss": 0.24304654598236083, "step": 9450 }, { "epoch": 2.6852114674992906, "grad_norm": 12.916497230529785, "learning_rate": 9.731677547544707e-05, "loss": 0.2319650650024414, "step": 9460 }, { "epoch": 2.6880499574226513, "grad_norm": 17.324609756469727, "learning_rate": 9.73139369855237e-05, "loss": 0.23163559436798095, "step": 9470 }, { "epoch": 2.690888447346012, "grad_norm": 19.111806869506836, "learning_rate": 9.731109849560035e-05, "loss": 0.2528048515319824, "step": 9480 }, { "epoch": 2.6937269372693726, "grad_norm": 17.495214462280273, "learning_rate": 9.730826000567699e-05, "loss": 0.30721681118011473, "step": 9490 }, { "epoch": 2.6965654271927333, "grad_norm": 21.38582992553711, "learning_rate": 9.730542151575362e-05, "loss": 0.26047730445861816, "step": 9500 }, { "epoch": 2.6965654271927333, "eval_accuracy": 0.8493673300693075, "eval_loss": 0.44921496510505676, "eval_runtime": 31.0649, "eval_samples_per_second": 506.263, "eval_steps_per_second": 7.919, "step": 9500 }, { "epoch": 2.6994039171160944, "grad_norm": 20.519363403320312, "learning_rate": 9.730258302583026e-05, "loss": 0.29499478340148927, "step": 9510 }, { "epoch": 2.702242407039455, "grad_norm": 19.01187515258789, "learning_rate": 9.72997445359069e-05, "loss": 0.2947841644287109, "step": 9520 }, { "epoch": 2.7050808969628157, "grad_norm": 21.62796401977539, "learning_rate": 9.729690604598353e-05, "loss": 0.3381195545196533, "step": 9530 }, { "epoch": 2.7079193868861764, "grad_norm": 18.938907623291016, "learning_rate": 9.729406755606017e-05, "loss": 0.2477346420288086, "step": 9540 }, { "epoch": 2.710757876809537, "grad_norm": 22.521770477294922, "learning_rate": 9.729122906613683e-05, "loss": 0.2575953960418701, "step": 9550 }, { "epoch": 2.713596366732898, "grad_norm": 22.870479583740234, "learning_rate": 9.728839057621346e-05, "loss": 0.2541611909866333, "step": 9560 }, { "epoch": 2.716434856656259, "grad_norm": 21.431659698486328, "learning_rate": 9.72855520862901e-05, "loss": 0.2780029773712158, "step": 9570 }, { "epoch": 2.7192733465796195, "grad_norm": 24.88163948059082, "learning_rate": 9.728271359636674e-05, "loss": 0.29173128604888915, "step": 9580 }, { "epoch": 2.7221118365029806, "grad_norm": 25.26793098449707, "learning_rate": 9.727987510644338e-05, "loss": 0.31362454891204833, "step": 9590 }, { "epoch": 2.7249503264263413, "grad_norm": 18.78021812438965, "learning_rate": 9.727703661652001e-05, "loss": 0.2751483917236328, "step": 9600 }, { "epoch": 2.727788816349702, "grad_norm": 29.22690200805664, "learning_rate": 9.727419812659666e-05, "loss": 0.29752316474914553, "step": 9610 }, { "epoch": 2.7306273062730626, "grad_norm": 23.88349151611328, "learning_rate": 9.72713596366733e-05, "loss": 0.25617666244506837, "step": 9620 }, { "epoch": 2.7334657961964233, "grad_norm": 12.977919578552246, "learning_rate": 9.726852114674993e-05, "loss": 0.23914566040039062, "step": 9630 }, { "epoch": 2.7363042861197844, "grad_norm": 18.041250228881836, "learning_rate": 9.726568265682657e-05, "loss": 0.26929564476013185, "step": 9640 }, { "epoch": 2.739142776043145, "grad_norm": 13.003925323486328, "learning_rate": 9.726284416690321e-05, "loss": 0.28539423942565917, "step": 9650 }, { "epoch": 2.7419812659665057, "grad_norm": 17.521793365478516, "learning_rate": 9.726000567697984e-05, "loss": 0.24657120704650878, "step": 9660 }, { "epoch": 2.744819755889867, "grad_norm": 13.071157455444336, "learning_rate": 9.725716718705648e-05, "loss": 0.2956104040145874, "step": 9670 }, { "epoch": 2.7476582458132275, "grad_norm": 15.85848331451416, "learning_rate": 9.725432869713314e-05, "loss": 0.28966264724731444, "step": 9680 }, { "epoch": 2.750496735736588, "grad_norm": 23.094085693359375, "learning_rate": 9.725149020720977e-05, "loss": 0.33190135955810546, "step": 9690 }, { "epoch": 2.753335225659949, "grad_norm": 13.309473037719727, "learning_rate": 9.724865171728641e-05, "loss": 0.2656923770904541, "step": 9700 }, { "epoch": 2.7561737155833095, "grad_norm": 15.193010330200195, "learning_rate": 9.724581322736305e-05, "loss": 0.3423954725265503, "step": 9710 }, { "epoch": 2.7590122055066706, "grad_norm": 13.571406364440918, "learning_rate": 9.724297473743969e-05, "loss": 0.2532841920852661, "step": 9720 }, { "epoch": 2.7618506954300313, "grad_norm": 11.178678512573242, "learning_rate": 9.724013624751632e-05, "loss": 0.25420119762420657, "step": 9730 }, { "epoch": 2.764689185353392, "grad_norm": 19.80664825439453, "learning_rate": 9.723729775759296e-05, "loss": 0.2993446350097656, "step": 9740 }, { "epoch": 2.767527675276753, "grad_norm": 17.400503158569336, "learning_rate": 9.723445926766961e-05, "loss": 0.26490628719329834, "step": 9750 }, { "epoch": 2.7703661652001137, "grad_norm": 17.312856674194336, "learning_rate": 9.723162077774624e-05, "loss": 0.25854156017303465, "step": 9760 }, { "epoch": 2.7732046551234744, "grad_norm": 15.450798988342285, "learning_rate": 9.722878228782288e-05, "loss": 0.222100830078125, "step": 9770 }, { "epoch": 2.776043145046835, "grad_norm": 27.243274688720703, "learning_rate": 9.722594379789953e-05, "loss": 0.26625804901123046, "step": 9780 }, { "epoch": 2.7788816349701957, "grad_norm": 14.516337394714355, "learning_rate": 9.722310530797615e-05, "loss": 0.2396000862121582, "step": 9790 }, { "epoch": 2.781720124893557, "grad_norm": 22.565673828125, "learning_rate": 9.72202668180528e-05, "loss": 0.2102586030960083, "step": 9800 }, { "epoch": 2.7845586148169175, "grad_norm": 16.52597999572754, "learning_rate": 9.721742832812945e-05, "loss": 0.263578200340271, "step": 9810 }, { "epoch": 2.787397104740278, "grad_norm": 7.076500415802002, "learning_rate": 9.721458983820608e-05, "loss": 0.2602548360824585, "step": 9820 }, { "epoch": 2.790235594663639, "grad_norm": 11.145809173583984, "learning_rate": 9.721175134828272e-05, "loss": 0.2652809858322144, "step": 9830 }, { "epoch": 2.7930740845869995, "grad_norm": 20.258697509765625, "learning_rate": 9.720891285835936e-05, "loss": 0.2715002536773682, "step": 9840 }, { "epoch": 2.7959125745103606, "grad_norm": 16.852272033691406, "learning_rate": 9.7206074368436e-05, "loss": 0.22555181980133057, "step": 9850 }, { "epoch": 2.7987510644337212, "grad_norm": 17.305248260498047, "learning_rate": 9.720323587851263e-05, "loss": 0.21825485229492186, "step": 9860 }, { "epoch": 2.801589554357082, "grad_norm": 13.663861274719238, "learning_rate": 9.720039738858927e-05, "loss": 0.24384195804595948, "step": 9870 }, { "epoch": 2.804428044280443, "grad_norm": 21.773473739624023, "learning_rate": 9.719755889866593e-05, "loss": 0.31784915924072266, "step": 9880 }, { "epoch": 2.8072665342038037, "grad_norm": 23.478778839111328, "learning_rate": 9.719472040874255e-05, "loss": 0.2890221357345581, "step": 9890 }, { "epoch": 2.8101050241271643, "grad_norm": 15.169078826904297, "learning_rate": 9.71918819188192e-05, "loss": 0.27158963680267334, "step": 9900 }, { "epoch": 2.812943514050525, "grad_norm": 21.22194480895996, "learning_rate": 9.718904342889584e-05, "loss": 0.2151625633239746, "step": 9910 }, { "epoch": 2.8157820039738857, "grad_norm": 23.666257858276367, "learning_rate": 9.718620493897246e-05, "loss": 0.2504837989807129, "step": 9920 }, { "epoch": 2.818620493897247, "grad_norm": 19.463533401489258, "learning_rate": 9.71833664490491e-05, "loss": 0.2768238544464111, "step": 9930 }, { "epoch": 2.8214589838206074, "grad_norm": 13.129846572875977, "learning_rate": 9.718052795912575e-05, "loss": 0.3096976041793823, "step": 9940 }, { "epoch": 2.824297473743968, "grad_norm": 16.599721908569336, "learning_rate": 9.717768946920239e-05, "loss": 0.27189714908599855, "step": 9950 }, { "epoch": 2.827135963667329, "grad_norm": 21.78156089782715, "learning_rate": 9.717485097927903e-05, "loss": 0.25347445011138914, "step": 9960 }, { "epoch": 2.82997445359069, "grad_norm": 14.555994987487793, "learning_rate": 9.717201248935567e-05, "loss": 0.3180224895477295, "step": 9970 }, { "epoch": 2.8328129435140506, "grad_norm": 17.541913986206055, "learning_rate": 9.716917399943231e-05, "loss": 0.2798717498779297, "step": 9980 }, { "epoch": 2.835651433437411, "grad_norm": 15.366768836975098, "learning_rate": 9.716633550950894e-05, "loss": 0.23516781330108644, "step": 9990 }, { "epoch": 2.838489923360772, "grad_norm": 18.413864135742188, "learning_rate": 9.716349701958558e-05, "loss": 0.2668823003768921, "step": 10000 }, { "epoch": 2.838489923360772, "eval_accuracy": 0.8517835569402937, "eval_loss": 0.4507799446582794, "eval_runtime": 30.7608, "eval_samples_per_second": 511.268, "eval_steps_per_second": 7.997, "step": 10000 }, { "epoch": 2.841328413284133, "grad_norm": 21.122041702270508, "learning_rate": 9.716065852966224e-05, "loss": 0.2700818061828613, "step": 10010 }, { "epoch": 2.8441669032074937, "grad_norm": 18.726993560791016, "learning_rate": 9.715782003973886e-05, "loss": 0.2364410638809204, "step": 10020 }, { "epoch": 2.8470053931308543, "grad_norm": 16.451772689819336, "learning_rate": 9.71549815498155e-05, "loss": 0.2411497116088867, "step": 10030 }, { "epoch": 2.8498438830542154, "grad_norm": 24.123859405517578, "learning_rate": 9.715214305989215e-05, "loss": 0.2889240264892578, "step": 10040 }, { "epoch": 2.8526823729775757, "grad_norm": 13.031628608703613, "learning_rate": 9.714930456996877e-05, "loss": 0.2588303804397583, "step": 10050 }, { "epoch": 2.8555208629009368, "grad_norm": 17.849319458007812, "learning_rate": 9.714646608004542e-05, "loss": 0.25627334117889405, "step": 10060 }, { "epoch": 2.8583593528242974, "grad_norm": 19.63104820251465, "learning_rate": 9.714362759012206e-05, "loss": 0.2885117530822754, "step": 10070 }, { "epoch": 2.861197842747658, "grad_norm": 13.781159400939941, "learning_rate": 9.71407891001987e-05, "loss": 0.2903496265411377, "step": 10080 }, { "epoch": 2.864036332671019, "grad_norm": 12.115455627441406, "learning_rate": 9.713795061027534e-05, "loss": 0.2852003812789917, "step": 10090 }, { "epoch": 2.86687482259438, "grad_norm": 17.76203155517578, "learning_rate": 9.713511212035198e-05, "loss": 0.2570801258087158, "step": 10100 }, { "epoch": 2.8697133125177405, "grad_norm": 18.625986099243164, "learning_rate": 9.713227363042862e-05, "loss": 0.2592076301574707, "step": 10110 }, { "epoch": 2.872551802441101, "grad_norm": 13.73946475982666, "learning_rate": 9.712943514050525e-05, "loss": 0.22207555770874024, "step": 10120 }, { "epoch": 2.875390292364462, "grad_norm": 12.634784698486328, "learning_rate": 9.712659665058189e-05, "loss": 0.24926607608795165, "step": 10130 }, { "epoch": 2.878228782287823, "grad_norm": 19.480297088623047, "learning_rate": 9.712375816065853e-05, "loss": 0.26073637008666994, "step": 10140 }, { "epoch": 2.8810672722111836, "grad_norm": 14.653205871582031, "learning_rate": 9.712091967073517e-05, "loss": 0.23487539291381837, "step": 10150 }, { "epoch": 2.8839057621345443, "grad_norm": 15.37935733795166, "learning_rate": 9.711808118081182e-05, "loss": 0.24443528652191163, "step": 10160 }, { "epoch": 2.8867442520579054, "grad_norm": 17.303863525390625, "learning_rate": 9.711524269088846e-05, "loss": 0.2529600143432617, "step": 10170 }, { "epoch": 2.889582741981266, "grad_norm": 33.37606430053711, "learning_rate": 9.711240420096509e-05, "loss": 0.2778038263320923, "step": 10180 }, { "epoch": 2.8924212319046267, "grad_norm": 19.504133224487305, "learning_rate": 9.710956571104173e-05, "loss": 0.29585649967193606, "step": 10190 }, { "epoch": 2.8952597218279874, "grad_norm": 17.723546981811523, "learning_rate": 9.710672722111837e-05, "loss": 0.24174587726593016, "step": 10200 }, { "epoch": 2.898098211751348, "grad_norm": 15.686098098754883, "learning_rate": 9.710388873119501e-05, "loss": 0.27827978134155273, "step": 10210 }, { "epoch": 2.900936701674709, "grad_norm": 14.3742094039917, "learning_rate": 9.710105024127165e-05, "loss": 0.21491069793701173, "step": 10220 }, { "epoch": 2.90377519159807, "grad_norm": 18.604324340820312, "learning_rate": 9.709821175134829e-05, "loss": 0.23633861541748047, "step": 10230 }, { "epoch": 2.9066136815214305, "grad_norm": 24.143869400024414, "learning_rate": 9.709537326142493e-05, "loss": 0.2693354368209839, "step": 10240 }, { "epoch": 2.9094521714447916, "grad_norm": 21.800477981567383, "learning_rate": 9.709253477150156e-05, "loss": 0.22950847148895265, "step": 10250 }, { "epoch": 2.9122906613681523, "grad_norm": 20.529905319213867, "learning_rate": 9.70896962815782e-05, "loss": 0.2422048568725586, "step": 10260 }, { "epoch": 2.915129151291513, "grad_norm": 20.537445068359375, "learning_rate": 9.708685779165484e-05, "loss": 0.2803179264068604, "step": 10270 }, { "epoch": 2.9179676412148736, "grad_norm": 16.101655960083008, "learning_rate": 9.708401930173149e-05, "loss": 0.24713599681854248, "step": 10280 }, { "epoch": 2.9208061311382343, "grad_norm": 10.84926986694336, "learning_rate": 9.708118081180813e-05, "loss": 0.2222980499267578, "step": 10290 }, { "epoch": 2.9236446210615954, "grad_norm": 12.458867073059082, "learning_rate": 9.707834232188477e-05, "loss": 0.24086337089538573, "step": 10300 }, { "epoch": 2.926483110984956, "grad_norm": 17.04847526550293, "learning_rate": 9.70755038319614e-05, "loss": 0.2354985237121582, "step": 10310 }, { "epoch": 2.9293216009083167, "grad_norm": 25.308412551879883, "learning_rate": 9.707266534203804e-05, "loss": 0.27909610271453855, "step": 10320 }, { "epoch": 2.9321600908316774, "grad_norm": 17.994943618774414, "learning_rate": 9.706982685211468e-05, "loss": 0.2386638879776001, "step": 10330 }, { "epoch": 2.934998580755038, "grad_norm": 14.818497657775879, "learning_rate": 9.706698836219132e-05, "loss": 0.240454363822937, "step": 10340 }, { "epoch": 2.937837070678399, "grad_norm": 13.345973014831543, "learning_rate": 9.706414987226796e-05, "loss": 0.24808545112609864, "step": 10350 }, { "epoch": 2.94067556060176, "grad_norm": 21.07094383239746, "learning_rate": 9.70613113823446e-05, "loss": 0.2733091592788696, "step": 10360 }, { "epoch": 2.9435140505251205, "grad_norm": 18.607572555541992, "learning_rate": 9.705847289242123e-05, "loss": 0.25334250926971436, "step": 10370 }, { "epoch": 2.9463525404484816, "grad_norm": 18.747655868530273, "learning_rate": 9.705563440249787e-05, "loss": 0.2256793737411499, "step": 10380 }, { "epoch": 2.9491910303718423, "grad_norm": 14.889187812805176, "learning_rate": 9.705279591257451e-05, "loss": 0.21279048919677734, "step": 10390 }, { "epoch": 2.952029520295203, "grad_norm": 14.380620002746582, "learning_rate": 9.704995742265115e-05, "loss": 0.24076695442199708, "step": 10400 }, { "epoch": 2.9548680102185636, "grad_norm": 30.139314651489258, "learning_rate": 9.70471189327278e-05, "loss": 0.26405456066131594, "step": 10410 }, { "epoch": 2.9577065001419243, "grad_norm": 22.04210090637207, "learning_rate": 9.704428044280444e-05, "loss": 0.27288987636566164, "step": 10420 }, { "epoch": 2.9605449900652854, "grad_norm": 10.629016876220703, "learning_rate": 9.704144195288108e-05, "loss": 0.19840748310089112, "step": 10430 }, { "epoch": 2.963383479988646, "grad_norm": 14.220978736877441, "learning_rate": 9.70386034629577e-05, "loss": 0.2405843496322632, "step": 10440 }, { "epoch": 2.9662219699120067, "grad_norm": 28.67279815673828, "learning_rate": 9.703576497303435e-05, "loss": 0.23020591735839843, "step": 10450 }, { "epoch": 2.969060459835368, "grad_norm": 17.291454315185547, "learning_rate": 9.703292648311099e-05, "loss": 0.2988447666168213, "step": 10460 }, { "epoch": 2.9718989497587285, "grad_norm": 13.446309089660645, "learning_rate": 9.703008799318762e-05, "loss": 0.268503737449646, "step": 10470 }, { "epoch": 2.974737439682089, "grad_norm": 12.08239459991455, "learning_rate": 9.702724950326427e-05, "loss": 0.2384115695953369, "step": 10480 }, { "epoch": 2.97757592960545, "grad_norm": 12.028255462646484, "learning_rate": 9.702441101334091e-05, "loss": 0.24648849964141845, "step": 10490 }, { "epoch": 2.9804144195288105, "grad_norm": 14.870538711547852, "learning_rate": 9.702157252341754e-05, "loss": 0.26308369636535645, "step": 10500 }, { "epoch": 2.9804144195288105, "eval_accuracy": 0.864373370636485, "eval_loss": 0.41203802824020386, "eval_runtime": 31.377, "eval_samples_per_second": 501.228, "eval_steps_per_second": 7.84, "step": 10500 }, { "epoch": 2.9832529094521716, "grad_norm": 14.513052940368652, "learning_rate": 9.701873403349418e-05, "loss": 0.2901655912399292, "step": 10510 }, { "epoch": 2.9860913993755323, "grad_norm": 24.498777389526367, "learning_rate": 9.701589554357082e-05, "loss": 0.2695554256439209, "step": 10520 }, { "epoch": 2.988929889298893, "grad_norm": 28.75697135925293, "learning_rate": 9.701305705364747e-05, "loss": 0.22772583961486817, "step": 10530 }, { "epoch": 2.991768379222254, "grad_norm": 9.938371658325195, "learning_rate": 9.70102185637241e-05, "loss": 0.2511621952056885, "step": 10540 }, { "epoch": 2.9946068691456147, "grad_norm": 20.586977005004883, "learning_rate": 9.700738007380075e-05, "loss": 0.24575157165527345, "step": 10550 }, { "epoch": 2.9974453590689754, "grad_norm": 12.629380226135254, "learning_rate": 9.700454158387739e-05, "loss": 0.2179649591445923, "step": 10560 }, { "epoch": 3.000283848992336, "grad_norm": 12.893305778503418, "learning_rate": 9.700170309395402e-05, "loss": 0.22755892276763917, "step": 10570 }, { "epoch": 3.0031223389156967, "grad_norm": 14.128294944763184, "learning_rate": 9.699886460403066e-05, "loss": 0.21660025119781495, "step": 10580 }, { "epoch": 3.005960828839058, "grad_norm": 25.21160888671875, "learning_rate": 9.69960261141073e-05, "loss": 0.22867438793182374, "step": 10590 }, { "epoch": 3.0087993187624185, "grad_norm": 13.244644165039062, "learning_rate": 9.699318762418393e-05, "loss": 0.24277143478393554, "step": 10600 }, { "epoch": 3.011637808685779, "grad_norm": 18.281429290771484, "learning_rate": 9.699034913426058e-05, "loss": 0.2278064727783203, "step": 10610 }, { "epoch": 3.01447629860914, "grad_norm": 16.516063690185547, "learning_rate": 9.698751064433722e-05, "loss": 0.21306154727935792, "step": 10620 }, { "epoch": 3.017314788532501, "grad_norm": 19.363893508911133, "learning_rate": 9.698467215441385e-05, "loss": 0.2018261432647705, "step": 10630 }, { "epoch": 3.0201532784558616, "grad_norm": 8.60107707977295, "learning_rate": 9.69818336644905e-05, "loss": 0.1982879161834717, "step": 10640 }, { "epoch": 3.0229917683792222, "grad_norm": 22.62881851196289, "learning_rate": 9.697899517456713e-05, "loss": 0.2999326229095459, "step": 10650 }, { "epoch": 3.025830258302583, "grad_norm": 13.846975326538086, "learning_rate": 9.697615668464378e-05, "loss": 0.23673901557922364, "step": 10660 }, { "epoch": 3.028668748225944, "grad_norm": 18.202268600463867, "learning_rate": 9.69733181947204e-05, "loss": 0.2435927391052246, "step": 10670 }, { "epoch": 3.0315072381493047, "grad_norm": 11.563315391540527, "learning_rate": 9.697047970479706e-05, "loss": 0.2007399320602417, "step": 10680 }, { "epoch": 3.0343457280726653, "grad_norm": 19.236194610595703, "learning_rate": 9.69676412148737e-05, "loss": 0.25865540504455564, "step": 10690 }, { "epoch": 3.037184217996026, "grad_norm": 16.745946884155273, "learning_rate": 9.696480272495033e-05, "loss": 0.20650393962860109, "step": 10700 }, { "epoch": 3.0400227079193867, "grad_norm": 18.521982192993164, "learning_rate": 9.696196423502697e-05, "loss": 0.20056042671203614, "step": 10710 }, { "epoch": 3.042861197842748, "grad_norm": 19.786373138427734, "learning_rate": 9.695912574510361e-05, "loss": 0.23505036830902098, "step": 10720 }, { "epoch": 3.0456996877661084, "grad_norm": 10.31456470489502, "learning_rate": 9.695628725518024e-05, "loss": 0.21039993762969972, "step": 10730 }, { "epoch": 3.048538177689469, "grad_norm": 10.725625991821289, "learning_rate": 9.69534487652569e-05, "loss": 0.255189847946167, "step": 10740 }, { "epoch": 3.0513766676128298, "grad_norm": 16.50446891784668, "learning_rate": 9.695061027533354e-05, "loss": 0.2216968297958374, "step": 10750 }, { "epoch": 3.054215157536191, "grad_norm": 16.032379150390625, "learning_rate": 9.694777178541016e-05, "loss": 0.20543622970581055, "step": 10760 }, { "epoch": 3.0570536474595515, "grad_norm": 12.301016807556152, "learning_rate": 9.69449332954868e-05, "loss": 0.1975337028503418, "step": 10770 }, { "epoch": 3.059892137382912, "grad_norm": 16.790771484375, "learning_rate": 9.694209480556345e-05, "loss": 0.25299646854400637, "step": 10780 }, { "epoch": 3.062730627306273, "grad_norm": 25.425148010253906, "learning_rate": 9.693925631564009e-05, "loss": 0.25143113136291506, "step": 10790 }, { "epoch": 3.065569117229634, "grad_norm": 17.002199172973633, "learning_rate": 9.693641782571671e-05, "loss": 0.23155615329742432, "step": 10800 }, { "epoch": 3.0684076071529947, "grad_norm": 12.678319931030273, "learning_rate": 9.693357933579337e-05, "loss": 0.1883660674095154, "step": 10810 }, { "epoch": 3.0712460970763553, "grad_norm": 17.38966941833496, "learning_rate": 9.693074084587001e-05, "loss": 0.24447345733642578, "step": 10820 }, { "epoch": 3.074084586999716, "grad_norm": 14.343957901000977, "learning_rate": 9.692790235594664e-05, "loss": 0.24958877563476561, "step": 10830 }, { "epoch": 3.076923076923077, "grad_norm": 12.602128028869629, "learning_rate": 9.692506386602328e-05, "loss": 0.18532336950302125, "step": 10840 }, { "epoch": 3.0797615668464378, "grad_norm": 12.524943351745605, "learning_rate": 9.692222537609992e-05, "loss": 0.24769043922424316, "step": 10850 }, { "epoch": 3.0826000567697984, "grad_norm": 13.8504638671875, "learning_rate": 9.691938688617655e-05, "loss": 0.18376538753509522, "step": 10860 }, { "epoch": 3.085438546693159, "grad_norm": 22.01972198486328, "learning_rate": 9.691654839625319e-05, "loss": 0.2254868984222412, "step": 10870 }, { "epoch": 3.08827703661652, "grad_norm": 16.021013259887695, "learning_rate": 9.691370990632985e-05, "loss": 0.21665406227111816, "step": 10880 }, { "epoch": 3.091115526539881, "grad_norm": 23.66881561279297, "learning_rate": 9.691087141640647e-05, "loss": 0.19602859020233154, "step": 10890 }, { "epoch": 3.0939540164632415, "grad_norm": 19.660036087036133, "learning_rate": 9.690803292648312e-05, "loss": 0.2671362400054932, "step": 10900 }, { "epoch": 3.096792506386602, "grad_norm": 19.80447769165039, "learning_rate": 9.690519443655976e-05, "loss": 0.20511860847473146, "step": 10910 }, { "epoch": 3.0996309963099633, "grad_norm": 12.218652725219727, "learning_rate": 9.69023559466364e-05, "loss": 0.22975389957427977, "step": 10920 }, { "epoch": 3.102469486233324, "grad_norm": 15.66241455078125, "learning_rate": 9.689951745671303e-05, "loss": 0.20163025856018066, "step": 10930 }, { "epoch": 3.1053079761566846, "grad_norm": 11.016373634338379, "learning_rate": 9.689667896678968e-05, "loss": 0.1971202850341797, "step": 10940 }, { "epoch": 3.1081464660800453, "grad_norm": 10.28996753692627, "learning_rate": 9.689384047686632e-05, "loss": 0.18606898784637452, "step": 10950 }, { "epoch": 3.1109849560034064, "grad_norm": 13.315267562866211, "learning_rate": 9.689100198694295e-05, "loss": 0.21283600330352784, "step": 10960 }, { "epoch": 3.113823445926767, "grad_norm": 15.513428688049316, "learning_rate": 9.688816349701959e-05, "loss": 0.21595463752746583, "step": 10970 }, { "epoch": 3.1166619358501277, "grad_norm": 14.546529769897461, "learning_rate": 9.688532500709623e-05, "loss": 0.18798352479934693, "step": 10980 }, { "epoch": 3.1195004257734884, "grad_norm": 17.30731964111328, "learning_rate": 9.688248651717286e-05, "loss": 0.21172025203704833, "step": 10990 }, { "epoch": 3.122338915696849, "grad_norm": 17.227052688598633, "learning_rate": 9.68796480272495e-05, "loss": 0.19874925613403321, "step": 11000 }, { "epoch": 3.122338915696849, "eval_accuracy": 0.8715584663317861, "eval_loss": 0.39016368985176086, "eval_runtime": 31.3018, "eval_samples_per_second": 502.432, "eval_steps_per_second": 7.859, "step": 11000 }, { "epoch": 3.12517740562021, "grad_norm": 29.575613021850586, "learning_rate": 9.687680953732616e-05, "loss": 0.2470324993133545, "step": 11010 }, { "epoch": 3.128015895543571, "grad_norm": 11.799863815307617, "learning_rate": 9.687397104740278e-05, "loss": 0.19145163297653198, "step": 11020 }, { "epoch": 3.1308543854669315, "grad_norm": 16.00745391845703, "learning_rate": 9.687113255747943e-05, "loss": 0.21735126972198487, "step": 11030 }, { "epoch": 3.133692875390292, "grad_norm": 16.77241325378418, "learning_rate": 9.686829406755607e-05, "loss": 0.21074838638305665, "step": 11040 }, { "epoch": 3.1365313653136533, "grad_norm": 16.993755340576172, "learning_rate": 9.686545557763271e-05, "loss": 0.23533854484558106, "step": 11050 }, { "epoch": 3.139369855237014, "grad_norm": 13.329765319824219, "learning_rate": 9.686261708770934e-05, "loss": 0.23303022384643554, "step": 11060 }, { "epoch": 3.1422083451603746, "grad_norm": 16.305633544921875, "learning_rate": 9.685977859778598e-05, "loss": 0.22058229446411132, "step": 11070 }, { "epoch": 3.1450468350837353, "grad_norm": 18.921321868896484, "learning_rate": 9.685694010786262e-05, "loss": 0.18921666145324706, "step": 11080 }, { "epoch": 3.1478853250070964, "grad_norm": 13.634056091308594, "learning_rate": 9.685410161793926e-05, "loss": 0.1699698805809021, "step": 11090 }, { "epoch": 3.150723814930457, "grad_norm": 16.445297241210938, "learning_rate": 9.68512631280159e-05, "loss": 0.2500131845474243, "step": 11100 }, { "epoch": 3.1535623048538177, "grad_norm": 14.930386543273926, "learning_rate": 9.684842463809254e-05, "loss": 0.23129620552062988, "step": 11110 }, { "epoch": 3.1564007947771784, "grad_norm": 21.532629013061523, "learning_rate": 9.684558614816917e-05, "loss": 0.24727993011474608, "step": 11120 }, { "epoch": 3.1592392847005395, "grad_norm": 18.688865661621094, "learning_rate": 9.684274765824581e-05, "loss": 0.19990921020507812, "step": 11130 }, { "epoch": 3.1620777746239, "grad_norm": 21.205503463745117, "learning_rate": 9.683990916832247e-05, "loss": 0.23148727416992188, "step": 11140 }, { "epoch": 3.164916264547261, "grad_norm": 17.55402183532715, "learning_rate": 9.68370706783991e-05, "loss": 0.23841476440429688, "step": 11150 }, { "epoch": 3.1677547544706215, "grad_norm": 19.211013793945312, "learning_rate": 9.683423218847574e-05, "loss": 0.20569534301757814, "step": 11160 }, { "epoch": 3.1705932443939826, "grad_norm": 23.154470443725586, "learning_rate": 9.683139369855238e-05, "loss": 0.19128456115722656, "step": 11170 }, { "epoch": 3.1734317343173433, "grad_norm": 20.153133392333984, "learning_rate": 9.682855520862902e-05, "loss": 0.2156306028366089, "step": 11180 }, { "epoch": 3.176270224240704, "grad_norm": 17.49920654296875, "learning_rate": 9.682571671870565e-05, "loss": 0.17330524921417237, "step": 11190 }, { "epoch": 3.1791087141640646, "grad_norm": 11.474637985229492, "learning_rate": 9.682287822878229e-05, "loss": 0.19887797832489013, "step": 11200 }, { "epoch": 3.1819472040874253, "grad_norm": 15.783727645874023, "learning_rate": 9.682003973885893e-05, "loss": 0.220632004737854, "step": 11210 }, { "epoch": 3.1847856940107864, "grad_norm": 27.546283721923828, "learning_rate": 9.681720124893557e-05, "loss": 0.25777783393859866, "step": 11220 }, { "epoch": 3.187624183934147, "grad_norm": 12.006989479064941, "learning_rate": 9.681436275901221e-05, "loss": 0.2657755374908447, "step": 11230 }, { "epoch": 3.1904626738575077, "grad_norm": 21.779420852661133, "learning_rate": 9.681152426908885e-05, "loss": 0.25869054794311525, "step": 11240 }, { "epoch": 3.193301163780869, "grad_norm": 15.037901878356934, "learning_rate": 9.680868577916548e-05, "loss": 0.19070355892181395, "step": 11250 }, { "epoch": 3.1961396537042295, "grad_norm": 13.57245922088623, "learning_rate": 9.680584728924212e-05, "loss": 0.19707906246185303, "step": 11260 }, { "epoch": 3.19897814362759, "grad_norm": 17.515422821044922, "learning_rate": 9.680300879931876e-05, "loss": 0.21522417068481445, "step": 11270 }, { "epoch": 3.201816633550951, "grad_norm": 17.102718353271484, "learning_rate": 9.68001703093954e-05, "loss": 0.2115429401397705, "step": 11280 }, { "epoch": 3.2046551234743115, "grad_norm": 22.954265594482422, "learning_rate": 9.679733181947205e-05, "loss": 0.24711823463439941, "step": 11290 }, { "epoch": 3.2074936133976726, "grad_norm": 12.616451263427734, "learning_rate": 9.679449332954869e-05, "loss": 0.2089317560195923, "step": 11300 }, { "epoch": 3.2103321033210332, "grad_norm": 10.590105056762695, "learning_rate": 9.679165483962532e-05, "loss": 0.219266939163208, "step": 11310 }, { "epoch": 3.213170593244394, "grad_norm": 21.14474105834961, "learning_rate": 9.678881634970196e-05, "loss": 0.22574622631073, "step": 11320 }, { "epoch": 3.2160090831677546, "grad_norm": 29.06281852722168, "learning_rate": 9.67859778597786e-05, "loss": 0.22632551193237305, "step": 11330 }, { "epoch": 3.2188475730911157, "grad_norm": 13.382645606994629, "learning_rate": 9.678313936985524e-05, "loss": 0.2735266208648682, "step": 11340 }, { "epoch": 3.2216860630144764, "grad_norm": 14.074967384338379, "learning_rate": 9.678030087993188e-05, "loss": 0.1919063687324524, "step": 11350 }, { "epoch": 3.224524552937837, "grad_norm": 15.914581298828125, "learning_rate": 9.677746239000852e-05, "loss": 0.2603232145309448, "step": 11360 }, { "epoch": 3.2273630428611977, "grad_norm": 18.473819732666016, "learning_rate": 9.677462390008516e-05, "loss": 0.2510881185531616, "step": 11370 }, { "epoch": 3.230201532784559, "grad_norm": 22.8264217376709, "learning_rate": 9.677178541016179e-05, "loss": 0.2662872076034546, "step": 11380 }, { "epoch": 3.2330400227079195, "grad_norm": 16.1934871673584, "learning_rate": 9.676894692023843e-05, "loss": 0.2068319797515869, "step": 11390 }, { "epoch": 3.23587851263128, "grad_norm": 11.467836380004883, "learning_rate": 9.676610843031508e-05, "loss": 0.20871257781982422, "step": 11400 }, { "epoch": 3.238717002554641, "grad_norm": 15.661177635192871, "learning_rate": 9.676326994039172e-05, "loss": 0.19979265928268433, "step": 11410 }, { "epoch": 3.241555492478002, "grad_norm": 12.25120735168457, "learning_rate": 9.676043145046836e-05, "loss": 0.2359541177749634, "step": 11420 }, { "epoch": 3.2443939824013626, "grad_norm": 15.13478946685791, "learning_rate": 9.6757592960545e-05, "loss": 0.17350263595581056, "step": 11430 }, { "epoch": 3.2472324723247232, "grad_norm": 17.476802825927734, "learning_rate": 9.675475447062163e-05, "loss": 0.224137282371521, "step": 11440 }, { "epoch": 3.250070962248084, "grad_norm": 24.93374252319336, "learning_rate": 9.675191598069827e-05, "loss": 0.20631773471832277, "step": 11450 }, { "epoch": 3.252909452171445, "grad_norm": 26.18758773803711, "learning_rate": 9.674907749077491e-05, "loss": 0.258397650718689, "step": 11460 }, { "epoch": 3.2557479420948057, "grad_norm": 13.750945091247559, "learning_rate": 9.674652284984389e-05, "loss": 0.21751623153686522, "step": 11470 }, { "epoch": 3.2585864320181663, "grad_norm": 17.093978881835938, "learning_rate": 9.674368435992053e-05, "loss": 0.18042036294937133, "step": 11480 }, { "epoch": 3.261424921941527, "grad_norm": 16.70892333984375, "learning_rate": 9.674084586999716e-05, "loss": 0.19773545265197753, "step": 11490 }, { "epoch": 3.2642634118648877, "grad_norm": 15.371891975402832, "learning_rate": 9.67380073800738e-05, "loss": 0.21635334491729735, "step": 11500 }, { "epoch": 3.2642634118648877, "eval_accuracy": 0.8788071469447447, "eval_loss": 0.3605816960334778, "eval_runtime": 30.4556, "eval_samples_per_second": 516.392, "eval_steps_per_second": 8.077, "step": 11500 }, { "epoch": 3.2671019017882488, "grad_norm": 12.340066909790039, "learning_rate": 9.673516889015044e-05, "loss": 0.18395260572433472, "step": 11510 }, { "epoch": 3.2699403917116094, "grad_norm": 15.827431678771973, "learning_rate": 9.673233040022708e-05, "loss": 0.17612080574035643, "step": 11520 }, { "epoch": 3.27277888163497, "grad_norm": 14.852681159973145, "learning_rate": 9.672949191030372e-05, "loss": 0.18402761220932007, "step": 11530 }, { "epoch": 3.275617371558331, "grad_norm": 17.557153701782227, "learning_rate": 9.672665342038037e-05, "loss": 0.20240285396575927, "step": 11540 }, { "epoch": 3.278455861481692, "grad_norm": 18.5665283203125, "learning_rate": 9.672381493045701e-05, "loss": 0.20262906551361085, "step": 11550 }, { "epoch": 3.2812943514050525, "grad_norm": 12.783393859863281, "learning_rate": 9.672097644053364e-05, "loss": 0.2017357349395752, "step": 11560 }, { "epoch": 3.284132841328413, "grad_norm": 18.27359390258789, "learning_rate": 9.671813795061028e-05, "loss": 0.21872055530548096, "step": 11570 }, { "epoch": 3.286971331251774, "grad_norm": 18.7623348236084, "learning_rate": 9.671529946068692e-05, "loss": 0.20855932235717772, "step": 11580 }, { "epoch": 3.289809821175135, "grad_norm": 10.302472114562988, "learning_rate": 9.671246097076356e-05, "loss": 0.18217077255249023, "step": 11590 }, { "epoch": 3.2926483110984957, "grad_norm": 20.435976028442383, "learning_rate": 9.67096224808402e-05, "loss": 0.2270435571670532, "step": 11600 }, { "epoch": 3.2954868010218563, "grad_norm": 10.924196243286133, "learning_rate": 9.670678399091684e-05, "loss": 0.23111917972564697, "step": 11610 }, { "epoch": 3.298325290945217, "grad_norm": 16.25908851623535, "learning_rate": 9.670394550099347e-05, "loss": 0.24706826210021973, "step": 11620 }, { "epoch": 3.301163780868578, "grad_norm": 12.188825607299805, "learning_rate": 9.670110701107011e-05, "loss": 0.20542511940002442, "step": 11630 }, { "epoch": 3.3040022707919388, "grad_norm": 17.599225997924805, "learning_rate": 9.669826852114675e-05, "loss": 0.2173429012298584, "step": 11640 }, { "epoch": 3.3068407607152994, "grad_norm": 14.959057807922363, "learning_rate": 9.66954300312234e-05, "loss": 0.19175952672958374, "step": 11650 }, { "epoch": 3.30967925063866, "grad_norm": 11.093716621398926, "learning_rate": 9.669259154130004e-05, "loss": 0.1973414897918701, "step": 11660 }, { "epoch": 3.312517740562021, "grad_norm": 20.466720581054688, "learning_rate": 9.668975305137668e-05, "loss": 0.24486141204833983, "step": 11670 }, { "epoch": 3.315356230485382, "grad_norm": 13.740006446838379, "learning_rate": 9.668691456145332e-05, "loss": 0.21142239570617677, "step": 11680 }, { "epoch": 3.3181947204087425, "grad_norm": 15.002001762390137, "learning_rate": 9.668407607152995e-05, "loss": 0.20313360691070556, "step": 11690 }, { "epoch": 3.321033210332103, "grad_norm": 8.367109298706055, "learning_rate": 9.668123758160659e-05, "loss": 0.22884137630462648, "step": 11700 }, { "epoch": 3.323871700255464, "grad_norm": 20.374404907226562, "learning_rate": 9.667839909168323e-05, "loss": 0.19571526050567628, "step": 11710 }, { "epoch": 3.326710190178825, "grad_norm": 14.498163223266602, "learning_rate": 9.667556060175987e-05, "loss": 0.20029008388519287, "step": 11720 }, { "epoch": 3.3295486801021856, "grad_norm": 8.970108985900879, "learning_rate": 9.667272211183651e-05, "loss": 0.18074514865875244, "step": 11730 }, { "epoch": 3.3323871700255463, "grad_norm": 18.17815589904785, "learning_rate": 9.666988362191315e-05, "loss": 0.20803744792938234, "step": 11740 }, { "epoch": 3.3352256599489074, "grad_norm": 12.169970512390137, "learning_rate": 9.666704513198978e-05, "loss": 0.21330771446228028, "step": 11750 }, { "epoch": 3.338064149872268, "grad_norm": 17.670808792114258, "learning_rate": 9.666420664206642e-05, "loss": 0.21243536472320557, "step": 11760 }, { "epoch": 3.3409026397956287, "grad_norm": 10.695698738098145, "learning_rate": 9.666136815214306e-05, "loss": 0.20828895568847655, "step": 11770 }, { "epoch": 3.3437411297189894, "grad_norm": 11.600749969482422, "learning_rate": 9.66585296622197e-05, "loss": 0.2504126787185669, "step": 11780 }, { "epoch": 3.34657961964235, "grad_norm": 24.69825553894043, "learning_rate": 9.665569117229635e-05, "loss": 0.2206576108932495, "step": 11790 }, { "epoch": 3.349418109565711, "grad_norm": 18.680410385131836, "learning_rate": 9.665285268237299e-05, "loss": 0.21109144687652587, "step": 11800 }, { "epoch": 3.352256599489072, "grad_norm": 18.611135482788086, "learning_rate": 9.665001419244963e-05, "loss": 0.22174811363220215, "step": 11810 }, { "epoch": 3.3550950894124325, "grad_norm": 17.504030227661133, "learning_rate": 9.664717570252626e-05, "loss": 0.21745755672454833, "step": 11820 }, { "epoch": 3.357933579335793, "grad_norm": 12.793354988098145, "learning_rate": 9.66443372126029e-05, "loss": 0.2557119369506836, "step": 11830 }, { "epoch": 3.3607720692591543, "grad_norm": 13.081313133239746, "learning_rate": 9.664149872267954e-05, "loss": 0.198041570186615, "step": 11840 }, { "epoch": 3.363610559182515, "grad_norm": 31.202713012695312, "learning_rate": 9.663866023275617e-05, "loss": 0.2324148654937744, "step": 11850 }, { "epoch": 3.3664490491058756, "grad_norm": 12.557838439941406, "learning_rate": 9.663582174283282e-05, "loss": 0.1963936448097229, "step": 11860 }, { "epoch": 3.3692875390292363, "grad_norm": 9.48736572265625, "learning_rate": 9.663298325290946e-05, "loss": 0.21634628772735595, "step": 11870 }, { "epoch": 3.3721260289525974, "grad_norm": 16.349489212036133, "learning_rate": 9.663014476298609e-05, "loss": 0.2506218910217285, "step": 11880 }, { "epoch": 3.374964518875958, "grad_norm": 10.301873207092285, "learning_rate": 9.662730627306273e-05, "loss": 0.20182456970214843, "step": 11890 }, { "epoch": 3.3778030087993187, "grad_norm": 24.21087646484375, "learning_rate": 9.662446778313937e-05, "loss": 0.21132652759552, "step": 11900 }, { "epoch": 3.3806414987226794, "grad_norm": 13.918732643127441, "learning_rate": 9.662162929321602e-05, "loss": 0.21867427825927735, "step": 11910 }, { "epoch": 3.3834799886460405, "grad_norm": 13.517630577087402, "learning_rate": 9.661879080329266e-05, "loss": 0.2162034273147583, "step": 11920 }, { "epoch": 3.386318478569401, "grad_norm": 18.84538459777832, "learning_rate": 9.66159523133693e-05, "loss": 0.21188414096832275, "step": 11930 }, { "epoch": 3.389156968492762, "grad_norm": 24.080352783203125, "learning_rate": 9.661311382344594e-05, "loss": 0.21764988899230958, "step": 11940 }, { "epoch": 3.3919954584161225, "grad_norm": 15.010066032409668, "learning_rate": 9.661027533352257e-05, "loss": 0.23972277641296386, "step": 11950 }, { "epoch": 3.3948339483394836, "grad_norm": 11.731785774230957, "learning_rate": 9.660743684359921e-05, "loss": 0.21554033756256102, "step": 11960 }, { "epoch": 3.3976724382628443, "grad_norm": 17.53189468383789, "learning_rate": 9.660459835367585e-05, "loss": 0.1813856840133667, "step": 11970 }, { "epoch": 3.400510928186205, "grad_norm": 26.230566024780273, "learning_rate": 9.660175986375248e-05, "loss": 0.1830596923828125, "step": 11980 }, { "epoch": 3.4033494181095656, "grad_norm": 9.721470832824707, "learning_rate": 9.659892137382913e-05, "loss": 0.22232580184936523, "step": 11990 }, { "epoch": 3.4061879080329263, "grad_norm": 13.806138038635254, "learning_rate": 9.659608288390577e-05, "loss": 0.20747621059417726, "step": 12000 }, { "epoch": 3.4061879080329263, "eval_accuracy": 0.8741018630380873, "eval_loss": 0.38113686442375183, "eval_runtime": 31.2293, "eval_samples_per_second": 503.598, "eval_steps_per_second": 7.877, "step": 12000 }, { "epoch": 3.4090263979562874, "grad_norm": 10.555680274963379, "learning_rate": 9.65932443939824e-05, "loss": 0.2150982141494751, "step": 12010 }, { "epoch": 3.411864887879648, "grad_norm": 13.711437225341797, "learning_rate": 9.659040590405904e-05, "loss": 0.2441176652908325, "step": 12020 }, { "epoch": 3.4147033778030087, "grad_norm": 14.219337463378906, "learning_rate": 9.658756741413569e-05, "loss": 0.20845234394073486, "step": 12030 }, { "epoch": 3.41754186772637, "grad_norm": 19.067289352416992, "learning_rate": 9.658472892421233e-05, "loss": 0.19756081104278564, "step": 12040 }, { "epoch": 3.4203803576497305, "grad_norm": 13.542132377624512, "learning_rate": 9.658189043428895e-05, "loss": 0.220689058303833, "step": 12050 }, { "epoch": 3.423218847573091, "grad_norm": 18.156505584716797, "learning_rate": 9.657905194436561e-05, "loss": 0.18415558338165283, "step": 12060 }, { "epoch": 3.426057337496452, "grad_norm": 17.014469146728516, "learning_rate": 9.657621345444224e-05, "loss": 0.22088563442230225, "step": 12070 }, { "epoch": 3.4288958274198125, "grad_norm": 14.176739692687988, "learning_rate": 9.657337496451888e-05, "loss": 0.2342505931854248, "step": 12080 }, { "epoch": 3.4317343173431736, "grad_norm": 18.460636138916016, "learning_rate": 9.657053647459552e-05, "loss": 0.19163500070571898, "step": 12090 }, { "epoch": 3.4345728072665342, "grad_norm": 21.631229400634766, "learning_rate": 9.656769798467216e-05, "loss": 0.17132577896118165, "step": 12100 }, { "epoch": 3.437411297189895, "grad_norm": 16.773332595825195, "learning_rate": 9.656485949474879e-05, "loss": 0.19617114067077637, "step": 12110 }, { "epoch": 3.4402497871132556, "grad_norm": 16.87520980834961, "learning_rate": 9.656202100482544e-05, "loss": 0.18289507627487184, "step": 12120 }, { "epoch": 3.4430882770366167, "grad_norm": 23.625057220458984, "learning_rate": 9.655918251490209e-05, "loss": 0.18987154960632324, "step": 12130 }, { "epoch": 3.4459267669599773, "grad_norm": 12.312173843383789, "learning_rate": 9.655634402497871e-05, "loss": 0.21520578861236572, "step": 12140 }, { "epoch": 3.448765256883338, "grad_norm": 11.222692489624023, "learning_rate": 9.655350553505535e-05, "loss": 0.19222476482391357, "step": 12150 }, { "epoch": 3.4516037468066987, "grad_norm": 18.204130172729492, "learning_rate": 9.6550667045132e-05, "loss": 0.16466399431228637, "step": 12160 }, { "epoch": 3.45444223673006, "grad_norm": 15.86926555633545, "learning_rate": 9.654782855520862e-05, "loss": 0.1836155414581299, "step": 12170 }, { "epoch": 3.4572807266534205, "grad_norm": 18.80999755859375, "learning_rate": 9.654499006528527e-05, "loss": 0.18175710439682008, "step": 12180 }, { "epoch": 3.460119216576781, "grad_norm": 12.337085723876953, "learning_rate": 9.654215157536192e-05, "loss": 0.22268054485321045, "step": 12190 }, { "epoch": 3.462957706500142, "grad_norm": 17.18171501159668, "learning_rate": 9.653931308543855e-05, "loss": 0.2185718059539795, "step": 12200 }, { "epoch": 3.4657961964235025, "grad_norm": 16.527612686157227, "learning_rate": 9.653647459551519e-05, "loss": 0.17128726243972778, "step": 12210 }, { "epoch": 3.4686346863468636, "grad_norm": 17.636186599731445, "learning_rate": 9.653363610559183e-05, "loss": 0.2028270721435547, "step": 12220 }, { "epoch": 3.4714731762702242, "grad_norm": 14.152024269104004, "learning_rate": 9.653079761566847e-05, "loss": 0.21990017890930175, "step": 12230 }, { "epoch": 3.474311666193585, "grad_norm": 29.320327758789062, "learning_rate": 9.65279591257451e-05, "loss": 0.18856503963470458, "step": 12240 }, { "epoch": 3.477150156116946, "grad_norm": 18.46135902404785, "learning_rate": 9.652512063582174e-05, "loss": 0.17718451023101806, "step": 12250 }, { "epoch": 3.4799886460403067, "grad_norm": 18.069143295288086, "learning_rate": 9.65222821458984e-05, "loss": 0.24148759841918946, "step": 12260 }, { "epoch": 3.4828271359636673, "grad_norm": 19.718914031982422, "learning_rate": 9.651944365597502e-05, "loss": 0.1954998254776001, "step": 12270 }, { "epoch": 3.485665625887028, "grad_norm": 14.599784851074219, "learning_rate": 9.651660516605167e-05, "loss": 0.24097776412963867, "step": 12280 }, { "epoch": 3.4885041158103887, "grad_norm": 21.432050704956055, "learning_rate": 9.65137666761283e-05, "loss": 0.20681569576263428, "step": 12290 }, { "epoch": 3.4913426057337498, "grad_norm": 7.485457420349121, "learning_rate": 9.651092818620493e-05, "loss": 0.18294481039047242, "step": 12300 }, { "epoch": 3.4941810956571104, "grad_norm": 10.234678268432617, "learning_rate": 9.650808969628158e-05, "loss": 0.2259777545928955, "step": 12310 }, { "epoch": 3.497019585580471, "grad_norm": 17.758129119873047, "learning_rate": 9.650525120635823e-05, "loss": 0.22350800037384033, "step": 12320 }, { "epoch": 3.499858075503832, "grad_norm": 16.66558074951172, "learning_rate": 9.650241271643486e-05, "loss": 0.19122059345245362, "step": 12330 }, { "epoch": 3.502696565427193, "grad_norm": 15.982666969299316, "learning_rate": 9.64995742265115e-05, "loss": 0.23698322772979735, "step": 12340 }, { "epoch": 3.5055350553505535, "grad_norm": 21.78820037841797, "learning_rate": 9.649673573658814e-05, "loss": 0.2171928644180298, "step": 12350 }, { "epoch": 3.508373545273914, "grad_norm": 24.82773208618164, "learning_rate": 9.649389724666478e-05, "loss": 0.23034744262695311, "step": 12360 }, { "epoch": 3.511212035197275, "grad_norm": 24.12273597717285, "learning_rate": 9.649105875674141e-05, "loss": 0.24887030124664306, "step": 12370 }, { "epoch": 3.514050525120636, "grad_norm": 14.947895050048828, "learning_rate": 9.648822026681805e-05, "loss": 0.16475038528442382, "step": 12380 }, { "epoch": 3.5168890150439966, "grad_norm": 15.203449249267578, "learning_rate": 9.648538177689471e-05, "loss": 0.22003324031829835, "step": 12390 }, { "epoch": 3.5197275049673573, "grad_norm": 21.137218475341797, "learning_rate": 9.648254328697133e-05, "loss": 0.19501237869262694, "step": 12400 }, { "epoch": 3.522565994890718, "grad_norm": 20.264305114746094, "learning_rate": 9.647970479704798e-05, "loss": 0.21154956817626952, "step": 12410 }, { "epoch": 3.5254044848140786, "grad_norm": 10.199299812316895, "learning_rate": 9.647686630712462e-05, "loss": 0.21863808631896972, "step": 12420 }, { "epoch": 3.5282429747374398, "grad_norm": 14.300342559814453, "learning_rate": 9.647402781720125e-05, "loss": 0.18522413969039916, "step": 12430 }, { "epoch": 3.5310814646608004, "grad_norm": 23.800457000732422, "learning_rate": 9.647118932727789e-05, "loss": 0.20307679176330568, "step": 12440 }, { "epoch": 3.533919954584161, "grad_norm": 14.285547256469727, "learning_rate": 9.646835083735454e-05, "loss": 0.17858246564865113, "step": 12450 }, { "epoch": 3.536758444507522, "grad_norm": 18.144193649291992, "learning_rate": 9.646551234743117e-05, "loss": 0.16114590167999268, "step": 12460 }, { "epoch": 3.539596934430883, "grad_norm": 13.867683410644531, "learning_rate": 9.646267385750781e-05, "loss": 0.19620091915130616, "step": 12470 }, { "epoch": 3.5424354243542435, "grad_norm": 17.450275421142578, "learning_rate": 9.645983536758445e-05, "loss": 0.20018246173858642, "step": 12480 }, { "epoch": 3.545273914277604, "grad_norm": 12.375514030456543, "learning_rate": 9.64569968776611e-05, "loss": 0.18790247440338134, "step": 12490 }, { "epoch": 3.548112404200965, "grad_norm": 14.685271263122559, "learning_rate": 9.645415838773772e-05, "loss": 0.1831412434577942, "step": 12500 }, { "epoch": 3.548112404200965, "eval_accuracy": 0.8881541298404019, "eval_loss": 0.3331910967826843, "eval_runtime": 31.2921, "eval_samples_per_second": 502.587, "eval_steps_per_second": 7.861, "step": 12500 }, { "epoch": 3.550950894124326, "grad_norm": 18.61245346069336, "learning_rate": 9.645131989781436e-05, "loss": 0.19272198677062988, "step": 12510 }, { "epoch": 3.5537893840476866, "grad_norm": 7.7992401123046875, "learning_rate": 9.644848140789102e-05, "loss": 0.18955307006835936, "step": 12520 }, { "epoch": 3.5566278739710473, "grad_norm": 18.963354110717773, "learning_rate": 9.644564291796765e-05, "loss": 0.20854313373565675, "step": 12530 }, { "epoch": 3.5594663638944084, "grad_norm": 21.342796325683594, "learning_rate": 9.644280442804429e-05, "loss": 0.2071152448654175, "step": 12540 }, { "epoch": 3.562304853817769, "grad_norm": 18.822126388549805, "learning_rate": 9.643996593812093e-05, "loss": 0.19297002553939818, "step": 12550 }, { "epoch": 3.5651433437411297, "grad_norm": 9.989988327026367, "learning_rate": 9.643712744819756e-05, "loss": 0.203399395942688, "step": 12560 }, { "epoch": 3.5679818336644904, "grad_norm": 7.88776159286499, "learning_rate": 9.64342889582742e-05, "loss": 0.14284805059432984, "step": 12570 }, { "epoch": 3.570820323587851, "grad_norm": 19.929224014282227, "learning_rate": 9.643145046835084e-05, "loss": 0.20520339012145997, "step": 12580 }, { "epoch": 3.573658813511212, "grad_norm": 13.769087791442871, "learning_rate": 9.642861197842748e-05, "loss": 0.20943028926849366, "step": 12590 }, { "epoch": 3.576497303434573, "grad_norm": 16.053749084472656, "learning_rate": 9.642577348850412e-05, "loss": 0.24009351730346679, "step": 12600 }, { "epoch": 3.5793357933579335, "grad_norm": 18.332489013671875, "learning_rate": 9.642293499858076e-05, "loss": 0.1855130076408386, "step": 12610 }, { "epoch": 3.5821742832812946, "grad_norm": 14.052803039550781, "learning_rate": 9.64200965086574e-05, "loss": 0.16475338935852052, "step": 12620 }, { "epoch": 3.5850127732046553, "grad_norm": 13.83219051361084, "learning_rate": 9.641725801873403e-05, "loss": 0.1916528820991516, "step": 12630 }, { "epoch": 3.587851263128016, "grad_norm": 14.276023864746094, "learning_rate": 9.641441952881067e-05, "loss": 0.18999066352844238, "step": 12640 }, { "epoch": 3.5906897530513766, "grad_norm": 21.867109298706055, "learning_rate": 9.641158103888733e-05, "loss": 0.23952841758728027, "step": 12650 }, { "epoch": 3.5935282429747373, "grad_norm": 14.864910125732422, "learning_rate": 9.640874254896396e-05, "loss": 0.15287126302719117, "step": 12660 }, { "epoch": 3.5963667328980984, "grad_norm": 10.48604679107666, "learning_rate": 9.64059040590406e-05, "loss": 0.15772616863250732, "step": 12670 }, { "epoch": 3.599205222821459, "grad_norm": 14.08375358581543, "learning_rate": 9.640306556911724e-05, "loss": 0.17585846185684204, "step": 12680 }, { "epoch": 3.6020437127448197, "grad_norm": 9.9944429397583, "learning_rate": 9.640022707919387e-05, "loss": 0.17443268299102782, "step": 12690 }, { "epoch": 3.6048822026681804, "grad_norm": 18.99468994140625, "learning_rate": 9.639738858927051e-05, "loss": 0.23786187171936035, "step": 12700 }, { "epoch": 3.607720692591541, "grad_norm": 16.31536102294922, "learning_rate": 9.639455009934715e-05, "loss": 0.20249197483062745, "step": 12710 }, { "epoch": 3.610559182514902, "grad_norm": 18.80646514892578, "learning_rate": 9.639171160942379e-05, "loss": 0.1999928832054138, "step": 12720 }, { "epoch": 3.613397672438263, "grad_norm": 21.134939193725586, "learning_rate": 9.638887311950043e-05, "loss": 0.16092418432235717, "step": 12730 }, { "epoch": 3.6162361623616235, "grad_norm": 18.907377243041992, "learning_rate": 9.638603462957707e-05, "loss": 0.1759500741958618, "step": 12740 }, { "epoch": 3.6190746522849846, "grad_norm": 16.169979095458984, "learning_rate": 9.638319613965371e-05, "loss": 0.1590247392654419, "step": 12750 }, { "epoch": 3.6219131422083453, "grad_norm": 17.707767486572266, "learning_rate": 9.638035764973034e-05, "loss": 0.19387505054473878, "step": 12760 }, { "epoch": 3.624751632131706, "grad_norm": 14.954610824584961, "learning_rate": 9.637751915980698e-05, "loss": 0.22697823047637938, "step": 12770 }, { "epoch": 3.6275901220550666, "grad_norm": 16.924896240234375, "learning_rate": 9.637468066988363e-05, "loss": 0.1832563042640686, "step": 12780 }, { "epoch": 3.6304286119784273, "grad_norm": 9.252005577087402, "learning_rate": 9.637184217996027e-05, "loss": 0.18793096542358398, "step": 12790 }, { "epoch": 3.6332671019017884, "grad_norm": 15.584880828857422, "learning_rate": 9.636900369003691e-05, "loss": 0.16042823791503907, "step": 12800 }, { "epoch": 3.636105591825149, "grad_norm": 21.544593811035156, "learning_rate": 9.636616520011355e-05, "loss": 0.20613737106323243, "step": 12810 }, { "epoch": 3.6389440817485097, "grad_norm": 28.40416145324707, "learning_rate": 9.636332671019018e-05, "loss": 0.20446252822875977, "step": 12820 }, { "epoch": 3.641782571671871, "grad_norm": 21.0313777923584, "learning_rate": 9.636048822026682e-05, "loss": 0.232306170463562, "step": 12830 }, { "epoch": 3.6446210615952315, "grad_norm": 19.987058639526367, "learning_rate": 9.635764973034346e-05, "loss": 0.18236492872238158, "step": 12840 }, { "epoch": 3.647459551518592, "grad_norm": 14.75471305847168, "learning_rate": 9.63548112404201e-05, "loss": 0.2041480302810669, "step": 12850 }, { "epoch": 3.650298041441953, "grad_norm": 10.284089088439941, "learning_rate": 9.635197275049674e-05, "loss": 0.1662386417388916, "step": 12860 }, { "epoch": 3.6531365313653135, "grad_norm": 22.920259475708008, "learning_rate": 9.634913426057338e-05, "loss": 0.20379331111907958, "step": 12870 }, { "epoch": 3.6559750212886746, "grad_norm": 17.91655731201172, "learning_rate": 9.634629577065003e-05, "loss": 0.19459644556045533, "step": 12880 }, { "epoch": 3.6588135112120352, "grad_norm": 14.011137008666992, "learning_rate": 9.634345728072665e-05, "loss": 0.1959167242050171, "step": 12890 }, { "epoch": 3.661652001135396, "grad_norm": 9.336225509643555, "learning_rate": 9.63406187908033e-05, "loss": 0.20528478622436525, "step": 12900 }, { "epoch": 3.664490491058757, "grad_norm": 14.320165634155273, "learning_rate": 9.633778030087994e-05, "loss": 0.19499990940093995, "step": 12910 }, { "epoch": 3.6673289809821172, "grad_norm": 12.04212760925293, "learning_rate": 9.633494181095658e-05, "loss": 0.21102490425109863, "step": 12920 }, { "epoch": 3.6701674709054783, "grad_norm": 17.68199348449707, "learning_rate": 9.633210332103322e-05, "loss": 0.1669550657272339, "step": 12930 }, { "epoch": 3.673005960828839, "grad_norm": 13.608904838562012, "learning_rate": 9.632926483110986e-05, "loss": 0.2338263511657715, "step": 12940 }, { "epoch": 3.6758444507521997, "grad_norm": 20.929216384887695, "learning_rate": 9.632642634118649e-05, "loss": 0.20095372200012207, "step": 12950 }, { "epoch": 3.678682940675561, "grad_norm": 10.120415687561035, "learning_rate": 9.632358785126313e-05, "loss": 0.19782148599624633, "step": 12960 }, { "epoch": 3.6815214305989215, "grad_norm": 17.33092498779297, "learning_rate": 9.632074936133977e-05, "loss": 0.19996261596679688, "step": 12970 }, { "epoch": 3.684359920522282, "grad_norm": 15.53795337677002, "learning_rate": 9.631791087141641e-05, "loss": 0.19260650873184204, "step": 12980 }, { "epoch": 3.687198410445643, "grad_norm": 17.326200485229492, "learning_rate": 9.631507238149305e-05, "loss": 0.1979435443878174, "step": 12990 }, { "epoch": 3.6900369003690034, "grad_norm": 12.190852165222168, "learning_rate": 9.63122338915697e-05, "loss": 0.2052302837371826, "step": 13000 }, { "epoch": 3.6900369003690034, "eval_accuracy": 0.8860558275577033, "eval_loss": 0.3456185460090637, "eval_runtime": 31.2758, "eval_samples_per_second": 502.849, "eval_steps_per_second": 7.866, "step": 13000 }, { "epoch": 3.6928753902923646, "grad_norm": 11.41498851776123, "learning_rate": 9.630939540164632e-05, "loss": 0.16218639612197877, "step": 13010 }, { "epoch": 3.6957138802157252, "grad_norm": 21.549772262573242, "learning_rate": 9.630655691172296e-05, "loss": 0.18964788913726807, "step": 13020 }, { "epoch": 3.698552370139086, "grad_norm": 15.30506706237793, "learning_rate": 9.63037184217996e-05, "loss": 0.17965257167816162, "step": 13030 }, { "epoch": 3.701390860062447, "grad_norm": 18.242279052734375, "learning_rate": 9.630087993187625e-05, "loss": 0.22642974853515624, "step": 13040 }, { "epoch": 3.7042293499858077, "grad_norm": 13.957133293151855, "learning_rate": 9.629804144195289e-05, "loss": 0.2009499788284302, "step": 13050 }, { "epoch": 3.7070678399091683, "grad_norm": 19.624011993408203, "learning_rate": 9.629520295202953e-05, "loss": 0.21221296787261962, "step": 13060 }, { "epoch": 3.709906329832529, "grad_norm": 19.296112060546875, "learning_rate": 9.629236446210617e-05, "loss": 0.1756616234779358, "step": 13070 }, { "epoch": 3.7127448197558897, "grad_norm": 3.713320255279541, "learning_rate": 9.62895259721828e-05, "loss": 0.1612323999404907, "step": 13080 }, { "epoch": 3.7155833096792508, "grad_norm": 12.882383346557617, "learning_rate": 9.628668748225944e-05, "loss": 0.20692853927612304, "step": 13090 }, { "epoch": 3.7184217996026114, "grad_norm": 15.465047836303711, "learning_rate": 9.628384899233608e-05, "loss": 0.23029165267944335, "step": 13100 }, { "epoch": 3.721260289525972, "grad_norm": 20.43402099609375, "learning_rate": 9.628101050241271e-05, "loss": 0.20411965847015381, "step": 13110 }, { "epoch": 3.724098779449333, "grad_norm": 18.477548599243164, "learning_rate": 9.627817201248936e-05, "loss": 0.22115280628204345, "step": 13120 }, { "epoch": 3.726937269372694, "grad_norm": 14.626617431640625, "learning_rate": 9.6275333522566e-05, "loss": 0.20535669326782227, "step": 13130 }, { "epoch": 3.7297757592960545, "grad_norm": 9.742225646972656, "learning_rate": 9.627249503264263e-05, "loss": 0.19262950420379638, "step": 13140 }, { "epoch": 3.732614249219415, "grad_norm": 13.3360013961792, "learning_rate": 9.626965654271928e-05, "loss": 0.20899879932403564, "step": 13150 }, { "epoch": 3.735452739142776, "grad_norm": 21.276222229003906, "learning_rate": 9.626681805279592e-05, "loss": 0.18748929500579833, "step": 13160 }, { "epoch": 3.738291229066137, "grad_norm": 17.293577194213867, "learning_rate": 9.626397956287256e-05, "loss": 0.16237835884094237, "step": 13170 }, { "epoch": 3.7411297189894976, "grad_norm": 21.195192337036133, "learning_rate": 9.626114107294919e-05, "loss": 0.20309970378875733, "step": 13180 }, { "epoch": 3.7439682089128583, "grad_norm": 14.44090747833252, "learning_rate": 9.625830258302584e-05, "loss": 0.235282564163208, "step": 13190 }, { "epoch": 3.746806698836219, "grad_norm": 13.884706497192383, "learning_rate": 9.625546409310248e-05, "loss": 0.18537503480911255, "step": 13200 }, { "epoch": 3.7496451887595796, "grad_norm": 14.233735084533691, "learning_rate": 9.625262560317911e-05, "loss": 0.2119133710861206, "step": 13210 }, { "epoch": 3.7524836786829407, "grad_norm": 16.935958862304688, "learning_rate": 9.624978711325575e-05, "loss": 0.20455503463745117, "step": 13220 }, { "epoch": 3.7553221686063014, "grad_norm": 11.480093002319336, "learning_rate": 9.624694862333239e-05, "loss": 0.2135927677154541, "step": 13230 }, { "epoch": 3.758160658529662, "grad_norm": 16.112228393554688, "learning_rate": 9.624411013340902e-05, "loss": 0.19972648620605468, "step": 13240 }, { "epoch": 3.760999148453023, "grad_norm": 12.924802780151367, "learning_rate": 9.624127164348568e-05, "loss": 0.20564627647399902, "step": 13250 }, { "epoch": 3.763837638376384, "grad_norm": 18.588260650634766, "learning_rate": 9.623843315356232e-05, "loss": 0.20453810691833496, "step": 13260 }, { "epoch": 3.7666761282997445, "grad_norm": 18.94190788269043, "learning_rate": 9.623559466363894e-05, "loss": 0.19725641012191772, "step": 13270 }, { "epoch": 3.769514618223105, "grad_norm": 27.818538665771484, "learning_rate": 9.623275617371559e-05, "loss": 0.17562634944915773, "step": 13280 }, { "epoch": 3.772353108146466, "grad_norm": 16.601388931274414, "learning_rate": 9.622991768379223e-05, "loss": 0.16375694274902344, "step": 13290 }, { "epoch": 3.775191598069827, "grad_norm": 14.693408966064453, "learning_rate": 9.622707919386887e-05, "loss": 0.18652769327163696, "step": 13300 }, { "epoch": 3.7780300879931876, "grad_norm": 25.490880966186523, "learning_rate": 9.62242407039455e-05, "loss": 0.23232064247131348, "step": 13310 }, { "epoch": 3.7808685779165483, "grad_norm": 18.175931930541992, "learning_rate": 9.622140221402215e-05, "loss": 0.17263089418411254, "step": 13320 }, { "epoch": 3.7837070678399094, "grad_norm": 12.457060813903809, "learning_rate": 9.621856372409879e-05, "loss": 0.17455865144729615, "step": 13330 }, { "epoch": 3.78654555776327, "grad_norm": 18.688642501831055, "learning_rate": 9.621572523417542e-05, "loss": 0.20567383766174316, "step": 13340 }, { "epoch": 3.7893840476866307, "grad_norm": 16.189008712768555, "learning_rate": 9.621288674425206e-05, "loss": 0.20075876712799073, "step": 13350 }, { "epoch": 3.7922225376099914, "grad_norm": 13.119763374328613, "learning_rate": 9.62100482543287e-05, "loss": 0.2060337781906128, "step": 13360 }, { "epoch": 3.795061027533352, "grad_norm": 16.44010353088379, "learning_rate": 9.620720976440533e-05, "loss": 0.21363377571105957, "step": 13370 }, { "epoch": 3.797899517456713, "grad_norm": 10.756022453308105, "learning_rate": 9.620437127448197e-05, "loss": 0.17126485109329223, "step": 13380 }, { "epoch": 3.800738007380074, "grad_norm": 21.037017822265625, "learning_rate": 9.620153278455863e-05, "loss": 0.20660436153411865, "step": 13390 }, { "epoch": 3.8035764973034345, "grad_norm": 11.497673988342285, "learning_rate": 9.619869429463526e-05, "loss": 0.1891492486000061, "step": 13400 }, { "epoch": 3.8064149872267956, "grad_norm": 14.413599014282227, "learning_rate": 9.61958558047119e-05, "loss": 0.1766159415245056, "step": 13410 }, { "epoch": 3.8092534771501563, "grad_norm": 17.342639923095703, "learning_rate": 9.619301731478854e-05, "loss": 0.16026729345321655, "step": 13420 }, { "epoch": 3.812091967073517, "grad_norm": 14.935521125793457, "learning_rate": 9.619017882486518e-05, "loss": 0.1667719602584839, "step": 13430 }, { "epoch": 3.8149304569968776, "grad_norm": 9.354028701782227, "learning_rate": 9.618734033494181e-05, "loss": 0.23012104034423828, "step": 13440 }, { "epoch": 3.8177689469202383, "grad_norm": 22.844907760620117, "learning_rate": 9.618450184501846e-05, "loss": 0.23156285285949707, "step": 13450 }, { "epoch": 3.8206074368435994, "grad_norm": 10.652470588684082, "learning_rate": 9.61816633550951e-05, "loss": 0.19911113977432252, "step": 13460 }, { "epoch": 3.82344592676696, "grad_norm": 11.36054801940918, "learning_rate": 9.617882486517173e-05, "loss": 0.21364669799804686, "step": 13470 }, { "epoch": 3.8262844166903207, "grad_norm": 12.714508056640625, "learning_rate": 9.617627022424071e-05, "loss": 0.2182408094406128, "step": 13480 }, { "epoch": 3.8291229066136814, "grad_norm": 16.124755859375, "learning_rate": 9.617343173431734e-05, "loss": 0.1787625789642334, "step": 13490 }, { "epoch": 3.831961396537042, "grad_norm": 17.289695739746094, "learning_rate": 9.6170593244394e-05, "loss": 0.21091160774230958, "step": 13500 }, { "epoch": 3.831961396537042, "eval_accuracy": 0.8963565842182234, "eval_loss": 0.3161424696445465, "eval_runtime": 31.0853, "eval_samples_per_second": 505.93, "eval_steps_per_second": 7.914, "step": 13500 }, { "epoch": 3.834799886460403, "grad_norm": 14.816575050354004, "learning_rate": 9.616775475447064e-05, "loss": 0.18416981697082518, "step": 13510 }, { "epoch": 3.837638376383764, "grad_norm": 20.717742919921875, "learning_rate": 9.616491626454726e-05, "loss": 0.19217147827148437, "step": 13520 }, { "epoch": 3.8404768663071245, "grad_norm": 12.364614486694336, "learning_rate": 9.61620777746239e-05, "loss": 0.1885585069656372, "step": 13530 }, { "epoch": 3.8433153562304856, "grad_norm": 11.606247901916504, "learning_rate": 9.615923928470055e-05, "loss": 0.18612173795700074, "step": 13540 }, { "epoch": 3.8461538461538463, "grad_norm": 17.668453216552734, "learning_rate": 9.615640079477717e-05, "loss": 0.16202316284179688, "step": 13550 }, { "epoch": 3.848992336077207, "grad_norm": 17.570472717285156, "learning_rate": 9.615356230485382e-05, "loss": 0.22200171947479247, "step": 13560 }, { "epoch": 3.8518308260005676, "grad_norm": 17.335346221923828, "learning_rate": 9.615072381493047e-05, "loss": 0.15838404893875122, "step": 13570 }, { "epoch": 3.8546693159239283, "grad_norm": 10.258763313293457, "learning_rate": 9.61478853250071e-05, "loss": 0.14664362668991088, "step": 13580 }, { "epoch": 3.8575078058472894, "grad_norm": 12.704676628112793, "learning_rate": 9.614504683508374e-05, "loss": 0.1908525347709656, "step": 13590 }, { "epoch": 3.86034629577065, "grad_norm": 14.854520797729492, "learning_rate": 9.614220834516038e-05, "loss": 0.18227908611297608, "step": 13600 }, { "epoch": 3.8631847856940107, "grad_norm": 13.429558753967285, "learning_rate": 9.613936985523702e-05, "loss": 0.2027650833129883, "step": 13610 }, { "epoch": 3.866023275617372, "grad_norm": 12.316184043884277, "learning_rate": 9.613653136531365e-05, "loss": 0.17951679229736328, "step": 13620 }, { "epoch": 3.8688617655407325, "grad_norm": 11.702552795410156, "learning_rate": 9.61336928753903e-05, "loss": 0.17083731889724732, "step": 13630 }, { "epoch": 3.871700255464093, "grad_norm": 10.787964820861816, "learning_rate": 9.613085438546695e-05, "loss": 0.19240431785583495, "step": 13640 }, { "epoch": 3.874538745387454, "grad_norm": 22.3664608001709, "learning_rate": 9.612801589554357e-05, "loss": 0.2139369487762451, "step": 13650 }, { "epoch": 3.8773772353108145, "grad_norm": 17.102693557739258, "learning_rate": 9.612517740562022e-05, "loss": 0.1737605571746826, "step": 13660 }, { "epoch": 3.8802157252341756, "grad_norm": 17.290803909301758, "learning_rate": 9.612233891569686e-05, "loss": 0.19548892974853516, "step": 13670 }, { "epoch": 3.8830542151575362, "grad_norm": 14.182625770568848, "learning_rate": 9.611950042577348e-05, "loss": 0.18373520374298097, "step": 13680 }, { "epoch": 3.885892705080897, "grad_norm": 13.956124305725098, "learning_rate": 9.611666193585013e-05, "loss": 0.23870363235473632, "step": 13690 }, { "epoch": 3.888731195004258, "grad_norm": 8.751736640930176, "learning_rate": 9.611382344592678e-05, "loss": 0.14544075727462769, "step": 13700 }, { "epoch": 3.8915696849276182, "grad_norm": 8.248520851135254, "learning_rate": 9.611098495600341e-05, "loss": 0.15393333435058593, "step": 13710 }, { "epoch": 3.8944081748509793, "grad_norm": 18.234996795654297, "learning_rate": 9.610814646608005e-05, "loss": 0.18511407375335692, "step": 13720 }, { "epoch": 3.89724666477434, "grad_norm": 12.562544822692871, "learning_rate": 9.610530797615669e-05, "loss": 0.15003457069396972, "step": 13730 }, { "epoch": 3.9000851546977007, "grad_norm": 16.743684768676758, "learning_rate": 9.610246948623333e-05, "loss": 0.21348919868469238, "step": 13740 }, { "epoch": 3.902923644621062, "grad_norm": 12.473259925842285, "learning_rate": 9.609963099630996e-05, "loss": 0.15301227569580078, "step": 13750 }, { "epoch": 3.9057621345444224, "grad_norm": 14.41502571105957, "learning_rate": 9.60967925063866e-05, "loss": 0.1809757947921753, "step": 13760 }, { "epoch": 3.908600624467783, "grad_norm": 9.003304481506348, "learning_rate": 9.609395401646324e-05, "loss": 0.14672800302505493, "step": 13770 }, { "epoch": 3.911439114391144, "grad_norm": 9.885737419128418, "learning_rate": 9.609111552653988e-05, "loss": 0.12219957113265992, "step": 13780 }, { "epoch": 3.9142776043145044, "grad_norm": 14.666175842285156, "learning_rate": 9.608827703661653e-05, "loss": 0.2119225263595581, "step": 13790 }, { "epoch": 3.9171160942378656, "grad_norm": 17.990489959716797, "learning_rate": 9.608543854669317e-05, "loss": 0.23682405948638915, "step": 13800 }, { "epoch": 3.919954584161226, "grad_norm": 15.3749361038208, "learning_rate": 9.60826000567698e-05, "loss": 0.1940447211265564, "step": 13810 }, { "epoch": 3.922793074084587, "grad_norm": 13.172243118286133, "learning_rate": 9.607976156684644e-05, "loss": 0.14192864894866944, "step": 13820 }, { "epoch": 3.925631564007948, "grad_norm": 13.275805473327637, "learning_rate": 9.607692307692309e-05, "loss": 0.19739904403686523, "step": 13830 }, { "epoch": 3.9284700539313087, "grad_norm": 14.367815017700195, "learning_rate": 9.607408458699972e-05, "loss": 0.19411333799362182, "step": 13840 }, { "epoch": 3.9313085438546693, "grad_norm": 13.982487678527832, "learning_rate": 9.607124609707636e-05, "loss": 0.17054067850112914, "step": 13850 }, { "epoch": 3.93414703377803, "grad_norm": 11.844534873962402, "learning_rate": 9.6068407607153e-05, "loss": 0.24255647659301757, "step": 13860 }, { "epoch": 3.9369855237013907, "grad_norm": 13.950010299682617, "learning_rate": 9.606556911722963e-05, "loss": 0.1747615694999695, "step": 13870 }, { "epoch": 3.9398240136247518, "grad_norm": 14.970807075500488, "learning_rate": 9.606273062730627e-05, "loss": 0.16250706911087037, "step": 13880 }, { "epoch": 3.9426625035481124, "grad_norm": 17.797439575195312, "learning_rate": 9.605989213738291e-05, "loss": 0.18647220134735107, "step": 13890 }, { "epoch": 3.945500993471473, "grad_norm": 8.312163352966309, "learning_rate": 9.605705364745955e-05, "loss": 0.1735267996788025, "step": 13900 }, { "epoch": 3.948339483394834, "grad_norm": 9.662410736083984, "learning_rate": 9.60542151575362e-05, "loss": 0.17853622436523436, "step": 13910 }, { "epoch": 3.951177973318195, "grad_norm": 10.913804054260254, "learning_rate": 9.605137666761284e-05, "loss": 0.17758163213729858, "step": 13920 }, { "epoch": 3.9540164632415555, "grad_norm": 13.551321983337402, "learning_rate": 9.604853817768948e-05, "loss": 0.19117436408996583, "step": 13930 }, { "epoch": 3.956854953164916, "grad_norm": 8.479565620422363, "learning_rate": 9.60456996877661e-05, "loss": 0.18471699953079224, "step": 13940 }, { "epoch": 3.959693443088277, "grad_norm": 18.360044479370117, "learning_rate": 9.604286119784275e-05, "loss": 0.2238318681716919, "step": 13950 }, { "epoch": 3.962531933011638, "grad_norm": 10.409412384033203, "learning_rate": 9.604002270791939e-05, "loss": 0.19304097890853883, "step": 13960 }, { "epoch": 3.9653704229349986, "grad_norm": 8.591785430908203, "learning_rate": 9.603718421799603e-05, "loss": 0.17751789093017578, "step": 13970 }, { "epoch": 3.9682089128583593, "grad_norm": 13.945221900939941, "learning_rate": 9.603434572807267e-05, "loss": 0.17351099252700805, "step": 13980 }, { "epoch": 3.97104740278172, "grad_norm": 7.1689229011535645, "learning_rate": 9.603150723814931e-05, "loss": 0.17921603918075563, "step": 13990 }, { "epoch": 3.9738858927050806, "grad_norm": 17.593414306640625, "learning_rate": 9.602866874822594e-05, "loss": 0.24585111141204835, "step": 14000 }, { "epoch": 3.9738858927050806, "eval_accuracy": 0.902651491066319, "eval_loss": 0.2931208908557892, "eval_runtime": 31.3853, "eval_samples_per_second": 501.095, "eval_steps_per_second": 7.838, "step": 14000 }, { "epoch": 3.9767243826284417, "grad_norm": 14.007991790771484, "learning_rate": 9.602583025830258e-05, "loss": 0.20412559509277345, "step": 14010 }, { "epoch": 3.9795628725518024, "grad_norm": 9.47784423828125, "learning_rate": 9.602299176837922e-05, "loss": 0.1461554765701294, "step": 14020 }, { "epoch": 3.982401362475163, "grad_norm": 19.436687469482422, "learning_rate": 9.602015327845586e-05, "loss": 0.19939231872558594, "step": 14030 }, { "epoch": 3.985239852398524, "grad_norm": 16.153579711914062, "learning_rate": 9.60173147885325e-05, "loss": 0.1849508047103882, "step": 14040 }, { "epoch": 3.988078342321885, "grad_norm": 10.739173889160156, "learning_rate": 9.601447629860915e-05, "loss": 0.17080507278442383, "step": 14050 }, { "epoch": 3.9909168322452455, "grad_norm": 18.857379913330078, "learning_rate": 9.601163780868579e-05, "loss": 0.17809062004089354, "step": 14060 }, { "epoch": 3.993755322168606, "grad_norm": 14.858738899230957, "learning_rate": 9.600879931876242e-05, "loss": 0.2061694622039795, "step": 14070 }, { "epoch": 3.996593812091967, "grad_norm": 9.384162902832031, "learning_rate": 9.600596082883906e-05, "loss": 0.14361640214920043, "step": 14080 }, { "epoch": 3.999432302015328, "grad_norm": 15.557256698608398, "learning_rate": 9.60031223389157e-05, "loss": 0.13269511461257935, "step": 14090 }, { "epoch": 4.002270791938688, "grad_norm": 15.339125633239746, "learning_rate": 9.600028384899234e-05, "loss": 0.15242763757705688, "step": 14100 }, { "epoch": 4.005109281862049, "grad_norm": 11.810029029846191, "learning_rate": 9.599744535906898e-05, "loss": 0.14725091457366943, "step": 14110 }, { "epoch": 4.00794777178541, "grad_norm": 15.168164253234863, "learning_rate": 9.599460686914562e-05, "loss": 0.13273046016693116, "step": 14120 }, { "epoch": 4.010786261708771, "grad_norm": 15.249293327331543, "learning_rate": 9.599176837922225e-05, "loss": 0.17031476497650147, "step": 14130 }, { "epoch": 4.013624751632132, "grad_norm": 10.77449893951416, "learning_rate": 9.598892988929889e-05, "loss": 0.15205686092376708, "step": 14140 }, { "epoch": 4.016463241555493, "grad_norm": 13.917808532714844, "learning_rate": 9.598609139937553e-05, "loss": 0.18895701169967652, "step": 14150 }, { "epoch": 4.019301731478853, "grad_norm": 17.4494686126709, "learning_rate": 9.598325290945218e-05, "loss": 0.12264747619628906, "step": 14160 }, { "epoch": 4.022140221402214, "grad_norm": 13.52995491027832, "learning_rate": 9.598041441952882e-05, "loss": 0.1630455732345581, "step": 14170 }, { "epoch": 4.024978711325574, "grad_norm": 8.253148078918457, "learning_rate": 9.597757592960546e-05, "loss": 0.1761770248413086, "step": 14180 }, { "epoch": 4.0278172012489355, "grad_norm": 16.79362678527832, "learning_rate": 9.59747374396821e-05, "loss": 0.16914713382720947, "step": 14190 }, { "epoch": 4.030655691172297, "grad_norm": 10.3887357711792, "learning_rate": 9.597189894975873e-05, "loss": 0.15404207706451417, "step": 14200 }, { "epoch": 4.033494181095657, "grad_norm": 17.84835433959961, "learning_rate": 9.596906045983537e-05, "loss": 0.15989917516708374, "step": 14210 }, { "epoch": 4.036332671019018, "grad_norm": 10.473235130310059, "learning_rate": 9.596622196991201e-05, "loss": 0.1424857497215271, "step": 14220 }, { "epoch": 4.039171160942379, "grad_norm": 8.463979721069336, "learning_rate": 9.596338347998865e-05, "loss": 0.12286888360977173, "step": 14230 }, { "epoch": 4.042009650865739, "grad_norm": 11.434859275817871, "learning_rate": 9.596054499006529e-05, "loss": 0.13087767362594604, "step": 14240 }, { "epoch": 4.0448481407891, "grad_norm": 14.250101089477539, "learning_rate": 9.595770650014193e-05, "loss": 0.1534764885902405, "step": 14250 }, { "epoch": 4.047686630712461, "grad_norm": 14.397462844848633, "learning_rate": 9.595486801021856e-05, "loss": 0.134521222114563, "step": 14260 }, { "epoch": 4.050525120635822, "grad_norm": 19.012601852416992, "learning_rate": 9.59520295202952e-05, "loss": 0.14702656269073486, "step": 14270 }, { "epoch": 4.053363610559183, "grad_norm": 21.502548217773438, "learning_rate": 9.594919103037185e-05, "loss": 0.16444971561431884, "step": 14280 }, { "epoch": 4.056202100482543, "grad_norm": 9.994865417480469, "learning_rate": 9.594635254044849e-05, "loss": 0.17271435260772705, "step": 14290 }, { "epoch": 4.059040590405904, "grad_norm": 13.898537635803223, "learning_rate": 9.594351405052513e-05, "loss": 0.14536019563674926, "step": 14300 }, { "epoch": 4.061879080329265, "grad_norm": 18.628498077392578, "learning_rate": 9.594067556060177e-05, "loss": 0.15034260749816894, "step": 14310 }, { "epoch": 4.0647175702526255, "grad_norm": 12.507842063903809, "learning_rate": 9.593783707067841e-05, "loss": 0.15725717544555665, "step": 14320 }, { "epoch": 4.067556060175987, "grad_norm": 9.733858108520508, "learning_rate": 9.593499858075504e-05, "loss": 0.1314501404762268, "step": 14330 }, { "epoch": 4.070394550099347, "grad_norm": 11.284711837768555, "learning_rate": 9.593216009083168e-05, "loss": 0.160497784614563, "step": 14340 }, { "epoch": 4.073233040022708, "grad_norm": 10.518166542053223, "learning_rate": 9.592932160090832e-05, "loss": 0.11356463432312011, "step": 14350 }, { "epoch": 4.076071529946069, "grad_norm": 24.16094970703125, "learning_rate": 9.592648311098496e-05, "loss": 0.15804142951965333, "step": 14360 }, { "epoch": 4.078910019869429, "grad_norm": 12.228034019470215, "learning_rate": 9.592392847005394e-05, "loss": 0.15015305280685426, "step": 14370 }, { "epoch": 4.08174850979279, "grad_norm": 12.908282279968262, "learning_rate": 9.592108998013057e-05, "loss": 0.12004361152648926, "step": 14380 }, { "epoch": 4.084586999716151, "grad_norm": 25.25391387939453, "learning_rate": 9.591825149020721e-05, "loss": 0.1858546495437622, "step": 14390 }, { "epoch": 4.087425489639512, "grad_norm": 15.679829597473145, "learning_rate": 9.591541300028385e-05, "loss": 0.18722642660140992, "step": 14400 }, { "epoch": 4.090263979562873, "grad_norm": 8.558819770812988, "learning_rate": 9.59125745103605e-05, "loss": 0.14437999725341796, "step": 14410 }, { "epoch": 4.093102469486233, "grad_norm": 19.275169372558594, "learning_rate": 9.590973602043714e-05, "loss": 0.14627193212509154, "step": 14420 }, { "epoch": 4.095940959409594, "grad_norm": 7.874567031860352, "learning_rate": 9.590689753051378e-05, "loss": 0.12814124822616577, "step": 14430 }, { "epoch": 4.098779449332955, "grad_norm": 11.124398231506348, "learning_rate": 9.59040590405904e-05, "loss": 0.16742610931396484, "step": 14440 }, { "epoch": 4.1016179392563155, "grad_norm": 17.87415313720703, "learning_rate": 9.590122055066705e-05, "loss": 0.15107352733612062, "step": 14450 }, { "epoch": 4.104456429179677, "grad_norm": 11.556680679321289, "learning_rate": 9.589838206074369e-05, "loss": 0.13166494369506837, "step": 14460 }, { "epoch": 4.107294919103037, "grad_norm": 17.838333129882812, "learning_rate": 9.589554357082033e-05, "loss": 0.1744963765144348, "step": 14470 }, { "epoch": 4.110133409026398, "grad_norm": 15.847147941589355, "learning_rate": 9.589270508089697e-05, "loss": 0.1760677695274353, "step": 14480 }, { "epoch": 4.112971898949759, "grad_norm": 16.368736267089844, "learning_rate": 9.588986659097361e-05, "loss": 0.16585955619812012, "step": 14490 }, { "epoch": 4.115810388873119, "grad_norm": 18.406042098999023, "learning_rate": 9.588702810105025e-05, "loss": 0.16642417907714843, "step": 14500 }, { "epoch": 4.115810388873119, "eval_accuracy": 0.8910790360526483, "eval_loss": 0.3261353671550751, "eval_runtime": 31.5065, "eval_samples_per_second": 499.166, "eval_steps_per_second": 7.808, "step": 14500 }, { "epoch": 4.11864887879648, "grad_norm": 15.058733940124512, "learning_rate": 9.588418961112688e-05, "loss": 0.17857513427734376, "step": 14510 }, { "epoch": 4.1214873687198414, "grad_norm": 10.961214065551758, "learning_rate": 9.588135112120352e-05, "loss": 0.14090417623519896, "step": 14520 }, { "epoch": 4.124325858643202, "grad_norm": 13.186880111694336, "learning_rate": 9.587851263128016e-05, "loss": 0.15759789943695068, "step": 14530 }, { "epoch": 4.127164348566563, "grad_norm": 14.095049858093262, "learning_rate": 9.58756741413568e-05, "loss": 0.11720154285430909, "step": 14540 }, { "epoch": 4.130002838489923, "grad_norm": 10.82900333404541, "learning_rate": 9.587283565143345e-05, "loss": 0.13271580934524535, "step": 14550 }, { "epoch": 4.132841328413284, "grad_norm": 12.094626426696777, "learning_rate": 9.586999716151009e-05, "loss": 0.17368659973144532, "step": 14560 }, { "epoch": 4.135679818336645, "grad_norm": 15.703944206237793, "learning_rate": 9.586715867158672e-05, "loss": 0.15307741165161132, "step": 14570 }, { "epoch": 4.138518308260005, "grad_norm": 9.091362953186035, "learning_rate": 9.586432018166336e-05, "loss": 0.12892868518829345, "step": 14580 }, { "epoch": 4.1413567981833665, "grad_norm": 13.104792594909668, "learning_rate": 9.586148169174e-05, "loss": 0.15084669589996338, "step": 14590 }, { "epoch": 4.144195288106728, "grad_norm": 19.178945541381836, "learning_rate": 9.585864320181664e-05, "loss": 0.15708644390106202, "step": 14600 }, { "epoch": 4.147033778030088, "grad_norm": 8.34633731842041, "learning_rate": 9.585580471189328e-05, "loss": 0.16216639280319214, "step": 14610 }, { "epoch": 4.149872267953449, "grad_norm": 17.990982055664062, "learning_rate": 9.585296622196992e-05, "loss": 0.17228389978408815, "step": 14620 }, { "epoch": 4.152710757876809, "grad_norm": 15.340961456298828, "learning_rate": 9.585012773204656e-05, "loss": 0.15551118850708007, "step": 14630 }, { "epoch": 4.15554924780017, "grad_norm": 18.991130828857422, "learning_rate": 9.584728924212319e-05, "loss": 0.12890100479125977, "step": 14640 }, { "epoch": 4.158387737723531, "grad_norm": 21.018190383911133, "learning_rate": 9.584445075219983e-05, "loss": 0.18114593029022216, "step": 14650 }, { "epoch": 4.161226227646892, "grad_norm": 6.306010723114014, "learning_rate": 9.584161226227647e-05, "loss": 0.11897997856140137, "step": 14660 }, { "epoch": 4.164064717570253, "grad_norm": 11.076420783996582, "learning_rate": 9.58387737723531e-05, "loss": 0.1446991205215454, "step": 14670 }, { "epoch": 4.166903207493613, "grad_norm": 12.878436088562012, "learning_rate": 9.583593528242976e-05, "loss": 0.15413979291915894, "step": 14680 }, { "epoch": 4.169741697416974, "grad_norm": 15.184830665588379, "learning_rate": 9.58330967925064e-05, "loss": 0.1326911687850952, "step": 14690 }, { "epoch": 4.172580187340335, "grad_norm": 17.91576385498047, "learning_rate": 9.583025830258303e-05, "loss": 0.17909533977508546, "step": 14700 }, { "epoch": 4.175418677263695, "grad_norm": 14.837722778320312, "learning_rate": 9.582741981265967e-05, "loss": 0.1566817045211792, "step": 14710 }, { "epoch": 4.1782571671870565, "grad_norm": 12.023417472839355, "learning_rate": 9.582458132273631e-05, "loss": 0.1333454966545105, "step": 14720 }, { "epoch": 4.181095657110418, "grad_norm": 7.271909713745117, "learning_rate": 9.582174283281295e-05, "loss": 0.14408345222473146, "step": 14730 }, { "epoch": 4.183934147033778, "grad_norm": 11.925993919372559, "learning_rate": 9.581890434288959e-05, "loss": 0.1441882371902466, "step": 14740 }, { "epoch": 4.186772636957139, "grad_norm": 12.516487121582031, "learning_rate": 9.581606585296623e-05, "loss": 0.13351558446884154, "step": 14750 }, { "epoch": 4.189611126880499, "grad_norm": 13.141271591186523, "learning_rate": 9.581322736304286e-05, "loss": 0.14427276849746704, "step": 14760 }, { "epoch": 4.19244961680386, "grad_norm": 12.104243278503418, "learning_rate": 9.58103888731195e-05, "loss": 0.17315857410430907, "step": 14770 }, { "epoch": 4.195288106727221, "grad_norm": 13.825701713562012, "learning_rate": 9.580755038319614e-05, "loss": 0.19491254091262816, "step": 14780 }, { "epoch": 4.198126596650582, "grad_norm": 8.500093460083008, "learning_rate": 9.580471189327279e-05, "loss": 0.1493491530418396, "step": 14790 }, { "epoch": 4.200965086573943, "grad_norm": 15.029679298400879, "learning_rate": 9.580187340334941e-05, "loss": 0.1404767155647278, "step": 14800 }, { "epoch": 4.203803576497304, "grad_norm": 14.780632972717285, "learning_rate": 9.579903491342607e-05, "loss": 0.15077284574508668, "step": 14810 }, { "epoch": 4.206642066420664, "grad_norm": 16.46811866760254, "learning_rate": 9.579619642350271e-05, "loss": 0.17287200689315796, "step": 14820 }, { "epoch": 4.209480556344025, "grad_norm": 15.16257095336914, "learning_rate": 9.579335793357934e-05, "loss": 0.16240355968475342, "step": 14830 }, { "epoch": 4.212319046267385, "grad_norm": 16.409542083740234, "learning_rate": 9.579051944365598e-05, "loss": 0.1358044147491455, "step": 14840 }, { "epoch": 4.2151575361907465, "grad_norm": 9.065581321716309, "learning_rate": 9.578768095373262e-05, "loss": 0.11479216814041138, "step": 14850 }, { "epoch": 4.217996026114108, "grad_norm": 16.2310791015625, "learning_rate": 9.578484246380925e-05, "loss": 0.16164865493774414, "step": 14860 }, { "epoch": 4.220834516037468, "grad_norm": 10.68352222442627, "learning_rate": 9.578200397388589e-05, "loss": 0.12752039432525636, "step": 14870 }, { "epoch": 4.223673005960829, "grad_norm": 13.836865425109863, "learning_rate": 9.577916548396254e-05, "loss": 0.20279693603515625, "step": 14880 }, { "epoch": 4.226511495884189, "grad_norm": 10.64200210571289, "learning_rate": 9.577632699403917e-05, "loss": 0.1932573914527893, "step": 14890 }, { "epoch": 4.22934998580755, "grad_norm": 11.400484085083008, "learning_rate": 9.577348850411581e-05, "loss": 0.16212347745895386, "step": 14900 }, { "epoch": 4.232188475730911, "grad_norm": 15.307188987731934, "learning_rate": 9.577065001419245e-05, "loss": 0.20311076641082765, "step": 14910 }, { "epoch": 4.235026965654272, "grad_norm": 15.208539962768555, "learning_rate": 9.57678115242691e-05, "loss": 0.15278089046478271, "step": 14920 }, { "epoch": 4.237865455577633, "grad_norm": 12.419407844543457, "learning_rate": 9.576497303434572e-05, "loss": 0.1347472071647644, "step": 14930 }, { "epoch": 4.240703945500994, "grad_norm": 21.679683685302734, "learning_rate": 9.576213454442238e-05, "loss": 0.16810553073883056, "step": 14940 }, { "epoch": 4.243542435424354, "grad_norm": 14.06842041015625, "learning_rate": 9.575929605449902e-05, "loss": 0.15204988718032836, "step": 14950 }, { "epoch": 4.246380925347715, "grad_norm": 8.801273345947266, "learning_rate": 9.575645756457565e-05, "loss": 0.15220837593078612, "step": 14960 }, { "epoch": 4.249219415271075, "grad_norm": 19.13870620727539, "learning_rate": 9.575361907465229e-05, "loss": 0.19406039714813234, "step": 14970 }, { "epoch": 4.2520579051944365, "grad_norm": 17.72483253479004, "learning_rate": 9.575078058472893e-05, "loss": 0.15963964462280272, "step": 14980 }, { "epoch": 4.254896395117798, "grad_norm": 19.06657600402832, "learning_rate": 9.574794209480556e-05, "loss": 0.15715559720993041, "step": 14990 }, { "epoch": 4.257734885041158, "grad_norm": 10.211169242858887, "learning_rate": 9.57451036048822e-05, "loss": 0.1485607624053955, "step": 15000 }, { "epoch": 4.257734885041158, "eval_accuracy": 0.8908246963820182, "eval_loss": 0.3205018937587738, "eval_runtime": 31.4678, "eval_samples_per_second": 499.781, "eval_steps_per_second": 7.818, "step": 15000 }, { "epoch": 4.260573374964519, "grad_norm": 18.0892276763916, "learning_rate": 9.574226511495885e-05, "loss": 0.1473289966583252, "step": 15010 }, { "epoch": 4.26341186488788, "grad_norm": 16.001670837402344, "learning_rate": 9.573942662503548e-05, "loss": 0.1674429416656494, "step": 15020 }, { "epoch": 4.26625035481124, "grad_norm": 18.877471923828125, "learning_rate": 9.573658813511212e-05, "loss": 0.16755801439285278, "step": 15030 }, { "epoch": 4.269088844734601, "grad_norm": 7.050081253051758, "learning_rate": 9.573374964518877e-05, "loss": 0.15270453691482544, "step": 15040 }, { "epoch": 4.271927334657962, "grad_norm": 14.125801086425781, "learning_rate": 9.573091115526541e-05, "loss": 0.15930687189102172, "step": 15050 }, { "epoch": 4.274765824581323, "grad_norm": 14.005635261535645, "learning_rate": 9.572807266534203e-05, "loss": 0.1637762665748596, "step": 15060 }, { "epoch": 4.277604314504684, "grad_norm": 13.851651191711426, "learning_rate": 9.572523417541868e-05, "loss": 0.1451090931892395, "step": 15070 }, { "epoch": 4.280442804428044, "grad_norm": 11.01707649230957, "learning_rate": 9.572239568549533e-05, "loss": 0.13447017669677735, "step": 15080 }, { "epoch": 4.283281294351405, "grad_norm": 16.21165657043457, "learning_rate": 9.571955719557196e-05, "loss": 0.1568738341331482, "step": 15090 }, { "epoch": 4.286119784274765, "grad_norm": 13.440966606140137, "learning_rate": 9.57167187056486e-05, "loss": 0.15765141248703002, "step": 15100 }, { "epoch": 4.2889582741981265, "grad_norm": 8.969966888427734, "learning_rate": 9.571388021572524e-05, "loss": 0.11767923831939697, "step": 15110 }, { "epoch": 4.291796764121488, "grad_norm": 18.897871017456055, "learning_rate": 9.571104172580187e-05, "loss": 0.1935199975967407, "step": 15120 }, { "epoch": 4.294635254044848, "grad_norm": 7.486785888671875, "learning_rate": 9.570820323587851e-05, "loss": 0.15177110433578492, "step": 15130 }, { "epoch": 4.297473743968209, "grad_norm": 14.913260459899902, "learning_rate": 9.570536474595517e-05, "loss": 0.14765549898147584, "step": 15140 }, { "epoch": 4.30031223389157, "grad_norm": 16.408605575561523, "learning_rate": 9.57025262560318e-05, "loss": 0.14995042085647584, "step": 15150 }, { "epoch": 4.30315072381493, "grad_norm": 19.2226505279541, "learning_rate": 9.569968776610843e-05, "loss": 0.13153890371322632, "step": 15160 }, { "epoch": 4.305989213738291, "grad_norm": 13.076022148132324, "learning_rate": 9.569684927618508e-05, "loss": 0.1534234404563904, "step": 15170 }, { "epoch": 4.3088277036616525, "grad_norm": 12.501521110534668, "learning_rate": 9.569401078626172e-05, "loss": 0.11859639883041381, "step": 15180 }, { "epoch": 4.311666193585013, "grad_norm": 25.320295333862305, "learning_rate": 9.569117229633835e-05, "loss": 0.14102661609649658, "step": 15190 }, { "epoch": 4.314504683508374, "grad_norm": 11.505056381225586, "learning_rate": 9.568833380641499e-05, "loss": 0.1731319546699524, "step": 15200 }, { "epoch": 4.317343173431734, "grad_norm": 9.780108451843262, "learning_rate": 9.568549531649164e-05, "loss": 0.17432491779327391, "step": 15210 }, { "epoch": 4.320181663355095, "grad_norm": 5.729079723358154, "learning_rate": 9.568265682656827e-05, "loss": 0.12856547832489013, "step": 15220 }, { "epoch": 4.323020153278456, "grad_norm": 17.16070556640625, "learning_rate": 9.567981833664491e-05, "loss": 0.1171339750289917, "step": 15230 }, { "epoch": 4.3258586432018165, "grad_norm": 19.715206146240234, "learning_rate": 9.567697984672155e-05, "loss": 0.18454134464263916, "step": 15240 }, { "epoch": 4.328697133125178, "grad_norm": 14.777257919311523, "learning_rate": 9.567414135679818e-05, "loss": 0.1561437129974365, "step": 15250 }, { "epoch": 4.331535623048538, "grad_norm": 12.500276565551758, "learning_rate": 9.567130286687482e-05, "loss": 0.13659229278564453, "step": 15260 }, { "epoch": 4.334374112971899, "grad_norm": 17.76792335510254, "learning_rate": 9.566846437695146e-05, "loss": 0.17823370695114135, "step": 15270 }, { "epoch": 4.33721260289526, "grad_norm": 10.953511238098145, "learning_rate": 9.56656258870281e-05, "loss": 0.1756179451942444, "step": 15280 }, { "epoch": 4.34005109281862, "grad_norm": 9.660861015319824, "learning_rate": 9.566278739710475e-05, "loss": 0.15249955654144287, "step": 15290 }, { "epoch": 4.342889582741981, "grad_norm": 14.300312042236328, "learning_rate": 9.565994890718139e-05, "loss": 0.16629996299743652, "step": 15300 }, { "epoch": 4.3457280726653424, "grad_norm": 14.198461532592773, "learning_rate": 9.565711041725803e-05, "loss": 0.15525127649307252, "step": 15310 }, { "epoch": 4.348566562588703, "grad_norm": 17.374862670898438, "learning_rate": 9.565427192733466e-05, "loss": 0.1522218942642212, "step": 15320 }, { "epoch": 4.351405052512064, "grad_norm": 11.194106101989746, "learning_rate": 9.56514334374113e-05, "loss": 0.14212888479232788, "step": 15330 }, { "epoch": 4.354243542435424, "grad_norm": 9.271810531616211, "learning_rate": 9.564859494748795e-05, "loss": 0.13489923477172852, "step": 15340 }, { "epoch": 4.357082032358785, "grad_norm": 14.644876480102539, "learning_rate": 9.564575645756458e-05, "loss": 0.11552230119705201, "step": 15350 }, { "epoch": 4.359920522282146, "grad_norm": 13.42244815826416, "learning_rate": 9.564291796764122e-05, "loss": 0.14517542123794555, "step": 15360 }, { "epoch": 4.362759012205506, "grad_norm": 17.174306869506836, "learning_rate": 9.564007947771786e-05, "loss": 0.14905043840408325, "step": 15370 }, { "epoch": 4.3655975021288675, "grad_norm": 14.001357078552246, "learning_rate": 9.563724098779449e-05, "loss": 0.1530600070953369, "step": 15380 }, { "epoch": 4.368435992052229, "grad_norm": 20.78911018371582, "learning_rate": 9.563440249787113e-05, "loss": 0.16638636589050293, "step": 15390 }, { "epoch": 4.371274481975589, "grad_norm": 14.357483863830566, "learning_rate": 9.563156400794777e-05, "loss": 0.14346433877944947, "step": 15400 }, { "epoch": 4.37411297189895, "grad_norm": 12.634615898132324, "learning_rate": 9.562872551802442e-05, "loss": 0.15297379493713378, "step": 15410 }, { "epoch": 4.37695146182231, "grad_norm": 12.296581268310547, "learning_rate": 9.562588702810106e-05, "loss": 0.13567272424697877, "step": 15420 }, { "epoch": 4.379789951745671, "grad_norm": 14.986875534057617, "learning_rate": 9.56230485381777e-05, "loss": 0.1665493965148926, "step": 15430 }, { "epoch": 4.382628441669032, "grad_norm": 6.609609127044678, "learning_rate": 9.562021004825434e-05, "loss": 0.12005674839019775, "step": 15440 }, { "epoch": 4.385466931592393, "grad_norm": 11.34575366973877, "learning_rate": 9.561737155833097e-05, "loss": 0.1485649585723877, "step": 15450 }, { "epoch": 4.388305421515754, "grad_norm": 14.887136459350586, "learning_rate": 9.561453306840761e-05, "loss": 0.1593350887298584, "step": 15460 }, { "epoch": 4.391143911439114, "grad_norm": 17.065351486206055, "learning_rate": 9.561169457848425e-05, "loss": 0.1566593885421753, "step": 15470 }, { "epoch": 4.393982401362475, "grad_norm": 12.026158332824707, "learning_rate": 9.560885608856089e-05, "loss": 0.13626867532730103, "step": 15480 }, { "epoch": 4.396820891285836, "grad_norm": 13.356855392456055, "learning_rate": 9.560601759863753e-05, "loss": 0.11659452915191651, "step": 15490 }, { "epoch": 4.399659381209196, "grad_norm": 19.59092140197754, "learning_rate": 9.560317910871417e-05, "loss": 0.15149736404418945, "step": 15500 }, { "epoch": 4.399659381209196, "eval_accuracy": 0.900489603865963, "eval_loss": 0.2935667335987091, "eval_runtime": 31.6322, "eval_samples_per_second": 497.184, "eval_steps_per_second": 7.777, "step": 15500 }, { "epoch": 4.4024978711325575, "grad_norm": 11.592808723449707, "learning_rate": 9.56003406187908e-05, "loss": 0.1285654067993164, "step": 15510 }, { "epoch": 4.405336361055919, "grad_norm": 9.41304874420166, "learning_rate": 9.559750212886744e-05, "loss": 0.13084110021591186, "step": 15520 }, { "epoch": 4.408174850979279, "grad_norm": 11.23768138885498, "learning_rate": 9.559466363894408e-05, "loss": 0.1178362488746643, "step": 15530 }, { "epoch": 4.41101334090264, "grad_norm": 10.919793128967285, "learning_rate": 9.559182514902073e-05, "loss": 0.16446350812911986, "step": 15540 }, { "epoch": 4.413851830826, "grad_norm": 17.316804885864258, "learning_rate": 9.558898665909737e-05, "loss": 0.1307805061340332, "step": 15550 }, { "epoch": 4.416690320749361, "grad_norm": 15.07625675201416, "learning_rate": 9.558614816917401e-05, "loss": 0.15423996448516847, "step": 15560 }, { "epoch": 4.419528810672722, "grad_norm": 18.693843841552734, "learning_rate": 9.558330967925065e-05, "loss": 0.18187485933303832, "step": 15570 }, { "epoch": 4.422367300596083, "grad_norm": 9.033263206481934, "learning_rate": 9.558047118932728e-05, "loss": 0.17740638256073, "step": 15580 }, { "epoch": 4.425205790519444, "grad_norm": 12.151775360107422, "learning_rate": 9.557763269940392e-05, "loss": 0.13484522104263305, "step": 15590 }, { "epoch": 4.428044280442805, "grad_norm": 17.459026336669922, "learning_rate": 9.557479420948056e-05, "loss": 0.14064306020736694, "step": 15600 }, { "epoch": 4.430882770366165, "grad_norm": 17.531124114990234, "learning_rate": 9.55719557195572e-05, "loss": 0.1496012330055237, "step": 15610 }, { "epoch": 4.433721260289526, "grad_norm": 18.614593505859375, "learning_rate": 9.556911722963384e-05, "loss": 0.1877504825592041, "step": 15620 }, { "epoch": 4.436559750212886, "grad_norm": 15.525225639343262, "learning_rate": 9.556627873971048e-05, "loss": 0.16716893911361694, "step": 15630 }, { "epoch": 4.4393982401362475, "grad_norm": 11.920730590820312, "learning_rate": 9.556344024978711e-05, "loss": 0.17789096832275392, "step": 15640 }, { "epoch": 4.442236730059609, "grad_norm": 14.344074249267578, "learning_rate": 9.556060175986375e-05, "loss": 0.1514929175376892, "step": 15650 }, { "epoch": 4.445075219982969, "grad_norm": 14.779162406921387, "learning_rate": 9.55577632699404e-05, "loss": 0.12468938827514649, "step": 15660 }, { "epoch": 4.44791370990633, "grad_norm": 10.21207332611084, "learning_rate": 9.555492478001704e-05, "loss": 0.13630890846252441, "step": 15670 }, { "epoch": 4.45075219982969, "grad_norm": 13.229500770568848, "learning_rate": 9.555208629009368e-05, "loss": 0.1446927785873413, "step": 15680 }, { "epoch": 4.453590689753051, "grad_norm": 8.497537612915039, "learning_rate": 9.554924780017032e-05, "loss": 0.1572495937347412, "step": 15690 }, { "epoch": 4.456429179676412, "grad_norm": 6.93145751953125, "learning_rate": 9.554640931024695e-05, "loss": 0.13637943267822267, "step": 15700 }, { "epoch": 4.459267669599773, "grad_norm": 8.381741523742676, "learning_rate": 9.554357082032359e-05, "loss": 0.15186114311218263, "step": 15710 }, { "epoch": 4.462106159523134, "grad_norm": 15.00367546081543, "learning_rate": 9.554073233040023e-05, "loss": 0.15030059814453126, "step": 15720 }, { "epoch": 4.464944649446495, "grad_norm": 17.346628189086914, "learning_rate": 9.553789384047687e-05, "loss": 0.1418553590774536, "step": 15730 }, { "epoch": 4.467783139369855, "grad_norm": 13.140084266662598, "learning_rate": 9.553505535055351e-05, "loss": 0.12391524314880371, "step": 15740 }, { "epoch": 4.470621629293216, "grad_norm": 9.749346733093262, "learning_rate": 9.553221686063015e-05, "loss": 0.12974709272384644, "step": 15750 }, { "epoch": 4.473460119216576, "grad_norm": 10.463181495666504, "learning_rate": 9.55293783707068e-05, "loss": 0.17821048498153685, "step": 15760 }, { "epoch": 4.4762986091399375, "grad_norm": 7.689849376678467, "learning_rate": 9.552653988078342e-05, "loss": 0.1456715941429138, "step": 15770 }, { "epoch": 4.479137099063299, "grad_norm": 16.416494369506836, "learning_rate": 9.552370139086006e-05, "loss": 0.15434129238128663, "step": 15780 }, { "epoch": 4.481975588986659, "grad_norm": 12.501565933227539, "learning_rate": 9.55208629009367e-05, "loss": 0.15272268056869506, "step": 15790 }, { "epoch": 4.48481407891002, "grad_norm": 16.444988250732422, "learning_rate": 9.551802441101333e-05, "loss": 0.144976806640625, "step": 15800 }, { "epoch": 4.487652568833381, "grad_norm": 12.045256614685059, "learning_rate": 9.551518592108999e-05, "loss": 0.13219764232635497, "step": 15810 }, { "epoch": 4.490491058756741, "grad_norm": 13.199865341186523, "learning_rate": 9.551234743116663e-05, "loss": 0.17169543504714965, "step": 15820 }, { "epoch": 4.493329548680102, "grad_norm": 18.176637649536133, "learning_rate": 9.550950894124326e-05, "loss": 0.16755325794219972, "step": 15830 }, { "epoch": 4.496168038603463, "grad_norm": 11.754714965820312, "learning_rate": 9.55066704513199e-05, "loss": 0.16987472772598267, "step": 15840 }, { "epoch": 4.499006528526824, "grad_norm": 17.328136444091797, "learning_rate": 9.550383196139654e-05, "loss": 0.1631556987762451, "step": 15850 }, { "epoch": 4.501845018450185, "grad_norm": 10.793828010559082, "learning_rate": 9.550099347147318e-05, "loss": 0.11724253892898559, "step": 15860 }, { "epoch": 4.504683508373545, "grad_norm": 18.185853958129883, "learning_rate": 9.549815498154982e-05, "loss": 0.16927456855773926, "step": 15870 }, { "epoch": 4.507521998296906, "grad_norm": 18.749469757080078, "learning_rate": 9.549531649162646e-05, "loss": 0.1657538652420044, "step": 15880 }, { "epoch": 4.510360488220266, "grad_norm": 14.60150146484375, "learning_rate": 9.54924780017031e-05, "loss": 0.13404953479766846, "step": 15890 }, { "epoch": 4.5131989781436275, "grad_norm": 11.472158432006836, "learning_rate": 9.548963951177973e-05, "loss": 0.15821380615234376, "step": 15900 }, { "epoch": 4.516037468066989, "grad_norm": 11.78376293182373, "learning_rate": 9.548680102185638e-05, "loss": 0.16594191789627075, "step": 15910 }, { "epoch": 4.518875957990349, "grad_norm": 15.846101760864258, "learning_rate": 9.548396253193302e-05, "loss": 0.12899974584579468, "step": 15920 }, { "epoch": 4.52171444791371, "grad_norm": 11.677336692810059, "learning_rate": 9.548112404200964e-05, "loss": 0.13508477210998535, "step": 15930 }, { "epoch": 4.524552937837071, "grad_norm": 11.68274974822998, "learning_rate": 9.54782855520863e-05, "loss": 0.10200138092041015, "step": 15940 }, { "epoch": 4.527391427760431, "grad_norm": 14.795760154724121, "learning_rate": 9.547544706216294e-05, "loss": 0.14780919551849364, "step": 15950 }, { "epoch": 4.530229917683792, "grad_norm": 10.435012817382812, "learning_rate": 9.547260857223957e-05, "loss": 0.1737540364265442, "step": 15960 }, { "epoch": 4.5330684076071535, "grad_norm": 10.23220157623291, "learning_rate": 9.546977008231621e-05, "loss": 0.12276431322097778, "step": 15970 }, { "epoch": 4.535906897530514, "grad_norm": 8.545750617980957, "learning_rate": 9.546693159239285e-05, "loss": 0.1404184579849243, "step": 15980 }, { "epoch": 4.538745387453875, "grad_norm": 17.83550262451172, "learning_rate": 9.546409310246949e-05, "loss": 0.1576627016067505, "step": 15990 }, { "epoch": 4.541583877377235, "grad_norm": 19.00641632080078, "learning_rate": 9.546125461254612e-05, "loss": 0.14220670461654664, "step": 16000 }, { "epoch": 4.541583877377235, "eval_accuracy": 0.9019520569720862, "eval_loss": 0.2884344756603241, "eval_runtime": 31.391, "eval_samples_per_second": 501.004, "eval_steps_per_second": 7.837, "step": 16000 }, { "epoch": 4.544422367300596, "grad_norm": 13.240455627441406, "learning_rate": 9.545841612262278e-05, "loss": 0.1689929962158203, "step": 16010 }, { "epoch": 4.547260857223957, "grad_norm": 19.55441665649414, "learning_rate": 9.545557763269942e-05, "loss": 0.14297375679016114, "step": 16020 }, { "epoch": 4.5500993471473175, "grad_norm": 10.37378978729248, "learning_rate": 9.545273914277604e-05, "loss": 0.14217883348464966, "step": 16030 }, { "epoch": 4.552937837070679, "grad_norm": 22.87603759765625, "learning_rate": 9.544990065285269e-05, "loss": 0.17013124227523804, "step": 16040 }, { "epoch": 4.555776326994039, "grad_norm": 16.714683532714844, "learning_rate": 9.544706216292933e-05, "loss": 0.18330156803131104, "step": 16050 }, { "epoch": 4.5586148169174, "grad_norm": 12.552401542663574, "learning_rate": 9.544422367300596e-05, "loss": 0.13127771615982056, "step": 16060 }, { "epoch": 4.561453306840761, "grad_norm": 20.79747772216797, "learning_rate": 9.544138518308261e-05, "loss": 0.1300564408302307, "step": 16070 }, { "epoch": 4.564291796764121, "grad_norm": 18.15772819519043, "learning_rate": 9.543854669315925e-05, "loss": 0.15373085737228392, "step": 16080 }, { "epoch": 4.567130286687482, "grad_norm": 12.785666465759277, "learning_rate": 9.543570820323588e-05, "loss": 0.16391913890838622, "step": 16090 }, { "epoch": 4.5699687766108426, "grad_norm": 7.867143630981445, "learning_rate": 9.543286971331252e-05, "loss": 0.14312713146209716, "step": 16100 }, { "epoch": 4.572807266534204, "grad_norm": 12.859415054321289, "learning_rate": 9.543003122338916e-05, "loss": 0.13727431297302245, "step": 16110 }, { "epoch": 4.575645756457565, "grad_norm": 16.947664260864258, "learning_rate": 9.54271927334658e-05, "loss": 0.14375555515289307, "step": 16120 }, { "epoch": 4.578484246380925, "grad_norm": 10.144893646240234, "learning_rate": 9.542435424354243e-05, "loss": 0.13479676246643066, "step": 16130 }, { "epoch": 4.581322736304286, "grad_norm": 15.607450485229492, "learning_rate": 9.542151575361909e-05, "loss": 0.15900163650512694, "step": 16140 }, { "epoch": 4.584161226227647, "grad_norm": 10.937230110168457, "learning_rate": 9.541867726369573e-05, "loss": 0.15017277002334595, "step": 16150 }, { "epoch": 4.586999716151007, "grad_norm": 13.372411727905273, "learning_rate": 9.541583877377236e-05, "loss": 0.16198084354400635, "step": 16160 }, { "epoch": 4.5898382060743685, "grad_norm": 16.571212768554688, "learning_rate": 9.5413000283849e-05, "loss": 0.13171954154968263, "step": 16170 }, { "epoch": 4.59267669599773, "grad_norm": 13.738970756530762, "learning_rate": 9.541016179392564e-05, "loss": 0.15878405570983886, "step": 16180 }, { "epoch": 4.59551518592109, "grad_norm": 18.799890518188477, "learning_rate": 9.540732330400227e-05, "loss": 0.15071109533309937, "step": 16190 }, { "epoch": 4.598353675844451, "grad_norm": 24.42254638671875, "learning_rate": 9.540448481407891e-05, "loss": 0.17055392265319824, "step": 16200 }, { "epoch": 4.601192165767811, "grad_norm": 13.94593334197998, "learning_rate": 9.540164632415556e-05, "loss": 0.12507585287094117, "step": 16210 }, { "epoch": 4.604030655691172, "grad_norm": 12.092589378356934, "learning_rate": 9.539880783423219e-05, "loss": 0.12754325866699218, "step": 16220 }, { "epoch": 4.606869145614533, "grad_norm": 9.539417266845703, "learning_rate": 9.539596934430883e-05, "loss": 0.15025877952575684, "step": 16230 }, { "epoch": 4.609707635537894, "grad_norm": 15.437283515930176, "learning_rate": 9.539313085438547e-05, "loss": 0.1816194772720337, "step": 16240 }, { "epoch": 4.612546125461255, "grad_norm": 17.008625030517578, "learning_rate": 9.539029236446211e-05, "loss": 0.18534044027328492, "step": 16250 }, { "epoch": 4.615384615384615, "grad_norm": 16.05769157409668, "learning_rate": 9.538745387453874e-05, "loss": 0.12883687019348145, "step": 16260 }, { "epoch": 4.618223105307976, "grad_norm": 13.185182571411133, "learning_rate": 9.53846153846154e-05, "loss": 0.12575019598007203, "step": 16270 }, { "epoch": 4.621061595231337, "grad_norm": 7.532161235809326, "learning_rate": 9.538177689469204e-05, "loss": 0.11277498006820678, "step": 16280 }, { "epoch": 4.623900085154697, "grad_norm": 12.064852714538574, "learning_rate": 9.537893840476867e-05, "loss": 0.1498566150665283, "step": 16290 }, { "epoch": 4.6267385750780585, "grad_norm": 15.776162147521973, "learning_rate": 9.537609991484531e-05, "loss": 0.2174987554550171, "step": 16300 }, { "epoch": 4.62957706500142, "grad_norm": 6.656815528869629, "learning_rate": 9.537326142492195e-05, "loss": 0.11988822221755982, "step": 16310 }, { "epoch": 4.63241555492478, "grad_norm": 10.42570686340332, "learning_rate": 9.537042293499858e-05, "loss": 0.153936505317688, "step": 16320 }, { "epoch": 4.635254044848141, "grad_norm": 10.324726104736328, "learning_rate": 9.536758444507522e-05, "loss": 0.13733071088790894, "step": 16330 }, { "epoch": 4.638092534771501, "grad_norm": 16.021486282348633, "learning_rate": 9.536474595515187e-05, "loss": 0.1552325129508972, "step": 16340 }, { "epoch": 4.640931024694862, "grad_norm": 10.999706268310547, "learning_rate": 9.53619074652285e-05, "loss": 0.1193424105644226, "step": 16350 }, { "epoch": 4.643769514618223, "grad_norm": 7.4361138343811035, "learning_rate": 9.535906897530514e-05, "loss": 0.12221949100494385, "step": 16360 }, { "epoch": 4.646608004541584, "grad_norm": 23.843963623046875, "learning_rate": 9.535623048538178e-05, "loss": 0.18717553615570068, "step": 16370 }, { "epoch": 4.649446494464945, "grad_norm": 9.559157371520996, "learning_rate": 9.535339199545843e-05, "loss": 0.11549685001373292, "step": 16380 }, { "epoch": 4.652284984388306, "grad_norm": 9.604438781738281, "learning_rate": 9.535055350553505e-05, "loss": 0.1513538956642151, "step": 16390 }, { "epoch": 4.655123474311666, "grad_norm": 12.756381034851074, "learning_rate": 9.53477150156117e-05, "loss": 0.14272687435150147, "step": 16400 }, { "epoch": 4.657961964235027, "grad_norm": 13.752169609069824, "learning_rate": 9.534487652568835e-05, "loss": 0.13836480379104615, "step": 16410 }, { "epoch": 4.660800454158387, "grad_norm": 10.84335708618164, "learning_rate": 9.534203803576498e-05, "loss": 0.16139088869094848, "step": 16420 }, { "epoch": 4.6636389440817485, "grad_norm": 18.47174072265625, "learning_rate": 9.533919954584162e-05, "loss": 0.15852797031402588, "step": 16430 }, { "epoch": 4.66647743400511, "grad_norm": 23.645231246948242, "learning_rate": 9.533636105591826e-05, "loss": 0.14505040645599365, "step": 16440 }, { "epoch": 4.66931592392847, "grad_norm": 8.398719787597656, "learning_rate": 9.533352256599489e-05, "loss": 0.15253835916519165, "step": 16450 }, { "epoch": 4.672154413851831, "grad_norm": 14.186738967895508, "learning_rate": 9.533068407607153e-05, "loss": 0.1989857077598572, "step": 16460 }, { "epoch": 4.674992903775191, "grad_norm": 10.147993087768555, "learning_rate": 9.532784558614818e-05, "loss": 0.1429014563560486, "step": 16470 }, { "epoch": 4.677831393698552, "grad_norm": 16.096384048461914, "learning_rate": 9.532500709622481e-05, "loss": 0.14322514533996583, "step": 16480 }, { "epoch": 4.680669883621913, "grad_norm": 18.274959564208984, "learning_rate": 9.532216860630145e-05, "loss": 0.1792846441268921, "step": 16490 }, { "epoch": 4.683508373545274, "grad_norm": 12.338505744934082, "learning_rate": 9.53193301163781e-05, "loss": 0.18460575342178345, "step": 16500 }, { "epoch": 4.683508373545274, "eval_accuracy": 0.9016341323837985, "eval_loss": 0.28733178973197937, "eval_runtime": 30.4821, "eval_samples_per_second": 515.942, "eval_steps_per_second": 8.07, "step": 16500 }, { "epoch": 4.686346863468635, "grad_norm": 12.89003849029541, "learning_rate": 9.531649162645474e-05, "loss": 0.1602902889251709, "step": 16510 }, { "epoch": 4.689185353391996, "grad_norm": 19.42878532409668, "learning_rate": 9.531365313653136e-05, "loss": 0.16873326301574706, "step": 16520 }, { "epoch": 4.692023843315356, "grad_norm": 16.303058624267578, "learning_rate": 9.5310814646608e-05, "loss": 0.1738981008529663, "step": 16530 }, { "epoch": 4.694862333238717, "grad_norm": 15.617766380310059, "learning_rate": 9.530797615668465e-05, "loss": 0.19199310541152953, "step": 16540 }, { "epoch": 4.697700823162078, "grad_norm": 19.003225326538086, "learning_rate": 9.530513766676129e-05, "loss": 0.12804521322250367, "step": 16550 }, { "epoch": 4.7005393130854385, "grad_norm": 11.42994499206543, "learning_rate": 9.530229917683793e-05, "loss": 0.17322299480438233, "step": 16560 }, { "epoch": 4.7033778030088, "grad_norm": 14.426518440246582, "learning_rate": 9.529946068691457e-05, "loss": 0.16375750303268433, "step": 16570 }, { "epoch": 4.70621629293216, "grad_norm": 18.268091201782227, "learning_rate": 9.52966221969912e-05, "loss": 0.13991639614105225, "step": 16580 }, { "epoch": 4.709054782855521, "grad_norm": 15.145362854003906, "learning_rate": 9.529378370706784e-05, "loss": 0.14734928607940673, "step": 16590 }, { "epoch": 4.711893272778882, "grad_norm": 12.734895706176758, "learning_rate": 9.529094521714448e-05, "loss": 0.18087154626846313, "step": 16600 }, { "epoch": 4.714731762702242, "grad_norm": 13.470159530639648, "learning_rate": 9.528810672722112e-05, "loss": 0.1604411482810974, "step": 16610 }, { "epoch": 4.717570252625603, "grad_norm": 14.53005599975586, "learning_rate": 9.528526823729776e-05, "loss": 0.13916300535202025, "step": 16620 }, { "epoch": 4.720408742548964, "grad_norm": 19.849550247192383, "learning_rate": 9.52824297473744e-05, "loss": 0.1767083764076233, "step": 16630 }, { "epoch": 4.723247232472325, "grad_norm": 8.63602066040039, "learning_rate": 9.527959125745103e-05, "loss": 0.1522284507751465, "step": 16640 }, { "epoch": 4.726085722395686, "grad_norm": 10.36530590057373, "learning_rate": 9.527675276752767e-05, "loss": 0.14065526723861693, "step": 16650 }, { "epoch": 4.728924212319046, "grad_norm": 9.742255210876465, "learning_rate": 9.527391427760432e-05, "loss": 0.14950262308120726, "step": 16660 }, { "epoch": 4.731762702242407, "grad_norm": 10.825611114501953, "learning_rate": 9.527107578768096e-05, "loss": 0.15421334505081177, "step": 16670 }, { "epoch": 4.734601192165767, "grad_norm": 6.3446946144104, "learning_rate": 9.52682372977576e-05, "loss": 0.11357299089431763, "step": 16680 }, { "epoch": 4.7374396820891285, "grad_norm": 12.798687934875488, "learning_rate": 9.526539880783424e-05, "loss": 0.13540432453155518, "step": 16690 }, { "epoch": 4.74027817201249, "grad_norm": 25.06239891052246, "learning_rate": 9.526256031791088e-05, "loss": 0.16007757186889648, "step": 16700 }, { "epoch": 4.74311666193585, "grad_norm": 16.52006721496582, "learning_rate": 9.525972182798751e-05, "loss": 0.15668725967407227, "step": 16710 }, { "epoch": 4.745955151859211, "grad_norm": 22.51520538330078, "learning_rate": 9.525688333806415e-05, "loss": 0.1538001298904419, "step": 16720 }, { "epoch": 4.748793641782572, "grad_norm": 27.855426788330078, "learning_rate": 9.525404484814079e-05, "loss": 0.1712619423866272, "step": 16730 }, { "epoch": 4.751632131705932, "grad_norm": 20.748098373413086, "learning_rate": 9.525120635821743e-05, "loss": 0.12831650972366332, "step": 16740 }, { "epoch": 4.754470621629293, "grad_norm": 12.2571439743042, "learning_rate": 9.524836786829407e-05, "loss": 0.1558891773223877, "step": 16750 }, { "epoch": 4.7573091115526545, "grad_norm": 8.167339324951172, "learning_rate": 9.524552937837072e-05, "loss": 0.11506112813949584, "step": 16760 }, { "epoch": 4.760147601476015, "grad_norm": 15.929369926452637, "learning_rate": 9.524269088844734e-05, "loss": 0.12471755743026733, "step": 16770 }, { "epoch": 4.762986091399376, "grad_norm": 30.671676635742188, "learning_rate": 9.523985239852399e-05, "loss": 0.174582839012146, "step": 16780 }, { "epoch": 4.765824581322736, "grad_norm": 14.295692443847656, "learning_rate": 9.523701390860063e-05, "loss": 0.15542949438095094, "step": 16790 }, { "epoch": 4.768663071246097, "grad_norm": 12.083245277404785, "learning_rate": 9.523417541867727e-05, "loss": 0.15535662174224854, "step": 16800 }, { "epoch": 4.771501561169458, "grad_norm": 6.681707859039307, "learning_rate": 9.523133692875391e-05, "loss": 0.140674364566803, "step": 16810 }, { "epoch": 4.7743400510928184, "grad_norm": 22.708444595336914, "learning_rate": 9.522849843883055e-05, "loss": 0.11735382080078124, "step": 16820 }, { "epoch": 4.77717854101618, "grad_norm": 14.901098251342773, "learning_rate": 9.522565994890719e-05, "loss": 0.1418764591217041, "step": 16830 }, { "epoch": 4.78001703093954, "grad_norm": 10.739974021911621, "learning_rate": 9.522282145898382e-05, "loss": 0.15952656269073487, "step": 16840 }, { "epoch": 4.782855520862901, "grad_norm": 10.795823097229004, "learning_rate": 9.521998296906046e-05, "loss": 0.1389915704727173, "step": 16850 }, { "epoch": 4.785694010786262, "grad_norm": 20.781103134155273, "learning_rate": 9.52171444791371e-05, "loss": 0.1436087965965271, "step": 16860 }, { "epoch": 4.788532500709622, "grad_norm": 13.771445274353027, "learning_rate": 9.521430598921374e-05, "loss": 0.13742592334747314, "step": 16870 }, { "epoch": 4.791370990632983, "grad_norm": 7.738908767700195, "learning_rate": 9.521146749929039e-05, "loss": 0.16615771055221557, "step": 16880 }, { "epoch": 4.7942094805563436, "grad_norm": 8.936503410339355, "learning_rate": 9.520862900936703e-05, "loss": 0.13377295732498168, "step": 16890 }, { "epoch": 4.797047970479705, "grad_norm": 11.112290382385254, "learning_rate": 9.520579051944365e-05, "loss": 0.1536064028739929, "step": 16900 }, { "epoch": 4.799886460403066, "grad_norm": 14.887866973876953, "learning_rate": 9.52029520295203e-05, "loss": 0.20455670356750488, "step": 16910 }, { "epoch": 4.802724950326426, "grad_norm": 11.61521053314209, "learning_rate": 9.520011353959694e-05, "loss": 0.1540137529373169, "step": 16920 }, { "epoch": 4.805563440249787, "grad_norm": 8.329435348510742, "learning_rate": 9.519727504967358e-05, "loss": 0.13170146942138672, "step": 16930 }, { "epoch": 4.808401930173148, "grad_norm": 15.267801284790039, "learning_rate": 9.519443655975022e-05, "loss": 0.12724708318710326, "step": 16940 }, { "epoch": 4.811240420096508, "grad_norm": 14.530689239501953, "learning_rate": 9.519159806982686e-05, "loss": 0.12465198040008545, "step": 16950 }, { "epoch": 4.8140789100198695, "grad_norm": 13.377323150634766, "learning_rate": 9.51887595799035e-05, "loss": 0.12636666297912597, "step": 16960 }, { "epoch": 4.816917399943231, "grad_norm": 16.755334854125977, "learning_rate": 9.518592108998013e-05, "loss": 0.11700376272201538, "step": 16970 }, { "epoch": 4.819755889866591, "grad_norm": 11.901659965515137, "learning_rate": 9.518308260005677e-05, "loss": 0.15317986011505128, "step": 16980 }, { "epoch": 4.822594379789952, "grad_norm": 15.167632102966309, "learning_rate": 9.518024411013341e-05, "loss": 0.15732300281524658, "step": 16990 }, { "epoch": 4.825432869713312, "grad_norm": 10.112268447875977, "learning_rate": 9.517740562021005e-05, "loss": 0.15507109165191652, "step": 17000 }, { "epoch": 4.825432869713312, "eval_accuracy": 0.9168309277039486, "eval_loss": 0.2476796805858612, "eval_runtime": 30.1905, "eval_samples_per_second": 520.925, "eval_steps_per_second": 8.148, "step": 17000 }, { "epoch": 4.828271359636673, "grad_norm": 15.758947372436523, "learning_rate": 9.51745671302867e-05, "loss": 0.1439222812652588, "step": 17010 }, { "epoch": 4.831109849560034, "grad_norm": 12.99755573272705, "learning_rate": 9.517172864036334e-05, "loss": 0.12416312694549561, "step": 17020 }, { "epoch": 4.833948339483395, "grad_norm": 11.258211135864258, "learning_rate": 9.516889015043997e-05, "loss": 0.14969966411590577, "step": 17030 }, { "epoch": 4.836786829406756, "grad_norm": 8.632542610168457, "learning_rate": 9.516605166051661e-05, "loss": 0.14188437461853026, "step": 17040 }, { "epoch": 4.839625319330116, "grad_norm": 12.363591194152832, "learning_rate": 9.516321317059325e-05, "loss": 0.12322738170623779, "step": 17050 }, { "epoch": 4.842463809253477, "grad_norm": 14.226353645324707, "learning_rate": 9.516037468066989e-05, "loss": 0.11890636682510376, "step": 17060 }, { "epoch": 4.845302299176838, "grad_norm": 7.976502418518066, "learning_rate": 9.515753619074653e-05, "loss": 0.13820751905441284, "step": 17070 }, { "epoch": 4.848140789100198, "grad_norm": 9.916215896606445, "learning_rate": 9.515469770082317e-05, "loss": 0.12979166507720946, "step": 17080 }, { "epoch": 4.8509792790235595, "grad_norm": 14.709095001220703, "learning_rate": 9.515185921089981e-05, "loss": 0.1539337158203125, "step": 17090 }, { "epoch": 4.85381776894692, "grad_norm": 16.777894973754883, "learning_rate": 9.514902072097644e-05, "loss": 0.13372565507888795, "step": 17100 }, { "epoch": 4.856656258870281, "grad_norm": 16.204736709594727, "learning_rate": 9.514618223105308e-05, "loss": 0.18906499147415162, "step": 17110 }, { "epoch": 4.859494748793642, "grad_norm": 15.491095542907715, "learning_rate": 9.514334374112972e-05, "loss": 0.1642136812210083, "step": 17120 }, { "epoch": 4.862333238717002, "grad_norm": 16.478290557861328, "learning_rate": 9.514050525120635e-05, "loss": 0.16236262321472167, "step": 17130 }, { "epoch": 4.865171728640363, "grad_norm": 6.901222229003906, "learning_rate": 9.513766676128301e-05, "loss": 0.2116683006286621, "step": 17140 }, { "epoch": 4.868010218563724, "grad_norm": 8.407983779907227, "learning_rate": 9.513482827135965e-05, "loss": 0.12644619941711427, "step": 17150 }, { "epoch": 4.870848708487085, "grad_norm": 12.428938865661621, "learning_rate": 9.513198978143628e-05, "loss": 0.14198888540267945, "step": 17160 }, { "epoch": 4.873687198410446, "grad_norm": 11.895127296447754, "learning_rate": 9.512915129151292e-05, "loss": 0.17176514863967896, "step": 17170 }, { "epoch": 4.876525688333807, "grad_norm": 12.704463958740234, "learning_rate": 9.512631280158956e-05, "loss": 0.15700401067733766, "step": 17180 }, { "epoch": 4.879364178257167, "grad_norm": 9.89565372467041, "learning_rate": 9.51234743116662e-05, "loss": 0.13275704383850098, "step": 17190 }, { "epoch": 4.882202668180528, "grad_norm": 10.422303199768066, "learning_rate": 9.512063582174284e-05, "loss": 0.13211022615432738, "step": 17200 }, { "epoch": 4.885041158103888, "grad_norm": 22.244260787963867, "learning_rate": 9.511779733181948e-05, "loss": 0.17186893224716188, "step": 17210 }, { "epoch": 4.8878796480272495, "grad_norm": 12.746564865112305, "learning_rate": 9.511495884189612e-05, "loss": 0.10704275369644164, "step": 17220 }, { "epoch": 4.890718137950611, "grad_norm": 11.201715469360352, "learning_rate": 9.511212035197275e-05, "loss": 0.13733270168304443, "step": 17230 }, { "epoch": 4.893556627873971, "grad_norm": 11.69204330444336, "learning_rate": 9.51092818620494e-05, "loss": 0.13713716268539428, "step": 17240 }, { "epoch": 4.896395117797332, "grad_norm": 8.279518127441406, "learning_rate": 9.510644337212603e-05, "loss": 0.08992868661880493, "step": 17250 }, { "epoch": 4.899233607720692, "grad_norm": 16.476802825927734, "learning_rate": 9.510360488220266e-05, "loss": 0.12493051290512085, "step": 17260 }, { "epoch": 4.902072097644053, "grad_norm": 15.440129280090332, "learning_rate": 9.510076639227932e-05, "loss": 0.14535372257232665, "step": 17270 }, { "epoch": 4.904910587567414, "grad_norm": 15.233850479125977, "learning_rate": 9.509792790235596e-05, "loss": 0.1533084034919739, "step": 17280 }, { "epoch": 4.907749077490775, "grad_norm": 7.581547260284424, "learning_rate": 9.509508941243259e-05, "loss": 0.1507856249809265, "step": 17290 }, { "epoch": 4.910587567414136, "grad_norm": 7.301828384399414, "learning_rate": 9.509225092250923e-05, "loss": 0.11710257530212402, "step": 17300 }, { "epoch": 4.913426057337497, "grad_norm": 16.295379638671875, "learning_rate": 9.508941243258587e-05, "loss": 0.14207034111022948, "step": 17310 }, { "epoch": 4.916264547260857, "grad_norm": 15.208168029785156, "learning_rate": 9.508657394266251e-05, "loss": 0.2113130807876587, "step": 17320 }, { "epoch": 4.919103037184218, "grad_norm": 17.72686004638672, "learning_rate": 9.508373545273914e-05, "loss": 0.1869381308555603, "step": 17330 }, { "epoch": 4.921941527107579, "grad_norm": 6.089931964874268, "learning_rate": 9.50808969628158e-05, "loss": 0.14198582172393798, "step": 17340 }, { "epoch": 4.9247800170309395, "grad_norm": 18.309528350830078, "learning_rate": 9.507805847289242e-05, "loss": 0.13088276386260986, "step": 17350 }, { "epoch": 4.927618506954301, "grad_norm": 10.71265697479248, "learning_rate": 9.507521998296906e-05, "loss": 0.14974571466445924, "step": 17360 }, { "epoch": 4.930456996877661, "grad_norm": 17.39009666442871, "learning_rate": 9.50723814930457e-05, "loss": 0.1483999013900757, "step": 17370 }, { "epoch": 4.933295486801022, "grad_norm": 14.069708824157715, "learning_rate": 9.506954300312235e-05, "loss": 0.1300961494445801, "step": 17380 }, { "epoch": 4.936133976724383, "grad_norm": 16.622167587280273, "learning_rate": 9.506670451319897e-05, "loss": 0.1281723976135254, "step": 17390 }, { "epoch": 4.938972466647743, "grad_norm": 11.97647476196289, "learning_rate": 9.506386602327563e-05, "loss": 0.12952659130096436, "step": 17400 }, { "epoch": 4.941810956571104, "grad_norm": 16.319665908813477, "learning_rate": 9.506102753335227e-05, "loss": 0.10434367656707763, "step": 17410 }, { "epoch": 4.944649446494465, "grad_norm": 15.70507526397705, "learning_rate": 9.50581890434289e-05, "loss": 0.12239481210708618, "step": 17420 }, { "epoch": 4.947487936417826, "grad_norm": 15.570923805236816, "learning_rate": 9.505535055350554e-05, "loss": 0.1408989191055298, "step": 17430 }, { "epoch": 4.950326426341187, "grad_norm": 9.592642784118652, "learning_rate": 9.505251206358218e-05, "loss": 0.15614187717437744, "step": 17440 }, { "epoch": 4.953164916264547, "grad_norm": 13.035661697387695, "learning_rate": 9.504967357365882e-05, "loss": 0.1270779013633728, "step": 17450 }, { "epoch": 4.956003406187908, "grad_norm": 9.722272872924805, "learning_rate": 9.504683508373545e-05, "loss": 0.11495968103408813, "step": 17460 }, { "epoch": 4.958841896111268, "grad_norm": 14.22462272644043, "learning_rate": 9.50439965938121e-05, "loss": 0.156838059425354, "step": 17470 }, { "epoch": 4.9616803860346295, "grad_norm": 12.47933578491211, "learning_rate": 9.504115810388873e-05, "loss": 0.14552901983261107, "step": 17480 }, { "epoch": 4.964518875957991, "grad_norm": 12.599534034729004, "learning_rate": 9.503831961396537e-05, "loss": 0.1571561336517334, "step": 17490 }, { "epoch": 4.967357365881351, "grad_norm": 9.423718452453613, "learning_rate": 9.503548112404202e-05, "loss": 0.15664663314819335, "step": 17500 }, { "epoch": 4.967357365881351, "eval_accuracy": 0.9085648884084695, "eval_loss": 0.2735145092010498, "eval_runtime": 30.6411, "eval_samples_per_second": 513.265, "eval_steps_per_second": 8.028, "step": 17500 }, { "epoch": 4.970195855804712, "grad_norm": 8.23122787475586, "learning_rate": 9.503264263411866e-05, "loss": 0.15310896635055543, "step": 17510 }, { "epoch": 4.973034345728073, "grad_norm": 11.388163566589355, "learning_rate": 9.502980414419528e-05, "loss": 0.12541829347610473, "step": 17520 }, { "epoch": 4.975872835651433, "grad_norm": 8.645771980285645, "learning_rate": 9.502696565427193e-05, "loss": 0.11650108098983765, "step": 17530 }, { "epoch": 4.978711325574794, "grad_norm": 8.968881607055664, "learning_rate": 9.502412716434858e-05, "loss": 0.160172176361084, "step": 17540 }, { "epoch": 4.9815498154981555, "grad_norm": 17.769880294799805, "learning_rate": 9.502128867442521e-05, "loss": 0.16284531354904175, "step": 17550 }, { "epoch": 4.984388305421516, "grad_norm": 15.417827606201172, "learning_rate": 9.501845018450185e-05, "loss": 0.14024698734283447, "step": 17560 }, { "epoch": 4.987226795344877, "grad_norm": 18.253232955932617, "learning_rate": 9.501561169457849e-05, "loss": 0.16308448314666749, "step": 17570 }, { "epoch": 4.990065285268237, "grad_norm": 8.247485160827637, "learning_rate": 9.501277320465512e-05, "loss": 0.1292736291885376, "step": 17580 }, { "epoch": 4.992903775191598, "grad_norm": 17.825857162475586, "learning_rate": 9.500993471473176e-05, "loss": 0.14941771030426027, "step": 17590 }, { "epoch": 4.995742265114959, "grad_norm": 17.01661491394043, "learning_rate": 9.500709622480842e-05, "loss": 0.14126372337341309, "step": 17600 }, { "epoch": 4.9985807550383194, "grad_norm": 11.892715454101562, "learning_rate": 9.500425773488504e-05, "loss": 0.1165812611579895, "step": 17610 }, { "epoch": 5.0014192449616806, "grad_norm": 13.998254776000977, "learning_rate": 9.500141924496168e-05, "loss": 0.1303948163986206, "step": 17620 }, { "epoch": 5.004257734885041, "grad_norm": 13.037029266357422, "learning_rate": 9.499858075503833e-05, "loss": 0.10122346878051758, "step": 17630 }, { "epoch": 5.007096224808402, "grad_norm": 13.354364395141602, "learning_rate": 9.499574226511497e-05, "loss": 0.12721903324127198, "step": 17640 }, { "epoch": 5.009934714731763, "grad_norm": 14.000283241271973, "learning_rate": 9.49929037751916e-05, "loss": 0.11292828321456909, "step": 17650 }, { "epoch": 5.012773204655123, "grad_norm": 13.819226264953613, "learning_rate": 9.499006528526824e-05, "loss": 0.12236974239349366, "step": 17660 }, { "epoch": 5.015611694578484, "grad_norm": 13.245518684387207, "learning_rate": 9.498722679534489e-05, "loss": 0.14156126976013184, "step": 17670 }, { "epoch": 5.018450184501845, "grad_norm": 15.717375755310059, "learning_rate": 9.498438830542152e-05, "loss": 0.0959846019744873, "step": 17680 }, { "epoch": 5.021288674425206, "grad_norm": 15.254039764404297, "learning_rate": 9.498154981549816e-05, "loss": 0.10405156612396241, "step": 17690 }, { "epoch": 5.024127164348567, "grad_norm": 9.055869102478027, "learning_rate": 9.49787113255748e-05, "loss": 0.10309066772460937, "step": 17700 }, { "epoch": 5.026965654271927, "grad_norm": 8.052003860473633, "learning_rate": 9.497587283565143e-05, "loss": 0.10911027193069459, "step": 17710 }, { "epoch": 5.029804144195288, "grad_norm": 7.9866228103637695, "learning_rate": 9.497303434572807e-05, "loss": 0.13897613286972046, "step": 17720 }, { "epoch": 5.032642634118649, "grad_norm": 15.06964111328125, "learning_rate": 9.497019585580471e-05, "loss": 0.11954238414764404, "step": 17730 }, { "epoch": 5.035481124042009, "grad_norm": 10.784893035888672, "learning_rate": 9.496735736588135e-05, "loss": 0.13433692455291749, "step": 17740 }, { "epoch": 5.0383196139653705, "grad_norm": 14.187119483947754, "learning_rate": 9.4964518875958e-05, "loss": 0.10665247440338135, "step": 17750 }, { "epoch": 5.041158103888731, "grad_norm": 12.727693557739258, "learning_rate": 9.496168038603464e-05, "loss": 0.1077520489692688, "step": 17760 }, { "epoch": 5.043996593812092, "grad_norm": 11.795356750488281, "learning_rate": 9.495884189611128e-05, "loss": 0.12967787981033324, "step": 17770 }, { "epoch": 5.046835083735453, "grad_norm": 16.47637367248535, "learning_rate": 9.49560034061879e-05, "loss": 0.11486667394638062, "step": 17780 }, { "epoch": 5.049673573658813, "grad_norm": 14.412755966186523, "learning_rate": 9.495316491626455e-05, "loss": 0.12002516984939575, "step": 17790 }, { "epoch": 5.052512063582174, "grad_norm": 19.657102584838867, "learning_rate": 9.49503264263412e-05, "loss": 0.12900152206420898, "step": 17800 }, { "epoch": 5.055350553505535, "grad_norm": 10.46070384979248, "learning_rate": 9.494748793641783e-05, "loss": 0.12214690446853638, "step": 17810 }, { "epoch": 5.058189043428896, "grad_norm": 10.935361862182617, "learning_rate": 9.494464944649447e-05, "loss": 0.11936023235321044, "step": 17820 }, { "epoch": 5.061027533352257, "grad_norm": 14.286049842834473, "learning_rate": 9.494181095657111e-05, "loss": 0.12046651840209961, "step": 17830 }, { "epoch": 5.063866023275617, "grad_norm": 15.395772933959961, "learning_rate": 9.493897246664774e-05, "loss": 0.15066107511520385, "step": 17840 }, { "epoch": 5.066704513198978, "grad_norm": 14.674049377441406, "learning_rate": 9.493613397672438e-05, "loss": 0.09819283485412597, "step": 17850 }, { "epoch": 5.069543003122339, "grad_norm": 18.47867774963379, "learning_rate": 9.493329548680102e-05, "loss": 0.15087668895721434, "step": 17860 }, { "epoch": 5.072381493045699, "grad_norm": 7.503854751586914, "learning_rate": 9.493045699687766e-05, "loss": 0.1093890905380249, "step": 17870 }, { "epoch": 5.0752199829690605, "grad_norm": 12.142801284790039, "learning_rate": 9.49276185069543e-05, "loss": 0.1050212025642395, "step": 17880 }, { "epoch": 5.078058472892422, "grad_norm": 12.580880165100098, "learning_rate": 9.492478001703095e-05, "loss": 0.146125328540802, "step": 17890 }, { "epoch": 5.080896962815782, "grad_norm": 13.052600860595703, "learning_rate": 9.492194152710759e-05, "loss": 0.10773894786834717, "step": 17900 }, { "epoch": 5.083735452739143, "grad_norm": 17.98420524597168, "learning_rate": 9.491910303718422e-05, "loss": 0.1375831127166748, "step": 17910 }, { "epoch": 5.086573942662503, "grad_norm": 11.845067024230957, "learning_rate": 9.491626454726086e-05, "loss": 0.10428259372711182, "step": 17920 }, { "epoch": 5.089412432585864, "grad_norm": 9.506842613220215, "learning_rate": 9.49134260573375e-05, "loss": 0.113460111618042, "step": 17930 }, { "epoch": 5.092250922509225, "grad_norm": 10.9423246383667, "learning_rate": 9.491058756741414e-05, "loss": 0.11058430671691895, "step": 17940 }, { "epoch": 5.095089412432586, "grad_norm": 11.014501571655273, "learning_rate": 9.490774907749078e-05, "loss": 0.11267154216766358, "step": 17950 }, { "epoch": 5.097927902355947, "grad_norm": 7.578512668609619, "learning_rate": 9.490491058756742e-05, "loss": 0.1101533055305481, "step": 17960 }, { "epoch": 5.100766392279308, "grad_norm": 15.54187297821045, "learning_rate": 9.490207209764405e-05, "loss": 0.12509812116622926, "step": 17970 }, { "epoch": 5.103604882202668, "grad_norm": 12.168378829956055, "learning_rate": 9.489923360772069e-05, "loss": 0.12506475448608398, "step": 17980 }, { "epoch": 5.106443372126029, "grad_norm": 16.660964965820312, "learning_rate": 9.489639511779733e-05, "loss": 0.11895430088043213, "step": 17990 }, { "epoch": 5.109281862049389, "grad_norm": 10.629940032958984, "learning_rate": 9.489355662787398e-05, "loss": 0.08650976419448853, "step": 18000 }, { "epoch": 5.109281862049389, "eval_accuracy": 0.9173396070452089, "eval_loss": 0.24206331372261047, "eval_runtime": 31.1193, "eval_samples_per_second": 505.378, "eval_steps_per_second": 7.905, "step": 18000 }, { "epoch": 5.1121203519727505, "grad_norm": 11.882974624633789, "learning_rate": 9.489071813795062e-05, "loss": 0.13628501892089845, "step": 18010 }, { "epoch": 5.114958841896112, "grad_norm": 11.58086109161377, "learning_rate": 9.488787964802726e-05, "loss": 0.11117228269577026, "step": 18020 }, { "epoch": 5.117797331819472, "grad_norm": 5.872004985809326, "learning_rate": 9.48850411581039e-05, "loss": 0.09934443831443787, "step": 18030 }, { "epoch": 5.120635821742833, "grad_norm": 27.13557243347168, "learning_rate": 9.488220266818053e-05, "loss": 0.12184182405471802, "step": 18040 }, { "epoch": 5.123474311666193, "grad_norm": 7.560076713562012, "learning_rate": 9.487936417825717e-05, "loss": 0.11554148197174072, "step": 18050 }, { "epoch": 5.126312801589554, "grad_norm": 8.092820167541504, "learning_rate": 9.487652568833381e-05, "loss": 0.083376944065094, "step": 18060 }, { "epoch": 5.129151291512915, "grad_norm": 14.400120735168457, "learning_rate": 9.487368719841045e-05, "loss": 0.12058475017547607, "step": 18070 }, { "epoch": 5.131989781436276, "grad_norm": 7.0960469245910645, "learning_rate": 9.487084870848709e-05, "loss": 0.08904277682304382, "step": 18080 }, { "epoch": 5.134828271359637, "grad_norm": 11.378924369812012, "learning_rate": 9.486801021856373e-05, "loss": 0.11610628366470337, "step": 18090 }, { "epoch": 5.137666761282998, "grad_norm": 14.249679565429688, "learning_rate": 9.486517172864036e-05, "loss": 0.1266200304031372, "step": 18100 }, { "epoch": 5.140505251206358, "grad_norm": 13.318857192993164, "learning_rate": 9.4862333238717e-05, "loss": 0.13533298969268798, "step": 18110 }, { "epoch": 5.143343741129719, "grad_norm": 7.622172832489014, "learning_rate": 9.485949474879364e-05, "loss": 0.13788855075836182, "step": 18120 }, { "epoch": 5.146182231053079, "grad_norm": 10.998847007751465, "learning_rate": 9.485665625887029e-05, "loss": 0.11215749979019166, "step": 18130 }, { "epoch": 5.1490207209764405, "grad_norm": 7.949224472045898, "learning_rate": 9.485381776894693e-05, "loss": 0.08397881388664245, "step": 18140 }, { "epoch": 5.151859210899802, "grad_norm": 7.0520195960998535, "learning_rate": 9.485097927902357e-05, "loss": 0.08436256647109985, "step": 18150 }, { "epoch": 5.154697700823162, "grad_norm": 7.95007848739624, "learning_rate": 9.484814078910021e-05, "loss": 0.08683756589889527, "step": 18160 }, { "epoch": 5.157536190746523, "grad_norm": 16.676387786865234, "learning_rate": 9.484530229917684e-05, "loss": 0.09507114887237549, "step": 18170 }, { "epoch": 5.160374680669884, "grad_norm": 9.202858924865723, "learning_rate": 9.484246380925348e-05, "loss": 0.09739378094673157, "step": 18180 }, { "epoch": 5.163213170593244, "grad_norm": 13.040538787841797, "learning_rate": 9.483962531933012e-05, "loss": 0.136370849609375, "step": 18190 }, { "epoch": 5.166051660516605, "grad_norm": 12.115922927856445, "learning_rate": 9.483678682940676e-05, "loss": 0.11529729366302491, "step": 18200 }, { "epoch": 5.168890150439966, "grad_norm": 10.48679256439209, "learning_rate": 9.48339483394834e-05, "loss": 0.13028812408447266, "step": 18210 }, { "epoch": 5.171728640363327, "grad_norm": 14.922074317932129, "learning_rate": 9.483110984956005e-05, "loss": 0.11457020044326782, "step": 18220 }, { "epoch": 5.174567130286688, "grad_norm": 8.332785606384277, "learning_rate": 9.482827135963667e-05, "loss": 0.10092849731445312, "step": 18230 }, { "epoch": 5.177405620210048, "grad_norm": 14.685294151306152, "learning_rate": 9.482543286971331e-05, "loss": 0.118309485912323, "step": 18240 }, { "epoch": 5.180244110133409, "grad_norm": 3.95269513130188, "learning_rate": 9.482259437978996e-05, "loss": 0.09387722015380859, "step": 18250 }, { "epoch": 5.183082600056769, "grad_norm": 7.29939079284668, "learning_rate": 9.48197558898666e-05, "loss": 0.10037614107131958, "step": 18260 }, { "epoch": 5.1859210899801305, "grad_norm": 9.66841983795166, "learning_rate": 9.481691739994324e-05, "loss": 0.12975598573684693, "step": 18270 }, { "epoch": 5.188759579903492, "grad_norm": 25.23455238342285, "learning_rate": 9.481407891001988e-05, "loss": 0.18081820011138916, "step": 18280 }, { "epoch": 5.191598069826852, "grad_norm": 14.022313117980957, "learning_rate": 9.481124042009651e-05, "loss": 0.11226980686187744, "step": 18290 }, { "epoch": 5.194436559750213, "grad_norm": 4.847559928894043, "learning_rate": 9.480840193017315e-05, "loss": 0.09739605188369752, "step": 18300 }, { "epoch": 5.197275049673574, "grad_norm": 14.528687477111816, "learning_rate": 9.480556344024979e-05, "loss": 0.09656654000282287, "step": 18310 }, { "epoch": 5.200113539596934, "grad_norm": 15.67531967163086, "learning_rate": 9.480272495032643e-05, "loss": 0.11359162330627441, "step": 18320 }, { "epoch": 5.202952029520295, "grad_norm": 11.566393852233887, "learning_rate": 9.479988646040307e-05, "loss": 0.11102167367935181, "step": 18330 }, { "epoch": 5.205790519443656, "grad_norm": 18.80540657043457, "learning_rate": 9.479704797047971e-05, "loss": 0.14986209869384765, "step": 18340 }, { "epoch": 5.208629009367017, "grad_norm": 14.605559349060059, "learning_rate": 9.479420948055636e-05, "loss": 0.09588061571121216, "step": 18350 }, { "epoch": 5.211467499290378, "grad_norm": 24.594322204589844, "learning_rate": 9.479137099063298e-05, "loss": 0.16140575408935548, "step": 18360 }, { "epoch": 5.214305989213738, "grad_norm": 9.228494644165039, "learning_rate": 9.478853250070963e-05, "loss": 0.12389692068099975, "step": 18370 }, { "epoch": 5.217144479137099, "grad_norm": 11.408028602600098, "learning_rate": 9.478569401078627e-05, "loss": 0.11617081165313721, "step": 18380 }, { "epoch": 5.21998296906046, "grad_norm": 13.211589813232422, "learning_rate": 9.47828555208629e-05, "loss": 0.1088978886604309, "step": 18390 }, { "epoch": 5.22282145898382, "grad_norm": 9.789724349975586, "learning_rate": 9.478030087993189e-05, "loss": 0.10675936937332153, "step": 18400 }, { "epoch": 5.2256599489071816, "grad_norm": 5.0881266593933105, "learning_rate": 9.477746239000852e-05, "loss": 0.11985863447189331, "step": 18410 }, { "epoch": 5.228498438830542, "grad_norm": 22.814834594726562, "learning_rate": 9.477462390008516e-05, "loss": 0.13029314279556276, "step": 18420 }, { "epoch": 5.231336928753903, "grad_norm": 11.96026611328125, "learning_rate": 9.47717854101618e-05, "loss": 0.1350261926651001, "step": 18430 }, { "epoch": 5.234175418677264, "grad_norm": 9.897076606750488, "learning_rate": 9.476894692023843e-05, "loss": 0.1350799560546875, "step": 18440 }, { "epoch": 5.237013908600624, "grad_norm": 12.540698051452637, "learning_rate": 9.476610843031508e-05, "loss": 0.11492029428482056, "step": 18450 }, { "epoch": 5.239852398523985, "grad_norm": 13.65211296081543, "learning_rate": 9.476326994039172e-05, "loss": 0.12582682371139525, "step": 18460 }, { "epoch": 5.242690888447346, "grad_norm": 13.934420585632324, "learning_rate": 9.476043145046835e-05, "loss": 0.11009331941604614, "step": 18470 }, { "epoch": 5.245529378370707, "grad_norm": 13.780197143554688, "learning_rate": 9.475759296054499e-05, "loss": 0.1336338996887207, "step": 18480 }, { "epoch": 5.248367868294068, "grad_norm": 14.090015411376953, "learning_rate": 9.475475447062163e-05, "loss": 0.13067694902420043, "step": 18490 }, { "epoch": 5.251206358217428, "grad_norm": 15.602925300598145, "learning_rate": 9.475191598069827e-05, "loss": 0.12691853046417237, "step": 18500 }, { "epoch": 5.251206358217428, "eval_accuracy": 0.9096458320086476, "eval_loss": 0.270553857088089, "eval_runtime": 31.0448, "eval_samples_per_second": 506.59, "eval_steps_per_second": 7.924, "step": 18500 }, { "epoch": 5.254044848140789, "grad_norm": 14.531049728393555, "learning_rate": 9.47490774907749e-05, "loss": 0.10472745895385742, "step": 18510 }, { "epoch": 5.25688333806415, "grad_norm": 6.957664966583252, "learning_rate": 9.474623900085156e-05, "loss": 0.10247851610183716, "step": 18520 }, { "epoch": 5.25972182798751, "grad_norm": 13.993846893310547, "learning_rate": 9.47434005109282e-05, "loss": 0.14197319746017456, "step": 18530 }, { "epoch": 5.2625603179108715, "grad_norm": 21.744396209716797, "learning_rate": 9.474056202100483e-05, "loss": 0.1284420371055603, "step": 18540 }, { "epoch": 5.265398807834233, "grad_norm": 10.779120445251465, "learning_rate": 9.473772353108147e-05, "loss": 0.10145989656448365, "step": 18550 }, { "epoch": 5.268237297757593, "grad_norm": 13.402883529663086, "learning_rate": 9.473488504115811e-05, "loss": 0.13248554468154908, "step": 18560 }, { "epoch": 5.271075787680954, "grad_norm": 13.94957160949707, "learning_rate": 9.473204655123474e-05, "loss": 0.12498775720596314, "step": 18570 }, { "epoch": 5.273914277604314, "grad_norm": 8.157875061035156, "learning_rate": 9.472920806131139e-05, "loss": 0.12708642482757568, "step": 18580 }, { "epoch": 5.276752767527675, "grad_norm": 20.33916664123535, "learning_rate": 9.472636957138803e-05, "loss": 0.09488003253936768, "step": 18590 }, { "epoch": 5.279591257451036, "grad_norm": 11.896894454956055, "learning_rate": 9.472353108146466e-05, "loss": 0.1267958402633667, "step": 18600 }, { "epoch": 5.282429747374397, "grad_norm": 13.123417854309082, "learning_rate": 9.47206925915413e-05, "loss": 0.10330753326416016, "step": 18610 }, { "epoch": 5.285268237297758, "grad_norm": 10.870304107666016, "learning_rate": 9.471785410161794e-05, "loss": 0.12085975408554077, "step": 18620 }, { "epoch": 5.288106727221118, "grad_norm": 17.0112361907959, "learning_rate": 9.471501561169459e-05, "loss": 0.13956127166748047, "step": 18630 }, { "epoch": 5.290945217144479, "grad_norm": 10.220715522766113, "learning_rate": 9.471217712177121e-05, "loss": 0.12341954708099365, "step": 18640 }, { "epoch": 5.29378370706784, "grad_norm": 14.950878143310547, "learning_rate": 9.470933863184787e-05, "loss": 0.14755876064300538, "step": 18650 }, { "epoch": 5.2966221969912, "grad_norm": 17.65648651123047, "learning_rate": 9.470650014192451e-05, "loss": 0.139887273311615, "step": 18660 }, { "epoch": 5.2994606869145615, "grad_norm": 10.992133140563965, "learning_rate": 9.470366165200114e-05, "loss": 0.10425843000411987, "step": 18670 }, { "epoch": 5.302299176837923, "grad_norm": 8.081459999084473, "learning_rate": 9.470082316207778e-05, "loss": 0.14321550130844116, "step": 18680 }, { "epoch": 5.305137666761283, "grad_norm": 19.88718032836914, "learning_rate": 9.469798467215442e-05, "loss": 0.1343953251838684, "step": 18690 }, { "epoch": 5.307976156684644, "grad_norm": 11.913741111755371, "learning_rate": 9.469514618223105e-05, "loss": 0.11695908308029175, "step": 18700 }, { "epoch": 5.310814646608004, "grad_norm": 16.517282485961914, "learning_rate": 9.46923076923077e-05, "loss": 0.12505130767822265, "step": 18710 }, { "epoch": 5.313653136531365, "grad_norm": 8.039443016052246, "learning_rate": 9.468946920238434e-05, "loss": 0.10022202730178834, "step": 18720 }, { "epoch": 5.316491626454726, "grad_norm": 16.427576065063477, "learning_rate": 9.468663071246097e-05, "loss": 0.13602070808410643, "step": 18730 }, { "epoch": 5.319330116378087, "grad_norm": 12.405729293823242, "learning_rate": 9.468379222253761e-05, "loss": 0.09898651242256165, "step": 18740 }, { "epoch": 5.322168606301448, "grad_norm": 5.045668601989746, "learning_rate": 9.468095373261425e-05, "loss": 0.11487486362457275, "step": 18750 }, { "epoch": 5.325007096224809, "grad_norm": 8.068799018859863, "learning_rate": 9.46781152426909e-05, "loss": 0.074915611743927, "step": 18760 }, { "epoch": 5.327845586148169, "grad_norm": 9.10688304901123, "learning_rate": 9.467527675276752e-05, "loss": 0.1302726984024048, "step": 18770 }, { "epoch": 5.33068407607153, "grad_norm": 18.554283142089844, "learning_rate": 9.467243826284418e-05, "loss": 0.12635024785995483, "step": 18780 }, { "epoch": 5.33352256599489, "grad_norm": 9.430561065673828, "learning_rate": 9.466959977292082e-05, "loss": 0.13685967922210693, "step": 18790 }, { "epoch": 5.3363610559182515, "grad_norm": 6.459030628204346, "learning_rate": 9.466676128299745e-05, "loss": 0.13198906183242798, "step": 18800 }, { "epoch": 5.339199545841613, "grad_norm": 6.607358455657959, "learning_rate": 9.466392279307409e-05, "loss": 0.12675567865371704, "step": 18810 }, { "epoch": 5.342038035764973, "grad_norm": 14.684468269348145, "learning_rate": 9.466108430315073e-05, "loss": 0.13726824522018433, "step": 18820 }, { "epoch": 5.344876525688334, "grad_norm": 12.43384838104248, "learning_rate": 9.465824581322736e-05, "loss": 0.11709840297698974, "step": 18830 }, { "epoch": 5.347715015611694, "grad_norm": 7.708503246307373, "learning_rate": 9.4655407323304e-05, "loss": 0.11707191467285157, "step": 18840 }, { "epoch": 5.350553505535055, "grad_norm": 12.617171287536621, "learning_rate": 9.465256883338065e-05, "loss": 0.1273653030395508, "step": 18850 }, { "epoch": 5.353391995458416, "grad_norm": 11.896539688110352, "learning_rate": 9.464973034345728e-05, "loss": 0.11616818904876709, "step": 18860 }, { "epoch": 5.356230485381777, "grad_norm": 10.566817283630371, "learning_rate": 9.464689185353392e-05, "loss": 0.12600181102752686, "step": 18870 }, { "epoch": 5.359068975305138, "grad_norm": 2.9978444576263428, "learning_rate": 9.464405336361057e-05, "loss": 0.09637483358383178, "step": 18880 }, { "epoch": 5.361907465228499, "grad_norm": 6.65971040725708, "learning_rate": 9.46412148736872e-05, "loss": 0.13089643716812133, "step": 18890 }, { "epoch": 5.364745955151859, "grad_norm": 18.470901489257812, "learning_rate": 9.463837638376383e-05, "loss": 0.09059785604476929, "step": 18900 }, { "epoch": 5.36758444507522, "grad_norm": 13.260082244873047, "learning_rate": 9.463553789384049e-05, "loss": 0.11366134881973267, "step": 18910 }, { "epoch": 5.37042293499858, "grad_norm": 14.940563201904297, "learning_rate": 9.463269940391713e-05, "loss": 0.09691439867019654, "step": 18920 }, { "epoch": 5.3732614249219415, "grad_norm": 10.499874114990234, "learning_rate": 9.462986091399376e-05, "loss": 0.10260868072509766, "step": 18930 }, { "epoch": 5.376099914845303, "grad_norm": 10.452183723449707, "learning_rate": 9.46270224240704e-05, "loss": 0.09258331656455994, "step": 18940 }, { "epoch": 5.378938404768663, "grad_norm": 20.69902801513672, "learning_rate": 9.462418393414704e-05, "loss": 0.1747816801071167, "step": 18950 }, { "epoch": 5.381776894692024, "grad_norm": 14.414055824279785, "learning_rate": 9.462134544422367e-05, "loss": 0.12152936458587646, "step": 18960 }, { "epoch": 5.384615384615385, "grad_norm": 6.830984115600586, "learning_rate": 9.461850695430031e-05, "loss": 0.10256987810134888, "step": 18970 }, { "epoch": 5.387453874538745, "grad_norm": 16.298723220825195, "learning_rate": 9.461566846437697e-05, "loss": 0.13600679636001586, "step": 18980 }, { "epoch": 5.390292364462106, "grad_norm": 14.064005851745605, "learning_rate": 9.46128299744536e-05, "loss": 0.10061171054840087, "step": 18990 }, { "epoch": 5.393130854385467, "grad_norm": 12.299553871154785, "learning_rate": 9.460999148453023e-05, "loss": 0.11337920427322387, "step": 19000 }, { "epoch": 5.393130854385467, "eval_accuracy": 0.9239524384815921, "eval_loss": 0.23114651441574097, "eval_runtime": 31.3393, "eval_samples_per_second": 501.83, "eval_steps_per_second": 7.85, "step": 19000 }, { "epoch": 5.395969344308828, "grad_norm": 12.65729808807373, "learning_rate": 9.460715299460688e-05, "loss": 0.10458855628967285, "step": 19010 }, { "epoch": 5.398807834232189, "grad_norm": 11.864614486694336, "learning_rate": 9.460431450468352e-05, "loss": 0.12202335596084594, "step": 19020 }, { "epoch": 5.401646324155549, "grad_norm": 10.886442184448242, "learning_rate": 9.460147601476015e-05, "loss": 0.12154321670532227, "step": 19030 }, { "epoch": 5.40448481407891, "grad_norm": 13.320350646972656, "learning_rate": 9.459863752483679e-05, "loss": 0.10878154039382934, "step": 19040 }, { "epoch": 5.40732330400227, "grad_norm": 8.635933876037598, "learning_rate": 9.459579903491344e-05, "loss": 0.12345335483551026, "step": 19050 }, { "epoch": 5.4101617939256315, "grad_norm": 16.82415771484375, "learning_rate": 9.459296054499007e-05, "loss": 0.1340232253074646, "step": 19060 }, { "epoch": 5.413000283848993, "grad_norm": 5.474141597747803, "learning_rate": 9.459012205506671e-05, "loss": 0.0995680570602417, "step": 19070 }, { "epoch": 5.415838773772353, "grad_norm": 8.26756763458252, "learning_rate": 9.458728356514335e-05, "loss": 0.10731569528579712, "step": 19080 }, { "epoch": 5.418677263695714, "grad_norm": 12.569828987121582, "learning_rate": 9.458444507521998e-05, "loss": 0.10253074169158935, "step": 19090 }, { "epoch": 5.421515753619075, "grad_norm": 23.679319381713867, "learning_rate": 9.458160658529662e-05, "loss": 0.13541394472122192, "step": 19100 }, { "epoch": 5.424354243542435, "grad_norm": 10.604979515075684, "learning_rate": 9.457876809537328e-05, "loss": 0.1214879035949707, "step": 19110 }, { "epoch": 5.427192733465796, "grad_norm": 13.791625022888184, "learning_rate": 9.45759296054499e-05, "loss": 0.13752079010009766, "step": 19120 }, { "epoch": 5.430031223389157, "grad_norm": 20.201370239257812, "learning_rate": 9.457309111552655e-05, "loss": 0.12793558835983276, "step": 19130 }, { "epoch": 5.432869713312518, "grad_norm": 14.36790657043457, "learning_rate": 9.457025262560319e-05, "loss": 0.1193717360496521, "step": 19140 }, { "epoch": 5.435708203235879, "grad_norm": 9.850201606750488, "learning_rate": 9.456741413567983e-05, "loss": 0.12861897945404052, "step": 19150 }, { "epoch": 5.438546693159239, "grad_norm": 6.269471645355225, "learning_rate": 9.456457564575646e-05, "loss": 0.0933423638343811, "step": 19160 }, { "epoch": 5.4413851830826, "grad_norm": 19.406147003173828, "learning_rate": 9.45617371558331e-05, "loss": 0.12077394723892212, "step": 19170 }, { "epoch": 5.444223673005961, "grad_norm": 12.03589153289795, "learning_rate": 9.455889866590974e-05, "loss": 0.1468037486076355, "step": 19180 }, { "epoch": 5.447062162929321, "grad_norm": 12.559707641601562, "learning_rate": 9.455606017598638e-05, "loss": 0.12684504985809325, "step": 19190 }, { "epoch": 5.4499006528526825, "grad_norm": 5.962762355804443, "learning_rate": 9.455322168606302e-05, "loss": 0.10693166255950928, "step": 19200 }, { "epoch": 5.452739142776043, "grad_norm": 11.452478408813477, "learning_rate": 9.455038319613966e-05, "loss": 0.10182032585144044, "step": 19210 }, { "epoch": 5.455577632699404, "grad_norm": 15.097282409667969, "learning_rate": 9.454754470621629e-05, "loss": 0.11651647090911865, "step": 19220 }, { "epoch": 5.458416122622765, "grad_norm": 8.956564903259277, "learning_rate": 9.454470621629293e-05, "loss": 0.13075654506683348, "step": 19230 }, { "epoch": 5.461254612546125, "grad_norm": 15.3197603225708, "learning_rate": 9.454186772636957e-05, "loss": 0.11533890962600708, "step": 19240 }, { "epoch": 5.464093102469486, "grad_norm": 11.846577644348145, "learning_rate": 9.453902923644621e-05, "loss": 0.12017837762832642, "step": 19250 }, { "epoch": 5.4669315923928465, "grad_norm": 4.201696872711182, "learning_rate": 9.453619074652286e-05, "loss": 0.12061038017272949, "step": 19260 }, { "epoch": 5.469770082316208, "grad_norm": 11.541903495788574, "learning_rate": 9.45333522565995e-05, "loss": 0.11912944316864013, "step": 19270 }, { "epoch": 5.472608572239569, "grad_norm": 12.73418140411377, "learning_rate": 9.453051376667613e-05, "loss": 0.1129902720451355, "step": 19280 }, { "epoch": 5.475447062162929, "grad_norm": 5.762692451477051, "learning_rate": 9.452767527675277e-05, "loss": 0.07790331244468689, "step": 19290 }, { "epoch": 5.47828555208629, "grad_norm": 13.306546211242676, "learning_rate": 9.452483678682941e-05, "loss": 0.13790605068206788, "step": 19300 }, { "epoch": 5.481124042009651, "grad_norm": 13.98701000213623, "learning_rate": 9.452199829690605e-05, "loss": 0.12352221012115479, "step": 19310 }, { "epoch": 5.483962531933011, "grad_norm": 9.166779518127441, "learning_rate": 9.451915980698269e-05, "loss": 0.09548219442367553, "step": 19320 }, { "epoch": 5.4868010218563725, "grad_norm": 16.246734619140625, "learning_rate": 9.451632131705933e-05, "loss": 0.1522259831428528, "step": 19330 }, { "epoch": 5.489639511779734, "grad_norm": 9.676566123962402, "learning_rate": 9.451348282713597e-05, "loss": 0.11040685176849366, "step": 19340 }, { "epoch": 5.492478001703094, "grad_norm": 12.970476150512695, "learning_rate": 9.45106443372126e-05, "loss": 0.14873800277709961, "step": 19350 }, { "epoch": 5.495316491626455, "grad_norm": 3.8955798149108887, "learning_rate": 9.450780584728924e-05, "loss": 0.12185837030410766, "step": 19360 }, { "epoch": 5.498154981549815, "grad_norm": 12.275150299072266, "learning_rate": 9.450496735736588e-05, "loss": 0.1141619086265564, "step": 19370 }, { "epoch": 5.500993471473176, "grad_norm": 7.498418807983398, "learning_rate": 9.450212886744253e-05, "loss": 0.09539836645126343, "step": 19380 }, { "epoch": 5.503831961396537, "grad_norm": 9.892207145690918, "learning_rate": 9.449929037751917e-05, "loss": 0.09321224093437194, "step": 19390 }, { "epoch": 5.506670451319898, "grad_norm": 9.417450904846191, "learning_rate": 9.449645188759581e-05, "loss": 0.13593860864639282, "step": 19400 }, { "epoch": 5.509508941243259, "grad_norm": 12.9541597366333, "learning_rate": 9.449361339767244e-05, "loss": 0.12845546007156372, "step": 19410 }, { "epoch": 5.512347431166619, "grad_norm": 12.885693550109863, "learning_rate": 9.449077490774908e-05, "loss": 0.10136137008666993, "step": 19420 }, { "epoch": 5.51518592108998, "grad_norm": 11.74797248840332, "learning_rate": 9.448793641782572e-05, "loss": 0.14557892084121704, "step": 19430 }, { "epoch": 5.518024411013341, "grad_norm": 11.357150077819824, "learning_rate": 9.448509792790236e-05, "loss": 0.09730833172798156, "step": 19440 }, { "epoch": 5.520862900936701, "grad_norm": 11.29161262512207, "learning_rate": 9.4482259437979e-05, "loss": 0.10603079795837403, "step": 19450 }, { "epoch": 5.5237013908600625, "grad_norm": 28.32952308654785, "learning_rate": 9.447942094805564e-05, "loss": 0.1581097960472107, "step": 19460 }, { "epoch": 5.526539880783423, "grad_norm": 4.153390407562256, "learning_rate": 9.447658245813228e-05, "loss": 0.11754984855651855, "step": 19470 }, { "epoch": 5.529378370706784, "grad_norm": 10.587778091430664, "learning_rate": 9.447374396820891e-05, "loss": 0.15817960500717163, "step": 19480 }, { "epoch": 5.532216860630145, "grad_norm": 9.358624458312988, "learning_rate": 9.447090547828555e-05, "loss": 0.12570657730102539, "step": 19490 }, { "epoch": 5.535055350553505, "grad_norm": 10.29257583618164, "learning_rate": 9.44680669883622e-05, "loss": 0.12990448474884034, "step": 19500 }, { "epoch": 5.535055350553505, "eval_accuracy": 0.9264958351878935, "eval_loss": 0.22490718960762024, "eval_runtime": 32.3009, "eval_samples_per_second": 486.891, "eval_steps_per_second": 7.616, "step": 19500 }, { "epoch": 5.537893840476866, "grad_norm": 25.854448318481445, "learning_rate": 9.446522849843884e-05, "loss": 0.136379873752594, "step": 19510 }, { "epoch": 5.540732330400227, "grad_norm": 7.664098739624023, "learning_rate": 9.446239000851548e-05, "loss": 0.14082088470458984, "step": 19520 }, { "epoch": 5.543570820323588, "grad_norm": 16.849760055541992, "learning_rate": 9.445955151859212e-05, "loss": 0.11256475448608398, "step": 19530 }, { "epoch": 5.546409310246949, "grad_norm": 7.661827564239502, "learning_rate": 9.445671302866875e-05, "loss": 0.12128474712371826, "step": 19540 }, { "epoch": 5.54924780017031, "grad_norm": 13.366745948791504, "learning_rate": 9.445387453874539e-05, "loss": 0.12486085891723633, "step": 19550 }, { "epoch": 5.55208629009367, "grad_norm": 19.34494972229004, "learning_rate": 9.445103604882203e-05, "loss": 0.08632761240005493, "step": 19560 }, { "epoch": 5.554924780017031, "grad_norm": 16.57925796508789, "learning_rate": 9.444819755889867e-05, "loss": 0.1167499303817749, "step": 19570 }, { "epoch": 5.557763269940391, "grad_norm": 7.807219982147217, "learning_rate": 9.444535906897531e-05, "loss": 0.10497434139251709, "step": 19580 }, { "epoch": 5.5606017598637525, "grad_norm": 8.378154754638672, "learning_rate": 9.444252057905195e-05, "loss": 0.13328760862350464, "step": 19590 }, { "epoch": 5.563440249787114, "grad_norm": 17.223222732543945, "learning_rate": 9.44396820891286e-05, "loss": 0.12755695581436158, "step": 19600 }, { "epoch": 5.566278739710474, "grad_norm": 11.267366409301758, "learning_rate": 9.443684359920522e-05, "loss": 0.1144278883934021, "step": 19610 }, { "epoch": 5.569117229633835, "grad_norm": 12.408498764038086, "learning_rate": 9.443400510928186e-05, "loss": 0.09208456873893738, "step": 19620 }, { "epoch": 5.571955719557195, "grad_norm": 12.417707443237305, "learning_rate": 9.44311666193585e-05, "loss": 0.09806557297706604, "step": 19630 }, { "epoch": 5.574794209480556, "grad_norm": 10.28376293182373, "learning_rate": 9.442832812943513e-05, "loss": 0.1239471435546875, "step": 19640 }, { "epoch": 5.577632699403917, "grad_norm": 13.795941352844238, "learning_rate": 9.442548963951179e-05, "loss": 0.10845887660980225, "step": 19650 }, { "epoch": 5.580471189327278, "grad_norm": 19.030147552490234, "learning_rate": 9.442265114958843e-05, "loss": 0.11543688774108887, "step": 19660 }, { "epoch": 5.583309679250639, "grad_norm": 9.811711311340332, "learning_rate": 9.441981265966506e-05, "loss": 0.11494889259338378, "step": 19670 }, { "epoch": 5.586148169174, "grad_norm": 12.787882804870605, "learning_rate": 9.44169741697417e-05, "loss": 0.0939164400100708, "step": 19680 }, { "epoch": 5.58898665909736, "grad_norm": 12.61274528503418, "learning_rate": 9.441413567981834e-05, "loss": 0.09798465967178345, "step": 19690 }, { "epoch": 5.591825149020721, "grad_norm": 16.24980354309082, "learning_rate": 9.441129718989498e-05, "loss": 0.14927272796630858, "step": 19700 }, { "epoch": 5.594663638944081, "grad_norm": 11.760284423828125, "learning_rate": 9.440845869997162e-05, "loss": 0.12690526247024536, "step": 19710 }, { "epoch": 5.5975021288674425, "grad_norm": 20.020917892456055, "learning_rate": 9.440562021004826e-05, "loss": 0.1077381730079651, "step": 19720 }, { "epoch": 5.600340618790804, "grad_norm": 10.436101913452148, "learning_rate": 9.44027817201249e-05, "loss": 0.1191368818283081, "step": 19730 }, { "epoch": 5.603179108714164, "grad_norm": 9.501431465148926, "learning_rate": 9.439994323020153e-05, "loss": 0.10124939680099487, "step": 19740 }, { "epoch": 5.606017598637525, "grad_norm": 13.978364944458008, "learning_rate": 9.439710474027818e-05, "loss": 0.11254279613494873, "step": 19750 }, { "epoch": 5.608856088560886, "grad_norm": 13.982394218444824, "learning_rate": 9.439426625035482e-05, "loss": 0.10022561550140381, "step": 19760 }, { "epoch": 5.611694578484246, "grad_norm": 9.231678009033203, "learning_rate": 9.439142776043144e-05, "loss": 0.07619082927703857, "step": 19770 }, { "epoch": 5.614533068407607, "grad_norm": 10.90194320678711, "learning_rate": 9.43885892705081e-05, "loss": 0.11572556495666504, "step": 19780 }, { "epoch": 5.617371558330968, "grad_norm": 4.109992980957031, "learning_rate": 9.438575078058474e-05, "loss": 0.09055798649787902, "step": 19790 }, { "epoch": 5.620210048254329, "grad_norm": 6.505393028259277, "learning_rate": 9.438291229066137e-05, "loss": 0.08564833402633668, "step": 19800 }, { "epoch": 5.62304853817769, "grad_norm": 11.80860424041748, "learning_rate": 9.438007380073801e-05, "loss": 0.1683119058609009, "step": 19810 }, { "epoch": 5.62588702810105, "grad_norm": 19.342016220092773, "learning_rate": 9.437723531081465e-05, "loss": 0.13672653436660767, "step": 19820 }, { "epoch": 5.628725518024411, "grad_norm": 28.116464614868164, "learning_rate": 9.437439682089129e-05, "loss": 0.09803901314735412, "step": 19830 }, { "epoch": 5.631564007947771, "grad_norm": 9.503965377807617, "learning_rate": 9.437155833096792e-05, "loss": 0.10152112245559693, "step": 19840 }, { "epoch": 5.6344024978711325, "grad_norm": 16.11463165283203, "learning_rate": 9.436871984104458e-05, "loss": 0.11753343343734741, "step": 19850 }, { "epoch": 5.637240987794494, "grad_norm": 10.268158912658691, "learning_rate": 9.436588135112122e-05, "loss": 0.1271757960319519, "step": 19860 }, { "epoch": 5.640079477717854, "grad_norm": 23.734561920166016, "learning_rate": 9.436304286119784e-05, "loss": 0.12662642002105712, "step": 19870 }, { "epoch": 5.642917967641215, "grad_norm": 12.347908973693848, "learning_rate": 9.436020437127449e-05, "loss": 0.12965919971466064, "step": 19880 }, { "epoch": 5.645756457564576, "grad_norm": 22.378528594970703, "learning_rate": 9.435736588135113e-05, "loss": 0.1164097785949707, "step": 19890 }, { "epoch": 5.648594947487936, "grad_norm": 7.571527004241943, "learning_rate": 9.435452739142776e-05, "loss": 0.09827263355255127, "step": 19900 }, { "epoch": 5.651433437411297, "grad_norm": 13.348270416259766, "learning_rate": 9.435168890150441e-05, "loss": 0.1280358076095581, "step": 19910 }, { "epoch": 5.654271927334658, "grad_norm": 10.951181411743164, "learning_rate": 9.434885041158105e-05, "loss": 0.13780926465988158, "step": 19920 }, { "epoch": 5.657110417258019, "grad_norm": 3.0407769680023193, "learning_rate": 9.434601192165768e-05, "loss": 0.11337422132492066, "step": 19930 }, { "epoch": 5.65994890718138, "grad_norm": 10.648685455322266, "learning_rate": 9.434317343173432e-05, "loss": 0.11053813695907592, "step": 19940 }, { "epoch": 5.66278739710474, "grad_norm": 10.185513496398926, "learning_rate": 9.434033494181096e-05, "loss": 0.1217140793800354, "step": 19950 }, { "epoch": 5.665625887028101, "grad_norm": 9.673163414001465, "learning_rate": 9.43374964518876e-05, "loss": 0.1199615478515625, "step": 19960 }, { "epoch": 5.668464376951462, "grad_norm": 17.210432052612305, "learning_rate": 9.433465796196423e-05, "loss": 0.10232281684875488, "step": 19970 }, { "epoch": 5.671302866874822, "grad_norm": 7.972498416900635, "learning_rate": 9.433181947204089e-05, "loss": 0.1017485499382019, "step": 19980 }, { "epoch": 5.6741413567981835, "grad_norm": 25.447080612182617, "learning_rate": 9.432898098211753e-05, "loss": 0.13863906860351563, "step": 19990 }, { "epoch": 5.676979846721544, "grad_norm": 14.742323875427246, "learning_rate": 9.432614249219416e-05, "loss": 0.1260089874267578, "step": 20000 }, { "epoch": 5.676979846721544, "eval_accuracy": 0.9208367775163732, "eval_loss": 0.23497049510478973, "eval_runtime": 31.2279, "eval_samples_per_second": 503.62, "eval_steps_per_second": 7.878, "step": 20000 }, { "epoch": 5.679818336644905, "grad_norm": 22.31974220275879, "learning_rate": 9.43233040022708e-05, "loss": 0.09978458285331726, "step": 20010 }, { "epoch": 5.682656826568266, "grad_norm": 4.697038650512695, "learning_rate": 9.432046551234744e-05, "loss": 0.1066719651222229, "step": 20020 }, { "epoch": 5.685495316491626, "grad_norm": 7.4572529792785645, "learning_rate": 9.431762702242407e-05, "loss": 0.10995265245437622, "step": 20030 }, { "epoch": 5.688333806414987, "grad_norm": 10.074922561645508, "learning_rate": 9.431478853250072e-05, "loss": 0.11781272888183594, "step": 20040 }, { "epoch": 5.6911722963383475, "grad_norm": 12.172504425048828, "learning_rate": 9.431195004257736e-05, "loss": 0.12341436147689819, "step": 20050 }, { "epoch": 5.694010786261709, "grad_norm": 13.814973831176758, "learning_rate": 9.430911155265399e-05, "loss": 0.08532713651657105, "step": 20060 }, { "epoch": 5.69684927618507, "grad_norm": 8.905369758605957, "learning_rate": 9.430627306273063e-05, "loss": 0.09130767583847046, "step": 20070 }, { "epoch": 5.69968776610843, "grad_norm": 10.224356651306152, "learning_rate": 9.430343457280727e-05, "loss": 0.1232654333114624, "step": 20080 }, { "epoch": 5.702526256031791, "grad_norm": 9.009565353393555, "learning_rate": 9.430059608288391e-05, "loss": 0.11865953207015992, "step": 20090 }, { "epoch": 5.705364745955152, "grad_norm": 8.753589630126953, "learning_rate": 9.429775759296054e-05, "loss": 0.11695575714111328, "step": 20100 }, { "epoch": 5.708203235878512, "grad_norm": 5.8907880783081055, "learning_rate": 9.42949191030372e-05, "loss": 0.09317396879196167, "step": 20110 }, { "epoch": 5.7110417258018735, "grad_norm": 12.783473014831543, "learning_rate": 9.429208061311382e-05, "loss": 0.14268021583557128, "step": 20120 }, { "epoch": 5.713880215725235, "grad_norm": 13.65078067779541, "learning_rate": 9.428924212319047e-05, "loss": 0.17956950664520263, "step": 20130 }, { "epoch": 5.716718705648595, "grad_norm": 13.46239948272705, "learning_rate": 9.428640363326711e-05, "loss": 0.10398170948028565, "step": 20140 }, { "epoch": 5.719557195571956, "grad_norm": 13.398715019226074, "learning_rate": 9.428356514334375e-05, "loss": 0.08553057909011841, "step": 20150 }, { "epoch": 5.722395685495316, "grad_norm": 13.870940208435059, "learning_rate": 9.428072665342038e-05, "loss": 0.12373956441879272, "step": 20160 }, { "epoch": 5.725234175418677, "grad_norm": 7.184823036193848, "learning_rate": 9.427788816349702e-05, "loss": 0.11054706573486328, "step": 20170 }, { "epoch": 5.728072665342038, "grad_norm": 16.883848190307617, "learning_rate": 9.427504967357367e-05, "loss": 0.11671416759490967, "step": 20180 }, { "epoch": 5.730911155265399, "grad_norm": 7.285149574279785, "learning_rate": 9.42722111836503e-05, "loss": 0.10194882154464721, "step": 20190 }, { "epoch": 5.73374964518876, "grad_norm": 17.83062744140625, "learning_rate": 9.426937269372694e-05, "loss": 0.12979027032852172, "step": 20200 }, { "epoch": 5.73658813511212, "grad_norm": 10.121158599853516, "learning_rate": 9.426653420380358e-05, "loss": 0.11723473072052001, "step": 20210 }, { "epoch": 5.739426625035481, "grad_norm": 9.345041275024414, "learning_rate": 9.426369571388021e-05, "loss": 0.09484395980834961, "step": 20220 }, { "epoch": 5.742265114958842, "grad_norm": 16.191144943237305, "learning_rate": 9.426085722395685e-05, "loss": 0.13823035955429078, "step": 20230 }, { "epoch": 5.745103604882202, "grad_norm": 8.47496509552002, "learning_rate": 9.425801873403351e-05, "loss": 0.09822333455085755, "step": 20240 }, { "epoch": 5.7479420948055635, "grad_norm": 9.439685821533203, "learning_rate": 9.425518024411014e-05, "loss": 0.1134137749671936, "step": 20250 }, { "epoch": 5.750780584728924, "grad_norm": 17.277421951293945, "learning_rate": 9.425234175418678e-05, "loss": 0.10476200580596924, "step": 20260 }, { "epoch": 5.753619074652285, "grad_norm": 6.557984828948975, "learning_rate": 9.424950326426342e-05, "loss": 0.09636909365653992, "step": 20270 }, { "epoch": 5.756457564575646, "grad_norm": 7.983254909515381, "learning_rate": 9.424666477434006e-05, "loss": 0.08869478702545167, "step": 20280 }, { "epoch": 5.759296054499006, "grad_norm": 12.20524787902832, "learning_rate": 9.424382628441669e-05, "loss": 0.1241506814956665, "step": 20290 }, { "epoch": 5.762134544422367, "grad_norm": 11.937776565551758, "learning_rate": 9.424098779449333e-05, "loss": 0.10028040409088135, "step": 20300 }, { "epoch": 5.764973034345728, "grad_norm": 11.134846687316895, "learning_rate": 9.423814930456998e-05, "loss": 0.10375795364379883, "step": 20310 }, { "epoch": 5.767811524269089, "grad_norm": 5.081900596618652, "learning_rate": 9.423531081464661e-05, "loss": 0.08871475458145142, "step": 20320 }, { "epoch": 5.77065001419245, "grad_norm": 13.92850399017334, "learning_rate": 9.423247232472325e-05, "loss": 0.11963483095169067, "step": 20330 }, { "epoch": 5.773488504115811, "grad_norm": 19.87070083618164, "learning_rate": 9.42296338347999e-05, "loss": 0.13282688856124877, "step": 20340 }, { "epoch": 5.776326994039171, "grad_norm": 10.71762466430664, "learning_rate": 9.422679534487652e-05, "loss": 0.11580984592437744, "step": 20350 }, { "epoch": 5.779165483962532, "grad_norm": 17.5373592376709, "learning_rate": 9.422395685495316e-05, "loss": 0.1278702974319458, "step": 20360 }, { "epoch": 5.782003973885892, "grad_norm": 8.061185836791992, "learning_rate": 9.42211183650298e-05, "loss": 0.11274677515029907, "step": 20370 }, { "epoch": 5.7848424638092535, "grad_norm": 7.343835830688477, "learning_rate": 9.421827987510645e-05, "loss": 0.10580188035964966, "step": 20380 }, { "epoch": 5.787680953732615, "grad_norm": 7.6651177406311035, "learning_rate": 9.421544138518309e-05, "loss": 0.11666873693466187, "step": 20390 }, { "epoch": 5.790519443655975, "grad_norm": 14.452144622802734, "learning_rate": 9.421260289525973e-05, "loss": 0.12069786787033081, "step": 20400 }, { "epoch": 5.793357933579336, "grad_norm": 16.657556533813477, "learning_rate": 9.420976440533637e-05, "loss": 0.10045976638793945, "step": 20410 }, { "epoch": 5.796196423502696, "grad_norm": 9.178298950195312, "learning_rate": 9.4206925915413e-05, "loss": 0.12472000122070312, "step": 20420 }, { "epoch": 5.799034913426057, "grad_norm": 11.092296600341797, "learning_rate": 9.420408742548964e-05, "loss": 0.12591469287872314, "step": 20430 }, { "epoch": 5.801873403349418, "grad_norm": 7.71895170211792, "learning_rate": 9.42012489355663e-05, "loss": 0.11758935451507568, "step": 20440 }, { "epoch": 5.804711893272779, "grad_norm": 11.795618057250977, "learning_rate": 9.419841044564292e-05, "loss": 0.10448135137557983, "step": 20450 }, { "epoch": 5.80755038319614, "grad_norm": 9.91723918914795, "learning_rate": 9.419557195571956e-05, "loss": 0.10342755317687988, "step": 20460 }, { "epoch": 5.810388873119501, "grad_norm": 22.000638961791992, "learning_rate": 9.41927334657962e-05, "loss": 0.14892503023147582, "step": 20470 }, { "epoch": 5.813227363042861, "grad_norm": 7.675416469573975, "learning_rate": 9.418989497587283e-05, "loss": 0.09208612442016602, "step": 20480 }, { "epoch": 5.816065852966222, "grad_norm": 16.408016204833984, "learning_rate": 9.418705648594947e-05, "loss": 0.08327261209487916, "step": 20490 }, { "epoch": 5.818904342889582, "grad_norm": 7.683635711669922, "learning_rate": 9.418421799602612e-05, "loss": 0.1132471203804016, "step": 20500 }, { "epoch": 5.818904342889582, "eval_accuracy": 0.9238888535639347, "eval_loss": 0.22741714119911194, "eval_runtime": 31.0664, "eval_samples_per_second": 506.237, "eval_steps_per_second": 7.919, "step": 20500 }, { "epoch": 5.8217428328129435, "grad_norm": 8.033111572265625, "learning_rate": 9.41816633550951e-05, "loss": 0.10498391389846802, "step": 20510 }, { "epoch": 5.824581322736305, "grad_norm": 16.93895149230957, "learning_rate": 9.417882486517174e-05, "loss": 0.13192824125289918, "step": 20520 }, { "epoch": 5.827419812659665, "grad_norm": 17.369476318359375, "learning_rate": 9.417598637524836e-05, "loss": 0.15906789302825927, "step": 20530 }, { "epoch": 5.830258302583026, "grad_norm": 25.607196807861328, "learning_rate": 9.4173147885325e-05, "loss": 0.08330153226852417, "step": 20540 }, { "epoch": 5.833096792506387, "grad_norm": 10.629730224609375, "learning_rate": 9.417030939540165e-05, "loss": 0.13406455516815186, "step": 20550 }, { "epoch": 5.835935282429747, "grad_norm": 8.932452201843262, "learning_rate": 9.416747090547829e-05, "loss": 0.11395784616470336, "step": 20560 }, { "epoch": 5.838773772353108, "grad_norm": 14.644490242004395, "learning_rate": 9.416463241555493e-05, "loss": 0.13847874402999877, "step": 20570 }, { "epoch": 5.841612262276469, "grad_norm": 16.633623123168945, "learning_rate": 9.416179392563157e-05, "loss": 0.1258102297782898, "step": 20580 }, { "epoch": 5.84445075219983, "grad_norm": 14.985760688781738, "learning_rate": 9.415895543570821e-05, "loss": 0.12671819925308228, "step": 20590 }, { "epoch": 5.847289242123191, "grad_norm": 7.696166515350342, "learning_rate": 9.415611694578484e-05, "loss": 0.14628607034683228, "step": 20600 }, { "epoch": 5.850127732046551, "grad_norm": 15.762514114379883, "learning_rate": 9.415327845586148e-05, "loss": 0.11489802598953247, "step": 20610 }, { "epoch": 5.852966221969912, "grad_norm": 12.863702774047852, "learning_rate": 9.415043996593814e-05, "loss": 0.09319427013397216, "step": 20620 }, { "epoch": 5.855804711893272, "grad_norm": 13.40141773223877, "learning_rate": 9.414760147601476e-05, "loss": 0.13039968013763428, "step": 20630 }, { "epoch": 5.8586432018166335, "grad_norm": 6.219179153442383, "learning_rate": 9.41447629860914e-05, "loss": 0.08061777353286743, "step": 20640 }, { "epoch": 5.861481691739995, "grad_norm": 23.412900924682617, "learning_rate": 9.414192449616805e-05, "loss": 0.11036337614059448, "step": 20650 }, { "epoch": 5.864320181663355, "grad_norm": 11.657022476196289, "learning_rate": 9.413908600624468e-05, "loss": 0.09171515107154846, "step": 20660 }, { "epoch": 5.867158671586716, "grad_norm": 10.708574295043945, "learning_rate": 9.413624751632132e-05, "loss": 0.1260165572166443, "step": 20670 }, { "epoch": 5.869997161510077, "grad_norm": 4.3276777267456055, "learning_rate": 9.413340902639796e-05, "loss": 0.10439838171005249, "step": 20680 }, { "epoch": 5.872835651433437, "grad_norm": 13.661375045776367, "learning_rate": 9.41305705364746e-05, "loss": 0.11782879829406738, "step": 20690 }, { "epoch": 5.875674141356798, "grad_norm": 10.946717262268066, "learning_rate": 9.412773204655124e-05, "loss": 0.11104187965393067, "step": 20700 }, { "epoch": 5.878512631280159, "grad_norm": 14.004988670349121, "learning_rate": 9.412489355662788e-05, "loss": 0.0921678900718689, "step": 20710 }, { "epoch": 5.88135112120352, "grad_norm": 7.5447773933410645, "learning_rate": 9.412205506670452e-05, "loss": 0.13092324733734131, "step": 20720 }, { "epoch": 5.884189611126881, "grad_norm": 8.072649955749512, "learning_rate": 9.411921657678115e-05, "loss": 0.1444801926612854, "step": 20730 }, { "epoch": 5.887028101050241, "grad_norm": 16.72834587097168, "learning_rate": 9.411637808685779e-05, "loss": 0.13325690031051635, "step": 20740 }, { "epoch": 5.889866590973602, "grad_norm": 21.628686904907227, "learning_rate": 9.411353959693443e-05, "loss": 0.12417876720428467, "step": 20750 }, { "epoch": 5.892705080896963, "grad_norm": 10.818861961364746, "learning_rate": 9.411070110701108e-05, "loss": 0.12724164724349976, "step": 20760 }, { "epoch": 5.895543570820323, "grad_norm": 8.504688262939453, "learning_rate": 9.410786261708772e-05, "loss": 0.10596181154251098, "step": 20770 }, { "epoch": 5.8983820607436845, "grad_norm": 14.816200256347656, "learning_rate": 9.410502412716436e-05, "loss": 0.094748854637146, "step": 20780 }, { "epoch": 5.901220550667045, "grad_norm": 18.645090103149414, "learning_rate": 9.410218563724099e-05, "loss": 0.15787179470062257, "step": 20790 }, { "epoch": 5.904059040590406, "grad_norm": 7.746403694152832, "learning_rate": 9.409934714731763e-05, "loss": 0.11731476783752441, "step": 20800 }, { "epoch": 5.906897530513767, "grad_norm": 13.322840690612793, "learning_rate": 9.409650865739427e-05, "loss": 0.0997210144996643, "step": 20810 }, { "epoch": 5.909736020437127, "grad_norm": 9.661260604858398, "learning_rate": 9.409367016747091e-05, "loss": 0.08642529845237731, "step": 20820 }, { "epoch": 5.912574510360488, "grad_norm": 10.458847045898438, "learning_rate": 9.409083167754755e-05, "loss": 0.12587547302246094, "step": 20830 }, { "epoch": 5.9154130002838485, "grad_norm": 14.758914947509766, "learning_rate": 9.408799318762419e-05, "loss": 0.07983130216598511, "step": 20840 }, { "epoch": 5.91825149020721, "grad_norm": 13.798670768737793, "learning_rate": 9.408515469770083e-05, "loss": 0.10943112373352051, "step": 20850 }, { "epoch": 5.921089980130571, "grad_norm": 9.510903358459473, "learning_rate": 9.408231620777746e-05, "loss": 0.12384045124053955, "step": 20860 }, { "epoch": 5.923928470053931, "grad_norm": 24.714670181274414, "learning_rate": 9.40794777178541e-05, "loss": 0.11439346075057984, "step": 20870 }, { "epoch": 5.926766959977292, "grad_norm": 11.630462646484375, "learning_rate": 9.407663922793075e-05, "loss": 0.10922585725784302, "step": 20880 }, { "epoch": 5.929605449900653, "grad_norm": 6.719127178192139, "learning_rate": 9.407380073800739e-05, "loss": 0.10444743633270263, "step": 20890 }, { "epoch": 5.932443939824013, "grad_norm": 12.36545467376709, "learning_rate": 9.407096224808403e-05, "loss": 0.12218999862670898, "step": 20900 }, { "epoch": 5.9352824297473745, "grad_norm": 11.014594078063965, "learning_rate": 9.406812375816067e-05, "loss": 0.08795750737190247, "step": 20910 }, { "epoch": 5.938120919670736, "grad_norm": 5.8085174560546875, "learning_rate": 9.40652852682373e-05, "loss": 0.09594220519065857, "step": 20920 }, { "epoch": 5.940959409594096, "grad_norm": 17.007211685180664, "learning_rate": 9.406244677831394e-05, "loss": 0.08541787266731263, "step": 20930 }, { "epoch": 5.943797899517457, "grad_norm": 12.685742378234863, "learning_rate": 9.405960828839058e-05, "loss": 0.10849156379699706, "step": 20940 }, { "epoch": 5.946636389440817, "grad_norm": 6.9238715171813965, "learning_rate": 9.405676979846722e-05, "loss": 0.12375425100326538, "step": 20950 }, { "epoch": 5.949474879364178, "grad_norm": 4.230216026306152, "learning_rate": 9.405393130854386e-05, "loss": 0.08689563870429992, "step": 20960 }, { "epoch": 5.952313369287539, "grad_norm": 10.104366302490234, "learning_rate": 9.40510928186205e-05, "loss": 0.13967742919921874, "step": 20970 }, { "epoch": 5.9551518592109, "grad_norm": 11.409494400024414, "learning_rate": 9.404825432869713e-05, "loss": 0.1268508553504944, "step": 20980 }, { "epoch": 5.957990349134261, "grad_norm": 8.809844017028809, "learning_rate": 9.404541583877377e-05, "loss": 0.11903468370437623, "step": 20990 }, { "epoch": 5.960828839057621, "grad_norm": 8.836562156677246, "learning_rate": 9.404257734885041e-05, "loss": 0.09153556823730469, "step": 21000 }, { "epoch": 5.960828839057621, "eval_accuracy": 0.9313282889298659, "eval_loss": 0.2052992433309555, "eval_runtime": 30.996, "eval_samples_per_second": 507.389, "eval_steps_per_second": 7.937, "step": 21000 }, { "epoch": 5.963667328980982, "grad_norm": 12.423620223999023, "learning_rate": 9.403973885892706e-05, "loss": 0.11621305942535401, "step": 21010 }, { "epoch": 5.966505818904343, "grad_norm": 1.8047871589660645, "learning_rate": 9.40369003690037e-05, "loss": 0.09838481545448304, "step": 21020 }, { "epoch": 5.969344308827703, "grad_norm": 13.596325874328613, "learning_rate": 9.403406187908034e-05, "loss": 0.12000309228897095, "step": 21030 }, { "epoch": 5.9721827987510645, "grad_norm": 15.063260078430176, "learning_rate": 9.403122338915698e-05, "loss": 0.16305012702941896, "step": 21040 }, { "epoch": 5.975021288674425, "grad_norm": 9.36721420288086, "learning_rate": 9.402838489923361e-05, "loss": 0.1107431173324585, "step": 21050 }, { "epoch": 5.977859778597786, "grad_norm": 16.601165771484375, "learning_rate": 9.402554640931025e-05, "loss": 0.13110706806182862, "step": 21060 }, { "epoch": 5.980698268521147, "grad_norm": 16.701623916625977, "learning_rate": 9.402270791938689e-05, "loss": 0.14415010213851928, "step": 21070 }, { "epoch": 5.983536758444507, "grad_norm": 15.612679481506348, "learning_rate": 9.401986942946352e-05, "loss": 0.12169110774993896, "step": 21080 }, { "epoch": 5.986375248367868, "grad_norm": 12.671293258666992, "learning_rate": 9.401703093954017e-05, "loss": 0.14987602233886718, "step": 21090 }, { "epoch": 5.989213738291229, "grad_norm": 17.24456214904785, "learning_rate": 9.401419244961681e-05, "loss": 0.15189071893692016, "step": 21100 }, { "epoch": 5.99205222821459, "grad_norm": 21.735652923583984, "learning_rate": 9.401135395969344e-05, "loss": 0.14007269144058226, "step": 21110 }, { "epoch": 5.994890718137951, "grad_norm": 20.80341339111328, "learning_rate": 9.400851546977008e-05, "loss": 0.10591450929641724, "step": 21120 }, { "epoch": 5.997729208061312, "grad_norm": 14.216853141784668, "learning_rate": 9.400567697984673e-05, "loss": 0.09699013829231262, "step": 21130 }, { "epoch": 6.000567697984672, "grad_norm": 10.743067741394043, "learning_rate": 9.400283848992337e-05, "loss": 0.08979021310806275, "step": 21140 }, { "epoch": 6.003406187908033, "grad_norm": 3.1110355854034424, "learning_rate": 9.4e-05, "loss": 0.07359349727630615, "step": 21150 }, { "epoch": 6.006244677831393, "grad_norm": 11.48859977722168, "learning_rate": 9.399716151007665e-05, "loss": 0.08522689342498779, "step": 21160 }, { "epoch": 6.0090831677547545, "grad_norm": 6.143226623535156, "learning_rate": 9.399432302015329e-05, "loss": 0.09884411692619324, "step": 21170 }, { "epoch": 6.011921657678116, "grad_norm": 11.420652389526367, "learning_rate": 9.399148453022992e-05, "loss": 0.09911879301071166, "step": 21180 }, { "epoch": 6.014760147601476, "grad_norm": 7.774734973907471, "learning_rate": 9.398864604030656e-05, "loss": 0.10043747425079345, "step": 21190 }, { "epoch": 6.017598637524837, "grad_norm": 4.420595169067383, "learning_rate": 9.39858075503832e-05, "loss": 0.07095736861228943, "step": 21200 }, { "epoch": 6.020437127448197, "grad_norm": 11.891218185424805, "learning_rate": 9.398296906045983e-05, "loss": 0.07457776069641113, "step": 21210 }, { "epoch": 6.023275617371558, "grad_norm": 11.142216682434082, "learning_rate": 9.398013057053648e-05, "loss": 0.072478985786438, "step": 21220 }, { "epoch": 6.026114107294919, "grad_norm": 10.591689109802246, "learning_rate": 9.397729208061313e-05, "loss": 0.10340570211410523, "step": 21230 }, { "epoch": 6.02895259721828, "grad_norm": 15.661985397338867, "learning_rate": 9.397445359068975e-05, "loss": 0.10229082107543945, "step": 21240 }, { "epoch": 6.031791087141641, "grad_norm": 6.8917317390441895, "learning_rate": 9.39716151007664e-05, "loss": 0.11099512577056884, "step": 21250 }, { "epoch": 6.034629577065002, "grad_norm": 7.677479267120361, "learning_rate": 9.396877661084304e-05, "loss": 0.07190749645233155, "step": 21260 }, { "epoch": 6.037468066988362, "grad_norm": 10.08597469329834, "learning_rate": 9.396593812091968e-05, "loss": 0.08615765571594239, "step": 21270 }, { "epoch": 6.040306556911723, "grad_norm": 14.094669342041016, "learning_rate": 9.39630996309963e-05, "loss": 0.10557165145874023, "step": 21280 }, { "epoch": 6.043145046835083, "grad_norm": 11.102452278137207, "learning_rate": 9.396026114107296e-05, "loss": 0.08731478452682495, "step": 21290 }, { "epoch": 6.0459835367584445, "grad_norm": 18.687788009643555, "learning_rate": 9.39574226511496e-05, "loss": 0.11136163473129272, "step": 21300 }, { "epoch": 6.048822026681806, "grad_norm": 12.847024917602539, "learning_rate": 9.395458416122623e-05, "loss": 0.09557934403419495, "step": 21310 }, { "epoch": 6.051660516605166, "grad_norm": 8.790975570678711, "learning_rate": 9.395174567130287e-05, "loss": 0.10499650239944458, "step": 21320 }, { "epoch": 6.054499006528527, "grad_norm": 11.057319641113281, "learning_rate": 9.394890718137951e-05, "loss": 0.09857409000396729, "step": 21330 }, { "epoch": 6.057337496451888, "grad_norm": 18.362462997436523, "learning_rate": 9.394606869145614e-05, "loss": 0.09969000816345215, "step": 21340 }, { "epoch": 6.060175986375248, "grad_norm": 13.130828857421875, "learning_rate": 9.394323020153278e-05, "loss": 0.10471675395965577, "step": 21350 }, { "epoch": 6.063014476298609, "grad_norm": 9.789393424987793, "learning_rate": 9.394039171160944e-05, "loss": 0.08259748220443726, "step": 21360 }, { "epoch": 6.06585296622197, "grad_norm": 11.308144569396973, "learning_rate": 9.393755322168606e-05, "loss": 0.11735329627990723, "step": 21370 }, { "epoch": 6.068691456145331, "grad_norm": 5.1346025466918945, "learning_rate": 9.39347147317627e-05, "loss": 0.08542198538780213, "step": 21380 }, { "epoch": 6.071529946068692, "grad_norm": 9.559159278869629, "learning_rate": 9.393187624183935e-05, "loss": 0.0788351058959961, "step": 21390 }, { "epoch": 6.074368435992052, "grad_norm": 9.88121509552002, "learning_rate": 9.392903775191599e-05, "loss": 0.0921436071395874, "step": 21400 }, { "epoch": 6.077206925915413, "grad_norm": 18.608478546142578, "learning_rate": 9.392619926199262e-05, "loss": 0.0845526933670044, "step": 21410 }, { "epoch": 6.080045415838773, "grad_norm": 4.436546325683594, "learning_rate": 9.392336077206927e-05, "loss": 0.07260103225708008, "step": 21420 }, { "epoch": 6.0828839057621344, "grad_norm": 9.707710266113281, "learning_rate": 9.392052228214591e-05, "loss": 0.08143643736839294, "step": 21430 }, { "epoch": 6.085722395685496, "grad_norm": 7.061699390411377, "learning_rate": 9.391768379222254e-05, "loss": 0.09043810963630676, "step": 21440 }, { "epoch": 6.088560885608856, "grad_norm": 13.426015853881836, "learning_rate": 9.391484530229918e-05, "loss": 0.060395896434783936, "step": 21450 }, { "epoch": 6.091399375532217, "grad_norm": 15.57006549835205, "learning_rate": 9.391200681237582e-05, "loss": 0.11533644199371337, "step": 21460 }, { "epoch": 6.094237865455578, "grad_norm": 5.301523208618164, "learning_rate": 9.390916832245245e-05, "loss": 0.09352814555168151, "step": 21470 }, { "epoch": 6.097076355378938, "grad_norm": 14.802569389343262, "learning_rate": 9.390632983252909e-05, "loss": 0.11143943071365356, "step": 21480 }, { "epoch": 6.099914845302299, "grad_norm": 10.134464263916016, "learning_rate": 9.390349134260575e-05, "loss": 0.09743857979774476, "step": 21490 }, { "epoch": 6.1027533352256595, "grad_norm": 10.993227005004883, "learning_rate": 9.390065285268237e-05, "loss": 0.10054000616073608, "step": 21500 }, { "epoch": 6.1027533352256595, "eval_accuracy": 0.9324728174477014, "eval_loss": 0.20066311955451965, "eval_runtime": 31.6206, "eval_samples_per_second": 497.366, "eval_steps_per_second": 7.78, "step": 21500 }, { "epoch": 6.105591825149021, "grad_norm": 13.049242973327637, "learning_rate": 9.389781436275902e-05, "loss": 0.09531475901603699, "step": 21510 }, { "epoch": 6.108430315072382, "grad_norm": 10.597549438476562, "learning_rate": 9.389497587283566e-05, "loss": 0.08681356906890869, "step": 21520 }, { "epoch": 6.111268804995742, "grad_norm": 13.929612159729004, "learning_rate": 9.38921373829123e-05, "loss": 0.09679096937179565, "step": 21530 }, { "epoch": 6.114107294919103, "grad_norm": 9.482136726379395, "learning_rate": 9.388929889298893e-05, "loss": 0.08291661143302917, "step": 21540 }, { "epoch": 6.116945784842464, "grad_norm": 10.108871459960938, "learning_rate": 9.388646040306557e-05, "loss": 0.12714382410049438, "step": 21550 }, { "epoch": 6.119784274765824, "grad_norm": 15.778446197509766, "learning_rate": 9.388362191314222e-05, "loss": 0.10163508653640747, "step": 21560 }, { "epoch": 6.1226227646891855, "grad_norm": 18.318805694580078, "learning_rate": 9.388078342321885e-05, "loss": 0.09296117424964905, "step": 21570 }, { "epoch": 6.125461254612546, "grad_norm": 5.008502960205078, "learning_rate": 9.387794493329549e-05, "loss": 0.06724562048912049, "step": 21580 }, { "epoch": 6.128299744535907, "grad_norm": 10.594789505004883, "learning_rate": 9.387510644337213e-05, "loss": 0.084965181350708, "step": 21590 }, { "epoch": 6.131138234459268, "grad_norm": 8.914656639099121, "learning_rate": 9.387226795344876e-05, "loss": 0.06451302170753478, "step": 21600 }, { "epoch": 6.133976724382628, "grad_norm": 5.62416934967041, "learning_rate": 9.38694294635254e-05, "loss": 0.09203835725784301, "step": 21610 }, { "epoch": 6.136815214305989, "grad_norm": 13.042671203613281, "learning_rate": 9.386659097360206e-05, "loss": 0.0830959439277649, "step": 21620 }, { "epoch": 6.1396537042293495, "grad_norm": 15.792468070983887, "learning_rate": 9.386375248367869e-05, "loss": 0.1132962703704834, "step": 21630 }, { "epoch": 6.142492194152711, "grad_norm": 12.245312690734863, "learning_rate": 9.386091399375533e-05, "loss": 0.11983513832092285, "step": 21640 }, { "epoch": 6.145330684076072, "grad_norm": 11.293359756469727, "learning_rate": 9.385807550383197e-05, "loss": 0.09297634959220887, "step": 21650 }, { "epoch": 6.148169173999432, "grad_norm": 7.058413028717041, "learning_rate": 9.385523701390861e-05, "loss": 0.0682492196559906, "step": 21660 }, { "epoch": 6.151007663922793, "grad_norm": 21.44993019104004, "learning_rate": 9.385239852398524e-05, "loss": 0.1442980408668518, "step": 21670 }, { "epoch": 6.153846153846154, "grad_norm": 7.716861724853516, "learning_rate": 9.384956003406188e-05, "loss": 0.09743369221687317, "step": 21680 }, { "epoch": 6.156684643769514, "grad_norm": 8.692341804504395, "learning_rate": 9.384672154413853e-05, "loss": 0.08535637259483338, "step": 21690 }, { "epoch": 6.1595231336928755, "grad_norm": 24.293867111206055, "learning_rate": 9.384388305421516e-05, "loss": 0.116846764087677, "step": 21700 }, { "epoch": 6.162361623616236, "grad_norm": 7.000239849090576, "learning_rate": 9.38410445642918e-05, "loss": 0.10343834161758422, "step": 21710 }, { "epoch": 6.165200113539597, "grad_norm": 14.319605827331543, "learning_rate": 9.383820607436844e-05, "loss": 0.09635480642318725, "step": 21720 }, { "epoch": 6.168038603462958, "grad_norm": 12.32835865020752, "learning_rate": 9.383536758444507e-05, "loss": 0.10151472091674804, "step": 21730 }, { "epoch": 6.170877093386318, "grad_norm": 8.093631744384766, "learning_rate": 9.383252909452171e-05, "loss": 0.08099229335784912, "step": 21740 }, { "epoch": 6.173715583309679, "grad_norm": 3.1243510246276855, "learning_rate": 9.382969060459837e-05, "loss": 0.07208704352378845, "step": 21750 }, { "epoch": 6.17655407323304, "grad_norm": 12.66645336151123, "learning_rate": 9.3826852114675e-05, "loss": 0.0819431185722351, "step": 21760 }, { "epoch": 6.179392563156401, "grad_norm": 20.28594970703125, "learning_rate": 9.382401362475164e-05, "loss": 0.08638877868652343, "step": 21770 }, { "epoch": 6.182231053079762, "grad_norm": 11.864381790161133, "learning_rate": 9.382117513482828e-05, "loss": 0.12183315753936767, "step": 21780 }, { "epoch": 6.185069543003122, "grad_norm": 10.574756622314453, "learning_rate": 9.381833664490492e-05, "loss": 0.11219758987426758, "step": 21790 }, { "epoch": 6.187908032926483, "grad_norm": 9.806113243103027, "learning_rate": 9.381549815498155e-05, "loss": 0.07526425123214722, "step": 21800 }, { "epoch": 6.190746522849844, "grad_norm": 12.798103332519531, "learning_rate": 9.381265966505819e-05, "loss": 0.07418632507324219, "step": 21810 }, { "epoch": 6.193585012773204, "grad_norm": 12.292802810668945, "learning_rate": 9.380982117513483e-05, "loss": 0.08453023433685303, "step": 21820 }, { "epoch": 6.1964235026965655, "grad_norm": 17.0206356048584, "learning_rate": 9.380698268521147e-05, "loss": 0.09070110321044922, "step": 21830 }, { "epoch": 6.199261992619927, "grad_norm": 6.9051079750061035, "learning_rate": 9.380414419528811e-05, "loss": 0.09951165318489075, "step": 21840 }, { "epoch": 6.202100482543287, "grad_norm": 8.99085521697998, "learning_rate": 9.380130570536476e-05, "loss": 0.11273281574249268, "step": 21850 }, { "epoch": 6.204938972466648, "grad_norm": 10.897320747375488, "learning_rate": 9.379846721544138e-05, "loss": 0.08586341142654419, "step": 21860 }, { "epoch": 6.207777462390008, "grad_norm": 7.794835090637207, "learning_rate": 9.379562872551802e-05, "loss": 0.07840330600738525, "step": 21870 }, { "epoch": 6.210615952313369, "grad_norm": 9.387540817260742, "learning_rate": 9.379279023559467e-05, "loss": 0.09344335794448852, "step": 21880 }, { "epoch": 6.21345444223673, "grad_norm": 9.73838996887207, "learning_rate": 9.378995174567131e-05, "loss": 0.08935476541519165, "step": 21890 }, { "epoch": 6.216292932160091, "grad_norm": 12.763219833374023, "learning_rate": 9.378711325574795e-05, "loss": 0.0860913872718811, "step": 21900 }, { "epoch": 6.219131422083452, "grad_norm": 6.4986443519592285, "learning_rate": 9.378427476582459e-05, "loss": 0.11419270038604737, "step": 21910 }, { "epoch": 6.221969912006813, "grad_norm": 9.286530494689941, "learning_rate": 9.378143627590122e-05, "loss": 0.10870224237442017, "step": 21920 }, { "epoch": 6.224808401930173, "grad_norm": 5.983080863952637, "learning_rate": 9.377859778597786e-05, "loss": 0.08914061188697815, "step": 21930 }, { "epoch": 6.227646891853534, "grad_norm": 11.714787483215332, "learning_rate": 9.37757592960545e-05, "loss": 0.08509978652000427, "step": 21940 }, { "epoch": 6.230485381776894, "grad_norm": 20.62018585205078, "learning_rate": 9.377292080613114e-05, "loss": 0.11447678804397583, "step": 21950 }, { "epoch": 6.2333238717002555, "grad_norm": 12.139809608459473, "learning_rate": 9.377008231620778e-05, "loss": 0.10462168455123902, "step": 21960 }, { "epoch": 6.236162361623617, "grad_norm": 12.046883583068848, "learning_rate": 9.376724382628442e-05, "loss": 0.07254236936569214, "step": 21970 }, { "epoch": 6.239000851546977, "grad_norm": 25.686941146850586, "learning_rate": 9.376440533636107e-05, "loss": 0.08625777959823608, "step": 21980 }, { "epoch": 6.241839341470338, "grad_norm": 19.759035110473633, "learning_rate": 9.37615668464377e-05, "loss": 0.13934781551361083, "step": 21990 }, { "epoch": 6.244677831393698, "grad_norm": 8.881654739379883, "learning_rate": 9.375872835651434e-05, "loss": 0.0812686800956726, "step": 22000 }, { "epoch": 6.244677831393698, "eval_accuracy": 0.9270045145291537, "eval_loss": 0.21792440116405487, "eval_runtime": 31.0504, "eval_samples_per_second": 506.499, "eval_steps_per_second": 7.923, "step": 22000 }, { "epoch": 6.247516321317059, "grad_norm": 11.264602661132812, "learning_rate": 9.375588986659098e-05, "loss": 0.10338951349258423, "step": 22010 }, { "epoch": 6.25035481124042, "grad_norm": 15.521607398986816, "learning_rate": 9.375305137666762e-05, "loss": 0.09795397520065308, "step": 22020 }, { "epoch": 6.253193301163781, "grad_norm": 11.035134315490723, "learning_rate": 9.375021288674426e-05, "loss": 0.10320013761520386, "step": 22030 }, { "epoch": 6.256031791087142, "grad_norm": 16.9571590423584, "learning_rate": 9.37473743968209e-05, "loss": 0.10504947900772095, "step": 22040 }, { "epoch": 6.258870281010503, "grad_norm": 13.23913860321045, "learning_rate": 9.374453590689753e-05, "loss": 0.11344741582870484, "step": 22050 }, { "epoch": 6.261708770933863, "grad_norm": 9.76651668548584, "learning_rate": 9.374169741697417e-05, "loss": 0.08526365160942077, "step": 22060 }, { "epoch": 6.264547260857224, "grad_norm": 17.147823333740234, "learning_rate": 9.373885892705081e-05, "loss": 0.0857745885848999, "step": 22070 }, { "epoch": 6.267385750780584, "grad_norm": 11.726737976074219, "learning_rate": 9.373602043712745e-05, "loss": 0.07491317987442017, "step": 22080 }, { "epoch": 6.2702242407039455, "grad_norm": 5.284913063049316, "learning_rate": 9.37331819472041e-05, "loss": 0.05360146760940552, "step": 22090 }, { "epoch": 6.273062730627307, "grad_norm": 9.205575942993164, "learning_rate": 9.373034345728074e-05, "loss": 0.0956966757774353, "step": 22100 }, { "epoch": 6.275901220550667, "grad_norm": 18.47577476501465, "learning_rate": 9.372750496735738e-05, "loss": 0.0897540032863617, "step": 22110 }, { "epoch": 6.278739710474028, "grad_norm": 13.804877281188965, "learning_rate": 9.3724666477434e-05, "loss": 0.06971631646156311, "step": 22120 }, { "epoch": 6.281578200397389, "grad_norm": 13.640564918518066, "learning_rate": 9.372182798751065e-05, "loss": 0.09559376239776611, "step": 22130 }, { "epoch": 6.284416690320749, "grad_norm": 7.796805381774902, "learning_rate": 9.371898949758729e-05, "loss": 0.087092125415802, "step": 22140 }, { "epoch": 6.28725518024411, "grad_norm": 17.170866012573242, "learning_rate": 9.371615100766393e-05, "loss": 0.1096609354019165, "step": 22150 }, { "epoch": 6.290093670167471, "grad_norm": 6.7061309814453125, "learning_rate": 9.371331251774057e-05, "loss": 0.0764557421207428, "step": 22160 }, { "epoch": 6.292932160090832, "grad_norm": 8.954500198364258, "learning_rate": 9.371047402781721e-05, "loss": 0.0790553092956543, "step": 22170 }, { "epoch": 6.295770650014193, "grad_norm": 10.993725776672363, "learning_rate": 9.370763553789384e-05, "loss": 0.09968703389167785, "step": 22180 }, { "epoch": 6.298609139937553, "grad_norm": 2.6468212604522705, "learning_rate": 9.370479704797048e-05, "loss": 0.08798613548278808, "step": 22190 }, { "epoch": 6.301447629860914, "grad_norm": 12.747579574584961, "learning_rate": 9.370195855804712e-05, "loss": 0.09578362703323365, "step": 22200 }, { "epoch": 6.304286119784274, "grad_norm": 15.204248428344727, "learning_rate": 9.369912006812376e-05, "loss": 0.1036005973815918, "step": 22210 }, { "epoch": 6.307124609707635, "grad_norm": 18.201011657714844, "learning_rate": 9.36962815782004e-05, "loss": 0.11049052476882934, "step": 22220 }, { "epoch": 6.3099630996309966, "grad_norm": 6.651350975036621, "learning_rate": 9.369344308827705e-05, "loss": 0.058932000398635866, "step": 22230 }, { "epoch": 6.312801589554357, "grad_norm": 13.46920108795166, "learning_rate": 9.369060459835369e-05, "loss": 0.09388837814331055, "step": 22240 }, { "epoch": 6.315640079477718, "grad_norm": 6.388342380523682, "learning_rate": 9.368776610843032e-05, "loss": 0.08493118286132813, "step": 22250 }, { "epoch": 6.318478569401079, "grad_norm": 10.680513381958008, "learning_rate": 9.368492761850696e-05, "loss": 0.11544784307479858, "step": 22260 }, { "epoch": 6.321317059324439, "grad_norm": 12.826972961425781, "learning_rate": 9.36820891285836e-05, "loss": 0.07117531895637512, "step": 22270 }, { "epoch": 6.3241555492478, "grad_norm": 11.348532676696777, "learning_rate": 9.367925063866023e-05, "loss": 0.12110992670059204, "step": 22280 }, { "epoch": 6.3269940391711605, "grad_norm": 16.930397033691406, "learning_rate": 9.367641214873688e-05, "loss": 0.16743370294570922, "step": 22290 }, { "epoch": 6.329832529094522, "grad_norm": 9.169883728027344, "learning_rate": 9.367357365881352e-05, "loss": 0.07741034030914307, "step": 22300 }, { "epoch": 6.332671019017883, "grad_norm": 7.381467342376709, "learning_rate": 9.367073516889015e-05, "loss": 0.11755729913711548, "step": 22310 }, { "epoch": 6.335509508941243, "grad_norm": 10.883224487304688, "learning_rate": 9.366789667896679e-05, "loss": 0.1140102505683899, "step": 22320 }, { "epoch": 6.338347998864604, "grad_norm": 15.579988479614258, "learning_rate": 9.366505818904343e-05, "loss": 0.10063846111297607, "step": 22330 }, { "epoch": 6.341186488787965, "grad_norm": 10.76476001739502, "learning_rate": 9.366221969912007e-05, "loss": 0.13289889097213745, "step": 22340 }, { "epoch": 6.344024978711325, "grad_norm": 6.35222053527832, "learning_rate": 9.365938120919672e-05, "loss": 0.09211413860321045, "step": 22350 }, { "epoch": 6.3468634686346865, "grad_norm": 10.59709358215332, "learning_rate": 9.365654271927336e-05, "loss": 0.09625858068466187, "step": 22360 }, { "epoch": 6.349701958558047, "grad_norm": 13.786771774291992, "learning_rate": 9.365370422935e-05, "loss": 0.07938930988311768, "step": 22370 }, { "epoch": 6.352540448481408, "grad_norm": 11.998483657836914, "learning_rate": 9.365086573942663e-05, "loss": 0.14438967704772948, "step": 22380 }, { "epoch": 6.355378938404769, "grad_norm": 8.744522094726562, "learning_rate": 9.364802724950327e-05, "loss": 0.10890406370162964, "step": 22390 }, { "epoch": 6.358217428328129, "grad_norm": 4.687183380126953, "learning_rate": 9.364518875957991e-05, "loss": 0.07402298450469971, "step": 22400 }, { "epoch": 6.36105591825149, "grad_norm": 17.525976181030273, "learning_rate": 9.364235026965654e-05, "loss": 0.118218994140625, "step": 22410 }, { "epoch": 6.3638944081748505, "grad_norm": 6.600802421569824, "learning_rate": 9.363951177973319e-05, "loss": 0.11827734708786011, "step": 22420 }, { "epoch": 6.366732898098212, "grad_norm": 8.856054306030273, "learning_rate": 9.363667328980983e-05, "loss": 0.06804376244544982, "step": 22430 }, { "epoch": 6.369571388021573, "grad_norm": 29.660568237304688, "learning_rate": 9.363383479988646e-05, "loss": 0.09639835357666016, "step": 22440 }, { "epoch": 6.372409877944933, "grad_norm": 16.50444984436035, "learning_rate": 9.36309963099631e-05, "loss": 0.10021114349365234, "step": 22450 }, { "epoch": 6.375248367868294, "grad_norm": 20.486492156982422, "learning_rate": 9.362815782003974e-05, "loss": 0.10686763525009155, "step": 22460 }, { "epoch": 6.378086857791655, "grad_norm": 10.710816383361816, "learning_rate": 9.362531933011638e-05, "loss": 0.08526849746704102, "step": 22470 }, { "epoch": 6.380925347715015, "grad_norm": 13.139506340026855, "learning_rate": 9.362248084019301e-05, "loss": 0.09127248525619507, "step": 22480 }, { "epoch": 6.3837638376383765, "grad_norm": 9.385111808776855, "learning_rate": 9.361964235026967e-05, "loss": 0.0844616413116455, "step": 22490 }, { "epoch": 6.386602327561738, "grad_norm": 5.706073760986328, "learning_rate": 9.361680386034631e-05, "loss": 0.09170372486114502, "step": 22500 }, { "epoch": 6.386602327561738, "eval_accuracy": 0.9301837604120302, "eval_loss": 0.2105872929096222, "eval_runtime": 31.9278, "eval_samples_per_second": 492.58, "eval_steps_per_second": 7.705, "step": 22500 }, { "epoch": 6.389440817485098, "grad_norm": 9.788163185119629, "learning_rate": 9.361396537042294e-05, "loss": 0.08152780532836915, "step": 22510 }, { "epoch": 6.392279307408459, "grad_norm": 15.817776679992676, "learning_rate": 9.361112688049958e-05, "loss": 0.09825285077095032, "step": 22520 }, { "epoch": 6.395117797331819, "grad_norm": 11.550987243652344, "learning_rate": 9.360828839057622e-05, "loss": 0.1070898175239563, "step": 22530 }, { "epoch": 6.39795628725518, "grad_norm": 14.608263969421387, "learning_rate": 9.360544990065285e-05, "loss": 0.0923076331615448, "step": 22540 }, { "epoch": 6.400794777178541, "grad_norm": 12.77895736694336, "learning_rate": 9.36026114107295e-05, "loss": 0.10174696445465088, "step": 22550 }, { "epoch": 6.403633267101902, "grad_norm": 7.593853950500488, "learning_rate": 9.359977292080614e-05, "loss": 0.0696295976638794, "step": 22560 }, { "epoch": 6.406471757025263, "grad_norm": 11.700136184692383, "learning_rate": 9.359693443088277e-05, "loss": 0.06687184572219848, "step": 22570 }, { "epoch": 6.409310246948623, "grad_norm": 7.089961528778076, "learning_rate": 9.359409594095941e-05, "loss": 0.0826976478099823, "step": 22580 }, { "epoch": 6.412148736871984, "grad_norm": 24.44202423095703, "learning_rate": 9.359154130002838e-05, "loss": 0.13295964002609253, "step": 22590 }, { "epoch": 6.414987226795345, "grad_norm": 6.154026985168457, "learning_rate": 9.358870281010503e-05, "loss": 0.09617015719413757, "step": 22600 }, { "epoch": 6.417825716718705, "grad_norm": 14.29837703704834, "learning_rate": 9.358586432018168e-05, "loss": 0.1076723575592041, "step": 22610 }, { "epoch": 6.4206642066420665, "grad_norm": 6.377991199493408, "learning_rate": 9.35830258302583e-05, "loss": 0.10216890573501587, "step": 22620 }, { "epoch": 6.423502696565428, "grad_norm": 1.7779245376586914, "learning_rate": 9.358018734033494e-05, "loss": 0.07874549627304077, "step": 22630 }, { "epoch": 6.426341186488788, "grad_norm": 17.45425033569336, "learning_rate": 9.357734885041159e-05, "loss": 0.09025533795356751, "step": 22640 }, { "epoch": 6.429179676412149, "grad_norm": 14.70903491973877, "learning_rate": 9.357451036048823e-05, "loss": 0.11120609045028687, "step": 22650 }, { "epoch": 6.432018166335509, "grad_norm": 16.77193260192871, "learning_rate": 9.357167187056486e-05, "loss": 0.10498535633087158, "step": 22660 }, { "epoch": 6.43485665625887, "grad_norm": 4.400278568267822, "learning_rate": 9.356883338064151e-05, "loss": 0.10595424175262451, "step": 22670 }, { "epoch": 6.437695146182231, "grad_norm": 11.257668495178223, "learning_rate": 9.356599489071815e-05, "loss": 0.07874829769134521, "step": 22680 }, { "epoch": 6.440533636105592, "grad_norm": 12.653393745422363, "learning_rate": 9.356315640079478e-05, "loss": 0.09990564584732056, "step": 22690 }, { "epoch": 6.443372126028953, "grad_norm": 10.325603485107422, "learning_rate": 9.356031791087142e-05, "loss": 0.07696129083633423, "step": 22700 }, { "epoch": 6.446210615952314, "grad_norm": 10.782560348510742, "learning_rate": 9.355747942094806e-05, "loss": 0.08075250387191772, "step": 22710 }, { "epoch": 6.449049105875674, "grad_norm": 4.658320426940918, "learning_rate": 9.355464093102469e-05, "loss": 0.06197248101234436, "step": 22720 }, { "epoch": 6.451887595799035, "grad_norm": 14.929058074951172, "learning_rate": 9.355180244110134e-05, "loss": 0.11998610496520996, "step": 22730 }, { "epoch": 6.454726085722395, "grad_norm": 7.436862468719482, "learning_rate": 9.354896395117799e-05, "loss": 0.0861608326435089, "step": 22740 }, { "epoch": 6.4575645756457565, "grad_norm": 5.92672061920166, "learning_rate": 9.354612546125461e-05, "loss": 0.07573016881942748, "step": 22750 }, { "epoch": 6.460403065569118, "grad_norm": 11.298891067504883, "learning_rate": 9.354328697133126e-05, "loss": 0.10512068271636962, "step": 22760 }, { "epoch": 6.463241555492478, "grad_norm": 3.5694100856781006, "learning_rate": 9.35404484814079e-05, "loss": 0.08416117429733276, "step": 22770 }, { "epoch": 6.466080045415839, "grad_norm": 14.55178165435791, "learning_rate": 9.353760999148454e-05, "loss": 0.10444525480270386, "step": 22780 }, { "epoch": 6.468918535339199, "grad_norm": 11.943623542785645, "learning_rate": 9.353477150156117e-05, "loss": 0.11375449895858765, "step": 22790 }, { "epoch": 6.47175702526256, "grad_norm": 14.246231079101562, "learning_rate": 9.353193301163782e-05, "loss": 0.07867357134819031, "step": 22800 }, { "epoch": 6.474595515185921, "grad_norm": 12.942447662353516, "learning_rate": 9.352909452171445e-05, "loss": 0.08727706670761108, "step": 22810 }, { "epoch": 6.477434005109282, "grad_norm": 15.228692054748535, "learning_rate": 9.352625603179109e-05, "loss": 0.11858946084976196, "step": 22820 }, { "epoch": 6.480272495032643, "grad_norm": 9.932172775268555, "learning_rate": 9.352341754186773e-05, "loss": 0.07700104713439941, "step": 22830 }, { "epoch": 6.483110984956004, "grad_norm": 18.2576904296875, "learning_rate": 9.352057905194437e-05, "loss": 0.09337477684020996, "step": 22840 }, { "epoch": 6.485949474879364, "grad_norm": 9.911008834838867, "learning_rate": 9.3517740562021e-05, "loss": 0.08646316528320312, "step": 22850 }, { "epoch": 6.488787964802725, "grad_norm": 14.605977058410645, "learning_rate": 9.351490207209764e-05, "loss": 0.09186390638351441, "step": 22860 }, { "epoch": 6.491626454726085, "grad_norm": 8.759679794311523, "learning_rate": 9.35120635821743e-05, "loss": 0.08041915893554688, "step": 22870 }, { "epoch": 6.4944649446494465, "grad_norm": 8.287078857421875, "learning_rate": 9.350922509225092e-05, "loss": 0.09436277747154236, "step": 22880 }, { "epoch": 6.497303434572808, "grad_norm": 12.57153034210205, "learning_rate": 9.350638660232757e-05, "loss": 0.09365052580833436, "step": 22890 }, { "epoch": 6.500141924496168, "grad_norm": 11.6986083984375, "learning_rate": 9.350354811240421e-05, "loss": 0.08615879416465759, "step": 22900 }, { "epoch": 6.502980414419529, "grad_norm": 8.28325080871582, "learning_rate": 9.350070962248084e-05, "loss": 0.14441734552383423, "step": 22910 }, { "epoch": 6.50581890434289, "grad_norm": 2.1102166175842285, "learning_rate": 9.349787113255748e-05, "loss": 0.0824457049369812, "step": 22920 }, { "epoch": 6.50865739426625, "grad_norm": 9.971158981323242, "learning_rate": 9.349503264263413e-05, "loss": 0.10625452995300293, "step": 22930 }, { "epoch": 6.511495884189611, "grad_norm": 4.650021076202393, "learning_rate": 9.349219415271076e-05, "loss": 0.07723555564880372, "step": 22940 }, { "epoch": 6.514334374112972, "grad_norm": 8.099427223205566, "learning_rate": 9.34893556627874e-05, "loss": 0.12295387983322144, "step": 22950 }, { "epoch": 6.517172864036333, "grad_norm": 13.784622192382812, "learning_rate": 9.348651717286404e-05, "loss": 0.0961941123008728, "step": 22960 }, { "epoch": 6.520011353959694, "grad_norm": 2.625196695327759, "learning_rate": 9.348367868294068e-05, "loss": 0.11788196563720703, "step": 22970 }, { "epoch": 6.522849843883054, "grad_norm": 3.4829862117767334, "learning_rate": 9.348084019301731e-05, "loss": 0.06416496038436889, "step": 22980 }, { "epoch": 6.525688333806415, "grad_norm": 11.49492073059082, "learning_rate": 9.347800170309395e-05, "loss": 0.1067654013633728, "step": 22990 }, { "epoch": 6.528526823729775, "grad_norm": 13.783095359802246, "learning_rate": 9.347516321317061e-05, "loss": 0.08907094001770019, "step": 23000 }, { "epoch": 6.528526823729775, "eval_accuracy": 0.931582628600496, "eval_loss": 0.2043585479259491, "eval_runtime": 32.1399, "eval_samples_per_second": 489.33, "eval_steps_per_second": 7.654, "step": 23000 }, { "epoch": 6.531365313653136, "grad_norm": 12.19428539276123, "learning_rate": 9.347232472324724e-05, "loss": 0.11192913055419922, "step": 23010 }, { "epoch": 6.5342038035764975, "grad_norm": 9.6806058883667, "learning_rate": 9.346948623332388e-05, "loss": 0.11081634759902954, "step": 23020 }, { "epoch": 6.537042293499858, "grad_norm": 9.315773963928223, "learning_rate": 9.346664774340052e-05, "loss": 0.09858989119529724, "step": 23030 }, { "epoch": 6.539880783423219, "grad_norm": 7.326742649078369, "learning_rate": 9.346380925347715e-05, "loss": 0.0827315092086792, "step": 23040 }, { "epoch": 6.54271927334658, "grad_norm": 14.56912612915039, "learning_rate": 9.346097076355379e-05, "loss": 0.10821828842163086, "step": 23050 }, { "epoch": 6.54555776326994, "grad_norm": 4.597448825836182, "learning_rate": 9.345813227363043e-05, "loss": 0.09526298642158508, "step": 23060 }, { "epoch": 6.548396253193301, "grad_norm": 11.362309455871582, "learning_rate": 9.345529378370707e-05, "loss": 0.09052236676216126, "step": 23070 }, { "epoch": 6.551234743116662, "grad_norm": 5.615175247192383, "learning_rate": 9.345245529378371e-05, "loss": 0.07679804563522338, "step": 23080 }, { "epoch": 6.554073233040023, "grad_norm": 9.740547180175781, "learning_rate": 9.344961680386035e-05, "loss": 0.08708915710449219, "step": 23090 }, { "epoch": 6.556911722963384, "grad_norm": 4.926576614379883, "learning_rate": 9.3446778313937e-05, "loss": 0.0837101936340332, "step": 23100 }, { "epoch": 6.559750212886744, "grad_norm": 7.401463031768799, "learning_rate": 9.344393982401362e-05, "loss": 0.09549596309661865, "step": 23110 }, { "epoch": 6.562588702810105, "grad_norm": 19.454238891601562, "learning_rate": 9.344110133409026e-05, "loss": 0.06996254324913025, "step": 23120 }, { "epoch": 6.565427192733466, "grad_norm": 12.590356826782227, "learning_rate": 9.343826284416692e-05, "loss": 0.11268901824951172, "step": 23130 }, { "epoch": 6.568265682656826, "grad_norm": 15.136454582214355, "learning_rate": 9.343542435424355e-05, "loss": 0.11089028120040893, "step": 23140 }, { "epoch": 6.5711041725801875, "grad_norm": 20.47833251953125, "learning_rate": 9.343258586432019e-05, "loss": 0.16502034664154053, "step": 23150 }, { "epoch": 6.573942662503548, "grad_norm": 18.76143455505371, "learning_rate": 9.342974737439683e-05, "loss": 0.1134371042251587, "step": 23160 }, { "epoch": 6.576781152426909, "grad_norm": 5.621868133544922, "learning_rate": 9.342690888447346e-05, "loss": 0.07961916327476501, "step": 23170 }, { "epoch": 6.57961964235027, "grad_norm": 4.496213912963867, "learning_rate": 9.34240703945501e-05, "loss": 0.06913825273513793, "step": 23180 }, { "epoch": 6.58245813227363, "grad_norm": 5.536193370819092, "learning_rate": 9.342123190462674e-05, "loss": 0.0882467806339264, "step": 23190 }, { "epoch": 6.585296622196991, "grad_norm": 7.135318756103516, "learning_rate": 9.341839341470338e-05, "loss": 0.08491207361221313, "step": 23200 }, { "epoch": 6.5881351121203515, "grad_norm": 10.8344087600708, "learning_rate": 9.341555492478002e-05, "loss": 0.0774510383605957, "step": 23210 }, { "epoch": 6.590973602043713, "grad_norm": 13.20254898071289, "learning_rate": 9.341271643485666e-05, "loss": 0.08600361347198486, "step": 23220 }, { "epoch": 6.593812091967074, "grad_norm": 10.479114532470703, "learning_rate": 9.34098779449333e-05, "loss": 0.11895914077758789, "step": 23230 }, { "epoch": 6.596650581890434, "grad_norm": 15.753108024597168, "learning_rate": 9.340703945500993e-05, "loss": 0.09777660369873047, "step": 23240 }, { "epoch": 6.599489071813795, "grad_norm": 6.3389081954956055, "learning_rate": 9.340420096508657e-05, "loss": 0.08923123478889465, "step": 23250 }, { "epoch": 6.602327561737156, "grad_norm": 7.197924613952637, "learning_rate": 9.340136247516322e-05, "loss": 0.08538019061088561, "step": 23260 }, { "epoch": 6.605166051660516, "grad_norm": 12.7660493850708, "learning_rate": 9.339852398523986e-05, "loss": 0.11998507976531983, "step": 23270 }, { "epoch": 6.6080045415838775, "grad_norm": 17.26128578186035, "learning_rate": 9.33956854953165e-05, "loss": 0.08162992000579834, "step": 23280 }, { "epoch": 6.610843031507239, "grad_norm": 23.859159469604492, "learning_rate": 9.339284700539314e-05, "loss": 0.10878651142120362, "step": 23290 }, { "epoch": 6.613681521430599, "grad_norm": 5.97392463684082, "learning_rate": 9.339000851546977e-05, "loss": 0.08939759135246277, "step": 23300 }, { "epoch": 6.61652001135396, "grad_norm": 6.42591667175293, "learning_rate": 9.338717002554641e-05, "loss": 0.0937576413154602, "step": 23310 }, { "epoch": 6.61935850127732, "grad_norm": 12.023765563964844, "learning_rate": 9.338433153562305e-05, "loss": 0.0607508659362793, "step": 23320 }, { "epoch": 6.622196991200681, "grad_norm": 11.77810001373291, "learning_rate": 9.338149304569969e-05, "loss": 0.10685676336288452, "step": 23330 }, { "epoch": 6.625035481124042, "grad_norm": 7.256436824798584, "learning_rate": 9.337865455577633e-05, "loss": 0.10113931894302368, "step": 23340 }, { "epoch": 6.627873971047403, "grad_norm": 11.541536331176758, "learning_rate": 9.337581606585297e-05, "loss": 0.060240209102630615, "step": 23350 }, { "epoch": 6.630712460970764, "grad_norm": 9.527950286865234, "learning_rate": 9.337297757592962e-05, "loss": 0.09862492084503174, "step": 23360 }, { "epoch": 6.633550950894124, "grad_norm": 5.018553256988525, "learning_rate": 9.337013908600624e-05, "loss": 0.0929749608039856, "step": 23370 }, { "epoch": 6.636389440817485, "grad_norm": 15.911595344543457, "learning_rate": 9.336730059608289e-05, "loss": 0.07330633997917176, "step": 23380 }, { "epoch": 6.639227930740846, "grad_norm": 8.000524520874023, "learning_rate": 9.336446210615953e-05, "loss": 0.09455389976501465, "step": 23390 }, { "epoch": 6.642066420664206, "grad_norm": 8.035616874694824, "learning_rate": 9.336162361623617e-05, "loss": 0.08320001363754273, "step": 23400 }, { "epoch": 6.6449049105875675, "grad_norm": 7.5174760818481445, "learning_rate": 9.335878512631281e-05, "loss": 0.06982924342155457, "step": 23410 }, { "epoch": 6.647743400510928, "grad_norm": 4.479267120361328, "learning_rate": 9.335594663638945e-05, "loss": 0.07252361178398133, "step": 23420 }, { "epoch": 6.650581890434289, "grad_norm": 7.234189510345459, "learning_rate": 9.335310814646608e-05, "loss": 0.07169376611709595, "step": 23430 }, { "epoch": 6.65342038035765, "grad_norm": 18.071443557739258, "learning_rate": 9.335026965654272e-05, "loss": 0.09714398384094239, "step": 23440 }, { "epoch": 6.65625887028101, "grad_norm": 8.038203239440918, "learning_rate": 9.334743116661936e-05, "loss": 0.07264513969421386, "step": 23450 }, { "epoch": 6.659097360204371, "grad_norm": 9.719202995300293, "learning_rate": 9.3344592676696e-05, "loss": 0.10534716844558716, "step": 23460 }, { "epoch": 6.661935850127732, "grad_norm": 5.747554302215576, "learning_rate": 9.334175418677264e-05, "loss": 0.11357256174087524, "step": 23470 }, { "epoch": 6.664774340051093, "grad_norm": 7.582512855529785, "learning_rate": 9.333891569684929e-05, "loss": 0.10980550050735474, "step": 23480 }, { "epoch": 6.667612829974454, "grad_norm": 11.435025215148926, "learning_rate": 9.333607720692593e-05, "loss": 0.08182085752487182, "step": 23490 }, { "epoch": 6.670451319897815, "grad_norm": 4.0055623054504395, "learning_rate": 9.333323871700255e-05, "loss": 0.10198774337768554, "step": 23500 }, { "epoch": 6.670451319897815, "eval_accuracy": 0.9365422521777834, "eval_loss": 0.19689252972602844, "eval_runtime": 31.32, "eval_samples_per_second": 502.14, "eval_steps_per_second": 7.854, "step": 23500 }, { "epoch": 6.673289809821175, "grad_norm": 5.353410720825195, "learning_rate": 9.33304002270792e-05, "loss": 0.07309556007385254, "step": 23510 }, { "epoch": 6.676128299744536, "grad_norm": 18.983692169189453, "learning_rate": 9.332756173715584e-05, "loss": 0.10379167795181274, "step": 23520 }, { "epoch": 6.678966789667896, "grad_norm": 12.102860450744629, "learning_rate": 9.332472324723248e-05, "loss": 0.14615061283111572, "step": 23530 }, { "epoch": 6.6818052795912575, "grad_norm": 16.388975143432617, "learning_rate": 9.332188475730912e-05, "loss": 0.0960249125957489, "step": 23540 }, { "epoch": 6.684643769514619, "grad_norm": 9.716329574584961, "learning_rate": 9.331904626738576e-05, "loss": 0.10273417234420776, "step": 23550 }, { "epoch": 6.687482259437979, "grad_norm": 7.821311950683594, "learning_rate": 9.331620777746239e-05, "loss": 0.11158208847045899, "step": 23560 }, { "epoch": 6.69032074936134, "grad_norm": 15.425642013549805, "learning_rate": 9.331336928753903e-05, "loss": 0.11695325374603271, "step": 23570 }, { "epoch": 6.6931592392847, "grad_norm": 8.43350601196289, "learning_rate": 9.331053079761567e-05, "loss": 0.12026833295822144, "step": 23580 }, { "epoch": 6.695997729208061, "grad_norm": 12.40167236328125, "learning_rate": 9.330769230769231e-05, "loss": 0.10209051370620728, "step": 23590 }, { "epoch": 6.698836219131422, "grad_norm": 9.377254486083984, "learning_rate": 9.330485381776895e-05, "loss": 0.10837109088897705, "step": 23600 }, { "epoch": 6.701674709054783, "grad_norm": 6.663228511810303, "learning_rate": 9.33020153278456e-05, "loss": 0.11400752067565918, "step": 23610 }, { "epoch": 6.704513198978144, "grad_norm": 9.539036750793457, "learning_rate": 9.329917683792224e-05, "loss": 0.07644888758659363, "step": 23620 }, { "epoch": 6.707351688901504, "grad_norm": 12.6395845413208, "learning_rate": 9.329633834799887e-05, "loss": 0.11350473165512084, "step": 23630 }, { "epoch": 6.710190178824865, "grad_norm": 9.589407920837402, "learning_rate": 9.329349985807551e-05, "loss": 0.08152202367782593, "step": 23640 }, { "epoch": 6.713028668748226, "grad_norm": 15.663517951965332, "learning_rate": 9.329066136815215e-05, "loss": 0.11970263719558716, "step": 23650 }, { "epoch": 6.715867158671586, "grad_norm": 12.597955703735352, "learning_rate": 9.328782287822879e-05, "loss": 0.09735053777694702, "step": 23660 }, { "epoch": 6.7187056485949475, "grad_norm": 18.34467124938965, "learning_rate": 9.328498438830543e-05, "loss": 0.10283708572387695, "step": 23670 }, { "epoch": 6.721544138518309, "grad_norm": 12.121549606323242, "learning_rate": 9.328214589838207e-05, "loss": 0.11331470012664795, "step": 23680 }, { "epoch": 6.724382628441669, "grad_norm": 13.72669792175293, "learning_rate": 9.32793074084587e-05, "loss": 0.09874246716499328, "step": 23690 }, { "epoch": 6.72722111836503, "grad_norm": 11.512922286987305, "learning_rate": 9.327646891853534e-05, "loss": 0.08080302476882935, "step": 23700 }, { "epoch": 6.730059608288391, "grad_norm": 10.64036750793457, "learning_rate": 9.327363042861198e-05, "loss": 0.1081117868423462, "step": 23710 }, { "epoch": 6.732898098211751, "grad_norm": 19.117401123046875, "learning_rate": 9.327079193868862e-05, "loss": 0.09446322917938232, "step": 23720 }, { "epoch": 6.735736588135112, "grad_norm": 5.025933742523193, "learning_rate": 9.326795344876527e-05, "loss": 0.11468440294265747, "step": 23730 }, { "epoch": 6.738575078058473, "grad_norm": 6.562473773956299, "learning_rate": 9.326511495884191e-05, "loss": 0.08148735761642456, "step": 23740 }, { "epoch": 6.741413567981834, "grad_norm": 10.234820365905762, "learning_rate": 9.326227646891853e-05, "loss": 0.10205699205398559, "step": 23750 }, { "epoch": 6.744252057905195, "grad_norm": 8.898704528808594, "learning_rate": 9.325943797899518e-05, "loss": 0.06831822991371155, "step": 23760 }, { "epoch": 6.747090547828555, "grad_norm": 17.5389347076416, "learning_rate": 9.325659948907182e-05, "loss": 0.1090964913368225, "step": 23770 }, { "epoch": 6.749929037751916, "grad_norm": 9.973577499389648, "learning_rate": 9.325376099914846e-05, "loss": 0.08267690539360047, "step": 23780 }, { "epoch": 6.752767527675276, "grad_norm": 9.735045433044434, "learning_rate": 9.325092250922509e-05, "loss": 0.09638917446136475, "step": 23790 }, { "epoch": 6.755606017598637, "grad_norm": 14.622360229492188, "learning_rate": 9.324808401930174e-05, "loss": 0.1093931794166565, "step": 23800 }, { "epoch": 6.7584445075219985, "grad_norm": 10.59725284576416, "learning_rate": 9.324524552937838e-05, "loss": 0.11809722185134888, "step": 23810 }, { "epoch": 6.761282997445359, "grad_norm": 10.137466430664062, "learning_rate": 9.324240703945501e-05, "loss": 0.11568174362182618, "step": 23820 }, { "epoch": 6.76412148736872, "grad_norm": 14.032526016235352, "learning_rate": 9.323956854953165e-05, "loss": 0.09648122787475585, "step": 23830 }, { "epoch": 6.766959977292081, "grad_norm": 9.677608489990234, "learning_rate": 9.32367300596083e-05, "loss": 0.06552197337150574, "step": 23840 }, { "epoch": 6.769798467215441, "grad_norm": 12.235411643981934, "learning_rate": 9.323389156968492e-05, "loss": 0.10965950489044189, "step": 23850 }, { "epoch": 6.772636957138802, "grad_norm": 9.106744766235352, "learning_rate": 9.323105307976158e-05, "loss": 0.08939334750175476, "step": 23860 }, { "epoch": 6.775475447062163, "grad_norm": 6.746470928192139, "learning_rate": 9.322821458983822e-05, "loss": 0.07405282855033875, "step": 23870 }, { "epoch": 6.778313936985524, "grad_norm": 12.039777755737305, "learning_rate": 9.322537609991485e-05, "loss": 0.09850831627845764, "step": 23880 }, { "epoch": 6.781152426908885, "grad_norm": 15.966470718383789, "learning_rate": 9.322253760999149e-05, "loss": 0.08547778129577636, "step": 23890 }, { "epoch": 6.783990916832245, "grad_norm": 18.932947158813477, "learning_rate": 9.321969912006813e-05, "loss": 0.09407666325569153, "step": 23900 }, { "epoch": 6.786829406755606, "grad_norm": 16.5733585357666, "learning_rate": 9.321686063014477e-05, "loss": 0.12048181295394897, "step": 23910 }, { "epoch": 6.789667896678967, "grad_norm": 10.802082061767578, "learning_rate": 9.32140221402214e-05, "loss": 0.13048343658447265, "step": 23920 }, { "epoch": 6.792506386602327, "grad_norm": 8.369768142700195, "learning_rate": 9.321118365029805e-05, "loss": 0.08966717720031739, "step": 23930 }, { "epoch": 6.7953448765256885, "grad_norm": 10.59682559967041, "learning_rate": 9.32083451603747e-05, "loss": 0.053786516189575195, "step": 23940 }, { "epoch": 6.798183366449049, "grad_norm": 10.559469223022461, "learning_rate": 9.320550667045132e-05, "loss": 0.08415335416793823, "step": 23950 }, { "epoch": 6.80102185637241, "grad_norm": 11.114952087402344, "learning_rate": 9.320266818052796e-05, "loss": 0.12069662809371948, "step": 23960 }, { "epoch": 6.803860346295771, "grad_norm": 10.029012680053711, "learning_rate": 9.31998296906046e-05, "loss": 0.09029229879379272, "step": 23970 }, { "epoch": 6.806698836219131, "grad_norm": 14.684560775756836, "learning_rate": 9.319699120068123e-05, "loss": 0.07971057891845704, "step": 23980 }, { "epoch": 6.809537326142492, "grad_norm": 10.083139419555664, "learning_rate": 9.319415271075787e-05, "loss": 0.08903594613075257, "step": 23990 }, { "epoch": 6.8123758160658525, "grad_norm": 17.14006233215332, "learning_rate": 9.319131422083453e-05, "loss": 0.10128430128097535, "step": 24000 }, { "epoch": 6.8123758160658525, "eval_accuracy": 0.9203280981751129, "eval_loss": 0.23221370577812195, "eval_runtime": 31.6746, "eval_samples_per_second": 496.518, "eval_steps_per_second": 7.766, "step": 24000 }, { "epoch": 6.815214305989214, "grad_norm": 5.353432655334473, "learning_rate": 9.318847573091116e-05, "loss": 0.09382016658782959, "step": 24010 }, { "epoch": 6.818052795912575, "grad_norm": 9.091730117797852, "learning_rate": 9.31856372409878e-05, "loss": 0.08397729992866516, "step": 24020 }, { "epoch": 6.820891285835935, "grad_norm": 8.414477348327637, "learning_rate": 9.318279875106444e-05, "loss": 0.10697766542434692, "step": 24030 }, { "epoch": 6.823729775759296, "grad_norm": 8.535869598388672, "learning_rate": 9.317996026114108e-05, "loss": 0.07649033665657043, "step": 24040 }, { "epoch": 6.826568265682657, "grad_norm": 23.232709884643555, "learning_rate": 9.317712177121771e-05, "loss": 0.09270018935203553, "step": 24050 }, { "epoch": 6.829406755606017, "grad_norm": 12.610830307006836, "learning_rate": 9.317428328129436e-05, "loss": 0.07556609511375427, "step": 24060 }, { "epoch": 6.8322452455293785, "grad_norm": 6.084565162658691, "learning_rate": 9.3171444791371e-05, "loss": 0.10252330303192139, "step": 24070 }, { "epoch": 6.83508373545274, "grad_norm": 4.155466079711914, "learning_rate": 9.316860630144763e-05, "loss": 0.06510661244392395, "step": 24080 }, { "epoch": 6.8379222253761, "grad_norm": 16.93148422241211, "learning_rate": 9.316576781152427e-05, "loss": 0.10527651309967041, "step": 24090 }, { "epoch": 6.840760715299461, "grad_norm": 20.688505172729492, "learning_rate": 9.316292932160092e-05, "loss": 0.09468711614608764, "step": 24100 }, { "epoch": 6.843599205222821, "grad_norm": 18.16554832458496, "learning_rate": 9.316009083167754e-05, "loss": 0.11274542808532714, "step": 24110 }, { "epoch": 6.846437695146182, "grad_norm": 9.878766059875488, "learning_rate": 9.315725234175418e-05, "loss": 0.10573322772979736, "step": 24120 }, { "epoch": 6.849276185069543, "grad_norm": 17.459016799926758, "learning_rate": 9.315441385183084e-05, "loss": 0.11389918327331543, "step": 24130 }, { "epoch": 6.852114674992904, "grad_norm": 11.15742015838623, "learning_rate": 9.315157536190747e-05, "loss": 0.11135003566741944, "step": 24140 }, { "epoch": 6.854953164916265, "grad_norm": 18.712249755859375, "learning_rate": 9.314873687198411e-05, "loss": 0.11905930042266846, "step": 24150 }, { "epoch": 6.857791654839625, "grad_norm": 11.562289237976074, "learning_rate": 9.314589838206075e-05, "loss": 0.08661167025566101, "step": 24160 }, { "epoch": 6.860630144762986, "grad_norm": 13.272250175476074, "learning_rate": 9.314305989213739e-05, "loss": 0.10623058080673217, "step": 24170 }, { "epoch": 6.863468634686347, "grad_norm": 9.34141731262207, "learning_rate": 9.314022140221402e-05, "loss": 0.09637885093688965, "step": 24180 }, { "epoch": 6.866307124609707, "grad_norm": 17.596288681030273, "learning_rate": 9.313738291229066e-05, "loss": 0.09468052983283996, "step": 24190 }, { "epoch": 6.8691456145330685, "grad_norm": 19.334457397460938, "learning_rate": 9.313454442236732e-05, "loss": 0.09870043992996216, "step": 24200 }, { "epoch": 6.871984104456429, "grad_norm": 7.195592403411865, "learning_rate": 9.313170593244394e-05, "loss": 0.09635913968086243, "step": 24210 }, { "epoch": 6.87482259437979, "grad_norm": 4.203941822052002, "learning_rate": 9.312886744252058e-05, "loss": 0.09320802688598633, "step": 24220 }, { "epoch": 6.877661084303151, "grad_norm": 5.168039798736572, "learning_rate": 9.312602895259723e-05, "loss": 0.08890237808227539, "step": 24230 }, { "epoch": 6.880499574226511, "grad_norm": 11.354249000549316, "learning_rate": 9.312319046267385e-05, "loss": 0.1269654154777527, "step": 24240 }, { "epoch": 6.883338064149872, "grad_norm": 11.18812084197998, "learning_rate": 9.31203519727505e-05, "loss": 0.09434882998466491, "step": 24250 }, { "epoch": 6.886176554073233, "grad_norm": 9.15461540222168, "learning_rate": 9.311751348282715e-05, "loss": 0.09702085852622985, "step": 24260 }, { "epoch": 6.889015043996594, "grad_norm": 13.384735107421875, "learning_rate": 9.311467499290378e-05, "loss": 0.05970205664634705, "step": 24270 }, { "epoch": 6.891853533919955, "grad_norm": 9.660673141479492, "learning_rate": 9.311183650298042e-05, "loss": 0.07755666375160217, "step": 24280 }, { "epoch": 6.894692023843316, "grad_norm": 13.448535919189453, "learning_rate": 9.310899801305706e-05, "loss": 0.09545423984527587, "step": 24290 }, { "epoch": 6.897530513766676, "grad_norm": 15.696562767028809, "learning_rate": 9.31061595231337e-05, "loss": 0.12689557075500488, "step": 24300 }, { "epoch": 6.900369003690037, "grad_norm": 9.575885772705078, "learning_rate": 9.310332103321033e-05, "loss": 0.08498870134353638, "step": 24310 }, { "epoch": 6.903207493613397, "grad_norm": 2.8332626819610596, "learning_rate": 9.310048254328697e-05, "loss": 0.08259831666946411, "step": 24320 }, { "epoch": 6.9060459835367585, "grad_norm": 12.012787818908691, "learning_rate": 9.309764405336363e-05, "loss": 0.12632443904876708, "step": 24330 }, { "epoch": 6.90888447346012, "grad_norm": 4.253190040588379, "learning_rate": 9.309480556344025e-05, "loss": 0.0795013964176178, "step": 24340 }, { "epoch": 6.91172296338348, "grad_norm": 8.718367576599121, "learning_rate": 9.30919670735169e-05, "loss": 0.11982712745666504, "step": 24350 }, { "epoch": 6.914561453306841, "grad_norm": 10.600751876831055, "learning_rate": 9.308912858359354e-05, "loss": 0.10106738805770873, "step": 24360 }, { "epoch": 6.917399943230201, "grad_norm": 12.918252944946289, "learning_rate": 9.308629009367016e-05, "loss": 0.08397529125213624, "step": 24370 }, { "epoch": 6.920238433153562, "grad_norm": 7.796765327453613, "learning_rate": 9.30834516037468e-05, "loss": 0.062130266427993776, "step": 24380 }, { "epoch": 6.923076923076923, "grad_norm": 11.75627613067627, "learning_rate": 9.308061311382345e-05, "loss": 0.0711436927318573, "step": 24390 }, { "epoch": 6.925915413000284, "grad_norm": 14.818982124328613, "learning_rate": 9.307777462390009e-05, "loss": 0.09907492399215698, "step": 24400 }, { "epoch": 6.928753902923645, "grad_norm": 8.472296714782715, "learning_rate": 9.307493613397673e-05, "loss": 0.10106189250946045, "step": 24410 }, { "epoch": 6.931592392847005, "grad_norm": 10.537667274475098, "learning_rate": 9.307209764405337e-05, "loss": 0.07931021451950074, "step": 24420 }, { "epoch": 6.934430882770366, "grad_norm": 18.851049423217773, "learning_rate": 9.306925915413001e-05, "loss": 0.11357226371765136, "step": 24430 }, { "epoch": 6.937269372693727, "grad_norm": 21.69588279724121, "learning_rate": 9.306642066420664e-05, "loss": 0.13082100152969361, "step": 24440 }, { "epoch": 6.940107862617087, "grad_norm": 9.200159072875977, "learning_rate": 9.306358217428328e-05, "loss": 0.08630244731903076, "step": 24450 }, { "epoch": 6.9429463525404485, "grad_norm": 8.029813766479492, "learning_rate": 9.306074368435992e-05, "loss": 0.08214585185050964, "step": 24460 }, { "epoch": 6.94578484246381, "grad_norm": 10.202174186706543, "learning_rate": 9.305790519443656e-05, "loss": 0.1129606008529663, "step": 24470 }, { "epoch": 6.94862333238717, "grad_norm": 11.96861457824707, "learning_rate": 9.30550667045132e-05, "loss": 0.15480034351348876, "step": 24480 }, { "epoch": 6.951461822310531, "grad_norm": 11.820459365844727, "learning_rate": 9.305222821458985e-05, "loss": 0.07899882793426513, "step": 24490 }, { "epoch": 6.954300312233892, "grad_norm": 10.035128593444824, "learning_rate": 9.304938972466648e-05, "loss": 0.10380600690841675, "step": 24500 }, { "epoch": 6.954300312233892, "eval_accuracy": 0.9291028168118523, "eval_loss": 0.2089700549840927, "eval_runtime": 31.3539, "eval_samples_per_second": 501.597, "eval_steps_per_second": 7.846, "step": 24500 }, { "epoch": 6.957138802157252, "grad_norm": 5.837090015411377, "learning_rate": 9.304655123474312e-05, "loss": 0.09950883984565735, "step": 24510 }, { "epoch": 6.959977292080613, "grad_norm": 11.904397964477539, "learning_rate": 9.304371274481976e-05, "loss": 0.08639371395111084, "step": 24520 }, { "epoch": 6.9628157820039736, "grad_norm": 5.764144420623779, "learning_rate": 9.30408742548964e-05, "loss": 0.09941230416297912, "step": 24530 }, { "epoch": 6.965654271927335, "grad_norm": 16.358959197998047, "learning_rate": 9.303803576497304e-05, "loss": 0.09886674880981446, "step": 24540 }, { "epoch": 6.968492761850696, "grad_norm": 6.199477195739746, "learning_rate": 9.303519727504968e-05, "loss": 0.09478334188461304, "step": 24550 }, { "epoch": 6.971331251774056, "grad_norm": 19.868894577026367, "learning_rate": 9.303235878512631e-05, "loss": 0.10347808599472046, "step": 24560 }, { "epoch": 6.974169741697417, "grad_norm": 11.553400993347168, "learning_rate": 9.302952029520295e-05, "loss": 0.10487091541290283, "step": 24570 }, { "epoch": 6.977008231620777, "grad_norm": 6.1727447509765625, "learning_rate": 9.302668180527959e-05, "loss": 0.1479036331176758, "step": 24580 }, { "epoch": 6.979846721544138, "grad_norm": 6.307447910308838, "learning_rate": 9.302384331535623e-05, "loss": 0.09229648709297181, "step": 24590 }, { "epoch": 6.9826852114674995, "grad_norm": 9.719098091125488, "learning_rate": 9.302100482543288e-05, "loss": 0.0741078019142151, "step": 24600 }, { "epoch": 6.98552370139086, "grad_norm": 10.375625610351562, "learning_rate": 9.301816633550952e-05, "loss": 0.08486036658287048, "step": 24610 }, { "epoch": 6.988362191314221, "grad_norm": 10.996009826660156, "learning_rate": 9.301532784558616e-05, "loss": 0.11396535634994506, "step": 24620 }, { "epoch": 6.991200681237582, "grad_norm": 5.173250675201416, "learning_rate": 9.301248935566279e-05, "loss": 0.08213579654693604, "step": 24630 }, { "epoch": 6.994039171160942, "grad_norm": 12.531560897827148, "learning_rate": 9.300965086573943e-05, "loss": 0.11228445768356324, "step": 24640 }, { "epoch": 6.996877661084303, "grad_norm": 12.625089645385742, "learning_rate": 9.300681237581607e-05, "loss": 0.10983151197433472, "step": 24650 }, { "epoch": 6.999716151007664, "grad_norm": 12.65639591217041, "learning_rate": 9.300397388589271e-05, "loss": 0.07384233474731446, "step": 24660 }, { "epoch": 7.002554640931025, "grad_norm": 8.278828620910645, "learning_rate": 9.300141924496169e-05, "loss": 0.08259215950965881, "step": 24670 }, { "epoch": 7.005393130854386, "grad_norm": 3.697892665863037, "learning_rate": 9.299858075503832e-05, "loss": 0.06144115328788757, "step": 24680 }, { "epoch": 7.008231620777746, "grad_norm": 13.145040512084961, "learning_rate": 9.299574226511496e-05, "loss": 0.06724100708961486, "step": 24690 }, { "epoch": 7.011070110701107, "grad_norm": 6.598389148712158, "learning_rate": 9.29929037751916e-05, "loss": 0.06451724171638488, "step": 24700 }, { "epoch": 7.013908600624468, "grad_norm": 12.674330711364746, "learning_rate": 9.299006528526824e-05, "loss": 0.05395633578300476, "step": 24710 }, { "epoch": 7.016747090547828, "grad_norm": 10.196317672729492, "learning_rate": 9.298722679534488e-05, "loss": 0.07047267556190491, "step": 24720 }, { "epoch": 7.0195855804711895, "grad_norm": 10.404425621032715, "learning_rate": 9.298438830542152e-05, "loss": 0.09644900560379029, "step": 24730 }, { "epoch": 7.02242407039455, "grad_norm": 3.890166759490967, "learning_rate": 9.298154981549815e-05, "loss": 0.07013571858406067, "step": 24740 }, { "epoch": 7.025262560317911, "grad_norm": 6.217899799346924, "learning_rate": 9.29787113255748e-05, "loss": 0.06245779395103455, "step": 24750 }, { "epoch": 7.028101050241272, "grad_norm": 6.580312252044678, "learning_rate": 9.297587283565144e-05, "loss": 0.06836444139480591, "step": 24760 }, { "epoch": 7.030939540164632, "grad_norm": 6.5935516357421875, "learning_rate": 9.297303434572808e-05, "loss": 0.07499512434005737, "step": 24770 }, { "epoch": 7.033778030087993, "grad_norm": 5.788921356201172, "learning_rate": 9.297019585580472e-05, "loss": 0.09312869906425476, "step": 24780 }, { "epoch": 7.0366165200113535, "grad_norm": 2.3077564239501953, "learning_rate": 9.296735736588136e-05, "loss": 0.0823489785194397, "step": 24790 }, { "epoch": 7.039455009934715, "grad_norm": 15.5554838180542, "learning_rate": 9.2964518875958e-05, "loss": 0.08530140519142151, "step": 24800 }, { "epoch": 7.042293499858076, "grad_norm": 6.212428569793701, "learning_rate": 9.296168038603463e-05, "loss": 0.05804241299629211, "step": 24810 }, { "epoch": 7.045131989781436, "grad_norm": 22.636356353759766, "learning_rate": 9.295884189611127e-05, "loss": 0.09581531882286072, "step": 24820 }, { "epoch": 7.047970479704797, "grad_norm": 16.66684913635254, "learning_rate": 9.295600340618791e-05, "loss": 0.09299579858779908, "step": 24830 }, { "epoch": 7.050808969628158, "grad_norm": 15.202995300292969, "learning_rate": 9.295316491626455e-05, "loss": 0.0945294976234436, "step": 24840 }, { "epoch": 7.053647459551518, "grad_norm": 9.286093711853027, "learning_rate": 9.29503264263412e-05, "loss": 0.08990670442581176, "step": 24850 }, { "epoch": 7.0564859494748795, "grad_norm": 10.716316223144531, "learning_rate": 9.294748793641784e-05, "loss": 0.0868717074394226, "step": 24860 }, { "epoch": 7.05932443939824, "grad_norm": 14.390135765075684, "learning_rate": 9.294464944649446e-05, "loss": 0.06352605223655701, "step": 24870 }, { "epoch": 7.062162929321601, "grad_norm": 9.363687515258789, "learning_rate": 9.29418109565711e-05, "loss": 0.0662489116191864, "step": 24880 }, { "epoch": 7.065001419244962, "grad_norm": 16.993261337280273, "learning_rate": 9.293897246664775e-05, "loss": 0.0787426471710205, "step": 24890 }, { "epoch": 7.067839909168322, "grad_norm": 13.533523559570312, "learning_rate": 9.293613397672439e-05, "loss": 0.08902983665466309, "step": 24900 }, { "epoch": 7.070678399091683, "grad_norm": 13.833831787109375, "learning_rate": 9.293329548680103e-05, "loss": 0.09883632063865662, "step": 24910 }, { "epoch": 7.073516889015044, "grad_norm": 4.1236491203308105, "learning_rate": 9.293045699687767e-05, "loss": 0.06012184619903564, "step": 24920 }, { "epoch": 7.076355378938405, "grad_norm": 8.891768455505371, "learning_rate": 9.292761850695431e-05, "loss": 0.0735208511352539, "step": 24930 }, { "epoch": 7.079193868861766, "grad_norm": 15.003506660461426, "learning_rate": 9.292478001703094e-05, "loss": 0.07380845546722412, "step": 24940 }, { "epoch": 7.082032358785126, "grad_norm": 9.788448333740234, "learning_rate": 9.292194152710758e-05, "loss": 0.0688049852848053, "step": 24950 }, { "epoch": 7.084870848708487, "grad_norm": 5.41404914855957, "learning_rate": 9.291910303718422e-05, "loss": 0.08030549287796021, "step": 24960 }, { "epoch": 7.087709338631848, "grad_norm": 12.876459121704102, "learning_rate": 9.291626454726085e-05, "loss": 0.07650284171104431, "step": 24970 }, { "epoch": 7.090547828555208, "grad_norm": 9.432177543640137, "learning_rate": 9.29134260573375e-05, "loss": 0.06650417447090148, "step": 24980 }, { "epoch": 7.0933863184785695, "grad_norm": 8.356846809387207, "learning_rate": 9.291058756741415e-05, "loss": 0.07502039670944213, "step": 24990 }, { "epoch": 7.096224808401931, "grad_norm": 13.065507888793945, "learning_rate": 9.290774907749077e-05, "loss": 0.07929702997207641, "step": 25000 }, { "epoch": 7.096224808401931, "eval_accuracy": 0.9442360272143447, "eval_loss": 0.16748574376106262, "eval_runtime": 31.0544, "eval_samples_per_second": 506.433, "eval_steps_per_second": 7.922, "step": 25000 }, { "epoch": 7.099063298325291, "grad_norm": 16.9394588470459, "learning_rate": 9.290491058756742e-05, "loss": 0.06433024406433105, "step": 25010 }, { "epoch": 7.101901788248652, "grad_norm": 12.390459060668945, "learning_rate": 9.290207209764406e-05, "loss": 0.07576498985290528, "step": 25020 }, { "epoch": 7.104740278172012, "grad_norm": 12.055755615234375, "learning_rate": 9.28992336077207e-05, "loss": 0.1016196370124817, "step": 25030 }, { "epoch": 7.107578768095373, "grad_norm": 10.71986198425293, "learning_rate": 9.289639511779734e-05, "loss": 0.1038313627243042, "step": 25040 }, { "epoch": 7.110417258018734, "grad_norm": 10.531457901000977, "learning_rate": 9.289355662787398e-05, "loss": 0.07117497324943542, "step": 25050 }, { "epoch": 7.113255747942095, "grad_norm": 16.930017471313477, "learning_rate": 9.289071813795062e-05, "loss": 0.0703173041343689, "step": 25060 }, { "epoch": 7.116094237865456, "grad_norm": 21.771995544433594, "learning_rate": 9.288787964802725e-05, "loss": 0.09848902225494385, "step": 25070 }, { "epoch": 7.118932727788816, "grad_norm": 13.917851448059082, "learning_rate": 9.288504115810389e-05, "loss": 0.0688615083694458, "step": 25080 }, { "epoch": 7.121771217712177, "grad_norm": 18.24556541442871, "learning_rate": 9.288220266818053e-05, "loss": 0.10535664558410644, "step": 25090 }, { "epoch": 7.124609707635538, "grad_norm": 5.248446464538574, "learning_rate": 9.287936417825716e-05, "loss": 0.0779831051826477, "step": 25100 }, { "epoch": 7.127448197558898, "grad_norm": 9.42318058013916, "learning_rate": 9.287652568833382e-05, "loss": 0.08774685859680176, "step": 25110 }, { "epoch": 7.1302866874822595, "grad_norm": 13.35573673248291, "learning_rate": 9.287368719841046e-05, "loss": 0.0786705732345581, "step": 25120 }, { "epoch": 7.133125177405621, "grad_norm": 9.384979248046875, "learning_rate": 9.287084870848708e-05, "loss": 0.06326441168785095, "step": 25130 }, { "epoch": 7.135963667328981, "grad_norm": 9.870256423950195, "learning_rate": 9.286801021856373e-05, "loss": 0.08181899189949035, "step": 25140 }, { "epoch": 7.138802157252342, "grad_norm": 3.5451319217681885, "learning_rate": 9.286517172864037e-05, "loss": 0.060170650482177734, "step": 25150 }, { "epoch": 7.141640647175702, "grad_norm": 5.058857440948486, "learning_rate": 9.286233323871701e-05, "loss": 0.0686713457107544, "step": 25160 }, { "epoch": 7.144479137099063, "grad_norm": 14.10214900970459, "learning_rate": 9.285949474879365e-05, "loss": 0.08296550512313842, "step": 25170 }, { "epoch": 7.147317627022424, "grad_norm": 9.43695068359375, "learning_rate": 9.285665625887029e-05, "loss": 0.07093438506126404, "step": 25180 }, { "epoch": 7.150156116945785, "grad_norm": 9.93986988067627, "learning_rate": 9.285381776894693e-05, "loss": 0.08041448593139648, "step": 25190 }, { "epoch": 7.152994606869146, "grad_norm": 8.851746559143066, "learning_rate": 9.285097927902356e-05, "loss": 0.07922419905662537, "step": 25200 }, { "epoch": 7.155833096792507, "grad_norm": 6.401785850524902, "learning_rate": 9.28481407891002e-05, "loss": 0.06062203049659729, "step": 25210 }, { "epoch": 7.158671586715867, "grad_norm": 18.311893463134766, "learning_rate": 9.284530229917684e-05, "loss": 0.08666465282440186, "step": 25220 }, { "epoch": 7.161510076639228, "grad_norm": 11.923754692077637, "learning_rate": 9.284246380925347e-05, "loss": 0.06378366947174072, "step": 25230 }, { "epoch": 7.164348566562588, "grad_norm": 5.056766033172607, "learning_rate": 9.283962531933013e-05, "loss": 0.05328802466392517, "step": 25240 }, { "epoch": 7.1671870564859494, "grad_norm": 9.68974781036377, "learning_rate": 9.283678682940677e-05, "loss": 0.09437194466590881, "step": 25250 }, { "epoch": 7.170025546409311, "grad_norm": 7.316320896148682, "learning_rate": 9.28339483394834e-05, "loss": 0.06968687176704406, "step": 25260 }, { "epoch": 7.172864036332671, "grad_norm": 11.415355682373047, "learning_rate": 9.283110984956004e-05, "loss": 0.05861054658889771, "step": 25270 }, { "epoch": 7.175702526256032, "grad_norm": 7.921270847320557, "learning_rate": 9.282827135963668e-05, "loss": 0.07577463984489441, "step": 25280 }, { "epoch": 7.178541016179393, "grad_norm": 12.708183288574219, "learning_rate": 9.282543286971332e-05, "loss": 0.0669076383113861, "step": 25290 }, { "epoch": 7.181379506102753, "grad_norm": 11.058527946472168, "learning_rate": 9.282259437978995e-05, "loss": 0.07173556685447693, "step": 25300 }, { "epoch": 7.184217996026114, "grad_norm": 4.351419925689697, "learning_rate": 9.28197558898666e-05, "loss": 0.08426600694656372, "step": 25310 }, { "epoch": 7.1870564859494745, "grad_norm": 11.54439926147461, "learning_rate": 9.281691739994324e-05, "loss": 0.066134774684906, "step": 25320 }, { "epoch": 7.189894975872836, "grad_norm": 16.16973114013672, "learning_rate": 9.281407891001987e-05, "loss": 0.08736181855201722, "step": 25330 }, { "epoch": 7.192733465796197, "grad_norm": 8.346450805664062, "learning_rate": 9.281124042009651e-05, "loss": 0.06326338052749633, "step": 25340 }, { "epoch": 7.195571955719557, "grad_norm": 3.7849199771881104, "learning_rate": 9.280840193017315e-05, "loss": 0.08217431902885437, "step": 25350 }, { "epoch": 7.198410445642918, "grad_norm": 9.105610847473145, "learning_rate": 9.280556344024978e-05, "loss": 0.09630203247070312, "step": 25360 }, { "epoch": 7.201248935566278, "grad_norm": 20.390913009643555, "learning_rate": 9.280272495032644e-05, "loss": 0.10159130096435547, "step": 25370 }, { "epoch": 7.204087425489639, "grad_norm": 10.270111083984375, "learning_rate": 9.279988646040308e-05, "loss": 0.073513263463974, "step": 25380 }, { "epoch": 7.2069259154130005, "grad_norm": 12.678959846496582, "learning_rate": 9.27970479704797e-05, "loss": 0.059249311685562134, "step": 25390 }, { "epoch": 7.209764405336361, "grad_norm": 22.306011199951172, "learning_rate": 9.279420948055635e-05, "loss": 0.08024435043334961, "step": 25400 }, { "epoch": 7.212602895259722, "grad_norm": 4.443155288696289, "learning_rate": 9.279137099063299e-05, "loss": 0.07389181852340698, "step": 25410 }, { "epoch": 7.215441385183083, "grad_norm": 5.50084114074707, "learning_rate": 9.278853250070963e-05, "loss": 0.07785683870315552, "step": 25420 }, { "epoch": 7.218279875106443, "grad_norm": 8.419054985046387, "learning_rate": 9.278569401078626e-05, "loss": 0.08136615753173829, "step": 25430 }, { "epoch": 7.221118365029804, "grad_norm": 8.237689018249512, "learning_rate": 9.278285552086291e-05, "loss": 0.05619444847106934, "step": 25440 }, { "epoch": 7.2239568549531645, "grad_norm": 8.519165992736816, "learning_rate": 9.278001703093954e-05, "loss": 0.05788261890411377, "step": 25450 }, { "epoch": 7.226795344876526, "grad_norm": 11.493070602416992, "learning_rate": 9.277717854101618e-05, "loss": 0.08106710910797119, "step": 25460 }, { "epoch": 7.229633834799887, "grad_norm": 10.802931785583496, "learning_rate": 9.277434005109282e-05, "loss": 0.05255587697029114, "step": 25470 }, { "epoch": 7.232472324723247, "grad_norm": 10.36487865447998, "learning_rate": 9.277150156116947e-05, "loss": 0.10177537202835082, "step": 25480 }, { "epoch": 7.235310814646608, "grad_norm": 6.762228965759277, "learning_rate": 9.276866307124609e-05, "loss": 0.0843475341796875, "step": 25490 }, { "epoch": 7.238149304569969, "grad_norm": 12.662060737609863, "learning_rate": 9.276582458132273e-05, "loss": 0.0893014907836914, "step": 25500 }, { "epoch": 7.238149304569969, "eval_accuracy": 0.9397850829783175, "eval_loss": 0.1832467019557953, "eval_runtime": 31.3095, "eval_samples_per_second": 502.308, "eval_steps_per_second": 7.857, "step": 25500 }, { "epoch": 7.240987794493329, "grad_norm": 8.080511093139648, "learning_rate": 9.276298609139939e-05, "loss": 0.05834176540374756, "step": 25510 }, { "epoch": 7.2438262844166905, "grad_norm": 6.830284118652344, "learning_rate": 9.276014760147602e-05, "loss": 0.06360492706298829, "step": 25520 }, { "epoch": 7.246664774340051, "grad_norm": 17.548480987548828, "learning_rate": 9.275730911155266e-05, "loss": 0.0802126407623291, "step": 25530 }, { "epoch": 7.249503264263412, "grad_norm": 9.045568466186523, "learning_rate": 9.27544706216293e-05, "loss": 0.07861078977584839, "step": 25540 }, { "epoch": 7.252341754186773, "grad_norm": 7.015875816345215, "learning_rate": 9.275163213170593e-05, "loss": 0.09540387988090515, "step": 25550 }, { "epoch": 7.255180244110133, "grad_norm": 3.82190203666687, "learning_rate": 9.274879364178257e-05, "loss": 0.06535564661026001, "step": 25560 }, { "epoch": 7.258018734033494, "grad_norm": 9.843457221984863, "learning_rate": 9.274595515185922e-05, "loss": 0.0892251968383789, "step": 25570 }, { "epoch": 7.2608572239568545, "grad_norm": 9.680808067321777, "learning_rate": 9.274311666193585e-05, "loss": 0.0860748529434204, "step": 25580 }, { "epoch": 7.263695713880216, "grad_norm": 2.475742816925049, "learning_rate": 9.27402781720125e-05, "loss": 0.08024742007255554, "step": 25590 }, { "epoch": 7.266534203803577, "grad_norm": 6.437875747680664, "learning_rate": 9.273743968208913e-05, "loss": 0.09326879382133484, "step": 25600 }, { "epoch": 7.269372693726937, "grad_norm": 7.342555999755859, "learning_rate": 9.273460119216578e-05, "loss": 0.10031281709671021, "step": 25610 }, { "epoch": 7.272211183650298, "grad_norm": 6.315887451171875, "learning_rate": 9.27317627022424e-05, "loss": 0.09980835318565369, "step": 25620 }, { "epoch": 7.275049673573659, "grad_norm": 12.160181045532227, "learning_rate": 9.272892421231905e-05, "loss": 0.07057795524597169, "step": 25630 }, { "epoch": 7.277888163497019, "grad_norm": 10.519721984863281, "learning_rate": 9.27260857223957e-05, "loss": 0.09350953102111817, "step": 25640 }, { "epoch": 7.2807266534203805, "grad_norm": 8.615524291992188, "learning_rate": 9.272324723247233e-05, "loss": 0.08921960592269898, "step": 25650 }, { "epoch": 7.283565143343741, "grad_norm": 14.594928741455078, "learning_rate": 9.272040874254897e-05, "loss": 0.07720339298248291, "step": 25660 }, { "epoch": 7.286403633267102, "grad_norm": 2.612436532974243, "learning_rate": 9.271757025262561e-05, "loss": 0.059430062770843506, "step": 25670 }, { "epoch": 7.289242123190463, "grad_norm": 12.778146743774414, "learning_rate": 9.271473176270224e-05, "loss": 0.054558998346328734, "step": 25680 }, { "epoch": 7.292080613113823, "grad_norm": 10.291651725769043, "learning_rate": 9.271189327277888e-05, "loss": 0.08524532318115234, "step": 25690 }, { "epoch": 7.294919103037184, "grad_norm": 3.862149715423584, "learning_rate": 9.270905478285552e-05, "loss": 0.08555809259414673, "step": 25700 }, { "epoch": 7.297757592960545, "grad_norm": 18.30660057067871, "learning_rate": 9.270621629293216e-05, "loss": 0.07146743535995484, "step": 25710 }, { "epoch": 7.300596082883906, "grad_norm": 19.888212203979492, "learning_rate": 9.27033778030088e-05, "loss": 0.08293269872665406, "step": 25720 }, { "epoch": 7.303434572807267, "grad_norm": 17.063730239868164, "learning_rate": 9.270053931308545e-05, "loss": 0.07968592047691345, "step": 25730 }, { "epoch": 7.306273062730627, "grad_norm": 2.37996244430542, "learning_rate": 9.269770082316209e-05, "loss": 0.06406975388526917, "step": 25740 }, { "epoch": 7.309111552653988, "grad_norm": 4.29535436630249, "learning_rate": 9.269486233323871e-05, "loss": 0.05949164628982544, "step": 25750 }, { "epoch": 7.311950042577349, "grad_norm": 7.4490251541137695, "learning_rate": 9.269202384331536e-05, "loss": 0.04965499639511108, "step": 25760 }, { "epoch": 7.314788532500709, "grad_norm": 8.656577110290527, "learning_rate": 9.268918535339201e-05, "loss": 0.10513595342636109, "step": 25770 }, { "epoch": 7.3176270224240705, "grad_norm": 4.1642374992370605, "learning_rate": 9.268634686346864e-05, "loss": 0.06156970858573914, "step": 25780 }, { "epoch": 7.320465512347431, "grad_norm": 12.691108703613281, "learning_rate": 9.268350837354528e-05, "loss": 0.08913788199424744, "step": 25790 }, { "epoch": 7.323304002270792, "grad_norm": 14.294926643371582, "learning_rate": 9.268066988362192e-05, "loss": 0.11012557744979859, "step": 25800 }, { "epoch": 7.326142492194153, "grad_norm": 8.799478530883789, "learning_rate": 9.267783139369855e-05, "loss": 0.07678711414337158, "step": 25810 }, { "epoch": 7.328980982117513, "grad_norm": 6.116623878479004, "learning_rate": 9.267499290377519e-05, "loss": 0.06817195415496827, "step": 25820 }, { "epoch": 7.331819472040874, "grad_norm": 16.788000106811523, "learning_rate": 9.267215441385183e-05, "loss": 0.07637510299682618, "step": 25830 }, { "epoch": 7.334657961964235, "grad_norm": 12.286406517028809, "learning_rate": 9.266931592392847e-05, "loss": 0.08467528223991394, "step": 25840 }, { "epoch": 7.337496451887596, "grad_norm": 10.662586212158203, "learning_rate": 9.266647743400511e-05, "loss": 0.10300173759460449, "step": 25850 }, { "epoch": 7.340334941810957, "grad_norm": 7.952162265777588, "learning_rate": 9.266363894408176e-05, "loss": 0.0618781566619873, "step": 25860 }, { "epoch": 7.343173431734318, "grad_norm": 12.708575248718262, "learning_rate": 9.26608004541584e-05, "loss": 0.07204200029373169, "step": 25870 }, { "epoch": 7.346011921657678, "grad_norm": 3.645596504211426, "learning_rate": 9.265796196423503e-05, "loss": 0.0725257933139801, "step": 25880 }, { "epoch": 7.348850411581039, "grad_norm": 7.456458568572998, "learning_rate": 9.265512347431167e-05, "loss": 0.0889801800251007, "step": 25890 }, { "epoch": 7.351688901504399, "grad_norm": 9.785014152526855, "learning_rate": 9.265228498438831e-05, "loss": 0.06083427667617798, "step": 25900 }, { "epoch": 7.3545273914277605, "grad_norm": 7.05605936050415, "learning_rate": 9.264944649446495e-05, "loss": 0.07014908790588378, "step": 25910 }, { "epoch": 7.357365881351122, "grad_norm": 6.651464939117432, "learning_rate": 9.264660800454159e-05, "loss": 0.11011708974838257, "step": 25920 }, { "epoch": 7.360204371274482, "grad_norm": 10.310638427734375, "learning_rate": 9.264376951461823e-05, "loss": 0.06599188446998597, "step": 25930 }, { "epoch": 7.363042861197843, "grad_norm": 15.391011238098145, "learning_rate": 9.264093102469486e-05, "loss": 0.08896167874336243, "step": 25940 }, { "epoch": 7.365881351121203, "grad_norm": 9.73863410949707, "learning_rate": 9.26380925347715e-05, "loss": 0.07937895059585572, "step": 25950 }, { "epoch": 7.368719841044564, "grad_norm": 9.990856170654297, "learning_rate": 9.263525404484814e-05, "loss": 0.09625785946846008, "step": 25960 }, { "epoch": 7.371558330967925, "grad_norm": 10.198019981384277, "learning_rate": 9.263241555492478e-05, "loss": 0.07414093613624573, "step": 25970 }, { "epoch": 7.374396820891286, "grad_norm": 4.457877159118652, "learning_rate": 9.262957706500143e-05, "loss": 0.06367641687393188, "step": 25980 }, { "epoch": 7.377235310814647, "grad_norm": 16.091041564941406, "learning_rate": 9.262673857507807e-05, "loss": 0.08975282311439514, "step": 25990 }, { "epoch": 7.380073800738008, "grad_norm": 9.023719787597656, "learning_rate": 9.262390008515471e-05, "loss": 0.08124292492866517, "step": 26000 }, { "epoch": 7.380073800738008, "eval_accuracy": 0.9362879125071533, "eval_loss": 0.1925540417432785, "eval_runtime": 31.0429, "eval_samples_per_second": 506.621, "eval_steps_per_second": 7.925, "step": 26000 }, { "epoch": 7.382912290661368, "grad_norm": 7.7657036781311035, "learning_rate": 9.262106159523134e-05, "loss": 0.0755397617816925, "step": 26010 }, { "epoch": 7.385750780584729, "grad_norm": 12.042271614074707, "learning_rate": 9.261822310530798e-05, "loss": 0.08097591400146484, "step": 26020 }, { "epoch": 7.388589270508089, "grad_norm": 2.8813228607177734, "learning_rate": 9.261538461538462e-05, "loss": 0.0630192518234253, "step": 26030 }, { "epoch": 7.3914277604314504, "grad_norm": 16.896039962768555, "learning_rate": 9.261254612546126e-05, "loss": 0.0758962631225586, "step": 26040 }, { "epoch": 7.3942662503548116, "grad_norm": 11.112716674804688, "learning_rate": 9.26097076355379e-05, "loss": 0.09962148070335389, "step": 26050 }, { "epoch": 7.397104740278172, "grad_norm": 9.072155952453613, "learning_rate": 9.260686914561454e-05, "loss": 0.08826958537101745, "step": 26060 }, { "epoch": 7.399943230201533, "grad_norm": 11.082853317260742, "learning_rate": 9.260403065569117e-05, "loss": 0.09242720603942871, "step": 26070 }, { "epoch": 7.402781720124894, "grad_norm": 9.824169158935547, "learning_rate": 9.260119216576781e-05, "loss": 0.08743641376495362, "step": 26080 }, { "epoch": 7.405620210048254, "grad_norm": 4.256606578826904, "learning_rate": 9.259835367584445e-05, "loss": 0.06762970089912415, "step": 26090 }, { "epoch": 7.408458699971615, "grad_norm": 8.099950790405273, "learning_rate": 9.25955151859211e-05, "loss": 0.09003919959068299, "step": 26100 }, { "epoch": 7.4112971898949755, "grad_norm": 5.497006893157959, "learning_rate": 9.259267669599774e-05, "loss": 0.06235863566398621, "step": 26110 }, { "epoch": 7.414135679818337, "grad_norm": 8.308137893676758, "learning_rate": 9.258983820607438e-05, "loss": 0.05843211412429809, "step": 26120 }, { "epoch": 7.416974169741698, "grad_norm": 8.485076904296875, "learning_rate": 9.258699971615102e-05, "loss": 0.08134673237800598, "step": 26130 }, { "epoch": 7.419812659665058, "grad_norm": 12.66151237487793, "learning_rate": 9.258416122622765e-05, "loss": 0.07153877019882202, "step": 26140 }, { "epoch": 7.422651149588419, "grad_norm": 9.781754493713379, "learning_rate": 9.258132273630429e-05, "loss": 0.08094428777694702, "step": 26150 }, { "epoch": 7.425489639511779, "grad_norm": 11.617583274841309, "learning_rate": 9.257848424638093e-05, "loss": 0.08042703866958618, "step": 26160 }, { "epoch": 7.42832812943514, "grad_norm": 4.851502895355225, "learning_rate": 9.257564575645757e-05, "loss": 0.07313272953033448, "step": 26170 }, { "epoch": 7.4311666193585015, "grad_norm": 12.004612922668457, "learning_rate": 9.257280726653421e-05, "loss": 0.06990544199943542, "step": 26180 }, { "epoch": 7.434005109281862, "grad_norm": 8.36447525024414, "learning_rate": 9.256996877661085e-05, "loss": 0.06553784012794495, "step": 26190 }, { "epoch": 7.436843599205223, "grad_norm": 11.0001802444458, "learning_rate": 9.256713028668748e-05, "loss": 0.10173889398574829, "step": 26200 }, { "epoch": 7.439682089128584, "grad_norm": 10.433905601501465, "learning_rate": 9.256429179676412e-05, "loss": 0.11588386297225953, "step": 26210 }, { "epoch": 7.442520579051944, "grad_norm": 8.971481323242188, "learning_rate": 9.256145330684076e-05, "loss": 0.06656346321105958, "step": 26220 }, { "epoch": 7.445359068975305, "grad_norm": 10.237892150878906, "learning_rate": 9.25586148169174e-05, "loss": 0.09673588871955871, "step": 26230 }, { "epoch": 7.4481975588986655, "grad_norm": 7.237141132354736, "learning_rate": 9.255577632699405e-05, "loss": 0.11670624017715454, "step": 26240 }, { "epoch": 7.451036048822027, "grad_norm": 8.963961601257324, "learning_rate": 9.255293783707069e-05, "loss": 0.0728587567806244, "step": 26250 }, { "epoch": 7.453874538745388, "grad_norm": 9.15424633026123, "learning_rate": 9.255009934714733e-05, "loss": 0.07129738330841065, "step": 26260 }, { "epoch": 7.456713028668748, "grad_norm": 23.21555519104004, "learning_rate": 9.254726085722396e-05, "loss": 0.11262639760971069, "step": 26270 }, { "epoch": 7.459551518592109, "grad_norm": 8.273276329040527, "learning_rate": 9.25444223673006e-05, "loss": 0.11315351724624634, "step": 26280 }, { "epoch": 7.46239000851547, "grad_norm": 10.806933403015137, "learning_rate": 9.254158387737724e-05, "loss": 0.08202406167984008, "step": 26290 }, { "epoch": 7.46522849843883, "grad_norm": 13.437762260437012, "learning_rate": 9.253874538745387e-05, "loss": 0.07173805236816407, "step": 26300 }, { "epoch": 7.4680669883621915, "grad_norm": 8.283321380615234, "learning_rate": 9.253590689753052e-05, "loss": 0.06738618612289429, "step": 26310 }, { "epoch": 7.470905478285552, "grad_norm": 6.9814300537109375, "learning_rate": 9.253306840760716e-05, "loss": 0.07975713014602662, "step": 26320 }, { "epoch": 7.473743968208913, "grad_norm": 11.191336631774902, "learning_rate": 9.253022991768379e-05, "loss": 0.08927326202392578, "step": 26330 }, { "epoch": 7.476582458132274, "grad_norm": 8.341598510742188, "learning_rate": 9.252739142776043e-05, "loss": 0.0828757643699646, "step": 26340 }, { "epoch": 7.479420948055634, "grad_norm": 11.498359680175781, "learning_rate": 9.252455293783708e-05, "loss": 0.08253850936889648, "step": 26350 }, { "epoch": 7.482259437978995, "grad_norm": 18.451519012451172, "learning_rate": 9.252171444791372e-05, "loss": 0.07351793646812439, "step": 26360 }, { "epoch": 7.4850979279023555, "grad_norm": 6.21812105178833, "learning_rate": 9.251887595799036e-05, "loss": 0.05628350377082825, "step": 26370 }, { "epoch": 7.487936417825717, "grad_norm": 7.223512172698975, "learning_rate": 9.2516037468067e-05, "loss": 0.07334859371185302, "step": 26380 }, { "epoch": 7.490774907749078, "grad_norm": 10.63648509979248, "learning_rate": 9.251319897814363e-05, "loss": 0.07936392426490783, "step": 26390 }, { "epoch": 7.493613397672438, "grad_norm": 2.5491249561309814, "learning_rate": 9.251036048822027e-05, "loss": 0.07642608284950256, "step": 26400 }, { "epoch": 7.496451887595799, "grad_norm": 9.710280418395996, "learning_rate": 9.250752199829691e-05, "loss": 0.09321855306625366, "step": 26410 }, { "epoch": 7.49929037751916, "grad_norm": 5.702704429626465, "learning_rate": 9.250468350837355e-05, "loss": 0.06718577146530151, "step": 26420 }, { "epoch": 7.50212886744252, "grad_norm": 7.61082649230957, "learning_rate": 9.250184501845018e-05, "loss": 0.07724987268447876, "step": 26430 }, { "epoch": 7.5049673573658815, "grad_norm": 3.6927883625030518, "learning_rate": 9.249900652852683e-05, "loss": 0.0692366361618042, "step": 26440 }, { "epoch": 7.507805847289243, "grad_norm": 6.430363178253174, "learning_rate": 9.249616803860348e-05, "loss": 0.07843012213706971, "step": 26450 }, { "epoch": 7.510644337212603, "grad_norm": 11.592170715332031, "learning_rate": 9.24933295486801e-05, "loss": 0.08535722494125367, "step": 26460 }, { "epoch": 7.513482827135964, "grad_norm": 12.934331893920898, "learning_rate": 9.249049105875674e-05, "loss": 0.0967501938343048, "step": 26470 }, { "epoch": 7.516321317059324, "grad_norm": 8.168246269226074, "learning_rate": 9.248765256883339e-05, "loss": 0.057279002666473386, "step": 26480 }, { "epoch": 7.519159806982685, "grad_norm": 6.954280376434326, "learning_rate": 9.248481407891001e-05, "loss": 0.06227178573608398, "step": 26490 }, { "epoch": 7.521998296906046, "grad_norm": 9.486580848693848, "learning_rate": 9.248197558898667e-05, "loss": 0.09544625282287597, "step": 26500 }, { "epoch": 7.521998296906046, "eval_accuracy": 0.9319641381064412, "eval_loss": 0.19461214542388916, "eval_runtime": 31.5747, "eval_samples_per_second": 498.088, "eval_steps_per_second": 7.791, "step": 26500 }, { "epoch": 7.524836786829407, "grad_norm": 12.489873886108398, "learning_rate": 9.247913709906331e-05, "loss": 0.09371354579925537, "step": 26510 }, { "epoch": 7.527675276752768, "grad_norm": 7.647334575653076, "learning_rate": 9.247629860913994e-05, "loss": 0.07879270315170288, "step": 26520 }, { "epoch": 7.530513766676128, "grad_norm": 9.045136451721191, "learning_rate": 9.247346011921658e-05, "loss": 0.08057585954666138, "step": 26530 }, { "epoch": 7.533352256599489, "grad_norm": 12.849018096923828, "learning_rate": 9.247062162929322e-05, "loss": 0.08601323962211609, "step": 26540 }, { "epoch": 7.53619074652285, "grad_norm": 9.870349884033203, "learning_rate": 9.246778313936986e-05, "loss": 0.08743392825126647, "step": 26550 }, { "epoch": 7.53902923644621, "grad_norm": 3.759732723236084, "learning_rate": 9.246494464944649e-05, "loss": 0.08128655552864075, "step": 26560 }, { "epoch": 7.5418677263695715, "grad_norm": 8.3211088180542, "learning_rate": 9.246210615952314e-05, "loss": 0.056488925218582155, "step": 26570 }, { "epoch": 7.544706216292932, "grad_norm": 16.501707077026367, "learning_rate": 9.245926766959979e-05, "loss": 0.05979335308074951, "step": 26580 }, { "epoch": 7.547544706216293, "grad_norm": 16.16169548034668, "learning_rate": 9.245642917967641e-05, "loss": 0.07142907381057739, "step": 26590 }, { "epoch": 7.550383196139654, "grad_norm": 16.38370704650879, "learning_rate": 9.245359068975306e-05, "loss": 0.09806442260742188, "step": 26600 }, { "epoch": 7.553221686063014, "grad_norm": 13.115726470947266, "learning_rate": 9.24507521998297e-05, "loss": 0.09101833701133728, "step": 26610 }, { "epoch": 7.556060175986375, "grad_norm": 11.214066505432129, "learning_rate": 9.244791370990632e-05, "loss": 0.09396579265594482, "step": 26620 }, { "epoch": 7.558898665909736, "grad_norm": 9.216989517211914, "learning_rate": 9.244507521998297e-05, "loss": 0.06569300889968872, "step": 26630 }, { "epoch": 7.561737155833097, "grad_norm": 10.804996490478516, "learning_rate": 9.244223673005962e-05, "loss": 0.09532834887504578, "step": 26640 }, { "epoch": 7.564575645756458, "grad_norm": 17.074663162231445, "learning_rate": 9.243939824013625e-05, "loss": 0.05799614787101746, "step": 26650 }, { "epoch": 7.567414135679819, "grad_norm": 5.555571556091309, "learning_rate": 9.243655975021289e-05, "loss": 0.055042868852615355, "step": 26660 }, { "epoch": 7.570252625603179, "grad_norm": 17.987028121948242, "learning_rate": 9.243372126028953e-05, "loss": 0.09240897893905639, "step": 26670 }, { "epoch": 7.57309111552654, "grad_norm": 2.771324634552002, "learning_rate": 9.243088277036617e-05, "loss": 0.07774472236633301, "step": 26680 }, { "epoch": 7.5759296054499, "grad_norm": 15.184845924377441, "learning_rate": 9.24280442804428e-05, "loss": 0.0839342713356018, "step": 26690 }, { "epoch": 7.5787680953732615, "grad_norm": 11.460173606872559, "learning_rate": 9.242520579051946e-05, "loss": 0.07380360960960389, "step": 26700 }, { "epoch": 7.581606585296623, "grad_norm": 1.1493022441864014, "learning_rate": 9.24223673005961e-05, "loss": 0.07841793298721314, "step": 26710 }, { "epoch": 7.584445075219983, "grad_norm": 4.56388521194458, "learning_rate": 9.241952881067272e-05, "loss": 0.06686785221099853, "step": 26720 }, { "epoch": 7.587283565143344, "grad_norm": 4.601447582244873, "learning_rate": 9.241669032074937e-05, "loss": 0.06081027388572693, "step": 26730 }, { "epoch": 7.590122055066704, "grad_norm": 6.164660930633545, "learning_rate": 9.241385183082601e-05, "loss": 0.06688077449798584, "step": 26740 }, { "epoch": 7.592960544990065, "grad_norm": 11.12303638458252, "learning_rate": 9.241101334090264e-05, "loss": 0.08214783072471618, "step": 26750 }, { "epoch": 7.595799034913426, "grad_norm": 16.310251235961914, "learning_rate": 9.240817485097928e-05, "loss": 0.13177411556243895, "step": 26760 }, { "epoch": 7.598637524836787, "grad_norm": 9.675166130065918, "learning_rate": 9.240533636105593e-05, "loss": 0.055321496725082395, "step": 26770 }, { "epoch": 7.601476014760148, "grad_norm": 6.835315704345703, "learning_rate": 9.24027817201249e-05, "loss": 0.09739679098129272, "step": 26780 }, { "epoch": 7.604314504683508, "grad_norm": 5.439204216003418, "learning_rate": 9.239994323020154e-05, "loss": 0.06536415219306946, "step": 26790 }, { "epoch": 7.607152994606869, "grad_norm": 9.276590347290039, "learning_rate": 9.239710474027817e-05, "loss": 0.07503811717033386, "step": 26800 }, { "epoch": 7.60999148453023, "grad_norm": 9.135767936706543, "learning_rate": 9.239426625035481e-05, "loss": 0.0628929615020752, "step": 26810 }, { "epoch": 7.61282997445359, "grad_norm": 1.528260588645935, "learning_rate": 9.239142776043146e-05, "loss": 0.06634442210197448, "step": 26820 }, { "epoch": 7.615668464376951, "grad_norm": 6.177563190460205, "learning_rate": 9.238858927050809e-05, "loss": 0.07088123559951783, "step": 26830 }, { "epoch": 7.6185069543003126, "grad_norm": 1.8341660499572754, "learning_rate": 9.238575078058473e-05, "loss": 0.09534658789634705, "step": 26840 }, { "epoch": 7.621345444223673, "grad_norm": 13.950006484985352, "learning_rate": 9.238291229066137e-05, "loss": 0.10807677507400512, "step": 26850 }, { "epoch": 7.624183934147034, "grad_norm": 12.056605339050293, "learning_rate": 9.238007380073802e-05, "loss": 0.0799710988998413, "step": 26860 }, { "epoch": 7.627022424070395, "grad_norm": 5.1240153312683105, "learning_rate": 9.237723531081464e-05, "loss": 0.08616395592689514, "step": 26870 }, { "epoch": 7.629860913993755, "grad_norm": 8.71175479888916, "learning_rate": 9.23743968208913e-05, "loss": 0.07502010464668274, "step": 26880 }, { "epoch": 7.632699403917116, "grad_norm": 12.677748680114746, "learning_rate": 9.237155833096794e-05, "loss": 0.0809528648853302, "step": 26890 }, { "epoch": 7.6355378938404765, "grad_norm": 8.12935733795166, "learning_rate": 9.236871984104457e-05, "loss": 0.11166051626205445, "step": 26900 }, { "epoch": 7.638376383763838, "grad_norm": 8.465171813964844, "learning_rate": 9.236588135112121e-05, "loss": 0.07791109681129456, "step": 26910 }, { "epoch": 7.641214873687199, "grad_norm": 5.829082489013672, "learning_rate": 9.236304286119785e-05, "loss": 0.05595495700836182, "step": 26920 }, { "epoch": 7.644053363610559, "grad_norm": 8.654165267944336, "learning_rate": 9.236020437127448e-05, "loss": 0.08592514991760254, "step": 26930 }, { "epoch": 7.64689185353392, "grad_norm": 7.819706916809082, "learning_rate": 9.235736588135112e-05, "loss": 0.05950862169265747, "step": 26940 }, { "epoch": 7.64973034345728, "grad_norm": 11.006224632263184, "learning_rate": 9.235452739142777e-05, "loss": 0.06624175906181336, "step": 26950 }, { "epoch": 7.652568833380641, "grad_norm": 13.52245807647705, "learning_rate": 9.23516889015044e-05, "loss": 0.0643926978111267, "step": 26960 }, { "epoch": 7.6554073233040025, "grad_norm": 8.29045295715332, "learning_rate": 9.234885041158104e-05, "loss": 0.09127501845359802, "step": 26970 }, { "epoch": 7.658245813227363, "grad_norm": 6.466549873352051, "learning_rate": 9.234601192165768e-05, "loss": 0.08462011814117432, "step": 26980 }, { "epoch": 7.661084303150724, "grad_norm": 10.676262855529785, "learning_rate": 9.234317343173433e-05, "loss": 0.08582854270935059, "step": 26990 }, { "epoch": 7.663922793074085, "grad_norm": 10.218280792236328, "learning_rate": 9.234033494181095e-05, "loss": 0.06660287976264953, "step": 27000 }, { "epoch": 7.663922793074085, "eval_accuracy": 0.9253513066700578, "eval_loss": 0.21773947775363922, "eval_runtime": 31.841, "eval_samples_per_second": 493.923, "eval_steps_per_second": 7.726, "step": 27000 }, { "epoch": 7.666761282997445, "grad_norm": 10.956585884094238, "learning_rate": 9.23374964518876e-05, "loss": 0.07425179481506347, "step": 27010 }, { "epoch": 7.669599772920806, "grad_norm": 17.00250244140625, "learning_rate": 9.233465796196425e-05, "loss": 0.07376413345336914, "step": 27020 }, { "epoch": 7.6724382628441665, "grad_norm": 21.057472229003906, "learning_rate": 9.233181947204088e-05, "loss": 0.09075461030006408, "step": 27030 }, { "epoch": 7.675276752767528, "grad_norm": 10.701294898986816, "learning_rate": 9.232898098211752e-05, "loss": 0.07240979075431823, "step": 27040 }, { "epoch": 7.678115242690889, "grad_norm": 6.992650032043457, "learning_rate": 9.232614249219416e-05, "loss": 0.06210773587226868, "step": 27050 }, { "epoch": 7.680953732614249, "grad_norm": 9.65118408203125, "learning_rate": 9.232330400227079e-05, "loss": 0.1013252854347229, "step": 27060 }, { "epoch": 7.68379222253761, "grad_norm": 18.297809600830078, "learning_rate": 9.232046551234743e-05, "loss": 0.08987092971801758, "step": 27070 }, { "epoch": 7.686630712460971, "grad_norm": 8.46811580657959, "learning_rate": 9.231762702242409e-05, "loss": 0.08325701951980591, "step": 27080 }, { "epoch": 7.689469202384331, "grad_norm": 12.01207447052002, "learning_rate": 9.231478853250071e-05, "loss": 0.10193637609481812, "step": 27090 }, { "epoch": 7.6923076923076925, "grad_norm": 8.483120918273926, "learning_rate": 9.231195004257735e-05, "loss": 0.07533005475997925, "step": 27100 }, { "epoch": 7.695146182231053, "grad_norm": 4.285157203674316, "learning_rate": 9.2309111552654e-05, "loss": 0.07308659553527833, "step": 27110 }, { "epoch": 7.697984672154414, "grad_norm": 13.469322204589844, "learning_rate": 9.230627306273064e-05, "loss": 0.11812728643417358, "step": 27120 }, { "epoch": 7.700823162077775, "grad_norm": 4.122330188751221, "learning_rate": 9.230343457280726e-05, "loss": 0.06805981993675232, "step": 27130 }, { "epoch": 7.703661652001135, "grad_norm": 8.195808410644531, "learning_rate": 9.23005960828839e-05, "loss": 0.10464376211166382, "step": 27140 }, { "epoch": 7.706500141924496, "grad_norm": 2.0590085983276367, "learning_rate": 9.229775759296055e-05, "loss": 0.05893133878707886, "step": 27150 }, { "epoch": 7.7093386318478565, "grad_norm": 6.839287757873535, "learning_rate": 9.229491910303719e-05, "loss": 0.07113541960716248, "step": 27160 }, { "epoch": 7.712177121771218, "grad_norm": 10.108999252319336, "learning_rate": 9.229208061311383e-05, "loss": 0.06880993843078613, "step": 27170 }, { "epoch": 7.715015611694579, "grad_norm": 7.260905742645264, "learning_rate": 9.228924212319047e-05, "loss": 0.07458056211471557, "step": 27180 }, { "epoch": 7.717854101617939, "grad_norm": 14.39460277557373, "learning_rate": 9.22864036332671e-05, "loss": 0.09929774403572082, "step": 27190 }, { "epoch": 7.7206925915413, "grad_norm": 8.72842025756836, "learning_rate": 9.228356514334374e-05, "loss": 0.09915777444839477, "step": 27200 }, { "epoch": 7.723531081464661, "grad_norm": 10.42147445678711, "learning_rate": 9.228072665342038e-05, "loss": 0.05212537050247192, "step": 27210 }, { "epoch": 7.726369571388021, "grad_norm": 8.22713565826416, "learning_rate": 9.227788816349702e-05, "loss": 0.07159795165061951, "step": 27220 }, { "epoch": 7.7292080613113825, "grad_norm": 4.017706394195557, "learning_rate": 9.227504967357366e-05, "loss": 0.06240721940994263, "step": 27230 }, { "epoch": 7.732046551234744, "grad_norm": 5.211182117462158, "learning_rate": 9.22722111836503e-05, "loss": 0.0729503571987152, "step": 27240 }, { "epoch": 7.734885041158104, "grad_norm": 10.763618469238281, "learning_rate": 9.226937269372693e-05, "loss": 0.06937309503555297, "step": 27250 }, { "epoch": 7.737723531081465, "grad_norm": 7.215002059936523, "learning_rate": 9.226653420380358e-05, "loss": 0.08836377263069153, "step": 27260 }, { "epoch": 7.740562021004825, "grad_norm": 12.056900024414062, "learning_rate": 9.226369571388022e-05, "loss": 0.11528704166412354, "step": 27270 }, { "epoch": 7.743400510928186, "grad_norm": 2.8099069595336914, "learning_rate": 9.226085722395686e-05, "loss": 0.0929962933063507, "step": 27280 }, { "epoch": 7.746239000851547, "grad_norm": 7.90421199798584, "learning_rate": 9.22580187340335e-05, "loss": 0.06480119228363038, "step": 27290 }, { "epoch": 7.749077490774908, "grad_norm": 5.393097400665283, "learning_rate": 9.225518024411014e-05, "loss": 0.05637313723564148, "step": 27300 }, { "epoch": 7.751915980698269, "grad_norm": 11.397273063659668, "learning_rate": 9.225234175418678e-05, "loss": 0.08215314745903016, "step": 27310 }, { "epoch": 7.754754470621629, "grad_norm": 10.50705623626709, "learning_rate": 9.224950326426341e-05, "loss": 0.0628060519695282, "step": 27320 }, { "epoch": 7.75759296054499, "grad_norm": 5.726906776428223, "learning_rate": 9.224666477434005e-05, "loss": 0.07564097046852111, "step": 27330 }, { "epoch": 7.760431450468351, "grad_norm": 4.556398868560791, "learning_rate": 9.224382628441669e-05, "loss": 0.0793717384338379, "step": 27340 }, { "epoch": 7.763269940391711, "grad_norm": 5.672148704528809, "learning_rate": 9.224098779449333e-05, "loss": 0.06801968216896057, "step": 27350 }, { "epoch": 7.7661084303150725, "grad_norm": 7.562779903411865, "learning_rate": 9.223814930456998e-05, "loss": 0.06255950331687928, "step": 27360 }, { "epoch": 7.768946920238433, "grad_norm": 4.586869239807129, "learning_rate": 9.223531081464662e-05, "loss": 0.07646456360816956, "step": 27370 }, { "epoch": 7.771785410161794, "grad_norm": 6.574797630310059, "learning_rate": 9.223247232472324e-05, "loss": 0.08726893067359924, "step": 27380 }, { "epoch": 7.774623900085155, "grad_norm": 11.990139961242676, "learning_rate": 9.222963383479989e-05, "loss": 0.10619642734527587, "step": 27390 }, { "epoch": 7.777462390008515, "grad_norm": 4.124191761016846, "learning_rate": 9.222679534487653e-05, "loss": 0.06511511206626892, "step": 27400 }, { "epoch": 7.780300879931876, "grad_norm": 11.842705726623535, "learning_rate": 9.222395685495317e-05, "loss": 0.07857871651649476, "step": 27410 }, { "epoch": 7.783139369855237, "grad_norm": 9.203516960144043, "learning_rate": 9.222111836502981e-05, "loss": 0.07252969741821289, "step": 27420 }, { "epoch": 7.785977859778598, "grad_norm": 7.54423713684082, "learning_rate": 9.221827987510645e-05, "loss": 0.06294538974761962, "step": 27430 }, { "epoch": 7.788816349701959, "grad_norm": 7.835594177246094, "learning_rate": 9.221544138518309e-05, "loss": 0.06422609686851502, "step": 27440 }, { "epoch": 7.79165483962532, "grad_norm": 10.204795837402344, "learning_rate": 9.221260289525972e-05, "loss": 0.07313728332519531, "step": 27450 }, { "epoch": 7.79449332954868, "grad_norm": 12.939783096313477, "learning_rate": 9.220976440533636e-05, "loss": 0.08524999022483826, "step": 27460 }, { "epoch": 7.797331819472041, "grad_norm": 12.29412841796875, "learning_rate": 9.2206925915413e-05, "loss": 0.0500885009765625, "step": 27470 }, { "epoch": 7.800170309395401, "grad_norm": 8.158082962036133, "learning_rate": 9.220408742548965e-05, "loss": 0.04963379502296448, "step": 27480 }, { "epoch": 7.8030087993187625, "grad_norm": 8.88704776763916, "learning_rate": 9.220124893556629e-05, "loss": 0.0734395980834961, "step": 27490 }, { "epoch": 7.805847289242124, "grad_norm": 3.642496347427368, "learning_rate": 9.219841044564293e-05, "loss": 0.05631873607635498, "step": 27500 }, { "epoch": 7.805847289242124, "eval_accuracy": 0.9385133846251669, "eval_loss": 0.17873375117778778, "eval_runtime": 31.7264, "eval_samples_per_second": 495.708, "eval_steps_per_second": 7.754, "step": 27500 }, { "epoch": 7.808685779165484, "grad_norm": 9.62083911895752, "learning_rate": 9.219557195571956e-05, "loss": 0.10155099630355835, "step": 27510 }, { "epoch": 7.811524269088845, "grad_norm": 9.295912742614746, "learning_rate": 9.21927334657962e-05, "loss": 0.0950278401374817, "step": 27520 }, { "epoch": 7.814362759012205, "grad_norm": 9.448451042175293, "learning_rate": 9.218989497587284e-05, "loss": 0.07441433668136596, "step": 27530 }, { "epoch": 7.817201248935566, "grad_norm": 14.678438186645508, "learning_rate": 9.218705648594948e-05, "loss": 0.0783860981464386, "step": 27540 }, { "epoch": 7.820039738858927, "grad_norm": 10.288312911987305, "learning_rate": 9.218421799602612e-05, "loss": 0.09926932454109191, "step": 27550 }, { "epoch": 7.822878228782288, "grad_norm": 15.592484474182129, "learning_rate": 9.218137950610276e-05, "loss": 0.07857686877250672, "step": 27560 }, { "epoch": 7.825716718705649, "grad_norm": 10.722553253173828, "learning_rate": 9.21785410161794e-05, "loss": 0.08373225927352905, "step": 27570 }, { "epoch": 7.828555208629009, "grad_norm": 21.01876449584961, "learning_rate": 9.217570252625603e-05, "loss": 0.1304453730583191, "step": 27580 }, { "epoch": 7.83139369855237, "grad_norm": 11.549372673034668, "learning_rate": 9.217286403633267e-05, "loss": 0.06365420818328857, "step": 27590 }, { "epoch": 7.834232188475731, "grad_norm": 7.73263692855835, "learning_rate": 9.217002554640931e-05, "loss": 0.07818495035171509, "step": 27600 }, { "epoch": 7.837070678399091, "grad_norm": 4.6471781730651855, "learning_rate": 9.216718705648594e-05, "loss": 0.11928218603134155, "step": 27610 }, { "epoch": 7.839909168322452, "grad_norm": 8.833198547363281, "learning_rate": 9.21643485665626e-05, "loss": 0.07078934907913208, "step": 27620 }, { "epoch": 7.8427476582458135, "grad_norm": 6.99755859375, "learning_rate": 9.216151007663924e-05, "loss": 0.06474398374557495, "step": 27630 }, { "epoch": 7.845586148169174, "grad_norm": 17.104679107666016, "learning_rate": 9.215867158671587e-05, "loss": 0.08713191747665405, "step": 27640 }, { "epoch": 7.848424638092535, "grad_norm": 12.586068153381348, "learning_rate": 9.215583309679251e-05, "loss": 0.09169603586196899, "step": 27650 }, { "epoch": 7.851263128015896, "grad_norm": 5.165184497833252, "learning_rate": 9.215299460686915e-05, "loss": 0.07519745230674743, "step": 27660 }, { "epoch": 7.854101617939256, "grad_norm": 6.144222736358643, "learning_rate": 9.215015611694579e-05, "loss": 0.08131247162818908, "step": 27670 }, { "epoch": 7.856940107862617, "grad_norm": 8.348605155944824, "learning_rate": 9.214731762702243e-05, "loss": 0.08561167716979981, "step": 27680 }, { "epoch": 7.8597785977859775, "grad_norm": 8.29742431640625, "learning_rate": 9.214447913709907e-05, "loss": 0.0758174180984497, "step": 27690 }, { "epoch": 7.862617087709339, "grad_norm": 4.052884101867676, "learning_rate": 9.214164064717571e-05, "loss": 0.05107748508453369, "step": 27700 }, { "epoch": 7.8654555776327, "grad_norm": 5.622657299041748, "learning_rate": 9.213880215725234e-05, "loss": 0.04839843213558197, "step": 27710 }, { "epoch": 7.86829406755606, "grad_norm": 8.668671607971191, "learning_rate": 9.213596366732898e-05, "loss": 0.07239719629287719, "step": 27720 }, { "epoch": 7.871132557479421, "grad_norm": 11.74494457244873, "learning_rate": 9.213312517740563e-05, "loss": 0.10596381425857544, "step": 27730 }, { "epoch": 7.873971047402781, "grad_norm": 8.483918190002441, "learning_rate": 9.213028668748225e-05, "loss": 0.08127362132072449, "step": 27740 }, { "epoch": 7.876809537326142, "grad_norm": 7.552752494812012, "learning_rate": 9.212744819755891e-05, "loss": 0.047620663046836854, "step": 27750 }, { "epoch": 7.8796480272495035, "grad_norm": 5.005000591278076, "learning_rate": 9.212460970763555e-05, "loss": 0.07075139880180359, "step": 27760 }, { "epoch": 7.882486517172864, "grad_norm": 7.97688102722168, "learning_rate": 9.212177121771218e-05, "loss": 0.06970332860946656, "step": 27770 }, { "epoch": 7.885325007096225, "grad_norm": 13.781211853027344, "learning_rate": 9.211893272778882e-05, "loss": 0.06361839771270753, "step": 27780 }, { "epoch": 7.888163497019586, "grad_norm": 9.488520622253418, "learning_rate": 9.211609423786546e-05, "loss": 0.07577272653579711, "step": 27790 }, { "epoch": 7.891001986942946, "grad_norm": 10.51278018951416, "learning_rate": 9.21132557479421e-05, "loss": 0.07654934525489807, "step": 27800 }, { "epoch": 7.893840476866307, "grad_norm": 7.967763423919678, "learning_rate": 9.211041725801873e-05, "loss": 0.075218665599823, "step": 27810 }, { "epoch": 7.8966789667896675, "grad_norm": 4.935878753662109, "learning_rate": 9.210757876809538e-05, "loss": 0.07743616700172425, "step": 27820 }, { "epoch": 7.899517456713029, "grad_norm": 14.868453979492188, "learning_rate": 9.210474027817203e-05, "loss": 0.08831820487976075, "step": 27830 }, { "epoch": 7.90235594663639, "grad_norm": 12.458539962768555, "learning_rate": 9.210190178824865e-05, "loss": 0.10484296083450317, "step": 27840 }, { "epoch": 7.90519443655975, "grad_norm": 8.402612686157227, "learning_rate": 9.20990632983253e-05, "loss": 0.0750116229057312, "step": 27850 }, { "epoch": 7.908032926483111, "grad_norm": 5.604598045349121, "learning_rate": 9.209622480840194e-05, "loss": 0.07691766023635864, "step": 27860 }, { "epoch": 7.910871416406472, "grad_norm": 9.31263256072998, "learning_rate": 9.209338631847856e-05, "loss": 0.08029322624206543, "step": 27870 }, { "epoch": 7.913709906329832, "grad_norm": 8.123852729797363, "learning_rate": 9.209054782855522e-05, "loss": 0.08302119970321656, "step": 27880 }, { "epoch": 7.9165483962531935, "grad_norm": 4.332707405090332, "learning_rate": 9.208770933863186e-05, "loss": 0.08256133794784545, "step": 27890 }, { "epoch": 7.919386886176554, "grad_norm": 10.565896034240723, "learning_rate": 9.208487084870849e-05, "loss": 0.06437480449676514, "step": 27900 }, { "epoch": 7.922225376099915, "grad_norm": 6.135404109954834, "learning_rate": 9.208203235878513e-05, "loss": 0.06738330125808716, "step": 27910 }, { "epoch": 7.925063866023276, "grad_norm": 4.180898189544678, "learning_rate": 9.207919386886177e-05, "loss": 0.06374726891517639, "step": 27920 }, { "epoch": 7.927902355946636, "grad_norm": 18.677400588989258, "learning_rate": 9.207635537893841e-05, "loss": 0.08649280071258544, "step": 27930 }, { "epoch": 7.930740845869997, "grad_norm": 19.61292839050293, "learning_rate": 9.207351688901504e-05, "loss": 0.1137991189956665, "step": 27940 }, { "epoch": 7.9335793357933575, "grad_norm": 8.618695259094238, "learning_rate": 9.20706783990917e-05, "loss": 0.09259873032569885, "step": 27950 }, { "epoch": 7.936417825716719, "grad_norm": 2.9410247802734375, "learning_rate": 9.206783990916834e-05, "loss": 0.06983211636543274, "step": 27960 }, { "epoch": 7.93925631564008, "grad_norm": 7.65075159072876, "learning_rate": 9.206500141924496e-05, "loss": 0.05612436532974243, "step": 27970 }, { "epoch": 7.94209480556344, "grad_norm": 13.11274528503418, "learning_rate": 9.20621629293216e-05, "loss": 0.06669648289680481, "step": 27980 }, { "epoch": 7.944933295486801, "grad_norm": 13.256776809692383, "learning_rate": 9.205932443939825e-05, "loss": 0.11692190170288086, "step": 27990 }, { "epoch": 7.947771785410162, "grad_norm": 4.469211578369141, "learning_rate": 9.205648594947487e-05, "loss": 0.07598663568496704, "step": 28000 }, { "epoch": 7.947771785410162, "eval_accuracy": 0.9387041393781395, "eval_loss": 0.18467707931995392, "eval_runtime": 31.9806, "eval_samples_per_second": 491.767, "eval_steps_per_second": 7.692, "step": 28000 }, { "epoch": 7.950610275333522, "grad_norm": 8.834558486938477, "learning_rate": 9.205364745955152e-05, "loss": 0.06588622331619262, "step": 28010 }, { "epoch": 7.9534487652568835, "grad_norm": 12.618034362792969, "learning_rate": 9.205080896962817e-05, "loss": 0.07139828205108642, "step": 28020 }, { "epoch": 7.956287255180245, "grad_norm": 10.613566398620605, "learning_rate": 9.20479704797048e-05, "loss": 0.07414588332176208, "step": 28030 }, { "epoch": 7.959125745103605, "grad_norm": 11.469747543334961, "learning_rate": 9.204513198978144e-05, "loss": 0.0742957592010498, "step": 28040 }, { "epoch": 7.961964235026966, "grad_norm": 4.077260971069336, "learning_rate": 9.204229349985808e-05, "loss": 0.06864491701126099, "step": 28050 }, { "epoch": 7.964802724950326, "grad_norm": 3.775050640106201, "learning_rate": 9.203945500993472e-05, "loss": 0.0753418266773224, "step": 28060 }, { "epoch": 7.967641214873687, "grad_norm": 1.6595792770385742, "learning_rate": 9.203661652001135e-05, "loss": 0.06280696988105774, "step": 28070 }, { "epoch": 7.970479704797048, "grad_norm": 7.248383522033691, "learning_rate": 9.2033778030088e-05, "loss": 0.06148518323898315, "step": 28080 }, { "epoch": 7.973318194720409, "grad_norm": 7.503737449645996, "learning_rate": 9.203093954016463e-05, "loss": 0.08175054788589478, "step": 28090 }, { "epoch": 7.97615668464377, "grad_norm": 6.599333763122559, "learning_rate": 9.202810105024127e-05, "loss": 0.06689843535423279, "step": 28100 }, { "epoch": 7.97899517456713, "grad_norm": 9.845291137695312, "learning_rate": 9.202526256031792e-05, "loss": 0.11563538312911988, "step": 28110 }, { "epoch": 7.981833664490491, "grad_norm": 7.681792259216309, "learning_rate": 9.202242407039456e-05, "loss": 0.08608756065368653, "step": 28120 }, { "epoch": 7.984672154413852, "grad_norm": 6.084494590759277, "learning_rate": 9.201958558047119e-05, "loss": 0.05544511079788208, "step": 28130 }, { "epoch": 7.987510644337212, "grad_norm": 13.77053165435791, "learning_rate": 9.201674709054783e-05, "loss": 0.08298943042755128, "step": 28140 }, { "epoch": 7.9903491342605735, "grad_norm": 26.657520294189453, "learning_rate": 9.201390860062448e-05, "loss": 0.11479487419128417, "step": 28150 }, { "epoch": 7.993187624183934, "grad_norm": 15.225860595703125, "learning_rate": 9.201107011070111e-05, "loss": 0.10137089490890502, "step": 28160 }, { "epoch": 7.996026114107295, "grad_norm": 4.095785140991211, "learning_rate": 9.200823162077775e-05, "loss": 0.09867247343063354, "step": 28170 }, { "epoch": 7.998864604030656, "grad_norm": 11.450681686401367, "learning_rate": 9.200539313085439e-05, "loss": 0.0811159610748291, "step": 28180 }, { "epoch": 8.001703093954017, "grad_norm": 4.138267993927002, "learning_rate": 9.200255464093102e-05, "loss": 0.08176198601722717, "step": 28190 }, { "epoch": 8.004541583877376, "grad_norm": 7.668015480041504, "learning_rate": 9.199971615100766e-05, "loss": 0.050739753246307376, "step": 28200 }, { "epoch": 8.007380073800737, "grad_norm": 18.287296295166016, "learning_rate": 9.199687766108432e-05, "loss": 0.08709633946418763, "step": 28210 }, { "epoch": 8.010218563724099, "grad_norm": 8.561509132385254, "learning_rate": 9.199403917116094e-05, "loss": 0.04590723216533661, "step": 28220 }, { "epoch": 8.01305705364746, "grad_norm": 5.986148834228516, "learning_rate": 9.199120068123759e-05, "loss": 0.06425231099128723, "step": 28230 }, { "epoch": 8.01589554357082, "grad_norm": 6.2063117027282715, "learning_rate": 9.198836219131423e-05, "loss": 0.05600226521492004, "step": 28240 }, { "epoch": 8.018734033494182, "grad_norm": 4.819368362426758, "learning_rate": 9.198552370139087e-05, "loss": 0.0593639612197876, "step": 28250 }, { "epoch": 8.021572523417541, "grad_norm": 8.739453315734863, "learning_rate": 9.19826852114675e-05, "loss": 0.10399658679962158, "step": 28260 }, { "epoch": 8.024411013340902, "grad_norm": 6.652143478393555, "learning_rate": 9.197984672154414e-05, "loss": 0.07376932501792907, "step": 28270 }, { "epoch": 8.027249503264263, "grad_norm": 6.728353977203369, "learning_rate": 9.197700823162079e-05, "loss": 0.05037533044815064, "step": 28280 }, { "epoch": 8.030087993187625, "grad_norm": 6.354711055755615, "learning_rate": 9.197416974169742e-05, "loss": 0.06774352788925171, "step": 28290 }, { "epoch": 8.032926483110986, "grad_norm": 7.2082061767578125, "learning_rate": 9.197133125177406e-05, "loss": 0.06029695272445679, "step": 28300 }, { "epoch": 8.035764973034345, "grad_norm": 4.566948413848877, "learning_rate": 9.19684927618507e-05, "loss": 0.07258274555206298, "step": 28310 }, { "epoch": 8.038603462957706, "grad_norm": 4.820693016052246, "learning_rate": 9.196565427192733e-05, "loss": 0.05814773440361023, "step": 28320 }, { "epoch": 8.041441952881067, "grad_norm": 8.485374450683594, "learning_rate": 9.196281578200397e-05, "loss": 0.05457184910774231, "step": 28330 }, { "epoch": 8.044280442804428, "grad_norm": 10.01970100402832, "learning_rate": 9.195997729208061e-05, "loss": 0.07092044353485108, "step": 28340 }, { "epoch": 8.04711893272779, "grad_norm": 5.771849632263184, "learning_rate": 9.195713880215726e-05, "loss": 0.06729248762130738, "step": 28350 }, { "epoch": 8.049957422651149, "grad_norm": 9.445206642150879, "learning_rate": 9.19543003122339e-05, "loss": 0.06455802917480469, "step": 28360 }, { "epoch": 8.05279591257451, "grad_norm": 5.551255226135254, "learning_rate": 9.195146182231054e-05, "loss": 0.0712252676486969, "step": 28370 }, { "epoch": 8.055634402497871, "grad_norm": 3.5603044033050537, "learning_rate": 9.194862333238718e-05, "loss": 0.09181907773017883, "step": 28380 }, { "epoch": 8.058472892421232, "grad_norm": 2.0351345539093018, "learning_rate": 9.194578484246381e-05, "loss": 0.07351831197738648, "step": 28390 }, { "epoch": 8.061311382344593, "grad_norm": 19.573463439941406, "learning_rate": 9.194294635254045e-05, "loss": 0.08498290181159973, "step": 28400 }, { "epoch": 8.064149872267954, "grad_norm": 12.892037391662598, "learning_rate": 9.19401078626171e-05, "loss": 0.07225368618965149, "step": 28410 }, { "epoch": 8.066988362191314, "grad_norm": 11.845551490783691, "learning_rate": 9.193726937269373e-05, "loss": 0.065793377161026, "step": 28420 }, { "epoch": 8.069826852114675, "grad_norm": 11.925344467163086, "learning_rate": 9.193443088277037e-05, "loss": 0.05553908944129944, "step": 28430 }, { "epoch": 8.072665342038036, "grad_norm": 12.499534606933594, "learning_rate": 9.193159239284701e-05, "loss": 0.06909813284873963, "step": 28440 }, { "epoch": 8.075503831961397, "grad_norm": 7.718222618103027, "learning_rate": 9.192875390292364e-05, "loss": 0.05608686208724976, "step": 28450 }, { "epoch": 8.078342321884758, "grad_norm": 12.7086820602417, "learning_rate": 9.192591541300028e-05, "loss": 0.05288501381874085, "step": 28460 }, { "epoch": 8.081180811808117, "grad_norm": 8.647099494934082, "learning_rate": 9.192307692307692e-05, "loss": 0.053787410259246826, "step": 28470 }, { "epoch": 8.084019301731479, "grad_norm": 9.51988410949707, "learning_rate": 9.192023843315357e-05, "loss": 0.08311404585838318, "step": 28480 }, { "epoch": 8.08685779165484, "grad_norm": 8.066847801208496, "learning_rate": 9.191739994323021e-05, "loss": 0.04294182658195496, "step": 28490 }, { "epoch": 8.0896962815782, "grad_norm": 5.057689189910889, "learning_rate": 9.191456145330685e-05, "loss": 0.049669665098190305, "step": 28500 }, { "epoch": 8.0896962815782, "eval_accuracy": 0.9378775354485916, "eval_loss": 0.1790970265865326, "eval_runtime": 31.1557, "eval_samples_per_second": 504.788, "eval_steps_per_second": 7.896, "step": 28500 }, { "epoch": 8.092534771501562, "grad_norm": 12.8748197555542, "learning_rate": 9.191172296338349e-05, "loss": 0.07101553678512573, "step": 28510 }, { "epoch": 8.095373261424921, "grad_norm": 2.00642728805542, "learning_rate": 9.190888447346012e-05, "loss": 0.04895746111869812, "step": 28520 }, { "epoch": 8.098211751348282, "grad_norm": 9.168696403503418, "learning_rate": 9.190604598353676e-05, "loss": 0.06775805354118347, "step": 28530 }, { "epoch": 8.101050241271643, "grad_norm": 6.544206619262695, "learning_rate": 9.19032074936134e-05, "loss": 0.05526372194290161, "step": 28540 }, { "epoch": 8.103888731195005, "grad_norm": 7.154520511627197, "learning_rate": 9.190036900369004e-05, "loss": 0.05225864052772522, "step": 28550 }, { "epoch": 8.106727221118366, "grad_norm": 8.968255996704102, "learning_rate": 9.189753051376668e-05, "loss": 0.04409788846969605, "step": 28560 }, { "epoch": 8.109565711041725, "grad_norm": 4.260835647583008, "learning_rate": 9.189469202384332e-05, "loss": 0.07063850164413452, "step": 28570 }, { "epoch": 8.112404200965086, "grad_norm": 11.586590766906738, "learning_rate": 9.189185353391995e-05, "loss": 0.05927335619926453, "step": 28580 }, { "epoch": 8.115242690888447, "grad_norm": 9.707047462463379, "learning_rate": 9.18890150439966e-05, "loss": 0.06357197761535645, "step": 28590 }, { "epoch": 8.118081180811808, "grad_norm": 8.005043029785156, "learning_rate": 9.188617655407324e-05, "loss": 0.05256074070930481, "step": 28600 }, { "epoch": 8.12091967073517, "grad_norm": 9.046860694885254, "learning_rate": 9.188333806414988e-05, "loss": 0.04542839527130127, "step": 28610 }, { "epoch": 8.12375816065853, "grad_norm": 2.8076603412628174, "learning_rate": 9.188049957422652e-05, "loss": 0.052019888162612916, "step": 28620 }, { "epoch": 8.12659665058189, "grad_norm": 6.1467204093933105, "learning_rate": 9.187766108430316e-05, "loss": 0.06769022941589356, "step": 28630 }, { "epoch": 8.129435140505251, "grad_norm": 5.422061920166016, "learning_rate": 9.18748225943798e-05, "loss": 0.06597175598144531, "step": 28640 }, { "epoch": 8.132273630428612, "grad_norm": 13.432371139526367, "learning_rate": 9.187198410445643e-05, "loss": 0.0722927987575531, "step": 28650 }, { "epoch": 8.135112120351973, "grad_norm": 5.142928600311279, "learning_rate": 9.186914561453307e-05, "loss": 0.06466186046600342, "step": 28660 }, { "epoch": 8.137950610275334, "grad_norm": 10.77366828918457, "learning_rate": 9.186630712460971e-05, "loss": 0.05489208698272705, "step": 28670 }, { "epoch": 8.140789100198694, "grad_norm": 18.801244735717773, "learning_rate": 9.186346863468635e-05, "loss": 0.07088869214057922, "step": 28680 }, { "epoch": 8.143627590122055, "grad_norm": 8.477251052856445, "learning_rate": 9.1860630144763e-05, "loss": 0.059319323301315306, "step": 28690 }, { "epoch": 8.146466080045416, "grad_norm": 15.10538101196289, "learning_rate": 9.185779165483964e-05, "loss": 0.05679097175598145, "step": 28700 }, { "epoch": 8.149304569968777, "grad_norm": 13.824700355529785, "learning_rate": 9.185495316491626e-05, "loss": 0.06503130793571472, "step": 28710 }, { "epoch": 8.152143059892138, "grad_norm": 6.980968952178955, "learning_rate": 9.18521146749929e-05, "loss": 0.048087412118911745, "step": 28720 }, { "epoch": 8.154981549815497, "grad_norm": 8.005522727966309, "learning_rate": 9.184927618506955e-05, "loss": 0.09411823153495788, "step": 28730 }, { "epoch": 8.157820039738858, "grad_norm": 8.153717041015625, "learning_rate": 9.184643769514619e-05, "loss": 0.050446927547454834, "step": 28740 }, { "epoch": 8.16065852966222, "grad_norm": 14.12834644317627, "learning_rate": 9.184359920522283e-05, "loss": 0.07808389663696289, "step": 28750 }, { "epoch": 8.16349701958558, "grad_norm": 7.5934367179870605, "learning_rate": 9.184076071529947e-05, "loss": 0.05060620307922363, "step": 28760 }, { "epoch": 8.166335509508942, "grad_norm": 1.4182673692703247, "learning_rate": 9.183792222537611e-05, "loss": 0.05429837703704834, "step": 28770 }, { "epoch": 8.169173999432301, "grad_norm": 4.7587361335754395, "learning_rate": 9.183508373545274e-05, "loss": 0.060312533378601076, "step": 28780 }, { "epoch": 8.172012489355662, "grad_norm": 2.2295658588409424, "learning_rate": 9.183224524552938e-05, "loss": 0.05230412483215332, "step": 28790 }, { "epoch": 8.174850979279023, "grad_norm": 8.094844818115234, "learning_rate": 9.182940675560602e-05, "loss": 0.06559881567955017, "step": 28800 }, { "epoch": 8.177689469202384, "grad_norm": 5.727514266967773, "learning_rate": 9.182656826568266e-05, "loss": 0.05493706464767456, "step": 28810 }, { "epoch": 8.180527959125746, "grad_norm": 9.533031463623047, "learning_rate": 9.18237297757593e-05, "loss": 0.05957727432250977, "step": 28820 }, { "epoch": 8.183366449049107, "grad_norm": 4.9983811378479, "learning_rate": 9.182089128583595e-05, "loss": 0.052783387899398806, "step": 28830 }, { "epoch": 8.186204938972466, "grad_norm": 6.4478607177734375, "learning_rate": 9.181805279591257e-05, "loss": 0.06514946818351745, "step": 28840 }, { "epoch": 8.189043428895827, "grad_norm": 4.356100082397461, "learning_rate": 9.181521430598922e-05, "loss": 0.05870385766029358, "step": 28850 }, { "epoch": 8.191881918819188, "grad_norm": 6.167853832244873, "learning_rate": 9.181237581606586e-05, "loss": 0.06018507480621338, "step": 28860 }, { "epoch": 8.19472040874255, "grad_norm": 11.0339994430542, "learning_rate": 9.18095373261425e-05, "loss": 0.0852200984954834, "step": 28870 }, { "epoch": 8.19755889866591, "grad_norm": 9.826255798339844, "learning_rate": 9.180669883621914e-05, "loss": 0.06306055784225464, "step": 28880 }, { "epoch": 8.20039738858927, "grad_norm": 7.294203758239746, "learning_rate": 9.180386034629578e-05, "loss": 0.08102589845657349, "step": 28890 }, { "epoch": 8.203235878512631, "grad_norm": 13.558586120605469, "learning_rate": 9.180102185637242e-05, "loss": 0.06372936964035034, "step": 28900 }, { "epoch": 8.206074368435992, "grad_norm": 6.082158088684082, "learning_rate": 9.179818336644905e-05, "loss": 0.07231396436691284, "step": 28910 }, { "epoch": 8.208912858359353, "grad_norm": 17.394548416137695, "learning_rate": 9.179534487652569e-05, "loss": 0.04497288167476654, "step": 28920 }, { "epoch": 8.211751348282714, "grad_norm": 3.8369526863098145, "learning_rate": 9.179250638660233e-05, "loss": 0.06243351697921753, "step": 28930 }, { "epoch": 8.214589838206074, "grad_norm": 7.327316761016846, "learning_rate": 9.178966789667896e-05, "loss": 0.04064055681228638, "step": 28940 }, { "epoch": 8.217428328129435, "grad_norm": 14.72048568725586, "learning_rate": 9.178682940675562e-05, "loss": 0.07328947186470032, "step": 28950 }, { "epoch": 8.220266818052796, "grad_norm": 9.032869338989258, "learning_rate": 9.178399091683226e-05, "loss": 0.06302196383476258, "step": 28960 }, { "epoch": 8.223105307976157, "grad_norm": 4.357512950897217, "learning_rate": 9.178115242690888e-05, "loss": 0.06843396425247192, "step": 28970 }, { "epoch": 8.225943797899518, "grad_norm": 6.721951007843018, "learning_rate": 9.177831393698553e-05, "loss": 0.0600142240524292, "step": 28980 }, { "epoch": 8.228782287822877, "grad_norm": 3.4084107875823975, "learning_rate": 9.17757592960545e-05, "loss": 0.07553284764289855, "step": 28990 }, { "epoch": 8.231620777746238, "grad_norm": 8.18293571472168, "learning_rate": 9.177292080613115e-05, "loss": 0.04516300559043884, "step": 29000 }, { "epoch": 8.231620777746238, "eval_accuracy": 0.943790932790742, "eval_loss": 0.1692141890525818, "eval_runtime": 30.9015, "eval_samples_per_second": 508.939, "eval_steps_per_second": 7.961, "step": 29000 }, { "epoch": 8.2344592676696, "grad_norm": 10.485038757324219, "learning_rate": 9.177008231620779e-05, "loss": 0.07149056792259216, "step": 29010 }, { "epoch": 8.23729775759296, "grad_norm": 4.556738376617432, "learning_rate": 9.176724382628442e-05, "loss": 0.06517475843429565, "step": 29020 }, { "epoch": 8.240136247516322, "grad_norm": 6.041149616241455, "learning_rate": 9.176440533636106e-05, "loss": 0.0854555606842041, "step": 29030 }, { "epoch": 8.242974737439683, "grad_norm": 5.430018901824951, "learning_rate": 9.17615668464377e-05, "loss": 0.07756624817848205, "step": 29040 }, { "epoch": 8.245813227363042, "grad_norm": 4.9481048583984375, "learning_rate": 9.175872835651434e-05, "loss": 0.07038364410400391, "step": 29050 }, { "epoch": 8.248651717286403, "grad_norm": 8.902161598205566, "learning_rate": 9.175588986659098e-05, "loss": 0.06555392742156982, "step": 29060 }, { "epoch": 8.251490207209764, "grad_norm": 8.555258750915527, "learning_rate": 9.175305137666762e-05, "loss": 0.05040958523750305, "step": 29070 }, { "epoch": 8.254328697133126, "grad_norm": 9.97897720336914, "learning_rate": 9.175021288674425e-05, "loss": 0.062173879146575926, "step": 29080 }, { "epoch": 8.257167187056487, "grad_norm": 5.084324836730957, "learning_rate": 9.174737439682089e-05, "loss": 0.06158629059791565, "step": 29090 }, { "epoch": 8.260005676979846, "grad_norm": 5.335748195648193, "learning_rate": 9.174453590689753e-05, "loss": 0.054487550258636476, "step": 29100 }, { "epoch": 8.262844166903207, "grad_norm": 4.3890228271484375, "learning_rate": 9.174169741697418e-05, "loss": 0.07088085412979125, "step": 29110 }, { "epoch": 8.265682656826568, "grad_norm": 10.776468276977539, "learning_rate": 9.17388589270508e-05, "loss": 0.05529135465621948, "step": 29120 }, { "epoch": 8.26852114674993, "grad_norm": 12.11426830291748, "learning_rate": 9.173602043712746e-05, "loss": 0.06611025333404541, "step": 29130 }, { "epoch": 8.27135963667329, "grad_norm": 7.765328884124756, "learning_rate": 9.17331819472041e-05, "loss": 0.05948413610458374, "step": 29140 }, { "epoch": 8.27419812659665, "grad_norm": 11.50073528289795, "learning_rate": 9.173034345728073e-05, "loss": 0.11590793132781982, "step": 29150 }, { "epoch": 8.27703661652001, "grad_norm": 2.4201109409332275, "learning_rate": 9.172750496735737e-05, "loss": 0.06780180931091309, "step": 29160 }, { "epoch": 8.279875106443372, "grad_norm": 10.273046493530273, "learning_rate": 9.172466647743401e-05, "loss": 0.050675714015960695, "step": 29170 }, { "epoch": 8.282713596366733, "grad_norm": 12.870804786682129, "learning_rate": 9.172182798751064e-05, "loss": 0.08268274068832397, "step": 29180 }, { "epoch": 8.285552086290094, "grad_norm": 10.322364807128906, "learning_rate": 9.171898949758729e-05, "loss": 0.06419029235839843, "step": 29190 }, { "epoch": 8.288390576213455, "grad_norm": 15.34072494506836, "learning_rate": 9.171615100766393e-05, "loss": 0.0902258574962616, "step": 29200 }, { "epoch": 8.291229066136815, "grad_norm": 12.890625953674316, "learning_rate": 9.171331251774056e-05, "loss": 0.07161271572113037, "step": 29210 }, { "epoch": 8.294067556060176, "grad_norm": 5.4624505043029785, "learning_rate": 9.17104740278172e-05, "loss": 0.06565551161766052, "step": 29220 }, { "epoch": 8.296906045983537, "grad_norm": 8.181920051574707, "learning_rate": 9.170763553789384e-05, "loss": 0.06308490633964539, "step": 29230 }, { "epoch": 8.299744535906898, "grad_norm": 3.0428454875946045, "learning_rate": 9.170479704797049e-05, "loss": 0.05067722201347351, "step": 29240 }, { "epoch": 8.302583025830259, "grad_norm": 5.010264873504639, "learning_rate": 9.170195855804711e-05, "loss": 0.04812777042388916, "step": 29250 }, { "epoch": 8.305421515753618, "grad_norm": 9.04201602935791, "learning_rate": 9.169912006812377e-05, "loss": 0.06117487549781799, "step": 29260 }, { "epoch": 8.30826000567698, "grad_norm": 9.197172164916992, "learning_rate": 9.169628157820041e-05, "loss": 0.062438076734542845, "step": 29270 }, { "epoch": 8.31109849560034, "grad_norm": 8.5620756149292, "learning_rate": 9.169344308827704e-05, "loss": 0.07440378665924072, "step": 29280 }, { "epoch": 8.313936985523702, "grad_norm": 5.338595390319824, "learning_rate": 9.169060459835368e-05, "loss": 0.05974234938621521, "step": 29290 }, { "epoch": 8.316775475447063, "grad_norm": 15.110782623291016, "learning_rate": 9.168776610843032e-05, "loss": 0.08244252800941468, "step": 29300 }, { "epoch": 8.319613965370422, "grad_norm": 17.116798400878906, "learning_rate": 9.168492761850695e-05, "loss": 0.06338911056518555, "step": 29310 }, { "epoch": 8.322452455293783, "grad_norm": 5.806769847869873, "learning_rate": 9.168208912858359e-05, "loss": 0.050322186946868894, "step": 29320 }, { "epoch": 8.325290945217144, "grad_norm": 9.002280235290527, "learning_rate": 9.167925063866025e-05, "loss": 0.05529931783676147, "step": 29330 }, { "epoch": 8.328129435140506, "grad_norm": 13.09355640411377, "learning_rate": 9.167641214873687e-05, "loss": 0.06681718230247498, "step": 29340 }, { "epoch": 8.330967925063867, "grad_norm": 7.570567607879639, "learning_rate": 9.167357365881351e-05, "loss": 0.08058772683143615, "step": 29350 }, { "epoch": 8.333806414987226, "grad_norm": 2.704773426055908, "learning_rate": 9.167073516889016e-05, "loss": 0.06773853898048401, "step": 29360 }, { "epoch": 8.336644904910587, "grad_norm": 3.5711374282836914, "learning_rate": 9.16678966789668e-05, "loss": 0.05489592552185059, "step": 29370 }, { "epoch": 8.339483394833948, "grad_norm": 5.974013328552246, "learning_rate": 9.166505818904342e-05, "loss": 0.05106773376464844, "step": 29380 }, { "epoch": 8.34232188475731, "grad_norm": 20.108932495117188, "learning_rate": 9.166221969912008e-05, "loss": 0.0673336923122406, "step": 29390 }, { "epoch": 8.34516037468067, "grad_norm": 18.985143661499023, "learning_rate": 9.165938120919672e-05, "loss": 0.05588265657424927, "step": 29400 }, { "epoch": 8.347998864604032, "grad_norm": 13.223711967468262, "learning_rate": 9.165654271927335e-05, "loss": 0.07614171504974365, "step": 29410 }, { "epoch": 8.35083735452739, "grad_norm": 4.557998180389404, "learning_rate": 9.165370422934999e-05, "loss": 0.04518270790576935, "step": 29420 }, { "epoch": 8.353675844450752, "grad_norm": 7.949966907501221, "learning_rate": 9.165086573942663e-05, "loss": 0.049933457374572755, "step": 29430 }, { "epoch": 8.356514334374113, "grad_norm": 7.423627853393555, "learning_rate": 9.164802724950326e-05, "loss": 0.07519654631614685, "step": 29440 }, { "epoch": 8.359352824297474, "grad_norm": 1.7148159742355347, "learning_rate": 9.16451887595799e-05, "loss": 0.05791993141174316, "step": 29450 }, { "epoch": 8.362191314220835, "grad_norm": 16.687705993652344, "learning_rate": 9.164235026965656e-05, "loss": 0.07638731598854065, "step": 29460 }, { "epoch": 8.365029804144195, "grad_norm": 5.448736190795898, "learning_rate": 9.163951177973318e-05, "loss": 0.058822745084762575, "step": 29470 }, { "epoch": 8.367868294067556, "grad_norm": 16.446035385131836, "learning_rate": 9.163667328980982e-05, "loss": 0.08842889070510865, "step": 29480 }, { "epoch": 8.370706783990917, "grad_norm": 17.496366500854492, "learning_rate": 9.163383479988647e-05, "loss": 0.06101993918418884, "step": 29490 }, { "epoch": 8.373545273914278, "grad_norm": 14.626163482666016, "learning_rate": 9.163099630996311e-05, "loss": 0.06540396213531494, "step": 29500 }, { "epoch": 8.373545273914278, "eval_accuracy": 0.938894894131112, "eval_loss": 0.1882154941558838, "eval_runtime": 31.3142, "eval_samples_per_second": 502.232, "eval_steps_per_second": 7.856, "step": 29500 }, { "epoch": 8.376383763837639, "grad_norm": 13.651522636413574, "learning_rate": 9.162815782003974e-05, "loss": 0.07866971492767334, "step": 29510 }, { "epoch": 8.379222253760998, "grad_norm": 10.000460624694824, "learning_rate": 9.162531933011638e-05, "loss": 0.06670709252357483, "step": 29520 }, { "epoch": 8.38206074368436, "grad_norm": 14.086359977722168, "learning_rate": 9.162248084019303e-05, "loss": 0.07480021119117737, "step": 29530 }, { "epoch": 8.38489923360772, "grad_norm": 4.935381889343262, "learning_rate": 9.161964235026966e-05, "loss": 0.06456262469291688, "step": 29540 }, { "epoch": 8.387737723531082, "grad_norm": 3.3612170219421387, "learning_rate": 9.16168038603463e-05, "loss": 0.05752266645431518, "step": 29550 }, { "epoch": 8.390576213454443, "grad_norm": 8.120400428771973, "learning_rate": 9.161396537042294e-05, "loss": 0.05767455101013184, "step": 29560 }, { "epoch": 8.393414703377802, "grad_norm": 6.8282880783081055, "learning_rate": 9.161112688049957e-05, "loss": 0.08247737884521485, "step": 29570 }, { "epoch": 8.396253193301163, "grad_norm": 17.539871215820312, "learning_rate": 9.160828839057621e-05, "loss": 0.05251510143280029, "step": 29580 }, { "epoch": 8.399091683224524, "grad_norm": 4.805350303649902, "learning_rate": 9.160544990065287e-05, "loss": 0.0473786473274231, "step": 29590 }, { "epoch": 8.401930173147885, "grad_norm": 10.188152313232422, "learning_rate": 9.16026114107295e-05, "loss": 0.08898124694824219, "step": 29600 }, { "epoch": 8.404768663071247, "grad_norm": 15.48466682434082, "learning_rate": 9.159977292080614e-05, "loss": 0.06543077230453491, "step": 29610 }, { "epoch": 8.407607152994608, "grad_norm": 12.283458709716797, "learning_rate": 9.159693443088278e-05, "loss": 0.09538312554359436, "step": 29620 }, { "epoch": 8.410445642917967, "grad_norm": 13.443482398986816, "learning_rate": 9.159409594095942e-05, "loss": 0.06765753626823426, "step": 29630 }, { "epoch": 8.413284132841328, "grad_norm": 11.031046867370605, "learning_rate": 9.159125745103605e-05, "loss": 0.08531753420829773, "step": 29640 }, { "epoch": 8.41612262276469, "grad_norm": 8.386163711547852, "learning_rate": 9.158841896111269e-05, "loss": 0.07212099432945251, "step": 29650 }, { "epoch": 8.41896111268805, "grad_norm": 5.738980293273926, "learning_rate": 9.158558047118934e-05, "loss": 0.07749001383781433, "step": 29660 }, { "epoch": 8.421799602611411, "grad_norm": 14.240184783935547, "learning_rate": 9.158274198126597e-05, "loss": 0.07213364839553833, "step": 29670 }, { "epoch": 8.42463809253477, "grad_norm": 10.006685256958008, "learning_rate": 9.157990349134261e-05, "loss": 0.06452087163925171, "step": 29680 }, { "epoch": 8.427476582458132, "grad_norm": 6.8914055824279785, "learning_rate": 9.157706500141925e-05, "loss": 0.04866733551025391, "step": 29690 }, { "epoch": 8.430315072381493, "grad_norm": 7.061382293701172, "learning_rate": 9.157422651149588e-05, "loss": 0.05150160789489746, "step": 29700 }, { "epoch": 8.433153562304854, "grad_norm": 14.592506408691406, "learning_rate": 9.157138802157252e-05, "loss": 0.0820405900478363, "step": 29710 }, { "epoch": 8.435992052228215, "grad_norm": 16.972883224487305, "learning_rate": 9.156854953164916e-05, "loss": 0.08067867755889893, "step": 29720 }, { "epoch": 8.438830542151575, "grad_norm": 6.584667205810547, "learning_rate": 9.15657110417258e-05, "loss": 0.07627401351928711, "step": 29730 }, { "epoch": 8.441669032074936, "grad_norm": 3.4748692512512207, "learning_rate": 9.156287255180245e-05, "loss": 0.05884630680084228, "step": 29740 }, { "epoch": 8.444507521998297, "grad_norm": 10.529985427856445, "learning_rate": 9.156003406187909e-05, "loss": 0.06511694788932801, "step": 29750 }, { "epoch": 8.447346011921658, "grad_norm": 7.230268955230713, "learning_rate": 9.155719557195573e-05, "loss": 0.06866088509559631, "step": 29760 }, { "epoch": 8.450184501845019, "grad_norm": 10.624114990234375, "learning_rate": 9.155435708203236e-05, "loss": 0.07058738470077515, "step": 29770 }, { "epoch": 8.453022991768378, "grad_norm": 6.953898906707764, "learning_rate": 9.1551518592109e-05, "loss": 0.057642459869384766, "step": 29780 }, { "epoch": 8.45586148169174, "grad_norm": 6.2965312004089355, "learning_rate": 9.154868010218564e-05, "loss": 0.07264969348907471, "step": 29790 }, { "epoch": 8.4586999716151, "grad_norm": 10.869126319885254, "learning_rate": 9.154584161226228e-05, "loss": 0.07255736589431763, "step": 29800 }, { "epoch": 8.461538461538462, "grad_norm": 6.415733337402344, "learning_rate": 9.154300312233892e-05, "loss": 0.04898870587348938, "step": 29810 }, { "epoch": 8.464376951461823, "grad_norm": 7.364101886749268, "learning_rate": 9.154016463241556e-05, "loss": 0.06439247727394104, "step": 29820 }, { "epoch": 8.467215441385184, "grad_norm": 6.468971252441406, "learning_rate": 9.153732614249219e-05, "loss": 0.06732615828514099, "step": 29830 }, { "epoch": 8.470053931308543, "grad_norm": 3.9857325553894043, "learning_rate": 9.153448765256883e-05, "loss": 0.06769530177116394, "step": 29840 }, { "epoch": 8.472892421231904, "grad_norm": 15.828653335571289, "learning_rate": 9.153164916264547e-05, "loss": 0.05919771790504456, "step": 29850 }, { "epoch": 8.475730911155265, "grad_norm": 17.331520080566406, "learning_rate": 9.152881067272212e-05, "loss": 0.08205833435058593, "step": 29860 }, { "epoch": 8.478569401078627, "grad_norm": 17.98587417602539, "learning_rate": 9.152597218279876e-05, "loss": 0.0660819411277771, "step": 29870 }, { "epoch": 8.481407891001988, "grad_norm": 3.2231264114379883, "learning_rate": 9.15231336928754e-05, "loss": 0.05351273417472839, "step": 29880 }, { "epoch": 8.484246380925347, "grad_norm": 4.374488353729248, "learning_rate": 9.152029520295204e-05, "loss": 0.06690125465393067, "step": 29890 }, { "epoch": 8.487084870848708, "grad_norm": 20.5726375579834, "learning_rate": 9.151745671302867e-05, "loss": 0.10268547534942626, "step": 29900 }, { "epoch": 8.48992336077207, "grad_norm": 9.212912559509277, "learning_rate": 9.151461822310531e-05, "loss": 0.07267470359802246, "step": 29910 }, { "epoch": 8.49276185069543, "grad_norm": 9.972260475158691, "learning_rate": 9.151177973318195e-05, "loss": 0.06243507862091065, "step": 29920 }, { "epoch": 8.495600340618791, "grad_norm": 9.37176513671875, "learning_rate": 9.150894124325859e-05, "loss": 0.07114928960800171, "step": 29930 }, { "epoch": 8.49843883054215, "grad_norm": 1.9846758842468262, "learning_rate": 9.150610275333523e-05, "loss": 0.06344807744026185, "step": 29940 }, { "epoch": 8.501277320465512, "grad_norm": 4.739149570465088, "learning_rate": 9.150326426341187e-05, "loss": 0.0565528929233551, "step": 29950 }, { "epoch": 8.504115810388873, "grad_norm": 6.5317254066467285, "learning_rate": 9.15004257734885e-05, "loss": 0.07424017786979675, "step": 29960 }, { "epoch": 8.506954300312234, "grad_norm": 9.611992835998535, "learning_rate": 9.149758728356514e-05, "loss": 0.08838900327682495, "step": 29970 }, { "epoch": 8.509792790235595, "grad_norm": 8.434793472290039, "learning_rate": 9.149474879364179e-05, "loss": 0.07297046780586243, "step": 29980 }, { "epoch": 8.512631280158956, "grad_norm": 3.4695658683776855, "learning_rate": 9.149191030371843e-05, "loss": 0.04186428189277649, "step": 29990 }, { "epoch": 8.515469770082316, "grad_norm": 9.727413177490234, "learning_rate": 9.148907181379507e-05, "loss": 0.05960800051689148, "step": 30000 }, { "epoch": 8.515469770082316, "eval_accuracy": 0.9387041393781395, "eval_loss": 0.189566969871521, "eval_runtime": 31.0727, "eval_samples_per_second": 506.135, "eval_steps_per_second": 7.917, "step": 30000 }, { "epoch": 8.518308260005677, "grad_norm": 6.594583511352539, "learning_rate": 9.148623332387171e-05, "loss": 0.058769834041595456, "step": 30010 }, { "epoch": 8.521146749929038, "grad_norm": 12.190415382385254, "learning_rate": 9.148339483394834e-05, "loss": 0.06734001636505127, "step": 30020 }, { "epoch": 8.523985239852399, "grad_norm": 9.373311042785645, "learning_rate": 9.148055634402498e-05, "loss": 0.065748530626297, "step": 30030 }, { "epoch": 8.52682372977576, "grad_norm": 3.8623650074005127, "learning_rate": 9.147771785410162e-05, "loss": 0.06235647201538086, "step": 30040 }, { "epoch": 8.52966221969912, "grad_norm": 6.599777698516846, "learning_rate": 9.147487936417826e-05, "loss": 0.04081343710422516, "step": 30050 }, { "epoch": 8.53250070962248, "grad_norm": 6.091607570648193, "learning_rate": 9.14720408742549e-05, "loss": 0.08156938552856445, "step": 30060 }, { "epoch": 8.535339199545842, "grad_norm": 10.18403434753418, "learning_rate": 9.146920238433154e-05, "loss": 0.053347927331924436, "step": 30070 }, { "epoch": 8.538177689469203, "grad_norm": 3.583003282546997, "learning_rate": 9.146636389440819e-05, "loss": 0.051168018579483034, "step": 30080 }, { "epoch": 8.541016179392564, "grad_norm": 5.291472911834717, "learning_rate": 9.146352540448481e-05, "loss": 0.05486744046211243, "step": 30090 }, { "epoch": 8.543854669315923, "grad_norm": 14.551093101501465, "learning_rate": 9.146068691456145e-05, "loss": 0.0818892240524292, "step": 30100 }, { "epoch": 8.546693159239284, "grad_norm": 7.438065528869629, "learning_rate": 9.14578484246381e-05, "loss": 0.053384286165237424, "step": 30110 }, { "epoch": 8.549531649162645, "grad_norm": 10.797991752624512, "learning_rate": 9.145500993471474e-05, "loss": 0.06102468371391297, "step": 30120 }, { "epoch": 8.552370139086007, "grad_norm": 9.833860397338867, "learning_rate": 9.145217144479138e-05, "loss": 0.0709060251712799, "step": 30130 }, { "epoch": 8.555208629009368, "grad_norm": 9.727563858032227, "learning_rate": 9.144933295486802e-05, "loss": 0.056763797998428345, "step": 30140 }, { "epoch": 8.558047118932727, "grad_norm": 12.185494422912598, "learning_rate": 9.144649446494465e-05, "loss": 0.057816308736801145, "step": 30150 }, { "epoch": 8.560885608856088, "grad_norm": 11.085196495056152, "learning_rate": 9.144365597502129e-05, "loss": 0.08229559659957886, "step": 30160 }, { "epoch": 8.56372409877945, "grad_norm": 13.693253517150879, "learning_rate": 9.144081748509793e-05, "loss": 0.08204988241195679, "step": 30170 }, { "epoch": 8.56656258870281, "grad_norm": 10.15668773651123, "learning_rate": 9.143797899517457e-05, "loss": 0.06671963930130005, "step": 30180 }, { "epoch": 8.569401078626171, "grad_norm": 8.673599243164062, "learning_rate": 9.143514050525121e-05, "loss": 0.04791229963302612, "step": 30190 }, { "epoch": 8.57223956854953, "grad_norm": 9.761473655700684, "learning_rate": 9.143230201532785e-05, "loss": 0.06229044198989868, "step": 30200 }, { "epoch": 8.575078058472892, "grad_norm": 6.504861831665039, "learning_rate": 9.14294635254045e-05, "loss": 0.07771341204643249, "step": 30210 }, { "epoch": 8.577916548396253, "grad_norm": 14.052194595336914, "learning_rate": 9.142662503548112e-05, "loss": 0.0685269832611084, "step": 30220 }, { "epoch": 8.580755038319614, "grad_norm": 9.634164810180664, "learning_rate": 9.142378654555777e-05, "loss": 0.09259766340255737, "step": 30230 }, { "epoch": 8.583593528242975, "grad_norm": 11.475071907043457, "learning_rate": 9.142094805563441e-05, "loss": 0.07257603406906128, "step": 30240 }, { "epoch": 8.586432018166336, "grad_norm": 5.292922496795654, "learning_rate": 9.141810956571103e-05, "loss": 0.07653214931488037, "step": 30250 }, { "epoch": 8.589270508089696, "grad_norm": 9.08526611328125, "learning_rate": 9.141527107578769e-05, "loss": 0.08168586492538452, "step": 30260 }, { "epoch": 8.592108998013057, "grad_norm": 5.779264450073242, "learning_rate": 9.141243258586433e-05, "loss": 0.05767713785171509, "step": 30270 }, { "epoch": 8.594947487936418, "grad_norm": 5.022219181060791, "learning_rate": 9.140959409594096e-05, "loss": 0.05964950919151306, "step": 30280 }, { "epoch": 8.597785977859779, "grad_norm": 5.085413455963135, "learning_rate": 9.14067556060176e-05, "loss": 0.04928558766841888, "step": 30290 }, { "epoch": 8.60062446778314, "grad_norm": 13.999133110046387, "learning_rate": 9.140391711609424e-05, "loss": 0.07592015266418457, "step": 30300 }, { "epoch": 8.6034629577065, "grad_norm": 10.314279556274414, "learning_rate": 9.140107862617088e-05, "loss": 0.08794921636581421, "step": 30310 }, { "epoch": 8.60630144762986, "grad_norm": 5.452056884765625, "learning_rate": 9.139824013624752e-05, "loss": 0.06838034391403199, "step": 30320 }, { "epoch": 8.609139937553222, "grad_norm": 12.394155502319336, "learning_rate": 9.139540164632417e-05, "loss": 0.05805119276046753, "step": 30330 }, { "epoch": 8.611978427476583, "grad_norm": 3.0079991817474365, "learning_rate": 9.139256315640081e-05, "loss": 0.057649272680282596, "step": 30340 }, { "epoch": 8.614816917399944, "grad_norm": 3.536961555480957, "learning_rate": 9.138972466647743e-05, "loss": 0.04464294612407684, "step": 30350 }, { "epoch": 8.617655407323305, "grad_norm": 12.767498970031738, "learning_rate": 9.138688617655408e-05, "loss": 0.07630480527877807, "step": 30360 }, { "epoch": 8.620493897246664, "grad_norm": 4.636853218078613, "learning_rate": 9.138404768663072e-05, "loss": 0.061662924289703366, "step": 30370 }, { "epoch": 8.623332387170025, "grad_norm": 11.533599853515625, "learning_rate": 9.138120919670735e-05, "loss": 0.057594358921051025, "step": 30380 }, { "epoch": 8.626170877093386, "grad_norm": 3.9659109115600586, "learning_rate": 9.1378370706784e-05, "loss": 0.034635528922080994, "step": 30390 }, { "epoch": 8.629009367016748, "grad_norm": 5.484599590301514, "learning_rate": 9.137553221686064e-05, "loss": 0.05701947212219238, "step": 30400 }, { "epoch": 8.631847856940109, "grad_norm": 11.841418266296387, "learning_rate": 9.137269372693727e-05, "loss": 0.05404433608055115, "step": 30410 }, { "epoch": 8.634686346863468, "grad_norm": 4.232126235961914, "learning_rate": 9.136985523701391e-05, "loss": 0.08561909198760986, "step": 30420 }, { "epoch": 8.63752483678683, "grad_norm": 4.9084014892578125, "learning_rate": 9.136701674709055e-05, "loss": 0.052912741899490356, "step": 30430 }, { "epoch": 8.64036332671019, "grad_norm": 2.036550760269165, "learning_rate": 9.13641782571672e-05, "loss": 0.07551093697547913, "step": 30440 }, { "epoch": 8.643201816633551, "grad_norm": 12.768614768981934, "learning_rate": 9.136133976724382e-05, "loss": 0.0871202826499939, "step": 30450 }, { "epoch": 8.646040306556912, "grad_norm": 10.173173904418945, "learning_rate": 9.135850127732048e-05, "loss": 0.0725557565689087, "step": 30460 }, { "epoch": 8.648878796480272, "grad_norm": 4.290877819061279, "learning_rate": 9.135566278739712e-05, "loss": 0.03578062057495117, "step": 30470 }, { "epoch": 8.651717286403633, "grad_norm": 10.177022933959961, "learning_rate": 9.135282429747375e-05, "loss": 0.07076327204704284, "step": 30480 }, { "epoch": 8.654555776326994, "grad_norm": 11.57591438293457, "learning_rate": 9.134998580755039e-05, "loss": 0.08516298532485962, "step": 30490 }, { "epoch": 8.657394266250355, "grad_norm": 6.045260906219482, "learning_rate": 9.134714731762703e-05, "loss": 0.06630693674087525, "step": 30500 }, { "epoch": 8.657394266250355, "eval_accuracy": 0.9411839511667832, "eval_loss": 0.1725481003522873, "eval_runtime": 31.1701, "eval_samples_per_second": 504.554, "eval_steps_per_second": 7.892, "step": 30500 }, { "epoch": 8.660232756173716, "grad_norm": 5.8542022705078125, "learning_rate": 9.134430882770366e-05, "loss": 0.07063242793083191, "step": 30510 }, { "epoch": 8.663071246097076, "grad_norm": 6.192205429077148, "learning_rate": 9.134147033778031e-05, "loss": 0.07235486507415771, "step": 30520 }, { "epoch": 8.665909736020437, "grad_norm": 7.132839679718018, "learning_rate": 9.133863184785695e-05, "loss": 0.07178015112876893, "step": 30530 }, { "epoch": 8.668748225943798, "grad_norm": 5.959991455078125, "learning_rate": 9.133579335793358e-05, "loss": 0.05568063259124756, "step": 30540 }, { "epoch": 8.671586715867159, "grad_norm": 10.190081596374512, "learning_rate": 9.133295486801022e-05, "loss": 0.06881506443023681, "step": 30550 }, { "epoch": 8.67442520579052, "grad_norm": 11.966773986816406, "learning_rate": 9.133011637808686e-05, "loss": 0.05807017683982849, "step": 30560 }, { "epoch": 8.67726369571388, "grad_norm": 8.806239128112793, "learning_rate": 9.13272778881635e-05, "loss": 0.0771269977092743, "step": 30570 }, { "epoch": 8.68010218563724, "grad_norm": 1.1770923137664795, "learning_rate": 9.132443939824013e-05, "loss": 0.03302620351314545, "step": 30580 }, { "epoch": 8.682940675560602, "grad_norm": 16.780710220336914, "learning_rate": 9.132160090831679e-05, "loss": 0.08350026607513428, "step": 30590 }, { "epoch": 8.685779165483963, "grad_norm": 6.750619888305664, "learning_rate": 9.131876241839343e-05, "loss": 0.09315245151519776, "step": 30600 }, { "epoch": 8.688617655407324, "grad_norm": 5.321597576141357, "learning_rate": 9.131592392847006e-05, "loss": 0.06254659295082092, "step": 30610 }, { "epoch": 8.691456145330685, "grad_norm": 6.383153915405273, "learning_rate": 9.13130854385467e-05, "loss": 0.07338873147964478, "step": 30620 }, { "epoch": 8.694294635254044, "grad_norm": 13.765811920166016, "learning_rate": 9.131024694862334e-05, "loss": 0.05769875645637512, "step": 30630 }, { "epoch": 8.697133125177405, "grad_norm": 8.510196685791016, "learning_rate": 9.130740845869997e-05, "loss": 0.05774034261703491, "step": 30640 }, { "epoch": 8.699971615100766, "grad_norm": 9.130453109741211, "learning_rate": 9.130456996877661e-05, "loss": 0.056436973810195926, "step": 30650 }, { "epoch": 8.702810105024128, "grad_norm": 11.908205032348633, "learning_rate": 9.130173147885326e-05, "loss": 0.0650800883769989, "step": 30660 }, { "epoch": 8.705648594947489, "grad_norm": 8.894450187683105, "learning_rate": 9.129889298892989e-05, "loss": 0.050325775146484376, "step": 30670 }, { "epoch": 8.708487084870848, "grad_norm": 4.6478447914123535, "learning_rate": 9.129605449900653e-05, "loss": 0.06203482151031494, "step": 30680 }, { "epoch": 8.711325574794209, "grad_norm": 12.28982162475586, "learning_rate": 9.129321600908317e-05, "loss": 0.06148065328598022, "step": 30690 }, { "epoch": 8.71416406471757, "grad_norm": 8.56557559967041, "learning_rate": 9.129037751915982e-05, "loss": 0.048805588483810426, "step": 30700 }, { "epoch": 8.717002554640931, "grad_norm": 11.844701766967773, "learning_rate": 9.128753902923644e-05, "loss": 0.07599793076515197, "step": 30710 }, { "epoch": 8.719841044564292, "grad_norm": 9.818061828613281, "learning_rate": 9.12847005393131e-05, "loss": 0.09255142211914062, "step": 30720 }, { "epoch": 8.722679534487652, "grad_norm": 8.864899635314941, "learning_rate": 9.128186204938973e-05, "loss": 0.05062950253486633, "step": 30730 }, { "epoch": 8.725518024411013, "grad_norm": 4.842836380004883, "learning_rate": 9.127902355946637e-05, "loss": 0.05957203507423401, "step": 30740 }, { "epoch": 8.728356514334374, "grad_norm": 10.169377326965332, "learning_rate": 9.127618506954301e-05, "loss": 0.07847896218299866, "step": 30750 }, { "epoch": 8.731195004257735, "grad_norm": 10.235345840454102, "learning_rate": 9.127334657961965e-05, "loss": 0.07809728384017944, "step": 30760 }, { "epoch": 8.734033494181096, "grad_norm": 14.762968063354492, "learning_rate": 9.127050808969628e-05, "loss": 0.07696836590766906, "step": 30770 }, { "epoch": 8.736871984104457, "grad_norm": 6.025543689727783, "learning_rate": 9.126766959977292e-05, "loss": 0.06058669090270996, "step": 30780 }, { "epoch": 8.739710474027817, "grad_norm": 11.903536796569824, "learning_rate": 9.126483110984957e-05, "loss": 0.07239351868629455, "step": 30790 }, { "epoch": 8.742548963951178, "grad_norm": 12.460436820983887, "learning_rate": 9.12619926199262e-05, "loss": 0.07071493268013, "step": 30800 }, { "epoch": 8.745387453874539, "grad_norm": 5.232187271118164, "learning_rate": 9.125915413000284e-05, "loss": 0.05923362970352173, "step": 30810 }, { "epoch": 8.7482259437979, "grad_norm": 14.459244728088379, "learning_rate": 9.125631564007948e-05, "loss": 0.06732058525085449, "step": 30820 }, { "epoch": 8.751064433721261, "grad_norm": 3.6833598613739014, "learning_rate": 9.125347715015613e-05, "loss": 0.058967185020446775, "step": 30830 }, { "epoch": 8.75390292364462, "grad_norm": 10.895611763000488, "learning_rate": 9.125063866023275e-05, "loss": 0.07326056957244872, "step": 30840 }, { "epoch": 8.756741413567982, "grad_norm": 11.13486099243164, "learning_rate": 9.12478001703094e-05, "loss": 0.06982390880584717, "step": 30850 }, { "epoch": 8.759579903491343, "grad_norm": 14.711895942687988, "learning_rate": 9.124496168038604e-05, "loss": 0.05212066173553467, "step": 30860 }, { "epoch": 8.762418393414704, "grad_norm": 17.846214294433594, "learning_rate": 9.124212319046268e-05, "loss": 0.08571950197219849, "step": 30870 }, { "epoch": 8.765256883338065, "grad_norm": 12.61149787902832, "learning_rate": 9.123928470053932e-05, "loss": 0.0673592209815979, "step": 30880 }, { "epoch": 8.768095373261424, "grad_norm": 5.6938605308532715, "learning_rate": 9.123644621061596e-05, "loss": 0.06559733152389527, "step": 30890 }, { "epoch": 8.770933863184785, "grad_norm": 6.9619927406311035, "learning_rate": 9.123360772069259e-05, "loss": 0.0866067111492157, "step": 30900 }, { "epoch": 8.773772353108146, "grad_norm": 8.645709037780762, "learning_rate": 9.123076923076923e-05, "loss": 0.08687164783477783, "step": 30910 }, { "epoch": 8.776610843031508, "grad_norm": 5.143220901489258, "learning_rate": 9.122793074084588e-05, "loss": 0.057608401775360106, "step": 30920 }, { "epoch": 8.779449332954869, "grad_norm": 14.712035179138184, "learning_rate": 9.122509225092251e-05, "loss": 0.06908709406852723, "step": 30930 }, { "epoch": 8.782287822878228, "grad_norm": 13.615184783935547, "learning_rate": 9.122225376099915e-05, "loss": 0.04762253761291504, "step": 30940 }, { "epoch": 8.785126312801589, "grad_norm": 2.48268723487854, "learning_rate": 9.12194152710758e-05, "loss": 0.0639740526676178, "step": 30950 }, { "epoch": 8.78796480272495, "grad_norm": 19.646148681640625, "learning_rate": 9.121657678115242e-05, "loss": 0.07524076700210572, "step": 30960 }, { "epoch": 8.790803292648311, "grad_norm": 5.18089485168457, "learning_rate": 9.121373829122906e-05, "loss": 0.08438354134559631, "step": 30970 }, { "epoch": 8.793641782571672, "grad_norm": 5.1031060218811035, "learning_rate": 9.12108998013057e-05, "loss": 0.05910661220550537, "step": 30980 }, { "epoch": 8.796480272495032, "grad_norm": 3.8738019466400146, "learning_rate": 9.120806131138235e-05, "loss": 0.07147717475891113, "step": 30990 }, { "epoch": 8.799318762418393, "grad_norm": 5.0825042724609375, "learning_rate": 9.120522282145899e-05, "loss": 0.07504554390907288, "step": 31000 }, { "epoch": 8.799318762418393, "eval_accuracy": 0.9465886691676735, "eval_loss": 0.15738148987293243, "eval_runtime": 31.6307, "eval_samples_per_second": 497.206, "eval_steps_per_second": 7.777, "step": 31000 }, { "epoch": 8.802157252341754, "grad_norm": 10.514293670654297, "learning_rate": 9.120238433153563e-05, "loss": 0.06329176425933838, "step": 31010 }, { "epoch": 8.804995742265115, "grad_norm": 9.732636451721191, "learning_rate": 9.119954584161227e-05, "loss": 0.06165165901184082, "step": 31020 }, { "epoch": 8.807834232188476, "grad_norm": 10.218039512634277, "learning_rate": 9.11967073516889e-05, "loss": 0.07378641963005066, "step": 31030 }, { "epoch": 8.810672722111837, "grad_norm": 20.488651275634766, "learning_rate": 9.119386886176554e-05, "loss": 0.07996610403060914, "step": 31040 }, { "epoch": 8.813511212035197, "grad_norm": 10.727422714233398, "learning_rate": 9.119103037184218e-05, "loss": 0.07219599485397339, "step": 31050 }, { "epoch": 8.816349701958558, "grad_norm": 10.90174388885498, "learning_rate": 9.118819188191882e-05, "loss": 0.06459282040596008, "step": 31060 }, { "epoch": 8.819188191881919, "grad_norm": 9.382819175720215, "learning_rate": 9.118535339199546e-05, "loss": 0.07012121677398682, "step": 31070 }, { "epoch": 8.82202668180528, "grad_norm": 4.837339401245117, "learning_rate": 9.11825149020721e-05, "loss": 0.05027868151664734, "step": 31080 }, { "epoch": 8.824865171728641, "grad_norm": 4.266017913818359, "learning_rate": 9.117967641214873e-05, "loss": 0.04700014591217041, "step": 31090 }, { "epoch": 8.827703661652, "grad_norm": 10.60668659210205, "learning_rate": 9.117683792222538e-05, "loss": 0.061681056022644044, "step": 31100 }, { "epoch": 8.830542151575361, "grad_norm": 4.687035083770752, "learning_rate": 9.117399943230202e-05, "loss": 0.08623862266540527, "step": 31110 }, { "epoch": 8.833380641498723, "grad_norm": 9.520359992980957, "learning_rate": 9.117116094237866e-05, "loss": 0.048452964425086974, "step": 31120 }, { "epoch": 8.836219131422084, "grad_norm": 7.0932841300964355, "learning_rate": 9.11683224524553e-05, "loss": 0.062233710289001466, "step": 31130 }, { "epoch": 8.839057621345445, "grad_norm": 6.89607048034668, "learning_rate": 9.116548396253194e-05, "loss": 0.06079059839248657, "step": 31140 }, { "epoch": 8.841896111268806, "grad_norm": 10.693526268005371, "learning_rate": 9.116264547260858e-05, "loss": 0.05661168098449707, "step": 31150 }, { "epoch": 8.844734601192165, "grad_norm": 12.32718563079834, "learning_rate": 9.115980698268521e-05, "loss": 0.043204781413078305, "step": 31160 }, { "epoch": 8.847573091115526, "grad_norm": 4.099190711975098, "learning_rate": 9.115696849276185e-05, "loss": 0.06582169532775879, "step": 31170 }, { "epoch": 8.850411581038887, "grad_norm": 10.13891887664795, "learning_rate": 9.115413000283849e-05, "loss": 0.09330734014511108, "step": 31180 }, { "epoch": 8.853250070962249, "grad_norm": 4.434497356414795, "learning_rate": 9.115129151291513e-05, "loss": 0.05179306268692017, "step": 31190 }, { "epoch": 8.85608856088561, "grad_norm": 7.782890796661377, "learning_rate": 9.114845302299178e-05, "loss": 0.06201858520507812, "step": 31200 }, { "epoch": 8.858927050808969, "grad_norm": 11.122188568115234, "learning_rate": 9.114561453306842e-05, "loss": 0.05274918079376221, "step": 31210 }, { "epoch": 8.86176554073233, "grad_norm": 9.690204620361328, "learning_rate": 9.114277604314504e-05, "loss": 0.06978282928466797, "step": 31220 }, { "epoch": 8.864604030655691, "grad_norm": 14.059823036193848, "learning_rate": 9.113993755322169e-05, "loss": 0.053569084405899046, "step": 31230 }, { "epoch": 8.867442520579052, "grad_norm": 9.883621215820312, "learning_rate": 9.113709906329833e-05, "loss": 0.08523458242416382, "step": 31240 }, { "epoch": 8.870281010502413, "grad_norm": 9.064912796020508, "learning_rate": 9.113426057337497e-05, "loss": 0.054639339447021484, "step": 31250 }, { "epoch": 8.873119500425773, "grad_norm": 17.543582916259766, "learning_rate": 9.113142208345161e-05, "loss": 0.05767755508422852, "step": 31260 }, { "epoch": 8.875957990349134, "grad_norm": 12.58343505859375, "learning_rate": 9.112858359352825e-05, "loss": 0.05623639822006225, "step": 31270 }, { "epoch": 8.878796480272495, "grad_norm": 14.024517059326172, "learning_rate": 9.112574510360489e-05, "loss": 0.08230687975883484, "step": 31280 }, { "epoch": 8.881634970195856, "grad_norm": 14.335714340209961, "learning_rate": 9.112290661368152e-05, "loss": 0.07999399900436402, "step": 31290 }, { "epoch": 8.884473460119217, "grad_norm": 6.547296047210693, "learning_rate": 9.112006812375816e-05, "loss": 0.07681872844696044, "step": 31300 }, { "epoch": 8.887311950042577, "grad_norm": 12.506217002868652, "learning_rate": 9.11172296338348e-05, "loss": 0.06401631832122803, "step": 31310 }, { "epoch": 8.890150439965938, "grad_norm": 3.479335308074951, "learning_rate": 9.111439114391144e-05, "loss": 0.06286961436271668, "step": 31320 }, { "epoch": 8.892988929889299, "grad_norm": 9.801219940185547, "learning_rate": 9.111155265398809e-05, "loss": 0.08583763837814332, "step": 31330 }, { "epoch": 8.89582741981266, "grad_norm": 12.058196067810059, "learning_rate": 9.110871416406473e-05, "loss": 0.09592686891555786, "step": 31340 }, { "epoch": 8.898665909736021, "grad_norm": 11.869256019592285, "learning_rate": 9.110587567414136e-05, "loss": 0.07629110813140869, "step": 31350 }, { "epoch": 8.90150439965938, "grad_norm": 1.2793084383010864, "learning_rate": 9.1103037184218e-05, "loss": 0.059134817123413085, "step": 31360 }, { "epoch": 8.904342889582741, "grad_norm": 14.228399276733398, "learning_rate": 9.110019869429464e-05, "loss": 0.08088141679763794, "step": 31370 }, { "epoch": 8.907181379506103, "grad_norm": 17.807186126708984, "learning_rate": 9.109736020437128e-05, "loss": 0.06523163318634033, "step": 31380 }, { "epoch": 8.910019869429464, "grad_norm": 13.873000144958496, "learning_rate": 9.109452171444792e-05, "loss": 0.07417306303977966, "step": 31390 }, { "epoch": 8.912858359352825, "grad_norm": 8.632597923278809, "learning_rate": 9.109168322452456e-05, "loss": 0.06533147096633911, "step": 31400 }, { "epoch": 8.915696849276186, "grad_norm": 8.661585807800293, "learning_rate": 9.10888447346012e-05, "loss": 0.07017588019371032, "step": 31410 }, { "epoch": 8.918535339199545, "grad_norm": 14.436474800109863, "learning_rate": 9.108600624467783e-05, "loss": 0.07135186791419983, "step": 31420 }, { "epoch": 8.921373829122906, "grad_norm": 5.942589282989502, "learning_rate": 9.108316775475447e-05, "loss": 0.07056043148040772, "step": 31430 }, { "epoch": 8.924212319046267, "grad_norm": 8.080760955810547, "learning_rate": 9.108032926483111e-05, "loss": 0.04907607734203338, "step": 31440 }, { "epoch": 8.927050808969629, "grad_norm": 8.866616249084473, "learning_rate": 9.107749077490776e-05, "loss": 0.061613088846206664, "step": 31450 }, { "epoch": 8.92988929889299, "grad_norm": 4.63857889175415, "learning_rate": 9.10746522849844e-05, "loss": 0.06444783210754394, "step": 31460 }, { "epoch": 8.932727788816349, "grad_norm": 19.896678924560547, "learning_rate": 9.107181379506104e-05, "loss": 0.07473809123039246, "step": 31470 }, { "epoch": 8.93556627873971, "grad_norm": 7.841392993927002, "learning_rate": 9.106897530513767e-05, "loss": 0.053362196683883666, "step": 31480 }, { "epoch": 8.938404768663071, "grad_norm": 16.415979385375977, "learning_rate": 9.106613681521431e-05, "loss": 0.07507288455963135, "step": 31490 }, { "epoch": 8.941243258586432, "grad_norm": 6.388270378112793, "learning_rate": 9.106329832529095e-05, "loss": 0.07571802735328674, "step": 31500 }, { "epoch": 8.941243258586432, "eval_accuracy": 0.9469701786736187, "eval_loss": 0.1587066799402237, "eval_runtime": 31.7736, "eval_samples_per_second": 494.971, "eval_steps_per_second": 7.742, "step": 31500 }, { "epoch": 8.944081748509793, "grad_norm": 7.662382125854492, "learning_rate": 9.106045983536759e-05, "loss": 0.03651776909828186, "step": 31510 }, { "epoch": 8.946920238433153, "grad_norm": 14.817533493041992, "learning_rate": 9.105762134544423e-05, "loss": 0.07683525085449219, "step": 31520 }, { "epoch": 8.949758728356514, "grad_norm": 6.961455345153809, "learning_rate": 9.105478285552087e-05, "loss": 0.0702526330947876, "step": 31530 }, { "epoch": 8.952597218279875, "grad_norm": 11.14152717590332, "learning_rate": 9.105194436559751e-05, "loss": 0.06614710092544555, "step": 31540 }, { "epoch": 8.955435708203236, "grad_norm": 15.313032150268555, "learning_rate": 9.104910587567414e-05, "loss": 0.04896753430366516, "step": 31550 }, { "epoch": 8.958274198126597, "grad_norm": 7.498221397399902, "learning_rate": 9.104626738575078e-05, "loss": 0.04692138135433197, "step": 31560 }, { "epoch": 8.961112688049958, "grad_norm": 8.560870170593262, "learning_rate": 9.104342889582743e-05, "loss": 0.05697489380836487, "step": 31570 }, { "epoch": 8.963951177973318, "grad_norm": 5.503041744232178, "learning_rate": 9.104059040590405e-05, "loss": 0.08263733983039856, "step": 31580 }, { "epoch": 8.966789667896679, "grad_norm": 2.8306033611297607, "learning_rate": 9.103775191598071e-05, "loss": 0.061256974935531616, "step": 31590 }, { "epoch": 8.96962815782004, "grad_norm": 5.988136291503906, "learning_rate": 9.103491342605735e-05, "loss": 0.060560721158981326, "step": 31600 }, { "epoch": 8.972466647743401, "grad_norm": 3.7903780937194824, "learning_rate": 9.103207493613398e-05, "loss": 0.05420640707015991, "step": 31610 }, { "epoch": 8.975305137666762, "grad_norm": 3.0052037239074707, "learning_rate": 9.102923644621062e-05, "loss": 0.044920125603675844, "step": 31620 }, { "epoch": 8.978143627590121, "grad_norm": 4.551146030426025, "learning_rate": 9.102639795628726e-05, "loss": 0.06018396019935608, "step": 31630 }, { "epoch": 8.980982117513483, "grad_norm": 4.385807514190674, "learning_rate": 9.102384331535624e-05, "loss": 0.057430309057235715, "step": 31640 }, { "epoch": 8.983820607436844, "grad_norm": 9.01045036315918, "learning_rate": 9.102100482543288e-05, "loss": 0.07642671465873718, "step": 31650 }, { "epoch": 8.986659097360205, "grad_norm": 15.248885154724121, "learning_rate": 9.101816633550951e-05, "loss": 0.06820961236953735, "step": 31660 }, { "epoch": 8.989497587283566, "grad_norm": 6.19985818862915, "learning_rate": 9.101532784558615e-05, "loss": 0.04741819202899933, "step": 31670 }, { "epoch": 8.992336077206925, "grad_norm": 26.0809326171875, "learning_rate": 9.101248935566279e-05, "loss": 0.05679764747619629, "step": 31680 }, { "epoch": 8.995174567130286, "grad_norm": 13.292773246765137, "learning_rate": 9.100965086573943e-05, "loss": 0.06299090385437012, "step": 31690 }, { "epoch": 8.998013057053647, "grad_norm": 5.700288772583008, "learning_rate": 9.100681237581607e-05, "loss": 0.08798794746398926, "step": 31700 }, { "epoch": 9.000851546977009, "grad_norm": 9.686714172363281, "learning_rate": 9.100397388589272e-05, "loss": 0.04719998836517334, "step": 31710 }, { "epoch": 9.00369003690037, "grad_norm": 1.6230088472366333, "learning_rate": 9.100113539596934e-05, "loss": 0.0502514123916626, "step": 31720 }, { "epoch": 9.006528526823729, "grad_norm": 7.6409592628479, "learning_rate": 9.099829690604598e-05, "loss": 0.05189587473869324, "step": 31730 }, { "epoch": 9.00936701674709, "grad_norm": 14.42635726928711, "learning_rate": 9.099545841612263e-05, "loss": 0.06049224138259888, "step": 31740 }, { "epoch": 9.012205506670451, "grad_norm": 12.75652027130127, "learning_rate": 9.099261992619927e-05, "loss": 0.06303011775016784, "step": 31750 }, { "epoch": 9.015043996593812, "grad_norm": 12.73111343383789, "learning_rate": 9.09897814362759e-05, "loss": 0.05389610528945923, "step": 31760 }, { "epoch": 9.017882486517173, "grad_norm": 11.562397003173828, "learning_rate": 9.098694294635255e-05, "loss": 0.06323632597923279, "step": 31770 }, { "epoch": 9.020720976440534, "grad_norm": 8.386837005615234, "learning_rate": 9.098410445642919e-05, "loss": 0.03240850865840912, "step": 31780 }, { "epoch": 9.023559466363894, "grad_norm": 7.020870685577393, "learning_rate": 9.098126596650582e-05, "loss": 0.06059011220932007, "step": 31790 }, { "epoch": 9.026397956287255, "grad_norm": 12.727704048156738, "learning_rate": 9.097842747658246e-05, "loss": 0.035716494917869566, "step": 31800 }, { "epoch": 9.029236446210616, "grad_norm": 12.288534164428711, "learning_rate": 9.09755889866591e-05, "loss": 0.06254916787147521, "step": 31810 }, { "epoch": 9.032074936133977, "grad_norm": 7.2569403648376465, "learning_rate": 9.097275049673573e-05, "loss": 0.05126778483390808, "step": 31820 }, { "epoch": 9.034913426057338, "grad_norm": 8.454544067382812, "learning_rate": 9.096991200681239e-05, "loss": 0.042856672406196596, "step": 31830 }, { "epoch": 9.037751915980698, "grad_norm": 13.909622192382812, "learning_rate": 9.096707351688903e-05, "loss": 0.056902194023132326, "step": 31840 }, { "epoch": 9.040590405904059, "grad_norm": 11.164444923400879, "learning_rate": 9.096423502696565e-05, "loss": 0.05609207153320313, "step": 31850 }, { "epoch": 9.04342889582742, "grad_norm": 2.568427801132202, "learning_rate": 9.09613965370423e-05, "loss": 0.039089816808700564, "step": 31860 }, { "epoch": 9.046267385750781, "grad_norm": 7.224270820617676, "learning_rate": 9.095855804711894e-05, "loss": 0.050432974100112916, "step": 31870 }, { "epoch": 9.049105875674142, "grad_norm": 4.621863842010498, "learning_rate": 9.095571955719558e-05, "loss": 0.04566062092781067, "step": 31880 }, { "epoch": 9.051944365597501, "grad_norm": 4.847370147705078, "learning_rate": 9.09528810672722e-05, "loss": 0.03859710693359375, "step": 31890 }, { "epoch": 9.054782855520862, "grad_norm": 6.624361991882324, "learning_rate": 9.095004257734886e-05, "loss": 0.04116395115852356, "step": 31900 }, { "epoch": 9.057621345444224, "grad_norm": 13.439946174621582, "learning_rate": 9.09472040874255e-05, "loss": 0.04496969282627106, "step": 31910 }, { "epoch": 9.060459835367585, "grad_norm": 7.528670787811279, "learning_rate": 9.094436559750213e-05, "loss": 0.03843676149845123, "step": 31920 }, { "epoch": 9.063298325290946, "grad_norm": 8.01745319366455, "learning_rate": 9.094152710757877e-05, "loss": 0.059415769577026364, "step": 31930 }, { "epoch": 9.066136815214305, "grad_norm": 0.7322397828102112, "learning_rate": 9.093868861765541e-05, "loss": 0.04222564697265625, "step": 31940 }, { "epoch": 9.068975305137666, "grad_norm": 3.2487258911132812, "learning_rate": 9.093585012773204e-05, "loss": 0.04935858547687531, "step": 31950 }, { "epoch": 9.071813795061027, "grad_norm": 5.705401420593262, "learning_rate": 9.093301163780868e-05, "loss": 0.03522341847419739, "step": 31960 }, { "epoch": 9.074652284984388, "grad_norm": 12.268989562988281, "learning_rate": 9.093017314788534e-05, "loss": 0.0877863883972168, "step": 31970 }, { "epoch": 9.07749077490775, "grad_norm": 3.4812443256378174, "learning_rate": 9.092733465796197e-05, "loss": 0.04092869162559509, "step": 31980 }, { "epoch": 9.08032926483111, "grad_norm": 3.5843749046325684, "learning_rate": 9.09244961680386e-05, "loss": 0.047165191173553465, "step": 31990 }, { "epoch": 9.08316775475447, "grad_norm": 7.594471454620361, "learning_rate": 9.092165767811525e-05, "loss": 0.050229012966156006, "step": 32000 }, { "epoch": 9.08316775475447, "eval_accuracy": 0.9454441406498378, "eval_loss": 0.16055257618427277, "eval_runtime": 31.6102, "eval_samples_per_second": 497.529, "eval_steps_per_second": 7.782, "step": 32000 }, { "epoch": 9.086006244677831, "grad_norm": 4.322275638580322, "learning_rate": 9.091881918819189e-05, "loss": 0.06291774511337281, "step": 32010 }, { "epoch": 9.088844734601192, "grad_norm": 9.507974624633789, "learning_rate": 9.091598069826852e-05, "loss": 0.049620908498764035, "step": 32020 }, { "epoch": 9.091683224524553, "grad_norm": 3.4975383281707764, "learning_rate": 9.091314220834517e-05, "loss": 0.05312735438346863, "step": 32030 }, { "epoch": 9.094521714447914, "grad_norm": 8.572249412536621, "learning_rate": 9.091030371842181e-05, "loss": 0.06213139295578003, "step": 32040 }, { "epoch": 9.097360204371274, "grad_norm": 12.718893051147461, "learning_rate": 9.090746522849844e-05, "loss": 0.0554810881614685, "step": 32050 }, { "epoch": 9.100198694294635, "grad_norm": 5.842963218688965, "learning_rate": 9.090462673857508e-05, "loss": 0.05603500008583069, "step": 32060 }, { "epoch": 9.103037184217996, "grad_norm": 11.779271125793457, "learning_rate": 9.090178824865172e-05, "loss": 0.05315329432487488, "step": 32070 }, { "epoch": 9.105875674141357, "grad_norm": 14.351799964904785, "learning_rate": 9.089894975872835e-05, "loss": 0.05615080595016479, "step": 32080 }, { "epoch": 9.108714164064718, "grad_norm": 2.0403223037719727, "learning_rate": 9.089611126880499e-05, "loss": 0.03992740511894226, "step": 32090 }, { "epoch": 9.111552653988078, "grad_norm": 5.808372497558594, "learning_rate": 9.089327277888165e-05, "loss": 0.05110638737678528, "step": 32100 }, { "epoch": 9.114391143911439, "grad_norm": 5.363349437713623, "learning_rate": 9.089043428895828e-05, "loss": 0.044322311878204346, "step": 32110 }, { "epoch": 9.1172296338348, "grad_norm": 13.359721183776855, "learning_rate": 9.088759579903492e-05, "loss": 0.042077115178108214, "step": 32120 }, { "epoch": 9.120068123758161, "grad_norm": 10.45478630065918, "learning_rate": 9.088475730911156e-05, "loss": 0.05763462781906128, "step": 32130 }, { "epoch": 9.122906613681522, "grad_norm": 15.195608139038086, "learning_rate": 9.08819188191882e-05, "loss": 0.06402850151062012, "step": 32140 }, { "epoch": 9.125745103604881, "grad_norm": 11.2763671875, "learning_rate": 9.087908032926483e-05, "loss": 0.06520623564720154, "step": 32150 }, { "epoch": 9.128583593528242, "grad_norm": 6.463578701019287, "learning_rate": 9.087624183934147e-05, "loss": 0.0754274606704712, "step": 32160 }, { "epoch": 9.131422083451604, "grad_norm": 3.423412322998047, "learning_rate": 9.087340334941812e-05, "loss": 0.041135784983634946, "step": 32170 }, { "epoch": 9.134260573374965, "grad_norm": 10.44631576538086, "learning_rate": 9.087056485949475e-05, "loss": 0.05105924606323242, "step": 32180 }, { "epoch": 9.137099063298326, "grad_norm": 13.660996437072754, "learning_rate": 9.08677263695714e-05, "loss": 0.0788940131664276, "step": 32190 }, { "epoch": 9.139937553221687, "grad_norm": 8.275731086730957, "learning_rate": 9.086488787964803e-05, "loss": 0.04814446866512299, "step": 32200 }, { "epoch": 9.142776043145046, "grad_norm": 6.043083667755127, "learning_rate": 9.086204938972466e-05, "loss": 0.05335206389427185, "step": 32210 }, { "epoch": 9.145614533068407, "grad_norm": 14.26015567779541, "learning_rate": 9.08592108998013e-05, "loss": 0.04197232127189636, "step": 32220 }, { "epoch": 9.148453022991768, "grad_norm": 7.298044204711914, "learning_rate": 9.085637240987796e-05, "loss": 0.05922012329101563, "step": 32230 }, { "epoch": 9.15129151291513, "grad_norm": 8.097308158874512, "learning_rate": 9.085353391995459e-05, "loss": 0.052116990089416504, "step": 32240 }, { "epoch": 9.15413000283849, "grad_norm": 4.339701175689697, "learning_rate": 9.085069543003123e-05, "loss": 0.05344792604446411, "step": 32250 }, { "epoch": 9.15696849276185, "grad_norm": 6.495077133178711, "learning_rate": 9.084785694010787e-05, "loss": 0.055792659521102905, "step": 32260 }, { "epoch": 9.159806982685211, "grad_norm": 2.3581790924072266, "learning_rate": 9.084501845018451e-05, "loss": 0.051355397701263426, "step": 32270 }, { "epoch": 9.162645472608572, "grad_norm": 10.864691734313965, "learning_rate": 9.084217996026114e-05, "loss": 0.07544721364974975, "step": 32280 }, { "epoch": 9.165483962531933, "grad_norm": 3.968423366546631, "learning_rate": 9.083934147033778e-05, "loss": 0.047580179572105405, "step": 32290 }, { "epoch": 9.168322452455294, "grad_norm": 4.634256362915039, "learning_rate": 9.083650298041443e-05, "loss": 0.04713979363441467, "step": 32300 }, { "epoch": 9.171160942378654, "grad_norm": 7.158009052276611, "learning_rate": 9.083366449049106e-05, "loss": 0.07584148645401001, "step": 32310 }, { "epoch": 9.173999432302015, "grad_norm": 15.426690101623535, "learning_rate": 9.08308260005677e-05, "loss": 0.06314955353736877, "step": 32320 }, { "epoch": 9.176837922225376, "grad_norm": 3.2448484897613525, "learning_rate": 9.082798751064435e-05, "loss": 0.03707973659038544, "step": 32330 }, { "epoch": 9.179676412148737, "grad_norm": 3.8451483249664307, "learning_rate": 9.082514902072097e-05, "loss": 0.04627393186092377, "step": 32340 }, { "epoch": 9.182514902072098, "grad_norm": 5.8484907150268555, "learning_rate": 9.082231053079761e-05, "loss": 0.046934878826141356, "step": 32350 }, { "epoch": 9.18535339199546, "grad_norm": 6.43878173828125, "learning_rate": 9.081947204087426e-05, "loss": 0.04995456337928772, "step": 32360 }, { "epoch": 9.188191881918819, "grad_norm": 13.413664817810059, "learning_rate": 9.08166335509509e-05, "loss": 0.06470543146133423, "step": 32370 }, { "epoch": 9.19103037184218, "grad_norm": 5.617048740386963, "learning_rate": 9.081379506102754e-05, "loss": 0.03900338113307953, "step": 32380 }, { "epoch": 9.19386886176554, "grad_norm": 12.342789649963379, "learning_rate": 9.081095657110418e-05, "loss": 0.07401307821273803, "step": 32390 }, { "epoch": 9.196707351688902, "grad_norm": 4.099523067474365, "learning_rate": 9.080811808118082e-05, "loss": 0.05901659727096557, "step": 32400 }, { "epoch": 9.199545841612263, "grad_norm": 1.8025932312011719, "learning_rate": 9.080527959125745e-05, "loss": 0.040928953886032106, "step": 32410 }, { "epoch": 9.202384331535622, "grad_norm": 7.9925994873046875, "learning_rate": 9.080244110133409e-05, "loss": 0.04325757920742035, "step": 32420 }, { "epoch": 9.205222821458984, "grad_norm": 13.091001510620117, "learning_rate": 9.079960261141075e-05, "loss": 0.05394256114959717, "step": 32430 }, { "epoch": 9.208061311382345, "grad_norm": 9.377684593200684, "learning_rate": 9.079676412148737e-05, "loss": 0.05074498653411865, "step": 32440 }, { "epoch": 9.210899801305706, "grad_norm": 12.04058837890625, "learning_rate": 9.079392563156401e-05, "loss": 0.054376494884490964, "step": 32450 }, { "epoch": 9.213738291229067, "grad_norm": 8.228126525878906, "learning_rate": 9.079108714164066e-05, "loss": 0.0486117035150528, "step": 32460 }, { "epoch": 9.216576781152426, "grad_norm": 12.644847869873047, "learning_rate": 9.078824865171728e-05, "loss": 0.05557645559310913, "step": 32470 }, { "epoch": 9.219415271075787, "grad_norm": 7.676670074462891, "learning_rate": 9.078541016179393e-05, "loss": 0.03925705552101135, "step": 32480 }, { "epoch": 9.222253760999148, "grad_norm": 12.223360061645508, "learning_rate": 9.078257167187057e-05, "loss": 0.049240988492965695, "step": 32490 }, { "epoch": 9.22509225092251, "grad_norm": 4.789877891540527, "learning_rate": 9.077973318194721e-05, "loss": 0.05932843089103699, "step": 32500 }, { "epoch": 9.22509225092251, "eval_accuracy": 0.9498314999682076, "eval_loss": 0.15024346113204956, "eval_runtime": 31.2491, "eval_samples_per_second": 503.278, "eval_steps_per_second": 7.872, "step": 32500 }, { "epoch": 9.22793074084587, "grad_norm": 8.23260498046875, "learning_rate": 9.077689469202385e-05, "loss": 0.04186435043811798, "step": 32510 }, { "epoch": 9.23076923076923, "grad_norm": 6.626314640045166, "learning_rate": 9.077405620210049e-05, "loss": 0.045269107818603514, "step": 32520 }, { "epoch": 9.233607720692591, "grad_norm": 9.466409683227539, "learning_rate": 9.077121771217713e-05, "loss": 0.08306272625923157, "step": 32530 }, { "epoch": 9.236446210615952, "grad_norm": 9.16460132598877, "learning_rate": 9.076837922225376e-05, "loss": 0.04933281540870667, "step": 32540 }, { "epoch": 9.239284700539313, "grad_norm": 10.519789695739746, "learning_rate": 9.07655407323304e-05, "loss": 0.055536919832229616, "step": 32550 }, { "epoch": 9.242123190462674, "grad_norm": 7.917647838592529, "learning_rate": 9.076270224240704e-05, "loss": 0.031090694665908813, "step": 32560 }, { "epoch": 9.244961680386035, "grad_norm": 6.369583606719971, "learning_rate": 9.075986375248368e-05, "loss": 0.06728399395942689, "step": 32570 }, { "epoch": 9.247800170309395, "grad_norm": 6.490670204162598, "learning_rate": 9.075702526256033e-05, "loss": 0.06461179852485657, "step": 32580 }, { "epoch": 9.250638660232756, "grad_norm": 1.9340163469314575, "learning_rate": 9.075418677263697e-05, "loss": 0.058265691995620726, "step": 32590 }, { "epoch": 9.253477150156117, "grad_norm": 12.019217491149902, "learning_rate": 9.07513482827136e-05, "loss": 0.0459725558757782, "step": 32600 }, { "epoch": 9.256315640079478, "grad_norm": 8.567548751831055, "learning_rate": 9.074850979279024e-05, "loss": 0.06528127193450928, "step": 32610 }, { "epoch": 9.25915413000284, "grad_norm": 10.61620807647705, "learning_rate": 9.074567130286688e-05, "loss": 0.07091454863548279, "step": 32620 }, { "epoch": 9.261992619926199, "grad_norm": 3.3433566093444824, "learning_rate": 9.074283281294352e-05, "loss": 0.05981482863426209, "step": 32630 }, { "epoch": 9.26483110984956, "grad_norm": 12.406513214111328, "learning_rate": 9.073999432302016e-05, "loss": 0.059479260444641115, "step": 32640 }, { "epoch": 9.26766959977292, "grad_norm": 3.91577410697937, "learning_rate": 9.07371558330968e-05, "loss": 0.06922081112861633, "step": 32650 }, { "epoch": 9.270508089696282, "grad_norm": 5.703088283538818, "learning_rate": 9.073431734317343e-05, "loss": 0.048567506670951846, "step": 32660 }, { "epoch": 9.273346579619643, "grad_norm": 13.659531593322754, "learning_rate": 9.073147885325007e-05, "loss": 0.05960379838943482, "step": 32670 }, { "epoch": 9.276185069543002, "grad_norm": 10.7163724899292, "learning_rate": 9.072864036332671e-05, "loss": 0.04003342390060425, "step": 32680 }, { "epoch": 9.279023559466363, "grad_norm": 4.531045913696289, "learning_rate": 9.072580187340335e-05, "loss": 0.046465244889259336, "step": 32690 }, { "epoch": 9.281862049389725, "grad_norm": 2.041239023208618, "learning_rate": 9.072296338348e-05, "loss": 0.04149287343025208, "step": 32700 }, { "epoch": 9.284700539313086, "grad_norm": 11.627174377441406, "learning_rate": 9.072012489355664e-05, "loss": 0.05404342412948608, "step": 32710 }, { "epoch": 9.287539029236447, "grad_norm": 7.9445481300354, "learning_rate": 9.071728640363328e-05, "loss": 0.05792953372001648, "step": 32720 }, { "epoch": 9.290377519159806, "grad_norm": 6.924063682556152, "learning_rate": 9.07144479137099e-05, "loss": 0.07755460143089295, "step": 32730 }, { "epoch": 9.293216009083167, "grad_norm": 6.975043773651123, "learning_rate": 9.071160942378655e-05, "loss": 0.04379885494709015, "step": 32740 }, { "epoch": 9.296054499006528, "grad_norm": 15.474916458129883, "learning_rate": 9.070877093386319e-05, "loss": 0.06385596990585327, "step": 32750 }, { "epoch": 9.29889298892989, "grad_norm": 4.132717609405518, "learning_rate": 9.070593244393982e-05, "loss": 0.0586206316947937, "step": 32760 }, { "epoch": 9.30173147885325, "grad_norm": 20.231489181518555, "learning_rate": 9.070309395401647e-05, "loss": 0.06682396531105042, "step": 32770 }, { "epoch": 9.304569968776612, "grad_norm": 9.4744873046875, "learning_rate": 9.070025546409311e-05, "loss": 0.05048024654388428, "step": 32780 }, { "epoch": 9.307408458699971, "grad_norm": 2.6996655464172363, "learning_rate": 9.069741697416974e-05, "loss": 0.052340883016586306, "step": 32790 }, { "epoch": 9.310246948623332, "grad_norm": 11.197591781616211, "learning_rate": 9.069457848424638e-05, "loss": 0.04950667023658752, "step": 32800 }, { "epoch": 9.313085438546693, "grad_norm": 7.194085597991943, "learning_rate": 9.069173999432302e-05, "loss": 0.04353128969669342, "step": 32810 }, { "epoch": 9.315923928470054, "grad_norm": 10.58753490447998, "learning_rate": 9.068890150439966e-05, "loss": 0.05287941694259644, "step": 32820 }, { "epoch": 9.318762418393415, "grad_norm": 5.493806838989258, "learning_rate": 9.06860630144763e-05, "loss": 0.04841550588607788, "step": 32830 }, { "epoch": 9.321600908316775, "grad_norm": 10.897929191589355, "learning_rate": 9.068322452455295e-05, "loss": 0.058511865139007566, "step": 32840 }, { "epoch": 9.324439398240136, "grad_norm": 8.787210464477539, "learning_rate": 9.068038603462959e-05, "loss": 0.04149903357028961, "step": 32850 }, { "epoch": 9.327277888163497, "grad_norm": 6.68208122253418, "learning_rate": 9.067754754470622e-05, "loss": 0.05815345048904419, "step": 32860 }, { "epoch": 9.330116378086858, "grad_norm": 13.823956489562988, "learning_rate": 9.067470905478286e-05, "loss": 0.04482123851776123, "step": 32870 }, { "epoch": 9.33295486801022, "grad_norm": 8.48888874053955, "learning_rate": 9.06718705648595e-05, "loss": 0.06808430552482606, "step": 32880 }, { "epoch": 9.335793357933579, "grad_norm": 10.772937774658203, "learning_rate": 9.066903207493613e-05, "loss": 0.05200949907302856, "step": 32890 }, { "epoch": 9.33863184785694, "grad_norm": 1.882460117340088, "learning_rate": 9.066619358501278e-05, "loss": 0.034025412797927854, "step": 32900 }, { "epoch": 9.3414703377803, "grad_norm": 6.333077430725098, "learning_rate": 9.066335509508942e-05, "loss": 0.04800796806812287, "step": 32910 }, { "epoch": 9.344308827703662, "grad_norm": 2.586176633834839, "learning_rate": 9.066051660516605e-05, "loss": 0.05263393521308899, "step": 32920 }, { "epoch": 9.347147317627023, "grad_norm": 9.180237770080566, "learning_rate": 9.065767811524269e-05, "loss": 0.031053465604782105, "step": 32930 }, { "epoch": 9.349985807550382, "grad_norm": 8.368448257446289, "learning_rate": 9.065483962531933e-05, "loss": 0.04113053679466248, "step": 32940 }, { "epoch": 9.352824297473743, "grad_norm": 3.159412145614624, "learning_rate": 9.065200113539598e-05, "loss": 0.05956799983978271, "step": 32950 }, { "epoch": 9.355662787397105, "grad_norm": 19.962419509887695, "learning_rate": 9.064916264547262e-05, "loss": 0.07392004132270813, "step": 32960 }, { "epoch": 9.358501277320466, "grad_norm": 9.92551040649414, "learning_rate": 9.064632415554926e-05, "loss": 0.055903536081314084, "step": 32970 }, { "epoch": 9.361339767243827, "grad_norm": 8.83159065246582, "learning_rate": 9.06434856656259e-05, "loss": 0.045094051957130434, "step": 32980 }, { "epoch": 9.364178257167188, "grad_norm": 9.9266996383667, "learning_rate": 9.064064717570253e-05, "loss": 0.058740997314453126, "step": 32990 }, { "epoch": 9.367016747090547, "grad_norm": 9.501269340515137, "learning_rate": 9.063780868577917e-05, "loss": 0.04406949281692505, "step": 33000 }, { "epoch": 9.367016747090547, "eval_accuracy": 0.9465886691676735, "eval_loss": 0.1648065596818924, "eval_runtime": 31.3549, "eval_samples_per_second": 501.581, "eval_steps_per_second": 7.846, "step": 33000 }, { "epoch": 9.369855237013908, "grad_norm": 9.468525886535645, "learning_rate": 9.063497019585581e-05, "loss": 0.048831897974014285, "step": 33010 }, { "epoch": 9.37269372693727, "grad_norm": 7.1258625984191895, "learning_rate": 9.063213170593244e-05, "loss": 0.05858026742935181, "step": 33020 }, { "epoch": 9.37553221686063, "grad_norm": 10.655607223510742, "learning_rate": 9.062929321600909e-05, "loss": 0.05381351709365845, "step": 33030 }, { "epoch": 9.378370706783992, "grad_norm": 12.196785926818848, "learning_rate": 9.062645472608573e-05, "loss": 0.06687752604484558, "step": 33040 }, { "epoch": 9.381209196707351, "grad_norm": 5.432117462158203, "learning_rate": 9.062361623616236e-05, "loss": 0.06028173565864563, "step": 33050 }, { "epoch": 9.384047686630712, "grad_norm": 8.175498962402344, "learning_rate": 9.0620777746239e-05, "loss": 0.0691449522972107, "step": 33060 }, { "epoch": 9.386886176554073, "grad_norm": 13.365309715270996, "learning_rate": 9.061793925631564e-05, "loss": 0.07226872444152832, "step": 33070 }, { "epoch": 9.389724666477434, "grad_norm": 14.634222030639648, "learning_rate": 9.061510076639229e-05, "loss": 0.07034959197044373, "step": 33080 }, { "epoch": 9.392563156400795, "grad_norm": 3.979567766189575, "learning_rate": 9.061226227646891e-05, "loss": 0.05294480323791504, "step": 33090 }, { "epoch": 9.395401646324155, "grad_norm": 13.566909790039062, "learning_rate": 9.060942378654557e-05, "loss": 0.06107956767082214, "step": 33100 }, { "epoch": 9.398240136247516, "grad_norm": 3.6039998531341553, "learning_rate": 9.060658529662221e-05, "loss": 0.062139981985092164, "step": 33110 }, { "epoch": 9.401078626170877, "grad_norm": 11.181325912475586, "learning_rate": 9.060374680669884e-05, "loss": 0.059360653162002563, "step": 33120 }, { "epoch": 9.403917116094238, "grad_norm": 6.055405139923096, "learning_rate": 9.060090831677548e-05, "loss": 0.07335441112518311, "step": 33130 }, { "epoch": 9.4067556060176, "grad_norm": 4.620993614196777, "learning_rate": 9.059806982685212e-05, "loss": 0.07250437140464783, "step": 33140 }, { "epoch": 9.40959409594096, "grad_norm": 1.5898244380950928, "learning_rate": 9.059523133692875e-05, "loss": 0.0533543586730957, "step": 33150 }, { "epoch": 9.41243258586432, "grad_norm": 10.768118858337402, "learning_rate": 9.05923928470054e-05, "loss": 0.05850784778594971, "step": 33160 }, { "epoch": 9.41527107578768, "grad_norm": 7.327179908752441, "learning_rate": 9.058955435708204e-05, "loss": 0.043669885396957396, "step": 33170 }, { "epoch": 9.418109565711042, "grad_norm": 8.013859748840332, "learning_rate": 9.058671586715867e-05, "loss": 0.06741069555282593, "step": 33180 }, { "epoch": 9.420948055634403, "grad_norm": 9.66065502166748, "learning_rate": 9.058387737723531e-05, "loss": 0.07110794782638549, "step": 33190 }, { "epoch": 9.423786545557764, "grad_norm": 13.936553001403809, "learning_rate": 9.058103888731196e-05, "loss": 0.07287751436233521, "step": 33200 }, { "epoch": 9.426625035481123, "grad_norm": 7.313204288482666, "learning_rate": 9.05782003973886e-05, "loss": 0.05224173665046692, "step": 33210 }, { "epoch": 9.429463525404485, "grad_norm": 11.710868835449219, "learning_rate": 9.057536190746522e-05, "loss": 0.056077378988265994, "step": 33220 }, { "epoch": 9.432302015327846, "grad_norm": 1.254156470298767, "learning_rate": 9.057252341754188e-05, "loss": 0.032727751135826114, "step": 33230 }, { "epoch": 9.435140505251207, "grad_norm": 1.8861327171325684, "learning_rate": 9.056968492761852e-05, "loss": 0.05641490817070007, "step": 33240 }, { "epoch": 9.437978995174568, "grad_norm": 9.412788391113281, "learning_rate": 9.056684643769515e-05, "loss": 0.053233766555786134, "step": 33250 }, { "epoch": 9.440817485097927, "grad_norm": 2.569307804107666, "learning_rate": 9.056400794777179e-05, "loss": 0.05567408800125122, "step": 33260 }, { "epoch": 9.443655975021288, "grad_norm": 7.04046630859375, "learning_rate": 9.056116945784843e-05, "loss": 0.04117984473705292, "step": 33270 }, { "epoch": 9.44649446494465, "grad_norm": 13.325481414794922, "learning_rate": 9.055833096792506e-05, "loss": 0.06516222953796387, "step": 33280 }, { "epoch": 9.44933295486801, "grad_norm": 5.740192413330078, "learning_rate": 9.05554924780017e-05, "loss": 0.05689252018928528, "step": 33290 }, { "epoch": 9.452171444791372, "grad_norm": 12.770515441894531, "learning_rate": 9.055265398807836e-05, "loss": 0.07921002507209778, "step": 33300 }, { "epoch": 9.455009934714731, "grad_norm": 13.46126937866211, "learning_rate": 9.054981549815498e-05, "loss": 0.04767411649227142, "step": 33310 }, { "epoch": 9.457848424638092, "grad_norm": 5.489933013916016, "learning_rate": 9.054697700823162e-05, "loss": 0.047058111429214476, "step": 33320 }, { "epoch": 9.460686914561453, "grad_norm": 9.937371253967285, "learning_rate": 9.054413851830827e-05, "loss": 0.07372314929962158, "step": 33330 }, { "epoch": 9.463525404484814, "grad_norm": 9.115100860595703, "learning_rate": 9.054130002838491e-05, "loss": 0.0654799461364746, "step": 33340 }, { "epoch": 9.466363894408175, "grad_norm": 3.8474485874176025, "learning_rate": 9.053846153846154e-05, "loss": 0.04887437522411346, "step": 33350 }, { "epoch": 9.469202384331535, "grad_norm": 11.678000450134277, "learning_rate": 9.053562304853819e-05, "loss": 0.07052799463272094, "step": 33360 }, { "epoch": 9.472040874254896, "grad_norm": 16.68780517578125, "learning_rate": 9.053278455861483e-05, "loss": 0.04608502984046936, "step": 33370 }, { "epoch": 9.474879364178257, "grad_norm": 3.5634920597076416, "learning_rate": 9.052994606869146e-05, "loss": 0.0610964834690094, "step": 33380 }, { "epoch": 9.477717854101618, "grad_norm": 7.769752025604248, "learning_rate": 9.05271075787681e-05, "loss": 0.05623708963394165, "step": 33390 }, { "epoch": 9.48055634402498, "grad_norm": 14.619163513183594, "learning_rate": 9.052426908884474e-05, "loss": 0.05393269658088684, "step": 33400 }, { "epoch": 9.48339483394834, "grad_norm": 9.452289581298828, "learning_rate": 9.052143059892137e-05, "loss": 0.058954554796218875, "step": 33410 }, { "epoch": 9.4862333238717, "grad_norm": 3.0336191654205322, "learning_rate": 9.051859210899801e-05, "loss": 0.04946458339691162, "step": 33420 }, { "epoch": 9.48907181379506, "grad_norm": 9.975210189819336, "learning_rate": 9.051575361907467e-05, "loss": 0.07156325578689575, "step": 33430 }, { "epoch": 9.491910303718422, "grad_norm": 9.630119323730469, "learning_rate": 9.05129151291513e-05, "loss": 0.05123568773269653, "step": 33440 }, { "epoch": 9.494748793641783, "grad_norm": 6.102807998657227, "learning_rate": 9.051007663922794e-05, "loss": 0.05549406409263611, "step": 33450 }, { "epoch": 9.497587283565144, "grad_norm": 0.8345799446105957, "learning_rate": 9.050723814930458e-05, "loss": 0.04638853669166565, "step": 33460 }, { "epoch": 9.500425773488503, "grad_norm": 6.340814113616943, "learning_rate": 9.050439965938122e-05, "loss": 0.06161945462226868, "step": 33470 }, { "epoch": 9.503264263411864, "grad_norm": 8.755582809448242, "learning_rate": 9.050156116945785e-05, "loss": 0.05433828234672546, "step": 33480 }, { "epoch": 9.506102753335226, "grad_norm": 8.362161636352539, "learning_rate": 9.049872267953449e-05, "loss": 0.06090704798698425, "step": 33490 }, { "epoch": 9.508941243258587, "grad_norm": 6.977360248565674, "learning_rate": 9.049588418961113e-05, "loss": 0.05114209055900574, "step": 33500 }, { "epoch": 9.508941243258587, "eval_accuracy": 0.9557448973103579, "eval_loss": 0.13861849904060364, "eval_runtime": 31.7276, "eval_samples_per_second": 495.688, "eval_steps_per_second": 7.754, "step": 33500 }, { "epoch": 9.511779733181948, "grad_norm": 14.03275203704834, "learning_rate": 9.049304569968777e-05, "loss": 0.04922421872615814, "step": 33510 }, { "epoch": 9.514618223105309, "grad_norm": 10.364019393920898, "learning_rate": 9.049020720976441e-05, "loss": 0.06954495906829834, "step": 33520 }, { "epoch": 9.517456713028668, "grad_norm": 5.165099143981934, "learning_rate": 9.048736871984105e-05, "loss": 0.05066389441490173, "step": 33530 }, { "epoch": 9.52029520295203, "grad_norm": 12.195990562438965, "learning_rate": 9.048453022991768e-05, "loss": 0.06648446917533875, "step": 33540 }, { "epoch": 9.52313369287539, "grad_norm": 4.532296180725098, "learning_rate": 9.048169173999432e-05, "loss": 0.04216625392436981, "step": 33550 }, { "epoch": 9.525972182798752, "grad_norm": 16.543903350830078, "learning_rate": 9.047885325007098e-05, "loss": 0.0777590274810791, "step": 33560 }, { "epoch": 9.528810672722113, "grad_norm": 7.024462699890137, "learning_rate": 9.04760147601476e-05, "loss": 0.05962986946105957, "step": 33570 }, { "epoch": 9.531649162645472, "grad_norm": 8.659052848815918, "learning_rate": 9.047317627022425e-05, "loss": 0.03709257543087006, "step": 33580 }, { "epoch": 9.534487652568833, "grad_norm": 3.186353921890259, "learning_rate": 9.047033778030089e-05, "loss": 0.05533115863800049, "step": 33590 }, { "epoch": 9.537326142492194, "grad_norm": 5.910744667053223, "learning_rate": 9.046749929037752e-05, "loss": 0.05815193653106689, "step": 33600 }, { "epoch": 9.540164632415555, "grad_norm": 12.982107162475586, "learning_rate": 9.046466080045416e-05, "loss": 0.06229819655418396, "step": 33610 }, { "epoch": 9.543003122338916, "grad_norm": 5.328309059143066, "learning_rate": 9.04618223105308e-05, "loss": 0.05786202549934387, "step": 33620 }, { "epoch": 9.545841612262276, "grad_norm": 8.800302505493164, "learning_rate": 9.045898382060744e-05, "loss": 0.06661202311515808, "step": 33630 }, { "epoch": 9.548680102185637, "grad_norm": 2.952171802520752, "learning_rate": 9.045614533068408e-05, "loss": 0.041045427322387695, "step": 33640 }, { "epoch": 9.551518592108998, "grad_norm": 9.967278480529785, "learning_rate": 9.045330684076072e-05, "loss": 0.09409021735191345, "step": 33650 }, { "epoch": 9.55435708203236, "grad_norm": 8.845858573913574, "learning_rate": 9.045046835083736e-05, "loss": 0.07691789865493774, "step": 33660 }, { "epoch": 9.55719557195572, "grad_norm": 4.441326141357422, "learning_rate": 9.044762986091399e-05, "loss": 0.03699399530887604, "step": 33670 }, { "epoch": 9.56003406187908, "grad_norm": 15.421638488769531, "learning_rate": 9.044479137099063e-05, "loss": 0.05435764789581299, "step": 33680 }, { "epoch": 9.56287255180244, "grad_norm": 10.45661735534668, "learning_rate": 9.044195288106727e-05, "loss": 0.04014127254486084, "step": 33690 }, { "epoch": 9.565711041725802, "grad_norm": 12.669248580932617, "learning_rate": 9.043911439114392e-05, "loss": 0.058434033393859865, "step": 33700 }, { "epoch": 9.568549531649163, "grad_norm": 7.6707305908203125, "learning_rate": 9.043627590122056e-05, "loss": 0.0526405930519104, "step": 33710 }, { "epoch": 9.571388021572524, "grad_norm": 7.266836643218994, "learning_rate": 9.04334374112972e-05, "loss": 0.05215244293212891, "step": 33720 }, { "epoch": 9.574226511495883, "grad_norm": 6.0221357345581055, "learning_rate": 9.043059892137383e-05, "loss": 0.06672795414924622, "step": 33730 }, { "epoch": 9.577065001419244, "grad_norm": 26.31820297241211, "learning_rate": 9.042776043145047e-05, "loss": 0.08382744789123535, "step": 33740 }, { "epoch": 9.579903491342606, "grad_norm": 10.193788528442383, "learning_rate": 9.042492194152711e-05, "loss": 0.06574383974075318, "step": 33750 }, { "epoch": 9.582741981265967, "grad_norm": 9.864354133605957, "learning_rate": 9.042208345160375e-05, "loss": 0.08076583743095397, "step": 33760 }, { "epoch": 9.585580471189328, "grad_norm": 7.901949405670166, "learning_rate": 9.041924496168039e-05, "loss": 0.07552215456962585, "step": 33770 }, { "epoch": 9.588418961112689, "grad_norm": 12.871236801147461, "learning_rate": 9.041640647175703e-05, "loss": 0.05840908288955689, "step": 33780 }, { "epoch": 9.591257451036048, "grad_norm": 9.155179023742676, "learning_rate": 9.041356798183367e-05, "loss": 0.03758316934108734, "step": 33790 }, { "epoch": 9.59409594095941, "grad_norm": 3.980708122253418, "learning_rate": 9.04107294919103e-05, "loss": 0.04662624001502991, "step": 33800 }, { "epoch": 9.59693443088277, "grad_norm": 9.541356086730957, "learning_rate": 9.040789100198694e-05, "loss": 0.04631110429763794, "step": 33810 }, { "epoch": 9.599772920806132, "grad_norm": 8.988570213317871, "learning_rate": 9.040505251206359e-05, "loss": 0.0743104338645935, "step": 33820 }, { "epoch": 9.602611410729493, "grad_norm": 9.99904727935791, "learning_rate": 9.040221402214023e-05, "loss": 0.04648450016975403, "step": 33830 }, { "epoch": 9.605449900652852, "grad_norm": 9.594966888427734, "learning_rate": 9.039937553221687e-05, "loss": 0.05980173349380493, "step": 33840 }, { "epoch": 9.608288390576213, "grad_norm": 14.48553466796875, "learning_rate": 9.039653704229351e-05, "loss": 0.05360459089279175, "step": 33850 }, { "epoch": 9.611126880499574, "grad_norm": 9.922717094421387, "learning_rate": 9.039369855237014e-05, "loss": 0.06500089168548584, "step": 33860 }, { "epoch": 9.613965370422935, "grad_norm": 4.235474586486816, "learning_rate": 9.039114391143912e-05, "loss": 0.07516565322875976, "step": 33870 }, { "epoch": 9.616803860346296, "grad_norm": 11.98862361907959, "learning_rate": 9.038830542151576e-05, "loss": 0.05682286620140076, "step": 33880 }, { "epoch": 9.619642350269656, "grad_norm": 16.991994857788086, "learning_rate": 9.03854669315924e-05, "loss": 0.07232513427734374, "step": 33890 }, { "epoch": 9.622480840193017, "grad_norm": 7.825690746307373, "learning_rate": 9.038262844166904e-05, "loss": 0.11338208913803101, "step": 33900 }, { "epoch": 9.625319330116378, "grad_norm": 14.34385871887207, "learning_rate": 9.037978995174567e-05, "loss": 0.06633058786392212, "step": 33910 }, { "epoch": 9.628157820039739, "grad_norm": 14.932887077331543, "learning_rate": 9.037695146182231e-05, "loss": 0.056937789916992186, "step": 33920 }, { "epoch": 9.6309963099631, "grad_norm": 14.428411483764648, "learning_rate": 9.037411297189895e-05, "loss": 0.058716845512390134, "step": 33930 }, { "epoch": 9.633834799886461, "grad_norm": 10.574261665344238, "learning_rate": 9.037127448197559e-05, "loss": 0.0521108090877533, "step": 33940 }, { "epoch": 9.63667328980982, "grad_norm": 8.565613746643066, "learning_rate": 9.036843599205223e-05, "loss": 0.04207794368267059, "step": 33950 }, { "epoch": 9.639511779733182, "grad_norm": 12.022870063781738, "learning_rate": 9.036559750212888e-05, "loss": 0.045295464992523196, "step": 33960 }, { "epoch": 9.642350269656543, "grad_norm": 5.8100738525390625, "learning_rate": 9.036275901220552e-05, "loss": 0.0781801164150238, "step": 33970 }, { "epoch": 9.645188759579904, "grad_norm": 4.381354331970215, "learning_rate": 9.035992052228214e-05, "loss": 0.05108208060264587, "step": 33980 }, { "epoch": 9.648027249503265, "grad_norm": 5.99945068359375, "learning_rate": 9.035708203235879e-05, "loss": 0.044146662950515746, "step": 33990 }, { "epoch": 9.650865739426624, "grad_norm": 11.849239349365234, "learning_rate": 9.035424354243543e-05, "loss": 0.05025023818016052, "step": 34000 }, { "epoch": 9.650865739426624, "eval_accuracy": 0.947606027850194, "eval_loss": 0.15701857209205627, "eval_runtime": 31.2038, "eval_samples_per_second": 504.01, "eval_steps_per_second": 7.884, "step": 34000 }, { "epoch": 9.653704229349986, "grad_norm": 4.164856910705566, "learning_rate": 9.035140505251207e-05, "loss": 0.052230226993560794, "step": 34010 }, { "epoch": 9.656542719273347, "grad_norm": 10.305496215820312, "learning_rate": 9.034856656258871e-05, "loss": 0.06024667024612427, "step": 34020 }, { "epoch": 9.659381209196708, "grad_norm": 8.605240821838379, "learning_rate": 9.034572807266535e-05, "loss": 0.05980364680290222, "step": 34030 }, { "epoch": 9.662219699120069, "grad_norm": 2.591973304748535, "learning_rate": 9.034288958274198e-05, "loss": 0.05305702686309814, "step": 34040 }, { "epoch": 9.665058189043428, "grad_norm": 12.5418119430542, "learning_rate": 9.034005109281862e-05, "loss": 0.0470085084438324, "step": 34050 }, { "epoch": 9.66789667896679, "grad_norm": 7.019689083099365, "learning_rate": 9.033721260289526e-05, "loss": 0.043849459290504454, "step": 34060 }, { "epoch": 9.67073516889015, "grad_norm": 1.362231969833374, "learning_rate": 9.03343741129719e-05, "loss": 0.04370751082897186, "step": 34070 }, { "epoch": 9.673573658813511, "grad_norm": 5.060967922210693, "learning_rate": 9.033153562304855e-05, "loss": 0.04780924916267395, "step": 34080 }, { "epoch": 9.676412148736873, "grad_norm": 9.045281410217285, "learning_rate": 9.032869713312519e-05, "loss": 0.06363303661346435, "step": 34090 }, { "epoch": 9.679250638660232, "grad_norm": 12.295304298400879, "learning_rate": 9.032585864320183e-05, "loss": 0.0758301079273224, "step": 34100 }, { "epoch": 9.682089128583593, "grad_norm": 10.919517517089844, "learning_rate": 9.032302015327846e-05, "loss": 0.05505637526512146, "step": 34110 }, { "epoch": 9.684927618506954, "grad_norm": 3.8185107707977295, "learning_rate": 9.03201816633551e-05, "loss": 0.05205299258232117, "step": 34120 }, { "epoch": 9.687766108430315, "grad_norm": 5.767138481140137, "learning_rate": 9.031734317343174e-05, "loss": 0.044072318077087405, "step": 34130 }, { "epoch": 9.690604598353676, "grad_norm": 10.324020385742188, "learning_rate": 9.031450468350838e-05, "loss": 0.049072015285491946, "step": 34140 }, { "epoch": 9.693443088277036, "grad_norm": 3.8596479892730713, "learning_rate": 9.031166619358502e-05, "loss": 0.040961083769798276, "step": 34150 }, { "epoch": 9.696281578200397, "grad_norm": 6.289798736572266, "learning_rate": 9.030882770366166e-05, "loss": 0.06501933336257934, "step": 34160 }, { "epoch": 9.699120068123758, "grad_norm": 9.705649375915527, "learning_rate": 9.030598921373829e-05, "loss": 0.049810847640037535, "step": 34170 }, { "epoch": 9.701958558047119, "grad_norm": 6.887966156005859, "learning_rate": 9.030315072381493e-05, "loss": 0.047608527541160586, "step": 34180 }, { "epoch": 9.70479704797048, "grad_norm": 6.2285075187683105, "learning_rate": 9.030031223389157e-05, "loss": 0.07033423781394958, "step": 34190 }, { "epoch": 9.707635537893841, "grad_norm": 14.393875122070312, "learning_rate": 9.029747374396821e-05, "loss": 0.06578875184059144, "step": 34200 }, { "epoch": 9.7104740278172, "grad_norm": 11.480899810791016, "learning_rate": 9.029463525404486e-05, "loss": 0.04567984938621521, "step": 34210 }, { "epoch": 9.713312517740562, "grad_norm": 23.58464241027832, "learning_rate": 9.02917967641215e-05, "loss": 0.08886916041374207, "step": 34220 }, { "epoch": 9.716151007663923, "grad_norm": 1.8707438707351685, "learning_rate": 9.028895827419814e-05, "loss": 0.07297916412353515, "step": 34230 }, { "epoch": 9.718989497587284, "grad_norm": 9.00274658203125, "learning_rate": 9.028611978427477e-05, "loss": 0.03945229649543762, "step": 34240 }, { "epoch": 9.721827987510645, "grad_norm": 3.684415102005005, "learning_rate": 9.028328129435141e-05, "loss": 0.06324402689933777, "step": 34250 }, { "epoch": 9.724666477434004, "grad_norm": 10.152517318725586, "learning_rate": 9.028044280442805e-05, "loss": 0.05714346766471863, "step": 34260 }, { "epoch": 9.727504967357365, "grad_norm": 10.9435453414917, "learning_rate": 9.027760431450468e-05, "loss": 0.0816379427909851, "step": 34270 }, { "epoch": 9.730343457280727, "grad_norm": 7.825002193450928, "learning_rate": 9.027476582458133e-05, "loss": 0.07541795969009399, "step": 34280 }, { "epoch": 9.733181947204088, "grad_norm": 2.87214994430542, "learning_rate": 9.027192733465797e-05, "loss": 0.06760282516479492, "step": 34290 }, { "epoch": 9.736020437127449, "grad_norm": 6.221203327178955, "learning_rate": 9.02690888447346e-05, "loss": 0.0756977915763855, "step": 34300 }, { "epoch": 9.73885892705081, "grad_norm": 8.128471374511719, "learning_rate": 9.026625035481124e-05, "loss": 0.0395295262336731, "step": 34310 }, { "epoch": 9.74169741697417, "grad_norm": 9.556492805480957, "learning_rate": 9.026341186488788e-05, "loss": 0.03885287344455719, "step": 34320 }, { "epoch": 9.74453590689753, "grad_norm": 10.452277183532715, "learning_rate": 9.026057337496453e-05, "loss": 0.0708901047706604, "step": 34330 }, { "epoch": 9.747374396820891, "grad_norm": 1.3048969507217407, "learning_rate": 9.025773488504117e-05, "loss": 0.03583475351333618, "step": 34340 }, { "epoch": 9.750212886744253, "grad_norm": 15.01528263092041, "learning_rate": 9.025489639511781e-05, "loss": 0.054342222213745114, "step": 34350 }, { "epoch": 9.753051376667614, "grad_norm": 14.763592720031738, "learning_rate": 9.025205790519444e-05, "loss": 0.0733025074005127, "step": 34360 }, { "epoch": 9.755889866590973, "grad_norm": 7.0914692878723145, "learning_rate": 9.024921941527108e-05, "loss": 0.051036524772644046, "step": 34370 }, { "epoch": 9.758728356514334, "grad_norm": 5.289018630981445, "learning_rate": 9.024638092534772e-05, "loss": 0.043183648586273195, "step": 34380 }, { "epoch": 9.761566846437695, "grad_norm": 2.602764844894409, "learning_rate": 9.024354243542436e-05, "loss": 0.06717556118965148, "step": 34390 }, { "epoch": 9.764405336361056, "grad_norm": 12.55064582824707, "learning_rate": 9.024070394550099e-05, "loss": 0.05532287359237671, "step": 34400 }, { "epoch": 9.767243826284417, "grad_norm": 4.019501686096191, "learning_rate": 9.023786545557764e-05, "loss": 0.08732059597969055, "step": 34410 }, { "epoch": 9.770082316207777, "grad_norm": 12.261041641235352, "learning_rate": 9.023502696565428e-05, "loss": 0.061834710836410525, "step": 34420 }, { "epoch": 9.772920806131138, "grad_norm": 11.338926315307617, "learning_rate": 9.023218847573091e-05, "loss": 0.05416260957717896, "step": 34430 }, { "epoch": 9.775759296054499, "grad_norm": 11.139217376708984, "learning_rate": 9.022934998580755e-05, "loss": 0.06826131343841553, "step": 34440 }, { "epoch": 9.77859778597786, "grad_norm": 5.79235315322876, "learning_rate": 9.02265114958842e-05, "loss": 0.046767657995224, "step": 34450 }, { "epoch": 9.781436275901221, "grad_norm": 3.5053915977478027, "learning_rate": 9.022367300596082e-05, "loss": 0.06353358030319214, "step": 34460 }, { "epoch": 9.78427476582458, "grad_norm": 5.582108020782471, "learning_rate": 9.022083451603748e-05, "loss": 0.04567739963531494, "step": 34470 }, { "epoch": 9.787113255747942, "grad_norm": 12.241418838500977, "learning_rate": 9.021799602611412e-05, "loss": 0.06244375705718994, "step": 34480 }, { "epoch": 9.789951745671303, "grad_norm": 9.93967056274414, "learning_rate": 9.021515753619075e-05, "loss": 0.08247674703598022, "step": 34490 }, { "epoch": 9.792790235594664, "grad_norm": 2.7331008911132812, "learning_rate": 9.021231904626739e-05, "loss": 0.055683857202529906, "step": 34500 }, { "epoch": 9.792790235594664, "eval_accuracy": 0.9530107458510841, "eval_loss": 0.1437760889530182, "eval_runtime": 31.1699, "eval_samples_per_second": 504.558, "eval_steps_per_second": 7.892, "step": 34500 }, { "epoch": 9.795628725518025, "grad_norm": 6.881160259246826, "learning_rate": 9.020948055634403e-05, "loss": 0.06169273853302002, "step": 34510 }, { "epoch": 9.798467215441384, "grad_norm": 11.642535209655762, "learning_rate": 9.020664206642067e-05, "loss": 0.04656406939029693, "step": 34520 }, { "epoch": 9.801305705364745, "grad_norm": 4.442540645599365, "learning_rate": 9.02038035764973e-05, "loss": 0.07033843994140625, "step": 34530 }, { "epoch": 9.804144195288107, "grad_norm": 13.053991317749023, "learning_rate": 9.020096508657395e-05, "loss": 0.08287585377693177, "step": 34540 }, { "epoch": 9.806982685211468, "grad_norm": 10.460387229919434, "learning_rate": 9.01981265966506e-05, "loss": 0.07373381853103637, "step": 34550 }, { "epoch": 9.809821175134829, "grad_norm": 2.3229660987854004, "learning_rate": 9.019528810672722e-05, "loss": 0.05174373984336853, "step": 34560 }, { "epoch": 9.81265966505819, "grad_norm": 4.232043266296387, "learning_rate": 9.019244961680386e-05, "loss": 0.03481191396713257, "step": 34570 }, { "epoch": 9.81549815498155, "grad_norm": 3.6204004287719727, "learning_rate": 9.01896111268805e-05, "loss": 0.043971121311187744, "step": 34580 }, { "epoch": 9.81833664490491, "grad_norm": 7.5256476402282715, "learning_rate": 9.018677263695713e-05, "loss": 0.05240855813026428, "step": 34590 }, { "epoch": 9.821175134828271, "grad_norm": 7.201149940490723, "learning_rate": 9.018393414703377e-05, "loss": 0.06972816586494446, "step": 34600 }, { "epoch": 9.824013624751633, "grad_norm": 9.160229682922363, "learning_rate": 9.018109565711043e-05, "loss": 0.08697982430458069, "step": 34610 }, { "epoch": 9.826852114674994, "grad_norm": 7.5745439529418945, "learning_rate": 9.017825716718706e-05, "loss": 0.06619174480438232, "step": 34620 }, { "epoch": 9.829690604598353, "grad_norm": 7.200782299041748, "learning_rate": 9.01754186772637e-05, "loss": 0.08137998580932618, "step": 34630 }, { "epoch": 9.832529094521714, "grad_norm": 7.1459174156188965, "learning_rate": 9.017258018734034e-05, "loss": 0.044330033659935, "step": 34640 }, { "epoch": 9.835367584445075, "grad_norm": 6.897623538970947, "learning_rate": 9.016974169741698e-05, "loss": 0.05968307256698609, "step": 34650 }, { "epoch": 9.838206074368436, "grad_norm": 8.819377899169922, "learning_rate": 9.016690320749361e-05, "loss": 0.060765206813812256, "step": 34660 }, { "epoch": 9.841044564291797, "grad_norm": 8.2949857711792, "learning_rate": 9.016406471757026e-05, "loss": 0.06788620352745056, "step": 34670 }, { "epoch": 9.843883054215157, "grad_norm": 16.896696090698242, "learning_rate": 9.01612262276469e-05, "loss": 0.06423289775848388, "step": 34680 }, { "epoch": 9.846721544138518, "grad_norm": 4.395873546600342, "learning_rate": 9.015838773772353e-05, "loss": 0.042915630340576175, "step": 34690 }, { "epoch": 9.849560034061879, "grad_norm": 6.8880743980407715, "learning_rate": 9.015554924780017e-05, "loss": 0.04221827983856201, "step": 34700 }, { "epoch": 9.85239852398524, "grad_norm": 8.773017883300781, "learning_rate": 9.015271075787682e-05, "loss": 0.042918425798416135, "step": 34710 }, { "epoch": 9.855237013908601, "grad_norm": 10.875904083251953, "learning_rate": 9.014987226795344e-05, "loss": 0.050799238681793216, "step": 34720 }, { "epoch": 9.858075503831962, "grad_norm": 4.369656085968018, "learning_rate": 9.014703377803009e-05, "loss": 0.05311837196350098, "step": 34730 }, { "epoch": 9.860913993755322, "grad_norm": 14.646661758422852, "learning_rate": 9.014419528810674e-05, "loss": 0.06273683905601501, "step": 34740 }, { "epoch": 9.863752483678683, "grad_norm": 22.416555404663086, "learning_rate": 9.014135679818337e-05, "loss": 0.055133724212646486, "step": 34750 }, { "epoch": 9.866590973602044, "grad_norm": 10.24942684173584, "learning_rate": 9.013851830826001e-05, "loss": 0.0659708857536316, "step": 34760 }, { "epoch": 9.869429463525405, "grad_norm": 7.025540351867676, "learning_rate": 9.013567981833665e-05, "loss": 0.07185924053192139, "step": 34770 }, { "epoch": 9.872267953448766, "grad_norm": 3.1399707794189453, "learning_rate": 9.013284132841329e-05, "loss": 0.04979588687419891, "step": 34780 }, { "epoch": 9.875106443372125, "grad_norm": 7.0206685066223145, "learning_rate": 9.013000283848992e-05, "loss": 0.06861185431480407, "step": 34790 }, { "epoch": 9.877944933295487, "grad_norm": 8.200695037841797, "learning_rate": 9.012716434856656e-05, "loss": 0.05065483450889587, "step": 34800 }, { "epoch": 9.880783423218848, "grad_norm": 2.2723278999328613, "learning_rate": 9.012432585864322e-05, "loss": 0.03970271944999695, "step": 34810 }, { "epoch": 9.883621913142209, "grad_norm": 8.67282772064209, "learning_rate": 9.012148736871984e-05, "loss": 0.04657282829284668, "step": 34820 }, { "epoch": 9.88646040306557, "grad_norm": 16.092105865478516, "learning_rate": 9.011864887879649e-05, "loss": 0.08139899969100953, "step": 34830 }, { "epoch": 9.88929889298893, "grad_norm": 4.557160377502441, "learning_rate": 9.011581038887313e-05, "loss": 0.06661387681961059, "step": 34840 }, { "epoch": 9.89213738291229, "grad_norm": 4.081960678100586, "learning_rate": 9.011297189894975e-05, "loss": 0.06317877769470215, "step": 34850 }, { "epoch": 9.894975872835651, "grad_norm": 5.8050642013549805, "learning_rate": 9.01101334090264e-05, "loss": 0.0409137099981308, "step": 34860 }, { "epoch": 9.897814362759012, "grad_norm": 11.1065092086792, "learning_rate": 9.010729491910305e-05, "loss": 0.0826742947101593, "step": 34870 }, { "epoch": 9.900652852682374, "grad_norm": 9.089666366577148, "learning_rate": 9.010445642917968e-05, "loss": 0.05835459232330322, "step": 34880 }, { "epoch": 9.903491342605733, "grad_norm": 14.420373916625977, "learning_rate": 9.010161793925632e-05, "loss": 0.061403590440750125, "step": 34890 }, { "epoch": 9.906329832529094, "grad_norm": 15.15878677368164, "learning_rate": 9.009877944933296e-05, "loss": 0.08295650482177734, "step": 34900 }, { "epoch": 9.909168322452455, "grad_norm": 2.6207170486450195, "learning_rate": 9.00959409594096e-05, "loss": 0.05734787583351135, "step": 34910 }, { "epoch": 9.912006812375816, "grad_norm": 18.246538162231445, "learning_rate": 9.009310246948623e-05, "loss": 0.08594343662261963, "step": 34920 }, { "epoch": 9.914845302299177, "grad_norm": 3.96736478805542, "learning_rate": 9.009026397956287e-05, "loss": 0.05625451803207397, "step": 34930 }, { "epoch": 9.917683792222537, "grad_norm": 7.367128849029541, "learning_rate": 9.008742548963953e-05, "loss": 0.046605801582336424, "step": 34940 }, { "epoch": 9.920522282145898, "grad_norm": 3.4135327339172363, "learning_rate": 9.008458699971616e-05, "loss": 0.05121656060218811, "step": 34950 }, { "epoch": 9.923360772069259, "grad_norm": 3.933467149734497, "learning_rate": 9.00817485097928e-05, "loss": 0.08033983707427979, "step": 34960 }, { "epoch": 9.92619926199262, "grad_norm": 5.143579959869385, "learning_rate": 9.007891001986944e-05, "loss": 0.03440910875797272, "step": 34970 }, { "epoch": 9.929037751915981, "grad_norm": 4.858419895172119, "learning_rate": 9.007607152994607e-05, "loss": 0.05421479344367981, "step": 34980 }, { "epoch": 9.931876241839342, "grad_norm": 8.035731315612793, "learning_rate": 9.007323304002271e-05, "loss": 0.05538780689239502, "step": 34990 }, { "epoch": 9.934714731762702, "grad_norm": 7.565187931060791, "learning_rate": 9.007039455009935e-05, "loss": 0.0592837929725647, "step": 35000 }, { "epoch": 9.934714731762702, "eval_accuracy": 0.9512303681566733, "eval_loss": 0.14887449145317078, "eval_runtime": 31.7605, "eval_samples_per_second": 495.176, "eval_steps_per_second": 7.745, "step": 35000 }, { "epoch": 9.937553221686063, "grad_norm": 18.523094177246094, "learning_rate": 9.006755606017599e-05, "loss": 0.06304702758789063, "step": 35010 }, { "epoch": 9.940391711609424, "grad_norm": 9.168621063232422, "learning_rate": 9.006471757025263e-05, "loss": 0.05922250747680664, "step": 35020 }, { "epoch": 9.943230201532785, "grad_norm": 14.038727760314941, "learning_rate": 9.006187908032927e-05, "loss": 0.08694047331809998, "step": 35030 }, { "epoch": 9.946068691456146, "grad_norm": 7.897758960723877, "learning_rate": 9.005904059040591e-05, "loss": 0.039450731873512265, "step": 35040 }, { "epoch": 9.948907181379505, "grad_norm": 8.153043746948242, "learning_rate": 9.005620210048254e-05, "loss": 0.0521111786365509, "step": 35050 }, { "epoch": 9.951745671302866, "grad_norm": 6.396288871765137, "learning_rate": 9.005336361055918e-05, "loss": 0.05624227523803711, "step": 35060 }, { "epoch": 9.954584161226228, "grad_norm": 19.761207580566406, "learning_rate": 9.005052512063584e-05, "loss": 0.057583165168762204, "step": 35070 }, { "epoch": 9.957422651149589, "grad_norm": 15.971322059631348, "learning_rate": 9.004768663071247e-05, "loss": 0.06497880220413207, "step": 35080 }, { "epoch": 9.96026114107295, "grad_norm": 10.363218307495117, "learning_rate": 9.004484814078911e-05, "loss": 0.05719610452651978, "step": 35090 }, { "epoch": 9.96309963099631, "grad_norm": 16.19901466369629, "learning_rate": 9.004200965086575e-05, "loss": 0.07287636399269104, "step": 35100 }, { "epoch": 9.96593812091967, "grad_norm": 3.8646068572998047, "learning_rate": 9.003917116094238e-05, "loss": 0.058908873796463014, "step": 35110 }, { "epoch": 9.968776610843031, "grad_norm": 20.06281852722168, "learning_rate": 9.003633267101902e-05, "loss": 0.05985050201416016, "step": 35120 }, { "epoch": 9.971615100766392, "grad_norm": 8.061190605163574, "learning_rate": 9.003349418109566e-05, "loss": 0.05187458992004394, "step": 35130 }, { "epoch": 9.974453590689754, "grad_norm": 16.898475646972656, "learning_rate": 9.00306556911723e-05, "loss": 0.07484169006347656, "step": 35140 }, { "epoch": 9.977292080613115, "grad_norm": 5.978675365447998, "learning_rate": 9.002781720124894e-05, "loss": 0.07348761558532715, "step": 35150 }, { "epoch": 9.980130570536474, "grad_norm": 12.11555290222168, "learning_rate": 9.002497871132558e-05, "loss": 0.07540331482887268, "step": 35160 }, { "epoch": 9.982969060459835, "grad_norm": 10.992325782775879, "learning_rate": 9.002214022140222e-05, "loss": 0.06647417545318604, "step": 35170 }, { "epoch": 9.985807550383196, "grad_norm": 10.29089069366455, "learning_rate": 9.001930173147885e-05, "loss": 0.06358823776245118, "step": 35180 }, { "epoch": 9.988646040306557, "grad_norm": 4.416968822479248, "learning_rate": 9.00164632415555e-05, "loss": 0.06774464845657349, "step": 35190 }, { "epoch": 9.991484530229918, "grad_norm": 5.965823650360107, "learning_rate": 9.001362475163214e-05, "loss": 0.05627605319023132, "step": 35200 }, { "epoch": 9.994323020153278, "grad_norm": 4.1874165534973145, "learning_rate": 9.001078626170878e-05, "loss": 0.051260560750961304, "step": 35210 }, { "epoch": 9.997161510076639, "grad_norm": 8.402924537658691, "learning_rate": 9.000794777178542e-05, "loss": 0.056137198209762575, "step": 35220 }, { "epoch": 10.0, "grad_norm": 7.2380051612854, "learning_rate": 9.000510928186206e-05, "loss": 0.06442593336105347, "step": 35230 }, { "epoch": 10.002838489923361, "grad_norm": 7.25870418548584, "learning_rate": 9.000227079193869e-05, "loss": 0.054992109537124634, "step": 35240 }, { "epoch": 10.005676979846722, "grad_norm": 6.037044048309326, "learning_rate": 8.999943230201533e-05, "loss": 0.04472480118274689, "step": 35250 }, { "epoch": 10.008515469770082, "grad_norm": 6.1710638999938965, "learning_rate": 8.999659381209197e-05, "loss": 0.05034240484237671, "step": 35260 }, { "epoch": 10.011353959693443, "grad_norm": 9.642723083496094, "learning_rate": 8.999375532216861e-05, "loss": 0.05321880578994751, "step": 35270 }, { "epoch": 10.014192449616804, "grad_norm": 10.24276065826416, "learning_rate": 8.999091683224525e-05, "loss": 0.04606838226318359, "step": 35280 }, { "epoch": 10.017030939540165, "grad_norm": 7.279782772064209, "learning_rate": 8.99880783423219e-05, "loss": 0.054281812906265256, "step": 35290 }, { "epoch": 10.019869429463526, "grad_norm": 6.006746768951416, "learning_rate": 8.998523985239852e-05, "loss": 0.03670320808887482, "step": 35300 }, { "epoch": 10.022707919386885, "grad_norm": 3.8634235858917236, "learning_rate": 8.998240136247516e-05, "loss": 0.05234031081199646, "step": 35310 }, { "epoch": 10.025546409310246, "grad_norm": 12.542618751525879, "learning_rate": 8.99795628725518e-05, "loss": 0.04267399311065674, "step": 35320 }, { "epoch": 10.028384899233608, "grad_norm": 12.190305709838867, "learning_rate": 8.997672438262845e-05, "loss": 0.05247676372528076, "step": 35330 }, { "epoch": 10.031223389156969, "grad_norm": 7.946407318115234, "learning_rate": 8.997388589270509e-05, "loss": 0.03265722393989563, "step": 35340 }, { "epoch": 10.03406187908033, "grad_norm": 11.76549243927002, "learning_rate": 8.997104740278173e-05, "loss": 0.04685068130493164, "step": 35350 }, { "epoch": 10.03690036900369, "grad_norm": 19.568191528320312, "learning_rate": 8.996820891285837e-05, "loss": 0.08826239109039306, "step": 35360 }, { "epoch": 10.03973885892705, "grad_norm": 3.0280404090881348, "learning_rate": 8.9965370422935e-05, "loss": 0.03310412466526032, "step": 35370 }, { "epoch": 10.042577348850411, "grad_norm": 8.630297660827637, "learning_rate": 8.996253193301164e-05, "loss": 0.05217688083648682, "step": 35380 }, { "epoch": 10.045415838773772, "grad_norm": 1.595406174659729, "learning_rate": 8.995969344308828e-05, "loss": 0.04221091270446777, "step": 35390 }, { "epoch": 10.048254328697134, "grad_norm": 6.737777233123779, "learning_rate": 8.995685495316491e-05, "loss": 0.05780994892120361, "step": 35400 }, { "epoch": 10.051092818620495, "grad_norm": 7.514589786529541, "learning_rate": 8.995401646324156e-05, "loss": 0.04562924802303314, "step": 35410 }, { "epoch": 10.053931308543854, "grad_norm": 4.597402095794678, "learning_rate": 8.99511779733182e-05, "loss": 0.0336977481842041, "step": 35420 }, { "epoch": 10.056769798467215, "grad_norm": 18.800081253051758, "learning_rate": 8.994833948339483e-05, "loss": 0.0389517605304718, "step": 35430 }, { "epoch": 10.059608288390576, "grad_norm": 1.9848864078521729, "learning_rate": 8.994550099347147e-05, "loss": 0.04523916840553284, "step": 35440 }, { "epoch": 10.062446778313937, "grad_norm": 7.036876201629639, "learning_rate": 8.994266250354812e-05, "loss": 0.0344100296497345, "step": 35450 }, { "epoch": 10.065285268237298, "grad_norm": 13.14783763885498, "learning_rate": 8.993982401362476e-05, "loss": 0.03282588124275208, "step": 35460 }, { "epoch": 10.068123758160658, "grad_norm": 10.074634552001953, "learning_rate": 8.99369855237014e-05, "loss": 0.05955502986907959, "step": 35470 }, { "epoch": 10.070962248084019, "grad_norm": 4.729062557220459, "learning_rate": 8.993414703377804e-05, "loss": 0.05340811610221863, "step": 35480 }, { "epoch": 10.07380073800738, "grad_norm": 6.622889518737793, "learning_rate": 8.993130854385468e-05, "loss": 0.03359088003635406, "step": 35490 }, { "epoch": 10.076639227930741, "grad_norm": 2.5799474716186523, "learning_rate": 8.992847005393131e-05, "loss": 0.04123450219631195, "step": 35500 }, { "epoch": 10.076639227930741, "eval_accuracy": 0.9471609334265912, "eval_loss": 0.16008706390857697, "eval_runtime": 31.296, "eval_samples_per_second": 502.525, "eval_steps_per_second": 7.86, "step": 35500 }, { "epoch": 10.079477717854102, "grad_norm": 8.8908109664917, "learning_rate": 8.992563156400795e-05, "loss": 0.05573850274085999, "step": 35510 }, { "epoch": 10.082316207777462, "grad_norm": 9.230525970458984, "learning_rate": 8.992279307408459e-05, "loss": 0.06340209245681763, "step": 35520 }, { "epoch": 10.085154697700823, "grad_norm": 5.9668498039245605, "learning_rate": 8.991995458416122e-05, "loss": 0.05195220708847046, "step": 35530 }, { "epoch": 10.087993187624184, "grad_norm": 7.181271553039551, "learning_rate": 8.991711609423787e-05, "loss": 0.0380649745464325, "step": 35540 }, { "epoch": 10.090831677547545, "grad_norm": 8.197202682495117, "learning_rate": 8.991427760431452e-05, "loss": 0.037612175941467284, "step": 35550 }, { "epoch": 10.093670167470906, "grad_norm": 2.697593927383423, "learning_rate": 8.991143911439114e-05, "loss": 0.022783589363098145, "step": 35560 }, { "epoch": 10.096508657394267, "grad_norm": 3.909217596054077, "learning_rate": 8.990860062446778e-05, "loss": 0.03313631415367126, "step": 35570 }, { "epoch": 10.099347147317626, "grad_norm": 6.560413360595703, "learning_rate": 8.990576213454443e-05, "loss": 0.03892926871776581, "step": 35580 }, { "epoch": 10.102185637240988, "grad_norm": 8.605551719665527, "learning_rate": 8.990292364462107e-05, "loss": 0.051356267929077146, "step": 35590 }, { "epoch": 10.105024127164349, "grad_norm": 8.99337100982666, "learning_rate": 8.99000851546977e-05, "loss": 0.03490999937057495, "step": 35600 }, { "epoch": 10.10786261708771, "grad_norm": 19.017131805419922, "learning_rate": 8.989724666477435e-05, "loss": 0.09872019290924072, "step": 35610 }, { "epoch": 10.11070110701107, "grad_norm": 11.235498428344727, "learning_rate": 8.989440817485099e-05, "loss": 0.05722121000289917, "step": 35620 }, { "epoch": 10.11353959693443, "grad_norm": 5.047369480133057, "learning_rate": 8.989156968492762e-05, "loss": 0.0776980996131897, "step": 35630 }, { "epoch": 10.116378086857791, "grad_norm": 6.9297895431518555, "learning_rate": 8.988873119500426e-05, "loss": 0.060690116882324216, "step": 35640 }, { "epoch": 10.119216576781152, "grad_norm": 17.668964385986328, "learning_rate": 8.98858927050809e-05, "loss": 0.06002585887908936, "step": 35650 }, { "epoch": 10.122055066704513, "grad_norm": 9.181960105895996, "learning_rate": 8.988305421515753e-05, "loss": 0.035371404886245725, "step": 35660 }, { "epoch": 10.124893556627875, "grad_norm": 7.714477062225342, "learning_rate": 8.988021572523418e-05, "loss": 0.024712955951690672, "step": 35670 }, { "epoch": 10.127732046551234, "grad_norm": 6.619862079620361, "learning_rate": 8.987737723531083e-05, "loss": 0.054236209392547606, "step": 35680 }, { "epoch": 10.130570536474595, "grad_norm": 11.290130615234375, "learning_rate": 8.987453874538745e-05, "loss": 0.06832056641578674, "step": 35690 }, { "epoch": 10.133409026397956, "grad_norm": 4.3995842933654785, "learning_rate": 8.98717002554641e-05, "loss": 0.04756172299385071, "step": 35700 }, { "epoch": 10.136247516321317, "grad_norm": 17.909027099609375, "learning_rate": 8.986886176554074e-05, "loss": 0.037564057111740115, "step": 35710 }, { "epoch": 10.139086006244678, "grad_norm": 6.395310401916504, "learning_rate": 8.986602327561738e-05, "loss": 0.0656251609325409, "step": 35720 }, { "epoch": 10.141924496168038, "grad_norm": 5.638888359069824, "learning_rate": 8.9863184785694e-05, "loss": 0.022350901365280153, "step": 35730 }, { "epoch": 10.144762986091399, "grad_norm": 15.563482284545898, "learning_rate": 8.986034629577066e-05, "loss": 0.07064988613128662, "step": 35740 }, { "epoch": 10.14760147601476, "grad_norm": 8.048022270202637, "learning_rate": 8.98575078058473e-05, "loss": 0.07858047485351563, "step": 35750 }, { "epoch": 10.150439965938121, "grad_norm": 4.078849792480469, "learning_rate": 8.985466931592393e-05, "loss": 0.0566254198551178, "step": 35760 }, { "epoch": 10.153278455861482, "grad_norm": 4.1883544921875, "learning_rate": 8.985183082600057e-05, "loss": 0.046141821146011355, "step": 35770 }, { "epoch": 10.156116945784843, "grad_norm": 7.745772361755371, "learning_rate": 8.984899233607721e-05, "loss": 0.035416656732559205, "step": 35780 }, { "epoch": 10.158955435708203, "grad_norm": 5.770241737365723, "learning_rate": 8.984615384615384e-05, "loss": 0.04654979407787323, "step": 35790 }, { "epoch": 10.161793925631564, "grad_norm": 3.8276517391204834, "learning_rate": 8.98433153562305e-05, "loss": 0.05311836004257202, "step": 35800 }, { "epoch": 10.164632415554925, "grad_norm": 15.73356819152832, "learning_rate": 8.984047686630714e-05, "loss": 0.02972656488418579, "step": 35810 }, { "epoch": 10.167470905478286, "grad_norm": 5.667628288269043, "learning_rate": 8.983763837638376e-05, "loss": 0.06465369462966919, "step": 35820 }, { "epoch": 10.170309395401647, "grad_norm": 6.022791862487793, "learning_rate": 8.98347998864604e-05, "loss": 0.048763447999954225, "step": 35830 }, { "epoch": 10.173147885325006, "grad_norm": 2.321946144104004, "learning_rate": 8.983196139653705e-05, "loss": 0.04847998023033142, "step": 35840 }, { "epoch": 10.175986375248367, "grad_norm": 5.7691192626953125, "learning_rate": 8.982912290661369e-05, "loss": 0.04806318581104278, "step": 35850 }, { "epoch": 10.178824865171729, "grad_norm": 13.540083885192871, "learning_rate": 8.982628441669032e-05, "loss": 0.05916212201118469, "step": 35860 }, { "epoch": 10.18166335509509, "grad_norm": 4.886026859283447, "learning_rate": 8.982344592676697e-05, "loss": 0.03068886399269104, "step": 35870 }, { "epoch": 10.18450184501845, "grad_norm": 14.19399356842041, "learning_rate": 8.982060743684361e-05, "loss": 0.06252155303955079, "step": 35880 }, { "epoch": 10.18734033494181, "grad_norm": 7.2224225997924805, "learning_rate": 8.981776894692024e-05, "loss": 0.056982392072677614, "step": 35890 }, { "epoch": 10.190178824865171, "grad_norm": 13.160146713256836, "learning_rate": 8.981493045699688e-05, "loss": 0.07169893980026246, "step": 35900 }, { "epoch": 10.193017314788532, "grad_norm": 3.4034552574157715, "learning_rate": 8.981209196707352e-05, "loss": 0.04664395749568939, "step": 35910 }, { "epoch": 10.195855804711893, "grad_norm": 7.455689430236816, "learning_rate": 8.980925347715015e-05, "loss": 0.05753688216209411, "step": 35920 }, { "epoch": 10.198694294635255, "grad_norm": 6.141243934631348, "learning_rate": 8.980641498722679e-05, "loss": 0.052104413509368896, "step": 35930 }, { "epoch": 10.201532784558616, "grad_norm": 3.608466863632202, "learning_rate": 8.980357649730345e-05, "loss": 0.028675973415374756, "step": 35940 }, { "epoch": 10.204371274481975, "grad_norm": 11.292566299438477, "learning_rate": 8.980073800738008e-05, "loss": 0.034777671098709106, "step": 35950 }, { "epoch": 10.207209764405336, "grad_norm": 6.170752048492432, "learning_rate": 8.979789951745672e-05, "loss": 0.04363607168197632, "step": 35960 }, { "epoch": 10.210048254328697, "grad_norm": 0.47235235571861267, "learning_rate": 8.979506102753336e-05, "loss": 0.01717611700296402, "step": 35970 }, { "epoch": 10.212886744252058, "grad_norm": 8.509514808654785, "learning_rate": 8.979222253761e-05, "loss": 0.03820683658123016, "step": 35980 }, { "epoch": 10.21572523417542, "grad_norm": 10.440407752990723, "learning_rate": 8.978938404768663e-05, "loss": 0.058907926082611084, "step": 35990 }, { "epoch": 10.218563724098779, "grad_norm": 8.241033554077148, "learning_rate": 8.978654555776328e-05, "loss": 0.03713009059429169, "step": 36000 }, { "epoch": 10.218563724098779, "eval_accuracy": 0.9473516881795638, "eval_loss": 0.15866875648498535, "eval_runtime": 31.5073, "eval_samples_per_second": 499.154, "eval_steps_per_second": 7.808, "step": 36000 }, { "epoch": 10.22140221402214, "grad_norm": 5.251601696014404, "learning_rate": 8.978370706783992e-05, "loss": 0.04426538050174713, "step": 36010 }, { "epoch": 10.224240703945501, "grad_norm": 1.979737401008606, "learning_rate": 8.978086857791655e-05, "loss": 0.055144065618515016, "step": 36020 }, { "epoch": 10.227079193868862, "grad_norm": 7.218305587768555, "learning_rate": 8.977803008799319e-05, "loss": 0.046082299947738645, "step": 36030 }, { "epoch": 10.229917683792223, "grad_norm": 9.494359970092773, "learning_rate": 8.977519159806983e-05, "loss": 0.0413273423910141, "step": 36040 }, { "epoch": 10.232756173715583, "grad_norm": 4.212758541107178, "learning_rate": 8.977263695713881e-05, "loss": 0.03304300308227539, "step": 36050 }, { "epoch": 10.235594663638944, "grad_norm": 13.917865753173828, "learning_rate": 8.976979846721546e-05, "loss": 0.03857312798500061, "step": 36060 }, { "epoch": 10.238433153562305, "grad_norm": 11.222973823547363, "learning_rate": 8.976695997729208e-05, "loss": 0.05476447343826294, "step": 36070 }, { "epoch": 10.241271643485666, "grad_norm": 2.1426548957824707, "learning_rate": 8.976412148736873e-05, "loss": 0.035970038175582884, "step": 36080 }, { "epoch": 10.244110133409027, "grad_norm": 5.713494777679443, "learning_rate": 8.976128299744537e-05, "loss": 0.04338274598121643, "step": 36090 }, { "epoch": 10.246948623332386, "grad_norm": 7.571118354797363, "learning_rate": 8.9758444507522e-05, "loss": 0.054502475261688235, "step": 36100 }, { "epoch": 10.249787113255747, "grad_norm": 9.001779556274414, "learning_rate": 8.975560601759864e-05, "loss": 0.04046744704246521, "step": 36110 }, { "epoch": 10.252625603179109, "grad_norm": 6.142360687255859, "learning_rate": 8.975276752767529e-05, "loss": 0.04563030898571015, "step": 36120 }, { "epoch": 10.25546409310247, "grad_norm": 8.738130569458008, "learning_rate": 8.974992903775192e-05, "loss": 0.05603446960449219, "step": 36130 }, { "epoch": 10.25830258302583, "grad_norm": 18.529918670654297, "learning_rate": 8.974709054782856e-05, "loss": 0.0697174310684204, "step": 36140 }, { "epoch": 10.261141072949192, "grad_norm": 14.157784461975098, "learning_rate": 8.97442520579052e-05, "loss": 0.05651613473892212, "step": 36150 }, { "epoch": 10.263979562872551, "grad_norm": 9.717578887939453, "learning_rate": 8.974141356798184e-05, "loss": 0.0654645323753357, "step": 36160 }, { "epoch": 10.266818052795912, "grad_norm": 9.141207695007324, "learning_rate": 8.973857507805847e-05, "loss": 0.03184970915317535, "step": 36170 }, { "epoch": 10.269656542719273, "grad_norm": 5.090808391571045, "learning_rate": 8.973573658813513e-05, "loss": 0.06937923431396484, "step": 36180 }, { "epoch": 10.272495032642635, "grad_norm": 4.167090892791748, "learning_rate": 8.973289809821175e-05, "loss": 0.08139961957931519, "step": 36190 }, { "epoch": 10.275333522565996, "grad_norm": 4.789121627807617, "learning_rate": 8.97300596082884e-05, "loss": 0.03616894781589508, "step": 36200 }, { "epoch": 10.278172012489355, "grad_norm": 5.434744358062744, "learning_rate": 8.972722111836504e-05, "loss": 0.03950951099395752, "step": 36210 }, { "epoch": 10.281010502412716, "grad_norm": 14.839851379394531, "learning_rate": 8.972438262844168e-05, "loss": 0.05882408618927002, "step": 36220 }, { "epoch": 10.283848992336077, "grad_norm": 5.861428737640381, "learning_rate": 8.97215441385183e-05, "loss": 0.08029335141181945, "step": 36230 }, { "epoch": 10.286687482259438, "grad_norm": 1.1029629707336426, "learning_rate": 8.971870564859495e-05, "loss": 0.058646482229232785, "step": 36240 }, { "epoch": 10.2895259721828, "grad_norm": 9.69312858581543, "learning_rate": 8.97158671586716e-05, "loss": 0.05503484606742859, "step": 36250 }, { "epoch": 10.292364462106159, "grad_norm": 9.076212882995605, "learning_rate": 8.971302866874823e-05, "loss": 0.045270389318466185, "step": 36260 }, { "epoch": 10.29520295202952, "grad_norm": 2.113004446029663, "learning_rate": 8.971019017882487e-05, "loss": 0.045760995149612425, "step": 36270 }, { "epoch": 10.298041441952881, "grad_norm": 6.438479423522949, "learning_rate": 8.970735168890151e-05, "loss": 0.053906583786010744, "step": 36280 }, { "epoch": 10.300879931876242, "grad_norm": 4.5841145515441895, "learning_rate": 8.970451319897814e-05, "loss": 0.043227142095565795, "step": 36290 }, { "epoch": 10.303718421799603, "grad_norm": 4.7565999031066895, "learning_rate": 8.970167470905478e-05, "loss": 0.032406175136566163, "step": 36300 }, { "epoch": 10.306556911722964, "grad_norm": 5.3222174644470215, "learning_rate": 8.969883621913142e-05, "loss": 0.058787685632705686, "step": 36310 }, { "epoch": 10.309395401646324, "grad_norm": 14.744622230529785, "learning_rate": 8.969599772920806e-05, "loss": 0.04638729691505432, "step": 36320 }, { "epoch": 10.312233891569685, "grad_norm": 4.511398792266846, "learning_rate": 8.96931592392847e-05, "loss": 0.056706970930099486, "step": 36330 }, { "epoch": 10.315072381493046, "grad_norm": 13.197816848754883, "learning_rate": 8.969032074936135e-05, "loss": 0.041135966777801514, "step": 36340 }, { "epoch": 10.317910871416407, "grad_norm": 6.950282096862793, "learning_rate": 8.968748225943799e-05, "loss": 0.04915453195571899, "step": 36350 }, { "epoch": 10.320749361339768, "grad_norm": 6.648165702819824, "learning_rate": 8.968464376951462e-05, "loss": 0.058501708507537845, "step": 36360 }, { "epoch": 10.323587851263127, "grad_norm": 9.410599708557129, "learning_rate": 8.968180527959126e-05, "loss": 0.03711932003498077, "step": 36370 }, { "epoch": 10.326426341186489, "grad_norm": 3.2461369037628174, "learning_rate": 8.967896678966791e-05, "loss": 0.029022285342216493, "step": 36380 }, { "epoch": 10.32926483110985, "grad_norm": 4.180446624755859, "learning_rate": 8.967612829974454e-05, "loss": 0.05456423759460449, "step": 36390 }, { "epoch": 10.33210332103321, "grad_norm": 8.58168888092041, "learning_rate": 8.967328980982118e-05, "loss": 0.06622806787490845, "step": 36400 }, { "epoch": 10.334941810956572, "grad_norm": 6.193541526794434, "learning_rate": 8.967045131989782e-05, "loss": 0.04797664284706116, "step": 36410 }, { "epoch": 10.337780300879931, "grad_norm": 2.4929521083831787, "learning_rate": 8.966761282997445e-05, "loss": 0.03953470289707184, "step": 36420 }, { "epoch": 10.340618790803292, "grad_norm": 12.940290451049805, "learning_rate": 8.966477434005109e-05, "loss": 0.05653942227363586, "step": 36430 }, { "epoch": 10.343457280726653, "grad_norm": 13.67895221710205, "learning_rate": 8.966193585012773e-05, "loss": 0.05468284487724304, "step": 36440 }, { "epoch": 10.346295770650014, "grad_norm": 8.446161270141602, "learning_rate": 8.965909736020437e-05, "loss": 0.05774714946746826, "step": 36450 }, { "epoch": 10.349134260573376, "grad_norm": 9.774473190307617, "learning_rate": 8.965625887028102e-05, "loss": 0.058855897188186644, "step": 36460 }, { "epoch": 10.351972750496735, "grad_norm": 13.697847366333008, "learning_rate": 8.965342038035766e-05, "loss": 0.06780917644500732, "step": 36470 }, { "epoch": 10.354811240420096, "grad_norm": 12.0314359664917, "learning_rate": 8.96505818904343e-05, "loss": 0.0725091814994812, "step": 36480 }, { "epoch": 10.357649730343457, "grad_norm": 4.78488302230835, "learning_rate": 8.964774340051093e-05, "loss": 0.04848609566688537, "step": 36490 }, { "epoch": 10.360488220266818, "grad_norm": 1.0983777046203613, "learning_rate": 8.964490491058757e-05, "loss": 0.04272754490375519, "step": 36500 }, { "epoch": 10.360488220266818, "eval_accuracy": 0.9542824442042348, "eval_loss": 0.1382163166999817, "eval_runtime": 31.338, "eval_samples_per_second": 501.85, "eval_steps_per_second": 7.85, "step": 36500 }, { "epoch": 10.36332671019018, "grad_norm": 16.599246978759766, "learning_rate": 8.964206642066421e-05, "loss": 0.055419743061065674, "step": 36510 }, { "epoch": 10.366165200113539, "grad_norm": 8.60208511352539, "learning_rate": 8.963922793074085e-05, "loss": 0.04853387475013733, "step": 36520 }, { "epoch": 10.3690036900369, "grad_norm": 6.653528213500977, "learning_rate": 8.963638944081749e-05, "loss": 0.0412818044424057, "step": 36530 }, { "epoch": 10.371842179960261, "grad_norm": 8.607937812805176, "learning_rate": 8.963355095089413e-05, "loss": 0.03893500566482544, "step": 36540 }, { "epoch": 10.374680669883622, "grad_norm": 15.344191551208496, "learning_rate": 8.963071246097076e-05, "loss": 0.04885843992233276, "step": 36550 }, { "epoch": 10.377519159806983, "grad_norm": 9.874051094055176, "learning_rate": 8.96278739710474e-05, "loss": 0.037526220083236694, "step": 36560 }, { "epoch": 10.380357649730344, "grad_norm": 4.276268005371094, "learning_rate": 8.962503548112404e-05, "loss": 0.04496805965900421, "step": 36570 }, { "epoch": 10.383196139653704, "grad_norm": 11.307729721069336, "learning_rate": 8.962219699120069e-05, "loss": 0.055699515342712405, "step": 36580 }, { "epoch": 10.386034629577065, "grad_norm": 14.329275131225586, "learning_rate": 8.961935850127733e-05, "loss": 0.06453256011009216, "step": 36590 }, { "epoch": 10.388873119500426, "grad_norm": 5.845245838165283, "learning_rate": 8.961652001135397e-05, "loss": 0.04669055938720703, "step": 36600 }, { "epoch": 10.391711609423787, "grad_norm": 6.348171234130859, "learning_rate": 8.961368152143061e-05, "loss": 0.0633388638496399, "step": 36610 }, { "epoch": 10.394550099347148, "grad_norm": 3.777928113937378, "learning_rate": 8.961084303150724e-05, "loss": 0.040178346633911136, "step": 36620 }, { "epoch": 10.397388589270507, "grad_norm": 7.331879138946533, "learning_rate": 8.960800454158388e-05, "loss": 0.05863572359085083, "step": 36630 }, { "epoch": 10.400227079193868, "grad_norm": 1.6967625617980957, "learning_rate": 8.960516605166052e-05, "loss": 0.07237986922264099, "step": 36640 }, { "epoch": 10.40306556911723, "grad_norm": 0.5049591064453125, "learning_rate": 8.960232756173716e-05, "loss": 0.037475234270095824, "step": 36650 }, { "epoch": 10.40590405904059, "grad_norm": 6.233887672424316, "learning_rate": 8.95994890718138e-05, "loss": 0.04201467633247376, "step": 36660 }, { "epoch": 10.408742548963952, "grad_norm": 9.84954833984375, "learning_rate": 8.959665058189044e-05, "loss": 0.035579648613929746, "step": 36670 }, { "epoch": 10.411581038887311, "grad_norm": 3.7128124237060547, "learning_rate": 8.959381209196707e-05, "loss": 0.05249812006950379, "step": 36680 }, { "epoch": 10.414419528810672, "grad_norm": 3.413320541381836, "learning_rate": 8.959097360204371e-05, "loss": 0.02419271171092987, "step": 36690 }, { "epoch": 10.417258018734033, "grad_norm": 12.171339988708496, "learning_rate": 8.958813511212035e-05, "loss": 0.04807780981063843, "step": 36700 }, { "epoch": 10.420096508657394, "grad_norm": 5.273982048034668, "learning_rate": 8.9585296622197e-05, "loss": 0.03695886433124542, "step": 36710 }, { "epoch": 10.422934998580756, "grad_norm": 6.233440399169922, "learning_rate": 8.958245813227364e-05, "loss": 0.06202154755592346, "step": 36720 }, { "epoch": 10.425773488504117, "grad_norm": 8.094334602355957, "learning_rate": 8.957961964235028e-05, "loss": 0.05624317526817322, "step": 36730 }, { "epoch": 10.428611978427476, "grad_norm": 3.4567596912384033, "learning_rate": 8.957678115242692e-05, "loss": 0.03419476449489593, "step": 36740 }, { "epoch": 10.431450468350837, "grad_norm": 10.846534729003906, "learning_rate": 8.957394266250355e-05, "loss": 0.05165817141532898, "step": 36750 }, { "epoch": 10.434288958274198, "grad_norm": 8.145037651062012, "learning_rate": 8.957110417258019e-05, "loss": 0.04305214285850525, "step": 36760 }, { "epoch": 10.43712744819756, "grad_norm": 6.392567157745361, "learning_rate": 8.956826568265683e-05, "loss": 0.029481494426727296, "step": 36770 }, { "epoch": 10.43996593812092, "grad_norm": 9.604814529418945, "learning_rate": 8.956542719273347e-05, "loss": 0.04259648323059082, "step": 36780 }, { "epoch": 10.44280442804428, "grad_norm": 6.068155288696289, "learning_rate": 8.956258870281011e-05, "loss": 0.031088033318519594, "step": 36790 }, { "epoch": 10.44564291796764, "grad_norm": 7.184687614440918, "learning_rate": 8.955975021288675e-05, "loss": 0.03532496094703674, "step": 36800 }, { "epoch": 10.448481407891002, "grad_norm": 7.030819892883301, "learning_rate": 8.955691172296338e-05, "loss": 0.06580129861831666, "step": 36810 }, { "epoch": 10.451319897814363, "grad_norm": 11.161382675170898, "learning_rate": 8.955407323304002e-05, "loss": 0.05144480466842651, "step": 36820 }, { "epoch": 10.454158387737724, "grad_norm": 13.454534530639648, "learning_rate": 8.955123474311667e-05, "loss": 0.04106142222881317, "step": 36830 }, { "epoch": 10.456996877661084, "grad_norm": 9.999848365783691, "learning_rate": 8.954839625319331e-05, "loss": 0.06666446924209594, "step": 36840 }, { "epoch": 10.459835367584445, "grad_norm": 13.241418838500977, "learning_rate": 8.954555776326995e-05, "loss": 0.04435061812400818, "step": 36850 }, { "epoch": 10.462673857507806, "grad_norm": 1.3495501279830933, "learning_rate": 8.954271927334659e-05, "loss": 0.043979603052139285, "step": 36860 }, { "epoch": 10.465512347431167, "grad_norm": 9.35275650024414, "learning_rate": 8.953988078342323e-05, "loss": 0.03838358819484711, "step": 36870 }, { "epoch": 10.468350837354528, "grad_norm": 5.111002445220947, "learning_rate": 8.953704229349986e-05, "loss": 0.04560145139694214, "step": 36880 }, { "epoch": 10.471189327277887, "grad_norm": 12.216764450073242, "learning_rate": 8.95342038035765e-05, "loss": 0.07290319204330445, "step": 36890 }, { "epoch": 10.474027817201248, "grad_norm": 11.165630340576172, "learning_rate": 8.953136531365314e-05, "loss": 0.05220510363578797, "step": 36900 }, { "epoch": 10.47686630712461, "grad_norm": 8.722389221191406, "learning_rate": 8.952852682372977e-05, "loss": 0.07873188853263854, "step": 36910 }, { "epoch": 10.47970479704797, "grad_norm": 11.220128059387207, "learning_rate": 8.952568833380642e-05, "loss": 0.03465230166912079, "step": 36920 }, { "epoch": 10.482543286971332, "grad_norm": 3.550487756729126, "learning_rate": 8.952284984388307e-05, "loss": 0.07557478547096252, "step": 36930 }, { "epoch": 10.485381776894693, "grad_norm": 3.807497262954712, "learning_rate": 8.95200113539597e-05, "loss": 0.05049041509628296, "step": 36940 }, { "epoch": 10.488220266818052, "grad_norm": 6.9589738845825195, "learning_rate": 8.951717286403633e-05, "loss": 0.05697675347328186, "step": 36950 }, { "epoch": 10.491058756741413, "grad_norm": 4.297712326049805, "learning_rate": 8.951433437411298e-05, "loss": 0.03934766054153442, "step": 36960 }, { "epoch": 10.493897246664774, "grad_norm": 1.7308541536331177, "learning_rate": 8.951149588418962e-05, "loss": 0.03359519839286804, "step": 36970 }, { "epoch": 10.496735736588136, "grad_norm": 2.608450412750244, "learning_rate": 8.950865739426626e-05, "loss": 0.04380088448524475, "step": 36980 }, { "epoch": 10.499574226511497, "grad_norm": 8.706777572631836, "learning_rate": 8.95058189043429e-05, "loss": 0.044136333465576175, "step": 36990 }, { "epoch": 10.502412716434856, "grad_norm": 3.4056553840637207, "learning_rate": 8.950298041441953e-05, "loss": 0.030855458974838258, "step": 37000 }, { "epoch": 10.502412716434856, "eval_accuracy": 0.9511667832390157, "eval_loss": 0.1493266224861145, "eval_runtime": 31.6631, "eval_samples_per_second": 496.698, "eval_steps_per_second": 7.769, "step": 37000 }, { "epoch": 10.505251206358217, "grad_norm": 11.527070045471191, "learning_rate": 8.950014192449617e-05, "loss": 0.04167925119400025, "step": 37010 }, { "epoch": 10.508089696281578, "grad_norm": 2.684386730194092, "learning_rate": 8.949730343457281e-05, "loss": 0.0481109082698822, "step": 37020 }, { "epoch": 10.51092818620494, "grad_norm": 9.902045249938965, "learning_rate": 8.949446494464945e-05, "loss": 0.05797825455665588, "step": 37030 }, { "epoch": 10.5137666761283, "grad_norm": 7.688483715057373, "learning_rate": 8.949162645472608e-05, "loss": 0.05305147171020508, "step": 37040 }, { "epoch": 10.51660516605166, "grad_norm": 5.112579345703125, "learning_rate": 8.948878796480274e-05, "loss": 0.05579475164413452, "step": 37050 }, { "epoch": 10.51944365597502, "grad_norm": 3.4778544902801514, "learning_rate": 8.948594947487938e-05, "loss": 0.052108395099639895, "step": 37060 }, { "epoch": 10.522282145898382, "grad_norm": 7.331381320953369, "learning_rate": 8.9483110984956e-05, "loss": 0.04809304475784302, "step": 37070 }, { "epoch": 10.525120635821743, "grad_norm": 7.230174541473389, "learning_rate": 8.948027249503265e-05, "loss": 0.04410632848739624, "step": 37080 }, { "epoch": 10.527959125745104, "grad_norm": 11.53337287902832, "learning_rate": 8.947743400510929e-05, "loss": 0.060776710510253906, "step": 37090 }, { "epoch": 10.530797615668465, "grad_norm": 12.495294570922852, "learning_rate": 8.947459551518593e-05, "loss": 0.0499268114566803, "step": 37100 }, { "epoch": 10.533636105591825, "grad_norm": 3.009577751159668, "learning_rate": 8.947175702526256e-05, "loss": 0.04827150106430054, "step": 37110 }, { "epoch": 10.536474595515186, "grad_norm": 10.95189094543457, "learning_rate": 8.946891853533921e-05, "loss": 0.05228626728057861, "step": 37120 }, { "epoch": 10.539313085438547, "grad_norm": 11.486112594604492, "learning_rate": 8.946608004541584e-05, "loss": 0.05806214213371277, "step": 37130 }, { "epoch": 10.542151575361908, "grad_norm": 2.045602321624756, "learning_rate": 8.946324155549248e-05, "loss": 0.04174984395503998, "step": 37140 }, { "epoch": 10.544990065285269, "grad_norm": 5.676380634307861, "learning_rate": 8.946040306556912e-05, "loss": 0.040227028727531436, "step": 37150 }, { "epoch": 10.547828555208628, "grad_norm": 5.284129619598389, "learning_rate": 8.945756457564576e-05, "loss": 0.038356566429138185, "step": 37160 }, { "epoch": 10.55066704513199, "grad_norm": 9.811860084533691, "learning_rate": 8.945472608572239e-05, "loss": 0.054844117164611815, "step": 37170 }, { "epoch": 10.55350553505535, "grad_norm": 7.599334716796875, "learning_rate": 8.945188759579905e-05, "loss": 0.05273175835609436, "step": 37180 }, { "epoch": 10.556344024978712, "grad_norm": 2.7173471450805664, "learning_rate": 8.944904910587569e-05, "loss": 0.03449954092502594, "step": 37190 }, { "epoch": 10.559182514902073, "grad_norm": 12.445903778076172, "learning_rate": 8.944621061595232e-05, "loss": 0.05224841833114624, "step": 37200 }, { "epoch": 10.562021004825432, "grad_norm": 5.940929412841797, "learning_rate": 8.944337212602896e-05, "loss": 0.04023993611335754, "step": 37210 }, { "epoch": 10.564859494748793, "grad_norm": 5.430938720703125, "learning_rate": 8.94405336361056e-05, "loss": 0.045115295052528384, "step": 37220 }, { "epoch": 10.567697984672154, "grad_norm": 14.923076629638672, "learning_rate": 8.943769514618223e-05, "loss": 0.04161341786384583, "step": 37230 }, { "epoch": 10.570536474595515, "grad_norm": 5.093540191650391, "learning_rate": 8.943485665625887e-05, "loss": 0.05185024738311768, "step": 37240 }, { "epoch": 10.573374964518877, "grad_norm": 9.676848411560059, "learning_rate": 8.943201816633552e-05, "loss": 0.07991297841072083, "step": 37250 }, { "epoch": 10.576213454442236, "grad_norm": 21.847732543945312, "learning_rate": 8.942917967641215e-05, "loss": 0.05551596283912659, "step": 37260 }, { "epoch": 10.579051944365597, "grad_norm": 10.302873611450195, "learning_rate": 8.942634118648879e-05, "loss": 0.0576478362083435, "step": 37270 }, { "epoch": 10.581890434288958, "grad_norm": 8.793778419494629, "learning_rate": 8.942350269656543e-05, "loss": 0.05904110670089722, "step": 37280 }, { "epoch": 10.58472892421232, "grad_norm": 3.1477136611938477, "learning_rate": 8.942066420664207e-05, "loss": 0.055383825302124025, "step": 37290 }, { "epoch": 10.58756741413568, "grad_norm": 17.405223846435547, "learning_rate": 8.94178257167187e-05, "loss": 0.05748081803321838, "step": 37300 }, { "epoch": 10.59040590405904, "grad_norm": 1.784849762916565, "learning_rate": 8.941498722679534e-05, "loss": 0.03512507379055023, "step": 37310 }, { "epoch": 10.5932443939824, "grad_norm": 1.0856764316558838, "learning_rate": 8.9412148736872e-05, "loss": 0.05085453987121582, "step": 37320 }, { "epoch": 10.596082883905762, "grad_norm": 7.579006195068359, "learning_rate": 8.940931024694863e-05, "loss": 0.058287763595581056, "step": 37330 }, { "epoch": 10.598921373829123, "grad_norm": 10.341796875, "learning_rate": 8.940647175702527e-05, "loss": 0.033180254697799685, "step": 37340 }, { "epoch": 10.601759863752484, "grad_norm": 13.001203536987305, "learning_rate": 8.940363326710191e-05, "loss": 0.03567807376384735, "step": 37350 }, { "epoch": 10.604598353675845, "grad_norm": 4.889222145080566, "learning_rate": 8.940079477717854e-05, "loss": 0.042106378078460696, "step": 37360 }, { "epoch": 10.607436843599205, "grad_norm": 5.18829870223999, "learning_rate": 8.939795628725518e-05, "loss": 0.057581347227096555, "step": 37370 }, { "epoch": 10.610275333522566, "grad_norm": 3.69331955909729, "learning_rate": 8.939511779733183e-05, "loss": 0.06446216702461242, "step": 37380 }, { "epoch": 10.613113823445927, "grad_norm": 2.446038246154785, "learning_rate": 8.939227930740846e-05, "loss": 0.03915868997573853, "step": 37390 }, { "epoch": 10.615952313369288, "grad_norm": 5.8372650146484375, "learning_rate": 8.93894408174851e-05, "loss": 0.03436077833175659, "step": 37400 }, { "epoch": 10.618790803292649, "grad_norm": 5.311520576477051, "learning_rate": 8.938660232756174e-05, "loss": 0.04923179149627686, "step": 37410 }, { "epoch": 10.621629293216008, "grad_norm": 10.053256034851074, "learning_rate": 8.938376383763838e-05, "loss": 0.052911609411239624, "step": 37420 }, { "epoch": 10.62446778313937, "grad_norm": 10.347527503967285, "learning_rate": 8.938092534771501e-05, "loss": 0.06862825751304627, "step": 37430 }, { "epoch": 10.62730627306273, "grad_norm": 2.2873501777648926, "learning_rate": 8.937808685779165e-05, "loss": 0.05170561671257019, "step": 37440 }, { "epoch": 10.630144762986092, "grad_norm": 3.918123245239258, "learning_rate": 8.937524836786831e-05, "loss": 0.0604759693145752, "step": 37450 }, { "epoch": 10.632983252909453, "grad_norm": 2.1962473392486572, "learning_rate": 8.937240987794494e-05, "loss": 0.08560603260993957, "step": 37460 }, { "epoch": 10.635821742832812, "grad_norm": 2.565603494644165, "learning_rate": 8.936957138802158e-05, "loss": 0.038162344694137575, "step": 37470 }, { "epoch": 10.638660232756173, "grad_norm": 17.403154373168945, "learning_rate": 8.936673289809822e-05, "loss": 0.040880581736564635, "step": 37480 }, { "epoch": 10.641498722679534, "grad_norm": 8.736148834228516, "learning_rate": 8.936389440817485e-05, "loss": 0.040731039643287656, "step": 37490 }, { "epoch": 10.644337212602895, "grad_norm": 6.361547946929932, "learning_rate": 8.936105591825149e-05, "loss": 0.036026608943939206, "step": 37500 }, { "epoch": 10.644337212602895, "eval_accuracy": 0.9541552743689197, "eval_loss": 0.13262097537517548, "eval_runtime": 31.3432, "eval_samples_per_second": 501.767, "eval_steps_per_second": 7.849, "step": 37500 }, { "epoch": 10.647175702526257, "grad_norm": 10.978135108947754, "learning_rate": 8.935821742832814e-05, "loss": 0.06716124415397644, "step": 37510 }, { "epoch": 10.650014192449618, "grad_norm": 4.695350170135498, "learning_rate": 8.935537893840477e-05, "loss": 0.05710494518280029, "step": 37520 }, { "epoch": 10.652852682372977, "grad_norm": 12.999497413635254, "learning_rate": 8.935254044848141e-05, "loss": 0.04939869046211243, "step": 37530 }, { "epoch": 10.655691172296338, "grad_norm": 4.029967308044434, "learning_rate": 8.934970195855805e-05, "loss": 0.045660588145256045, "step": 37540 }, { "epoch": 10.6585296622197, "grad_norm": 13.028033256530762, "learning_rate": 8.93468634686347e-05, "loss": 0.046709710359573366, "step": 37550 }, { "epoch": 10.66136815214306, "grad_norm": 11.557087898254395, "learning_rate": 8.934402497871132e-05, "loss": 0.07236909866333008, "step": 37560 }, { "epoch": 10.664206642066421, "grad_norm": 10.125807762145996, "learning_rate": 8.934118648878796e-05, "loss": 0.0551980197429657, "step": 37570 }, { "epoch": 10.66704513198978, "grad_norm": 14.962666511535645, "learning_rate": 8.933834799886462e-05, "loss": 0.04768535494804382, "step": 37580 }, { "epoch": 10.669883621913142, "grad_norm": 1.9633394479751587, "learning_rate": 8.933550950894125e-05, "loss": 0.03601632714271545, "step": 37590 }, { "epoch": 10.672722111836503, "grad_norm": 4.98323392868042, "learning_rate": 8.933267101901789e-05, "loss": 0.04608268141746521, "step": 37600 }, { "epoch": 10.675560601759864, "grad_norm": 11.655495643615723, "learning_rate": 8.932983252909453e-05, "loss": 0.05340247750282288, "step": 37610 }, { "epoch": 10.678399091683225, "grad_norm": 11.320283889770508, "learning_rate": 8.932699403917116e-05, "loss": 0.0428974986076355, "step": 37620 }, { "epoch": 10.681237581606585, "grad_norm": 3.8176426887512207, "learning_rate": 8.93241555492478e-05, "loss": 0.03072204887866974, "step": 37630 }, { "epoch": 10.684076071529946, "grad_norm": 15.627106666564941, "learning_rate": 8.932131705932444e-05, "loss": 0.05828758478164673, "step": 37640 }, { "epoch": 10.686914561453307, "grad_norm": 7.288362979888916, "learning_rate": 8.931847856940108e-05, "loss": 0.04542043209075928, "step": 37650 }, { "epoch": 10.689753051376668, "grad_norm": 11.484195709228516, "learning_rate": 8.931564007947772e-05, "loss": 0.0534932017326355, "step": 37660 }, { "epoch": 10.692591541300029, "grad_norm": 7.502041816711426, "learning_rate": 8.931280158955436e-05, "loss": 0.06448942422866821, "step": 37670 }, { "epoch": 10.695430031223388, "grad_norm": 7.892602920532227, "learning_rate": 8.9309963099631e-05, "loss": 0.053426170349121095, "step": 37680 }, { "epoch": 10.69826852114675, "grad_norm": 2.4152629375457764, "learning_rate": 8.930712460970763e-05, "loss": 0.043140605092048645, "step": 37690 }, { "epoch": 10.70110701107011, "grad_norm": 4.495814323425293, "learning_rate": 8.930428611978428e-05, "loss": 0.058317774534225465, "step": 37700 }, { "epoch": 10.703945500993472, "grad_norm": 7.092718601226807, "learning_rate": 8.930144762986093e-05, "loss": 0.04303047060966492, "step": 37710 }, { "epoch": 10.706783990916833, "grad_norm": 9.289896011352539, "learning_rate": 8.929860913993756e-05, "loss": 0.04771975576877594, "step": 37720 }, { "epoch": 10.709622480840192, "grad_norm": 8.999591827392578, "learning_rate": 8.92957706500142e-05, "loss": 0.0611985445022583, "step": 37730 }, { "epoch": 10.712460970763553, "grad_norm": 6.30220365524292, "learning_rate": 8.929293216009084e-05, "loss": 0.054555898904800414, "step": 37740 }, { "epoch": 10.715299460686914, "grad_norm": 7.283388614654541, "learning_rate": 8.929009367016747e-05, "loss": 0.0529313862323761, "step": 37750 }, { "epoch": 10.718137950610275, "grad_norm": 12.678055763244629, "learning_rate": 8.928725518024411e-05, "loss": 0.05128545761108398, "step": 37760 }, { "epoch": 10.720976440533637, "grad_norm": 7.386247634887695, "learning_rate": 8.928441669032075e-05, "loss": 0.03549208641052246, "step": 37770 }, { "epoch": 10.723814930456998, "grad_norm": 7.320062160491943, "learning_rate": 8.928157820039739e-05, "loss": 0.049683955311775205, "step": 37780 }, { "epoch": 10.726653420380357, "grad_norm": 7.607080936431885, "learning_rate": 8.927873971047403e-05, "loss": 0.0352347731590271, "step": 37790 }, { "epoch": 10.729491910303718, "grad_norm": 12.08772087097168, "learning_rate": 8.927590122055068e-05, "loss": 0.03941766023635864, "step": 37800 }, { "epoch": 10.73233040022708, "grad_norm": 4.185817241668701, "learning_rate": 8.927306273062732e-05, "loss": 0.0489218533039093, "step": 37810 }, { "epoch": 10.73516889015044, "grad_norm": 10.718232154846191, "learning_rate": 8.927022424070394e-05, "loss": 0.04533816874027252, "step": 37820 }, { "epoch": 10.738007380073801, "grad_norm": 11.412670135498047, "learning_rate": 8.926738575078059e-05, "loss": 0.05357496738433838, "step": 37830 }, { "epoch": 10.74084586999716, "grad_norm": 12.72150707244873, "learning_rate": 8.926454726085723e-05, "loss": 0.08674684166908264, "step": 37840 }, { "epoch": 10.743684359920522, "grad_norm": 4.283827781677246, "learning_rate": 8.926170877093387e-05, "loss": 0.0715071678161621, "step": 37850 }, { "epoch": 10.746522849843883, "grad_norm": 7.868562698364258, "learning_rate": 8.925887028101051e-05, "loss": 0.06395405530929565, "step": 37860 }, { "epoch": 10.749361339767244, "grad_norm": 2.7977356910705566, "learning_rate": 8.925603179108715e-05, "loss": 0.05910104513168335, "step": 37870 }, { "epoch": 10.752199829690605, "grad_norm": 8.621735572814941, "learning_rate": 8.925319330116378e-05, "loss": 0.06608824729919434, "step": 37880 }, { "epoch": 10.755038319613966, "grad_norm": 11.802695274353027, "learning_rate": 8.925035481124042e-05, "loss": 0.06655054688453674, "step": 37890 }, { "epoch": 10.757876809537326, "grad_norm": 0.9506636261940002, "learning_rate": 8.924751632131706e-05, "loss": 0.045469969511032104, "step": 37900 }, { "epoch": 10.760715299460687, "grad_norm": 9.88681411743164, "learning_rate": 8.92446778313937e-05, "loss": 0.04759877920150757, "step": 37910 }, { "epoch": 10.763553789384048, "grad_norm": 8.221586227416992, "learning_rate": 8.924183934147034e-05, "loss": 0.0642558753490448, "step": 37920 }, { "epoch": 10.766392279307409, "grad_norm": 4.3333234786987305, "learning_rate": 8.923900085154699e-05, "loss": 0.036686384677886964, "step": 37930 }, { "epoch": 10.76923076923077, "grad_norm": 11.051621437072754, "learning_rate": 8.923616236162361e-05, "loss": 0.037858805060386656, "step": 37940 }, { "epoch": 10.77206925915413, "grad_norm": 5.9207963943481445, "learning_rate": 8.923332387170026e-05, "loss": 0.04645982384681702, "step": 37950 }, { "epoch": 10.77490774907749, "grad_norm": 1.5580244064331055, "learning_rate": 8.92304853817769e-05, "loss": 0.041862720251083375, "step": 37960 }, { "epoch": 10.777746239000852, "grad_norm": 3.5849783420562744, "learning_rate": 8.922764689185354e-05, "loss": 0.04566018283367157, "step": 37970 }, { "epoch": 10.780584728924213, "grad_norm": 3.8744287490844727, "learning_rate": 8.922480840193018e-05, "loss": 0.045349889993667604, "step": 37980 }, { "epoch": 10.783423218847574, "grad_norm": 15.801223754882812, "learning_rate": 8.922196991200682e-05, "loss": 0.06152545213699341, "step": 37990 }, { "epoch": 10.786261708770933, "grad_norm": 4.121667385101318, "learning_rate": 8.921913142208346e-05, "loss": 0.0529982328414917, "step": 38000 }, { "epoch": 10.786261708770933, "eval_accuracy": 0.954791123545495, "eval_loss": 0.13791421055793762, "eval_runtime": 31.4972, "eval_samples_per_second": 499.314, "eval_steps_per_second": 7.81, "step": 38000 }, { "epoch": 10.789100198694294, "grad_norm": 8.214282989501953, "learning_rate": 8.921629293216009e-05, "loss": 0.05908235311508179, "step": 38010 }, { "epoch": 10.791938688617655, "grad_norm": 8.53370189666748, "learning_rate": 8.921345444223673e-05, "loss": 0.05178715586662293, "step": 38020 }, { "epoch": 10.794777178541016, "grad_norm": 13.354255676269531, "learning_rate": 8.921061595231337e-05, "loss": 0.05319744348526001, "step": 38030 }, { "epoch": 10.797615668464378, "grad_norm": 7.935073375701904, "learning_rate": 8.920777746239e-05, "loss": 0.04854918122291565, "step": 38040 }, { "epoch": 10.800454158387737, "grad_norm": 9.971450805664062, "learning_rate": 8.920493897246666e-05, "loss": 0.03835484087467193, "step": 38050 }, { "epoch": 10.803292648311098, "grad_norm": 3.2206015586853027, "learning_rate": 8.92021004825433e-05, "loss": 0.03390050232410431, "step": 38060 }, { "epoch": 10.80613113823446, "grad_norm": 5.002132415771484, "learning_rate": 8.919926199261992e-05, "loss": 0.06402709484100341, "step": 38070 }, { "epoch": 10.80896962815782, "grad_norm": 4.769713401794434, "learning_rate": 8.919642350269657e-05, "loss": 0.055002915859222415, "step": 38080 }, { "epoch": 10.811808118081181, "grad_norm": 12.758962631225586, "learning_rate": 8.919358501277321e-05, "loss": 0.04941333532333374, "step": 38090 }, { "epoch": 10.81464660800454, "grad_norm": 12.254110336303711, "learning_rate": 8.919074652284985e-05, "loss": 0.058733904361724855, "step": 38100 }, { "epoch": 10.817485097927902, "grad_norm": 10.186753273010254, "learning_rate": 8.918790803292649e-05, "loss": 0.039628738164901735, "step": 38110 }, { "epoch": 10.820323587851263, "grad_norm": 8.08736801147461, "learning_rate": 8.918506954300313e-05, "loss": 0.04123604893684387, "step": 38120 }, { "epoch": 10.823162077774624, "grad_norm": 6.497982501983643, "learning_rate": 8.918223105307977e-05, "loss": 0.06079375743865967, "step": 38130 }, { "epoch": 10.826000567697985, "grad_norm": 1.8209428787231445, "learning_rate": 8.91793925631564e-05, "loss": 0.031149780750274657, "step": 38140 }, { "epoch": 10.828839057621346, "grad_norm": 3.366511821746826, "learning_rate": 8.917655407323304e-05, "loss": 0.042412635684013364, "step": 38150 }, { "epoch": 10.831677547544706, "grad_norm": 3.608931303024292, "learning_rate": 8.917371558330968e-05, "loss": 0.045526671409606936, "step": 38160 }, { "epoch": 10.834516037468067, "grad_norm": 5.949339389801025, "learning_rate": 8.917087709338631e-05, "loss": 0.05724095106124878, "step": 38170 }, { "epoch": 10.837354527391428, "grad_norm": 6.7733049392700195, "learning_rate": 8.916803860346297e-05, "loss": 0.047005188465118405, "step": 38180 }, { "epoch": 10.840193017314789, "grad_norm": 25.709814071655273, "learning_rate": 8.916520011353961e-05, "loss": 0.08772512674331664, "step": 38190 }, { "epoch": 10.84303150723815, "grad_norm": 16.664751052856445, "learning_rate": 8.916236162361624e-05, "loss": 0.06233923435211182, "step": 38200 }, { "epoch": 10.84586999716151, "grad_norm": 5.98714017868042, "learning_rate": 8.915980698268522e-05, "loss": 0.06928134560585023, "step": 38210 }, { "epoch": 10.84870848708487, "grad_norm": 10.891114234924316, "learning_rate": 8.915696849276184e-05, "loss": 0.053616517782211305, "step": 38220 }, { "epoch": 10.851546977008232, "grad_norm": 3.9036974906921387, "learning_rate": 8.91541300028385e-05, "loss": 0.04729513525962829, "step": 38230 }, { "epoch": 10.854385466931593, "grad_norm": 13.764676094055176, "learning_rate": 8.915129151291514e-05, "loss": 0.05080227851867676, "step": 38240 }, { "epoch": 10.857223956854954, "grad_norm": 7.141643524169922, "learning_rate": 8.914845302299177e-05, "loss": 0.03850345313549042, "step": 38250 }, { "epoch": 10.860062446778313, "grad_norm": 6.551845073699951, "learning_rate": 8.914561453306841e-05, "loss": 0.044964417815208435, "step": 38260 }, { "epoch": 10.862900936701674, "grad_norm": 8.506628036499023, "learning_rate": 8.914277604314505e-05, "loss": 0.04852698743343353, "step": 38270 }, { "epoch": 10.865739426625035, "grad_norm": 4.149714469909668, "learning_rate": 8.913993755322169e-05, "loss": 0.04149608314037323, "step": 38280 }, { "epoch": 10.868577916548396, "grad_norm": 8.39784049987793, "learning_rate": 8.913709906329833e-05, "loss": 0.037533077597618106, "step": 38290 }, { "epoch": 10.871416406471758, "grad_norm": 5.319234848022461, "learning_rate": 8.913426057337497e-05, "loss": 0.038730254769325255, "step": 38300 }, { "epoch": 10.874254896395119, "grad_norm": 5.00916051864624, "learning_rate": 8.913142208345162e-05, "loss": 0.060316479206085204, "step": 38310 }, { "epoch": 10.877093386318478, "grad_norm": 9.620858192443848, "learning_rate": 8.912858359352824e-05, "loss": 0.043903243541717527, "step": 38320 }, { "epoch": 10.879931876241839, "grad_norm": 3.082885503768921, "learning_rate": 8.912574510360489e-05, "loss": 0.04673202931880951, "step": 38330 }, { "epoch": 10.8827703661652, "grad_norm": 2.890110969543457, "learning_rate": 8.912290661368153e-05, "loss": 0.04546935260295868, "step": 38340 }, { "epoch": 10.885608856088561, "grad_norm": 2.106387138366699, "learning_rate": 8.912006812375815e-05, "loss": 0.05234392285346985, "step": 38350 }, { "epoch": 10.888447346011922, "grad_norm": 10.52157974243164, "learning_rate": 8.911722963383481e-05, "loss": 0.044599348306655885, "step": 38360 }, { "epoch": 10.891285835935282, "grad_norm": 11.754111289978027, "learning_rate": 8.911439114391145e-05, "loss": 0.05273027420043945, "step": 38370 }, { "epoch": 10.894124325858643, "grad_norm": 7.142884731292725, "learning_rate": 8.911155265398808e-05, "loss": 0.05217101573944092, "step": 38380 }, { "epoch": 10.896962815782004, "grad_norm": 5.7373738288879395, "learning_rate": 8.910871416406472e-05, "loss": 0.041763636469841006, "step": 38390 }, { "epoch": 10.899801305705365, "grad_norm": 9.567590713500977, "learning_rate": 8.910587567414136e-05, "loss": 0.05150666832923889, "step": 38400 }, { "epoch": 10.902639795628726, "grad_norm": 10.401607513427734, "learning_rate": 8.9103037184218e-05, "loss": 0.050959455966949466, "step": 38410 }, { "epoch": 10.905478285552086, "grad_norm": 10.096796035766602, "learning_rate": 8.910019869429463e-05, "loss": 0.04512452483177185, "step": 38420 }, { "epoch": 10.908316775475447, "grad_norm": 4.9429931640625, "learning_rate": 8.909736020437129e-05, "loss": 0.04739703834056854, "step": 38430 }, { "epoch": 10.911155265398808, "grad_norm": 5.128699779510498, "learning_rate": 8.909452171444793e-05, "loss": 0.04005731642246246, "step": 38440 }, { "epoch": 10.913993755322169, "grad_norm": 11.468729019165039, "learning_rate": 8.909168322452455e-05, "loss": 0.05342747569084168, "step": 38450 }, { "epoch": 10.91683224524553, "grad_norm": 8.957573890686035, "learning_rate": 8.90888447346012e-05, "loss": 0.06143091320991516, "step": 38460 }, { "epoch": 10.91967073516889, "grad_norm": 10.171913146972656, "learning_rate": 8.908600624467784e-05, "loss": 0.051012343168258666, "step": 38470 }, { "epoch": 10.92250922509225, "grad_norm": 8.428825378417969, "learning_rate": 8.908316775475446e-05, "loss": 0.06785122156143189, "step": 38480 }, { "epoch": 10.925347715015612, "grad_norm": 8.689305305480957, "learning_rate": 8.908032926483112e-05, "loss": 0.059849923849105834, "step": 38490 }, { "epoch": 10.928186204938973, "grad_norm": 11.59469223022461, "learning_rate": 8.907749077490776e-05, "loss": 0.03448027074337005, "step": 38500 }, { "epoch": 10.928186204938973, "eval_accuracy": 0.9539645196159471, "eval_loss": 0.1362266093492508, "eval_runtime": 32.0371, "eval_samples_per_second": 490.899, "eval_steps_per_second": 7.679, "step": 38500 }, { "epoch": 10.931024694862334, "grad_norm": 12.533304214477539, "learning_rate": 8.907465228498439e-05, "loss": 0.06095793843269348, "step": 38510 }, { "epoch": 10.933863184785693, "grad_norm": 5.1569600105285645, "learning_rate": 8.907181379506103e-05, "loss": 0.04762200117111206, "step": 38520 }, { "epoch": 10.936701674709054, "grad_norm": 5.558366298675537, "learning_rate": 8.906897530513767e-05, "loss": 0.030347022414207458, "step": 38530 }, { "epoch": 10.939540164632415, "grad_norm": 1.4949798583984375, "learning_rate": 8.906613681521431e-05, "loss": 0.09603907465934754, "step": 38540 }, { "epoch": 10.942378654555776, "grad_norm": 4.198401927947998, "learning_rate": 8.906329832529094e-05, "loss": 0.03959104716777802, "step": 38550 }, { "epoch": 10.945217144479138, "grad_norm": 3.2333035469055176, "learning_rate": 8.90604598353676e-05, "loss": 0.06963757276535035, "step": 38560 }, { "epoch": 10.948055634402499, "grad_norm": 15.018879890441895, "learning_rate": 8.905762134544424e-05, "loss": 0.06735700368881226, "step": 38570 }, { "epoch": 10.950894124325858, "grad_norm": 1.3846895694732666, "learning_rate": 8.905478285552087e-05, "loss": 0.04106098413467407, "step": 38580 }, { "epoch": 10.953732614249219, "grad_norm": 7.6418890953063965, "learning_rate": 8.90519443655975e-05, "loss": 0.052422887086868285, "step": 38590 }, { "epoch": 10.95657110417258, "grad_norm": 7.431676387786865, "learning_rate": 8.904910587567415e-05, "loss": 0.057167887687683105, "step": 38600 }, { "epoch": 10.959409594095941, "grad_norm": 6.120459079742432, "learning_rate": 8.904626738575078e-05, "loss": 0.04599092602729797, "step": 38610 }, { "epoch": 10.962248084019302, "grad_norm": 3.009070873260498, "learning_rate": 8.904342889582742e-05, "loss": 0.03604561984539032, "step": 38620 }, { "epoch": 10.965086573942662, "grad_norm": 7.108126163482666, "learning_rate": 8.904059040590407e-05, "loss": 0.07322196960449219, "step": 38630 }, { "epoch": 10.967925063866023, "grad_norm": 6.833071708679199, "learning_rate": 8.90377519159807e-05, "loss": 0.034294241666793825, "step": 38640 }, { "epoch": 10.970763553789384, "grad_norm": 2.084627389907837, "learning_rate": 8.903491342605734e-05, "loss": 0.05035883784294128, "step": 38650 }, { "epoch": 10.973602043712745, "grad_norm": 7.055487632751465, "learning_rate": 8.903207493613398e-05, "loss": 0.030414706468582152, "step": 38660 }, { "epoch": 10.976440533636106, "grad_norm": 8.627908706665039, "learning_rate": 8.902923644621062e-05, "loss": 0.06591638922691345, "step": 38670 }, { "epoch": 10.979279023559467, "grad_norm": 4.9932403564453125, "learning_rate": 8.902639795628725e-05, "loss": 0.05395441055297852, "step": 38680 }, { "epoch": 10.982117513482827, "grad_norm": 15.169208526611328, "learning_rate": 8.90235594663639e-05, "loss": 0.05914019346237183, "step": 38690 }, { "epoch": 10.984956003406188, "grad_norm": 10.65866756439209, "learning_rate": 8.902072097644055e-05, "loss": 0.0431813508272171, "step": 38700 }, { "epoch": 10.987794493329549, "grad_norm": 20.115015029907227, "learning_rate": 8.901788248651718e-05, "loss": 0.08140392899513245, "step": 38710 }, { "epoch": 10.99063298325291, "grad_norm": 1.754321813583374, "learning_rate": 8.901504399659382e-05, "loss": 0.05766031742095947, "step": 38720 }, { "epoch": 10.993471473176271, "grad_norm": 3.1317625045776367, "learning_rate": 8.901220550667046e-05, "loss": 0.05524242520332336, "step": 38730 }, { "epoch": 10.99630996309963, "grad_norm": 3.33754301071167, "learning_rate": 8.900936701674709e-05, "loss": 0.02870774269104004, "step": 38740 }, { "epoch": 10.999148453022991, "grad_norm": 8.899383544921875, "learning_rate": 8.900652852682373e-05, "loss": 0.05789498686790466, "step": 38750 }, { "epoch": 11.001986942946353, "grad_norm": 14.599570274353027, "learning_rate": 8.900369003690038e-05, "loss": 0.06009508371353149, "step": 38760 }, { "epoch": 11.004825432869714, "grad_norm": 11.534079551696777, "learning_rate": 8.900085154697701e-05, "loss": 0.044374477863311765, "step": 38770 }, { "epoch": 11.007663922793075, "grad_norm": 6.6945576667785645, "learning_rate": 8.899801305705365e-05, "loss": 0.026109185814857484, "step": 38780 }, { "epoch": 11.010502412716434, "grad_norm": 5.227405071258545, "learning_rate": 8.89951745671303e-05, "loss": 0.02248488962650299, "step": 38790 }, { "epoch": 11.013340902639795, "grad_norm": 6.267428398132324, "learning_rate": 8.899233607720693e-05, "loss": 0.042588543891906736, "step": 38800 }, { "epoch": 11.016179392563156, "grad_norm": 10.655970573425293, "learning_rate": 8.898949758728356e-05, "loss": 0.05288074016571045, "step": 38810 }, { "epoch": 11.019017882486517, "grad_norm": 10.031039237976074, "learning_rate": 8.89866590973602e-05, "loss": 0.051262885332107544, "step": 38820 }, { "epoch": 11.021856372409879, "grad_norm": 3.097400426864624, "learning_rate": 8.898382060743685e-05, "loss": 0.05224065780639649, "step": 38830 }, { "epoch": 11.024694862333238, "grad_norm": 4.993659019470215, "learning_rate": 8.898098211751349e-05, "loss": 0.051405346393585204, "step": 38840 }, { "epoch": 11.027533352256599, "grad_norm": 6.353282928466797, "learning_rate": 8.897814362759013e-05, "loss": 0.03370220959186554, "step": 38850 }, { "epoch": 11.03037184217996, "grad_norm": 2.966917037963867, "learning_rate": 8.897530513766677e-05, "loss": 0.048281505703926086, "step": 38860 }, { "epoch": 11.033210332103321, "grad_norm": 21.087085723876953, "learning_rate": 8.89724666477434e-05, "loss": 0.041472843289375304, "step": 38870 }, { "epoch": 11.036048822026682, "grad_norm": 2.9310221672058105, "learning_rate": 8.896962815782004e-05, "loss": 0.03393231630325318, "step": 38880 }, { "epoch": 11.038887311950042, "grad_norm": 2.4367799758911133, "learning_rate": 8.89667896678967e-05, "loss": 0.0396744042634964, "step": 38890 }, { "epoch": 11.041725801873403, "grad_norm": 13.638497352600098, "learning_rate": 8.896395117797332e-05, "loss": 0.028379836678504945, "step": 38900 }, { "epoch": 11.044564291796764, "grad_norm": 2.574260711669922, "learning_rate": 8.896111268804996e-05, "loss": 0.04455780684947967, "step": 38910 }, { "epoch": 11.047402781720125, "grad_norm": 5.962393760681152, "learning_rate": 8.89582741981266e-05, "loss": 0.04642617702484131, "step": 38920 }, { "epoch": 11.050241271643486, "grad_norm": 0.777250349521637, "learning_rate": 8.895543570820323e-05, "loss": 0.033685031533241275, "step": 38930 }, { "epoch": 11.053079761566847, "grad_norm": 7.838201522827148, "learning_rate": 8.895259721827987e-05, "loss": 0.04278995990753174, "step": 38940 }, { "epoch": 11.055918251490207, "grad_norm": 2.831878185272217, "learning_rate": 8.894975872835651e-05, "loss": 0.03615584969520569, "step": 38950 }, { "epoch": 11.058756741413568, "grad_norm": 4.757225036621094, "learning_rate": 8.894692023843316e-05, "loss": 0.03551706671714783, "step": 38960 }, { "epoch": 11.061595231336929, "grad_norm": 5.683297634124756, "learning_rate": 8.89440817485098e-05, "loss": 0.039518555998802184, "step": 38970 }, { "epoch": 11.06443372126029, "grad_norm": 10.178922653198242, "learning_rate": 8.894124325858644e-05, "loss": 0.039110153913497925, "step": 38980 }, { "epoch": 11.067272211183651, "grad_norm": 1.9131416082382202, "learning_rate": 8.893840476866308e-05, "loss": 0.039188307523727414, "step": 38990 }, { "epoch": 11.07011070110701, "grad_norm": 17.690940856933594, "learning_rate": 8.893556627873971e-05, "loss": 0.059947621822357175, "step": 39000 }, { "epoch": 11.07011070110701, "eval_accuracy": 0.9514847078273033, "eval_loss": 0.14660021662712097, "eval_runtime": 31.1761, "eval_samples_per_second": 504.457, "eval_steps_per_second": 7.891, "step": 39000 }, { "epoch": 11.072949191030371, "grad_norm": 5.929471492767334, "learning_rate": 8.893272778881635e-05, "loss": 0.027347517013549805, "step": 39010 }, { "epoch": 11.075787680953733, "grad_norm": 6.718734264373779, "learning_rate": 8.892988929889299e-05, "loss": 0.056528902053833006, "step": 39020 }, { "epoch": 11.078626170877094, "grad_norm": 6.625723361968994, "learning_rate": 8.892705080896963e-05, "loss": 0.06551033854484559, "step": 39030 }, { "epoch": 11.081464660800455, "grad_norm": 3.869439125061035, "learning_rate": 8.892421231904627e-05, "loss": 0.039598909020423886, "step": 39040 }, { "epoch": 11.084303150723814, "grad_norm": 7.520358562469482, "learning_rate": 8.892137382912291e-05, "loss": 0.03328187465667724, "step": 39050 }, { "epoch": 11.087141640647175, "grad_norm": 2.2077038288116455, "learning_rate": 8.891853533919954e-05, "loss": 0.027495452761650087, "step": 39060 }, { "epoch": 11.089980130570536, "grad_norm": 5.891507625579834, "learning_rate": 8.891569684927618e-05, "loss": 0.030188310146331786, "step": 39070 }, { "epoch": 11.092818620493897, "grad_norm": 6.687758445739746, "learning_rate": 8.891285835935283e-05, "loss": 0.04934812784194946, "step": 39080 }, { "epoch": 11.095657110417259, "grad_norm": 9.199897766113281, "learning_rate": 8.891001986942947e-05, "loss": 0.042330962419509885, "step": 39090 }, { "epoch": 11.09849560034062, "grad_norm": 8.42823600769043, "learning_rate": 8.890718137950611e-05, "loss": 0.041123732924461365, "step": 39100 }, { "epoch": 11.101334090263979, "grad_norm": 7.519351959228516, "learning_rate": 8.890434288958275e-05, "loss": 0.04811866879463196, "step": 39110 }, { "epoch": 11.10417258018734, "grad_norm": 10.867979049682617, "learning_rate": 8.890150439965939e-05, "loss": 0.021816276013851166, "step": 39120 }, { "epoch": 11.107011070110701, "grad_norm": 11.573338508605957, "learning_rate": 8.889866590973602e-05, "loss": 0.0337225079536438, "step": 39130 }, { "epoch": 11.109849560034062, "grad_norm": 5.146324634552002, "learning_rate": 8.889582741981266e-05, "loss": 0.042096379399299624, "step": 39140 }, { "epoch": 11.112688049957423, "grad_norm": 12.381686210632324, "learning_rate": 8.88929889298893e-05, "loss": 0.035431957244873045, "step": 39150 }, { "epoch": 11.115526539880783, "grad_norm": 6.369025707244873, "learning_rate": 8.889015043996594e-05, "loss": 0.04335471987724304, "step": 39160 }, { "epoch": 11.118365029804144, "grad_norm": 1.6466095447540283, "learning_rate": 8.888731195004258e-05, "loss": 0.04059185683727264, "step": 39170 }, { "epoch": 11.121203519727505, "grad_norm": 5.768196105957031, "learning_rate": 8.888447346011923e-05, "loss": 0.037748593091964724, "step": 39180 }, { "epoch": 11.124042009650866, "grad_norm": 8.6279935836792, "learning_rate": 8.888163497019585e-05, "loss": 0.0365769624710083, "step": 39190 }, { "epoch": 11.126880499574227, "grad_norm": 11.250873565673828, "learning_rate": 8.88787964802725e-05, "loss": 0.04417133927345276, "step": 39200 }, { "epoch": 11.129718989497587, "grad_norm": 3.655747652053833, "learning_rate": 8.887595799034914e-05, "loss": 0.03420442938804626, "step": 39210 }, { "epoch": 11.132557479420948, "grad_norm": 1.911678433418274, "learning_rate": 8.887311950042578e-05, "loss": 0.04320805966854095, "step": 39220 }, { "epoch": 11.135395969344309, "grad_norm": 6.267022132873535, "learning_rate": 8.887028101050242e-05, "loss": 0.04816638827323914, "step": 39230 }, { "epoch": 11.13823445926767, "grad_norm": 5.782435417175293, "learning_rate": 8.886744252057906e-05, "loss": 0.04777747094631195, "step": 39240 }, { "epoch": 11.141072949191031, "grad_norm": 4.721435546875, "learning_rate": 8.88646040306557e-05, "loss": 0.03958534300327301, "step": 39250 }, { "epoch": 11.14391143911439, "grad_norm": 1.6922602653503418, "learning_rate": 8.886176554073233e-05, "loss": 0.03485212028026581, "step": 39260 }, { "epoch": 11.146749929037751, "grad_norm": 10.722707748413086, "learning_rate": 8.885892705080897e-05, "loss": 0.052032476663589476, "step": 39270 }, { "epoch": 11.149588418961113, "grad_norm": 11.775294303894043, "learning_rate": 8.885608856088561e-05, "loss": 0.028667646646499633, "step": 39280 }, { "epoch": 11.152426908884474, "grad_norm": 6.354204177856445, "learning_rate": 8.885325007096225e-05, "loss": 0.037648916244506836, "step": 39290 }, { "epoch": 11.155265398807835, "grad_norm": 13.509031295776367, "learning_rate": 8.88504115810389e-05, "loss": 0.04364720582962036, "step": 39300 }, { "epoch": 11.158103888731196, "grad_norm": 7.607676982879639, "learning_rate": 8.884757309111554e-05, "loss": 0.04972715973854065, "step": 39310 }, { "epoch": 11.160942378654555, "grad_norm": 11.344837188720703, "learning_rate": 8.884473460119216e-05, "loss": 0.05181915760040283, "step": 39320 }, { "epoch": 11.163780868577916, "grad_norm": 5.426929473876953, "learning_rate": 8.88418961112688e-05, "loss": 0.048836493492126466, "step": 39330 }, { "epoch": 11.166619358501277, "grad_norm": 1.4606432914733887, "learning_rate": 8.883905762134545e-05, "loss": 0.04444275200366974, "step": 39340 }, { "epoch": 11.169457848424639, "grad_norm": 5.54641580581665, "learning_rate": 8.883621913142209e-05, "loss": 0.04066909849643707, "step": 39350 }, { "epoch": 11.172296338348, "grad_norm": 7.7750773429870605, "learning_rate": 8.883338064149873e-05, "loss": 0.036909368634223935, "step": 39360 }, { "epoch": 11.175134828271359, "grad_norm": 9.151082992553711, "learning_rate": 8.883054215157537e-05, "loss": 0.036677679419517516, "step": 39370 }, { "epoch": 11.17797331819472, "grad_norm": 10.54290771484375, "learning_rate": 8.882770366165201e-05, "loss": 0.031220394372940063, "step": 39380 }, { "epoch": 11.180811808118081, "grad_norm": 2.6449501514434814, "learning_rate": 8.882486517172864e-05, "loss": 0.03766760528087616, "step": 39390 }, { "epoch": 11.183650298041442, "grad_norm": 6.63143253326416, "learning_rate": 8.882202668180528e-05, "loss": 0.0867281973361969, "step": 39400 }, { "epoch": 11.186488787964803, "grad_norm": 1.8736786842346191, "learning_rate": 8.881918819188192e-05, "loss": 0.04634508788585663, "step": 39410 }, { "epoch": 11.189327277888163, "grad_norm": 3.681365728378296, "learning_rate": 8.881634970195856e-05, "loss": 0.04647490978240967, "step": 39420 }, { "epoch": 11.192165767811524, "grad_norm": 8.258210182189941, "learning_rate": 8.88135112120352e-05, "loss": 0.03525558114051819, "step": 39430 }, { "epoch": 11.195004257734885, "grad_norm": 10.567083358764648, "learning_rate": 8.881067272211185e-05, "loss": 0.040261727571487424, "step": 39440 }, { "epoch": 11.197842747658246, "grad_norm": 1.8331791162490845, "learning_rate": 8.880783423218848e-05, "loss": 0.033540600538253786, "step": 39450 }, { "epoch": 11.200681237581607, "grad_norm": 2.7237627506256104, "learning_rate": 8.880499574226512e-05, "loss": 0.024020782113075255, "step": 39460 }, { "epoch": 11.203519727504966, "grad_norm": 3.1561853885650635, "learning_rate": 8.880215725234176e-05, "loss": 0.05169380307197571, "step": 39470 }, { "epoch": 11.206358217428328, "grad_norm": 17.208776473999023, "learning_rate": 8.87993187624184e-05, "loss": 0.06630215644836426, "step": 39480 }, { "epoch": 11.209196707351689, "grad_norm": 9.18853759765625, "learning_rate": 8.879648027249504e-05, "loss": 0.06524783372879028, "step": 39490 }, { "epoch": 11.21203519727505, "grad_norm": 8.582340240478516, "learning_rate": 8.879364178257168e-05, "loss": 0.03788497149944305, "step": 39500 }, { "epoch": 11.21203519727505, "eval_accuracy": 0.952756406180454, "eval_loss": 0.14106468856334686, "eval_runtime": 31.4128, "eval_samples_per_second": 500.655, "eval_steps_per_second": 7.831, "step": 39500 }, { "epoch": 11.214873687198411, "grad_norm": 2.1199471950531006, "learning_rate": 8.879080329264832e-05, "loss": 0.028016036748886107, "step": 39510 }, { "epoch": 11.217712177121772, "grad_norm": 7.91051721572876, "learning_rate": 8.878796480272495e-05, "loss": 0.03719348311424255, "step": 39520 }, { "epoch": 11.220550667045131, "grad_norm": 6.829957008361816, "learning_rate": 8.878512631280159e-05, "loss": 0.041284403204917906, "step": 39530 }, { "epoch": 11.223389156968492, "grad_norm": 6.979713439941406, "learning_rate": 8.878228782287823e-05, "loss": 0.033257138729095456, "step": 39540 }, { "epoch": 11.226227646891854, "grad_norm": 2.0938971042633057, "learning_rate": 8.877944933295486e-05, "loss": 0.04780513346195221, "step": 39550 }, { "epoch": 11.229066136815215, "grad_norm": 7.281595706939697, "learning_rate": 8.877661084303152e-05, "loss": 0.0397993266582489, "step": 39560 }, { "epoch": 11.231904626738576, "grad_norm": 3.8252663612365723, "learning_rate": 8.877377235310816e-05, "loss": 0.03965983390808105, "step": 39570 }, { "epoch": 11.234743116661935, "grad_norm": 13.741844177246094, "learning_rate": 8.877093386318479e-05, "loss": 0.03729905784130096, "step": 39580 }, { "epoch": 11.237581606585296, "grad_norm": 2.891458511352539, "learning_rate": 8.876809537326143e-05, "loss": 0.052103960514068605, "step": 39590 }, { "epoch": 11.240420096508657, "grad_norm": 5.4181671142578125, "learning_rate": 8.876525688333807e-05, "loss": 0.02109394073486328, "step": 39600 }, { "epoch": 11.243258586432018, "grad_norm": 7.526214599609375, "learning_rate": 8.876241839341471e-05, "loss": 0.02676447629928589, "step": 39610 }, { "epoch": 11.24609707635538, "grad_norm": 7.1598992347717285, "learning_rate": 8.875957990349135e-05, "loss": 0.044556647539138794, "step": 39620 }, { "epoch": 11.248935566278739, "grad_norm": 6.817886829376221, "learning_rate": 8.875674141356799e-05, "loss": 0.05024838447570801, "step": 39630 }, { "epoch": 11.2517740562021, "grad_norm": 11.896966934204102, "learning_rate": 8.875390292364463e-05, "loss": 0.03999115824699402, "step": 39640 }, { "epoch": 11.254612546125461, "grad_norm": 4.5129714012146, "learning_rate": 8.875106443372126e-05, "loss": 0.036044588685035704, "step": 39650 }, { "epoch": 11.257451036048822, "grad_norm": 4.467544078826904, "learning_rate": 8.87482259437979e-05, "loss": 0.04666455984115601, "step": 39660 }, { "epoch": 11.260289525972183, "grad_norm": 8.925542831420898, "learning_rate": 8.874538745387454e-05, "loss": 0.03652244806289673, "step": 39670 }, { "epoch": 11.263128015895543, "grad_norm": 9.0923433303833, "learning_rate": 8.874254896395117e-05, "loss": 0.03240195512771606, "step": 39680 }, { "epoch": 11.265966505818904, "grad_norm": 6.623198986053467, "learning_rate": 8.873971047402783e-05, "loss": 0.043272939324378965, "step": 39690 }, { "epoch": 11.268804995742265, "grad_norm": 2.8783748149871826, "learning_rate": 8.873687198410447e-05, "loss": 0.040272146463394165, "step": 39700 }, { "epoch": 11.271643485665626, "grad_norm": 6.466062068939209, "learning_rate": 8.87340334941811e-05, "loss": 0.03759041130542755, "step": 39710 }, { "epoch": 11.274481975588987, "grad_norm": 3.5749056339263916, "learning_rate": 8.873119500425774e-05, "loss": 0.043566498160362246, "step": 39720 }, { "epoch": 11.277320465512348, "grad_norm": 7.263559341430664, "learning_rate": 8.872835651433438e-05, "loss": 0.04249974191188812, "step": 39730 }, { "epoch": 11.280158955435708, "grad_norm": 11.27505111694336, "learning_rate": 8.872551802441102e-05, "loss": 0.037914568185806276, "step": 39740 }, { "epoch": 11.282997445359069, "grad_norm": 2.521247625350952, "learning_rate": 8.872267953448765e-05, "loss": 0.03940242528915405, "step": 39750 }, { "epoch": 11.28583593528243, "grad_norm": 1.8530833721160889, "learning_rate": 8.87198410445643e-05, "loss": 0.03865052461624145, "step": 39760 }, { "epoch": 11.288674425205791, "grad_norm": 5.247794151306152, "learning_rate": 8.871700255464093e-05, "loss": 0.036801236867904666, "step": 39770 }, { "epoch": 11.291512915129152, "grad_norm": 13.172710418701172, "learning_rate": 8.871416406471757e-05, "loss": 0.04191643297672272, "step": 39780 }, { "epoch": 11.294351405052511, "grad_norm": 3.5339529514312744, "learning_rate": 8.871132557479421e-05, "loss": 0.04157689809799194, "step": 39790 }, { "epoch": 11.297189894975872, "grad_norm": 2.7554502487182617, "learning_rate": 8.870848708487086e-05, "loss": 0.04689006805419922, "step": 39800 }, { "epoch": 11.300028384899234, "grad_norm": 11.842961311340332, "learning_rate": 8.870564859494748e-05, "loss": 0.04804868698120117, "step": 39810 }, { "epoch": 11.302866874822595, "grad_norm": 2.761676549911499, "learning_rate": 8.870281010502414e-05, "loss": 0.03384347856044769, "step": 39820 }, { "epoch": 11.305705364745956, "grad_norm": 6.327510833740234, "learning_rate": 8.869997161510078e-05, "loss": 0.042056670784950255, "step": 39830 }, { "epoch": 11.308543854669315, "grad_norm": 9.295647621154785, "learning_rate": 8.869713312517741e-05, "loss": 0.047100627422332765, "step": 39840 }, { "epoch": 11.311382344592676, "grad_norm": 5.03857421875, "learning_rate": 8.869429463525405e-05, "loss": 0.05792197585105896, "step": 39850 }, { "epoch": 11.314220834516037, "grad_norm": 5.200779438018799, "learning_rate": 8.869145614533069e-05, "loss": 0.02069302201271057, "step": 39860 }, { "epoch": 11.317059324439398, "grad_norm": 0.8214617967605591, "learning_rate": 8.868861765540732e-05, "loss": 0.04381552934646606, "step": 39870 }, { "epoch": 11.31989781436276, "grad_norm": 13.484682083129883, "learning_rate": 8.868577916548396e-05, "loss": 0.04238225817680359, "step": 39880 }, { "epoch": 11.32273630428612, "grad_norm": 11.78823184967041, "learning_rate": 8.868294067556061e-05, "loss": 0.041246157884597776, "step": 39890 }, { "epoch": 11.32557479420948, "grad_norm": 10.334183692932129, "learning_rate": 8.868010218563724e-05, "loss": 0.04436834454536438, "step": 39900 }, { "epoch": 11.328413284132841, "grad_norm": 8.74250316619873, "learning_rate": 8.867726369571388e-05, "loss": 0.04008035361766815, "step": 39910 }, { "epoch": 11.331251774056202, "grad_norm": 4.13367223739624, "learning_rate": 8.867442520579052e-05, "loss": 0.05199337601661682, "step": 39920 }, { "epoch": 11.334090263979563, "grad_norm": 17.059772491455078, "learning_rate": 8.867158671586717e-05, "loss": 0.04543254673480988, "step": 39930 }, { "epoch": 11.336928753902924, "grad_norm": 6.4024553298950195, "learning_rate": 8.86687482259438e-05, "loss": 0.039739423990249635, "step": 39940 }, { "epoch": 11.339767243826284, "grad_norm": 5.212245464324951, "learning_rate": 8.866590973602044e-05, "loss": 0.04636736512184143, "step": 39950 }, { "epoch": 11.342605733749645, "grad_norm": 17.039974212646484, "learning_rate": 8.866307124609709e-05, "loss": 0.06416319012641906, "step": 39960 }, { "epoch": 11.345444223673006, "grad_norm": 2.626824378967285, "learning_rate": 8.866023275617372e-05, "loss": 0.04492620229721069, "step": 39970 }, { "epoch": 11.348282713596367, "grad_norm": 7.594601154327393, "learning_rate": 8.865739426625036e-05, "loss": 0.028285348415374757, "step": 39980 }, { "epoch": 11.351121203519728, "grad_norm": 2.9567835330963135, "learning_rate": 8.8654555776327e-05, "loss": 0.05564141273498535, "step": 39990 }, { "epoch": 11.353959693443088, "grad_norm": 4.25455379486084, "learning_rate": 8.865171728640363e-05, "loss": 0.07398279905319213, "step": 40000 }, { "epoch": 11.353959693443088, "eval_accuracy": 0.9532015006040567, "eval_loss": 0.13763917982578278, "eval_runtime": 31.1135, "eval_samples_per_second": 505.472, "eval_steps_per_second": 7.907, "step": 40000 }, { "epoch": 11.356798183366449, "grad_norm": 4.23692512512207, "learning_rate": 8.864887879648027e-05, "loss": 0.03264350891113281, "step": 40010 }, { "epoch": 11.35963667328981, "grad_norm": 8.874746322631836, "learning_rate": 8.864604030655692e-05, "loss": 0.05038106441497803, "step": 40020 }, { "epoch": 11.36247516321317, "grad_norm": 1.852181077003479, "learning_rate": 8.864320181663355e-05, "loss": 0.030090701580047608, "step": 40030 }, { "epoch": 11.365313653136532, "grad_norm": 3.475825309753418, "learning_rate": 8.86403633267102e-05, "loss": 0.059383833408355714, "step": 40040 }, { "epoch": 11.368152143059891, "grad_norm": 8.16132926940918, "learning_rate": 8.863752483678684e-05, "loss": 0.03293721675872803, "step": 40050 }, { "epoch": 11.370990632983252, "grad_norm": 13.255269050598145, "learning_rate": 8.863468634686348e-05, "loss": 0.05026310086250305, "step": 40060 }, { "epoch": 11.373829122906614, "grad_norm": 10.392302513122559, "learning_rate": 8.86318478569401e-05, "loss": 0.04383312463760376, "step": 40070 }, { "epoch": 11.376667612829975, "grad_norm": 8.054374694824219, "learning_rate": 8.862900936701675e-05, "loss": 0.028009480237960814, "step": 40080 }, { "epoch": 11.379506102753336, "grad_norm": 7.8724541664123535, "learning_rate": 8.86261708770934e-05, "loss": 0.04316805005073547, "step": 40090 }, { "epoch": 11.382344592676697, "grad_norm": 4.537086486816406, "learning_rate": 8.862333238717003e-05, "loss": 0.030184608697891236, "step": 40100 }, { "epoch": 11.385183082600056, "grad_norm": 4.153220176696777, "learning_rate": 8.862049389724667e-05, "loss": 0.03144936561584473, "step": 40110 }, { "epoch": 11.388021572523417, "grad_norm": 2.012098550796509, "learning_rate": 8.861765540732331e-05, "loss": 0.03236268162727356, "step": 40120 }, { "epoch": 11.390860062446778, "grad_norm": 5.7497758865356445, "learning_rate": 8.861481691739994e-05, "loss": 0.02789943218231201, "step": 40130 }, { "epoch": 11.39369855237014, "grad_norm": 14.024553298950195, "learning_rate": 8.861197842747658e-05, "loss": 0.02832912802696228, "step": 40140 }, { "epoch": 11.3965370422935, "grad_norm": 8.662565231323242, "learning_rate": 8.860913993755322e-05, "loss": 0.035794562101364134, "step": 40150 }, { "epoch": 11.39937553221686, "grad_norm": 13.299927711486816, "learning_rate": 8.860630144762986e-05, "loss": 0.051793599128723146, "step": 40160 }, { "epoch": 11.402214022140221, "grad_norm": 3.974324941635132, "learning_rate": 8.86034629577065e-05, "loss": 0.03746732771396637, "step": 40170 }, { "epoch": 11.405052512063582, "grad_norm": 4.349684715270996, "learning_rate": 8.860062446778315e-05, "loss": 0.0361139714717865, "step": 40180 }, { "epoch": 11.407891001986943, "grad_norm": 6.070065021514893, "learning_rate": 8.859778597785979e-05, "loss": 0.034117507934570315, "step": 40190 }, { "epoch": 11.410729491910304, "grad_norm": 11.619451522827148, "learning_rate": 8.859494748793642e-05, "loss": 0.046603921055793765, "step": 40200 }, { "epoch": 11.413567981833664, "grad_norm": 5.385000705718994, "learning_rate": 8.859210899801306e-05, "loss": 0.04721275269985199, "step": 40210 }, { "epoch": 11.416406471757025, "grad_norm": 13.089354515075684, "learning_rate": 8.858927050808971e-05, "loss": 0.05168584585189819, "step": 40220 }, { "epoch": 11.419244961680386, "grad_norm": 12.567547798156738, "learning_rate": 8.858643201816634e-05, "loss": 0.04480060338973999, "step": 40230 }, { "epoch": 11.422083451603747, "grad_norm": 12.628218650817871, "learning_rate": 8.858359352824298e-05, "loss": 0.07853772640228271, "step": 40240 }, { "epoch": 11.424921941527108, "grad_norm": 9.429924964904785, "learning_rate": 8.858075503831962e-05, "loss": 0.0535646915435791, "step": 40250 }, { "epoch": 11.427760431450467, "grad_norm": 1.943774700164795, "learning_rate": 8.857791654839625e-05, "loss": 0.018581156432628632, "step": 40260 }, { "epoch": 11.430598921373829, "grad_norm": 5.487278938293457, "learning_rate": 8.857507805847289e-05, "loss": 0.05353713035583496, "step": 40270 }, { "epoch": 11.43343741129719, "grad_norm": 11.646852493286133, "learning_rate": 8.857223956854953e-05, "loss": 0.027602463960647583, "step": 40280 }, { "epoch": 11.43627590122055, "grad_norm": 11.33411693572998, "learning_rate": 8.856940107862617e-05, "loss": 0.04334660172462464, "step": 40290 }, { "epoch": 11.439114391143912, "grad_norm": 12.542768478393555, "learning_rate": 8.856656258870282e-05, "loss": 0.04223451018333435, "step": 40300 }, { "epoch": 11.441952881067273, "grad_norm": 6.214544296264648, "learning_rate": 8.856372409877946e-05, "loss": 0.04474785327911377, "step": 40310 }, { "epoch": 11.444791370990632, "grad_norm": 8.552118301391602, "learning_rate": 8.85608856088561e-05, "loss": 0.03829832077026367, "step": 40320 }, { "epoch": 11.447629860913993, "grad_norm": 3.506162405014038, "learning_rate": 8.855804711893273e-05, "loss": 0.03281140923500061, "step": 40330 }, { "epoch": 11.450468350837355, "grad_norm": 6.62778902053833, "learning_rate": 8.855520862900937e-05, "loss": 0.049500495195388794, "step": 40340 }, { "epoch": 11.453306840760716, "grad_norm": 5.414072036743164, "learning_rate": 8.855237013908601e-05, "loss": 0.03758763372898102, "step": 40350 }, { "epoch": 11.456145330684077, "grad_norm": 9.07391357421875, "learning_rate": 8.854953164916265e-05, "loss": 0.0444775402545929, "step": 40360 }, { "epoch": 11.458983820607436, "grad_norm": 5.526808738708496, "learning_rate": 8.854669315923929e-05, "loss": 0.03314472734928131, "step": 40370 }, { "epoch": 11.461822310530797, "grad_norm": 4.515206813812256, "learning_rate": 8.854385466931593e-05, "loss": 0.0357508659362793, "step": 40380 }, { "epoch": 11.464660800454158, "grad_norm": 17.794885635375977, "learning_rate": 8.854101617939256e-05, "loss": 0.040084469318389895, "step": 40390 }, { "epoch": 11.46749929037752, "grad_norm": 7.860543727874756, "learning_rate": 8.85381776894692e-05, "loss": 0.03683890998363495, "step": 40400 }, { "epoch": 11.47033778030088, "grad_norm": 2.1493349075317383, "learning_rate": 8.853533919954584e-05, "loss": 0.030355399847030638, "step": 40410 }, { "epoch": 11.47317627022424, "grad_norm": 2.1995296478271484, "learning_rate": 8.853250070962249e-05, "loss": 0.042979204654693605, "step": 40420 }, { "epoch": 11.476014760147601, "grad_norm": 12.870260238647461, "learning_rate": 8.852966221969913e-05, "loss": 0.053621649742126465, "step": 40430 }, { "epoch": 11.478853250070962, "grad_norm": 5.2994537353515625, "learning_rate": 8.852682372977577e-05, "loss": 0.042656701803207395, "step": 40440 }, { "epoch": 11.481691739994323, "grad_norm": 6.095134258270264, "learning_rate": 8.852398523985241e-05, "loss": 0.05943422317504883, "step": 40450 }, { "epoch": 11.484530229917684, "grad_norm": 10.917078971862793, "learning_rate": 8.852114674992904e-05, "loss": 0.04059193134307861, "step": 40460 }, { "epoch": 11.487368719841044, "grad_norm": 2.4780454635620117, "learning_rate": 8.851830826000568e-05, "loss": 0.03488376438617706, "step": 40470 }, { "epoch": 11.490207209764405, "grad_norm": 4.229562759399414, "learning_rate": 8.851546977008232e-05, "loss": 0.04805220365524292, "step": 40480 }, { "epoch": 11.493045699687766, "grad_norm": 7.648486137390137, "learning_rate": 8.851263128015896e-05, "loss": 0.037777215242385864, "step": 40490 }, { "epoch": 11.495884189611127, "grad_norm": 5.515402793884277, "learning_rate": 8.85097927902356e-05, "loss": 0.04882913529872894, "step": 40500 }, { "epoch": 11.495884189611127, "eval_accuracy": 0.9565715012399059, "eval_loss": 0.13253000378608704, "eval_runtime": 31.5222, "eval_samples_per_second": 498.918, "eval_steps_per_second": 7.804, "step": 40500 }, { "epoch": 11.498722679534488, "grad_norm": 12.569120407104492, "learning_rate": 8.850723814930457e-05, "loss": 0.06314729452133179, "step": 40510 }, { "epoch": 11.50156116945785, "grad_norm": 8.53803539276123, "learning_rate": 8.850439965938121e-05, "loss": 0.03850083947181702, "step": 40520 }, { "epoch": 11.504399659381209, "grad_norm": 5.943691730499268, "learning_rate": 8.850156116945785e-05, "loss": 0.05651187300682068, "step": 40530 }, { "epoch": 11.50723814930457, "grad_norm": 1.117270827293396, "learning_rate": 8.849872267953449e-05, "loss": 0.02911505401134491, "step": 40540 }, { "epoch": 11.51007663922793, "grad_norm": 0.5039180517196655, "learning_rate": 8.849588418961113e-05, "loss": 0.039834386110305785, "step": 40550 }, { "epoch": 11.512915129151292, "grad_norm": 1.643454909324646, "learning_rate": 8.849304569968778e-05, "loss": 0.04547125399112702, "step": 40560 }, { "epoch": 11.515753619074653, "grad_norm": 3.3448643684387207, "learning_rate": 8.84902072097644e-05, "loss": 0.03437337875366211, "step": 40570 }, { "epoch": 11.518592108998012, "grad_norm": 6.541136264801025, "learning_rate": 8.848736871984105e-05, "loss": 0.06071277260780335, "step": 40580 }, { "epoch": 11.521430598921373, "grad_norm": 7.1211466789245605, "learning_rate": 8.848453022991769e-05, "loss": 0.032107898592948915, "step": 40590 }, { "epoch": 11.524269088844735, "grad_norm": 10.989923477172852, "learning_rate": 8.848169173999433e-05, "loss": 0.0668204665184021, "step": 40600 }, { "epoch": 11.527107578768096, "grad_norm": 1.7694687843322754, "learning_rate": 8.847885325007097e-05, "loss": 0.07757980227470399, "step": 40610 }, { "epoch": 11.529946068691457, "grad_norm": 13.304360389709473, "learning_rate": 8.847601476014761e-05, "loss": 0.06403954029083252, "step": 40620 }, { "epoch": 11.532784558614816, "grad_norm": 8.41611099243164, "learning_rate": 8.847317627022424e-05, "loss": 0.07346206307411193, "step": 40630 }, { "epoch": 11.535623048538177, "grad_norm": 19.83196258544922, "learning_rate": 8.847033778030088e-05, "loss": 0.06302956342697144, "step": 40640 }, { "epoch": 11.538461538461538, "grad_norm": 1.5962491035461426, "learning_rate": 8.846749929037752e-05, "loss": 0.0424712061882019, "step": 40650 }, { "epoch": 11.5413000283849, "grad_norm": 4.162332057952881, "learning_rate": 8.846466080045416e-05, "loss": 0.05826054215431213, "step": 40660 }, { "epoch": 11.54413851830826, "grad_norm": 0.9698114991188049, "learning_rate": 8.84618223105308e-05, "loss": 0.04148352444171906, "step": 40670 }, { "epoch": 11.546977008231622, "grad_norm": 7.476339817047119, "learning_rate": 8.845898382060745e-05, "loss": 0.027741429209709168, "step": 40680 }, { "epoch": 11.549815498154981, "grad_norm": 4.600419998168945, "learning_rate": 8.845614533068409e-05, "loss": 0.02867766618728638, "step": 40690 }, { "epoch": 11.552653988078342, "grad_norm": 6.975192546844482, "learning_rate": 8.845330684076071e-05, "loss": 0.050774085521698, "step": 40700 }, { "epoch": 11.555492478001703, "grad_norm": 3.077288866043091, "learning_rate": 8.845046835083736e-05, "loss": 0.03793132603168488, "step": 40710 }, { "epoch": 11.558330967925064, "grad_norm": 12.623274803161621, "learning_rate": 8.8447629860914e-05, "loss": 0.049743345379829405, "step": 40720 }, { "epoch": 11.561169457848425, "grad_norm": 15.091360092163086, "learning_rate": 8.844479137099062e-05, "loss": 0.07584220170974731, "step": 40730 }, { "epoch": 11.564007947771785, "grad_norm": 10.169875144958496, "learning_rate": 8.844195288106728e-05, "loss": 0.04955956935882568, "step": 40740 }, { "epoch": 11.566846437695146, "grad_norm": 5.2755303382873535, "learning_rate": 8.843911439114392e-05, "loss": 0.034739372134208676, "step": 40750 }, { "epoch": 11.569684927618507, "grad_norm": 8.641897201538086, "learning_rate": 8.843627590122055e-05, "loss": 0.052517431974411014, "step": 40760 }, { "epoch": 11.572523417541868, "grad_norm": 4.512448310852051, "learning_rate": 8.843343741129719e-05, "loss": 0.061132752895355226, "step": 40770 }, { "epoch": 11.57536190746523, "grad_norm": 4.899333953857422, "learning_rate": 8.843059892137383e-05, "loss": 0.04364375174045563, "step": 40780 }, { "epoch": 11.578200397388589, "grad_norm": 10.793277740478516, "learning_rate": 8.842776043145047e-05, "loss": 0.048547714948654175, "step": 40790 }, { "epoch": 11.58103888731195, "grad_norm": 6.0333781242370605, "learning_rate": 8.842492194152711e-05, "loss": 0.032700303196907046, "step": 40800 }, { "epoch": 11.58387737723531, "grad_norm": 12.72266674041748, "learning_rate": 8.842208345160376e-05, "loss": 0.05314317345619202, "step": 40810 }, { "epoch": 11.586715867158672, "grad_norm": 1.3179000616073608, "learning_rate": 8.84192449616804e-05, "loss": 0.048350965976715087, "step": 40820 }, { "epoch": 11.589554357082033, "grad_norm": 6.280195713043213, "learning_rate": 8.841640647175703e-05, "loss": 0.04212665855884552, "step": 40830 }, { "epoch": 11.592392847005392, "grad_norm": 1.0754423141479492, "learning_rate": 8.841356798183367e-05, "loss": 0.05024091601371765, "step": 40840 }, { "epoch": 11.595231336928753, "grad_norm": 8.706792831420898, "learning_rate": 8.841072949191031e-05, "loss": 0.048531919717788696, "step": 40850 }, { "epoch": 11.598069826852115, "grad_norm": 11.477270126342773, "learning_rate": 8.840789100198694e-05, "loss": 0.0477634072303772, "step": 40860 }, { "epoch": 11.600908316775476, "grad_norm": 3.2627789974212646, "learning_rate": 8.840505251206359e-05, "loss": 0.057295024394989014, "step": 40870 }, { "epoch": 11.603746806698837, "grad_norm": 8.127674102783203, "learning_rate": 8.840221402214023e-05, "loss": 0.049380600452423096, "step": 40880 }, { "epoch": 11.606585296622196, "grad_norm": 11.47251033782959, "learning_rate": 8.839937553221686e-05, "loss": 0.052211332321166995, "step": 40890 }, { "epoch": 11.609423786545557, "grad_norm": 1.2437288761138916, "learning_rate": 8.83965370422935e-05, "loss": 0.0263483464717865, "step": 40900 }, { "epoch": 11.612262276468918, "grad_norm": 13.604060173034668, "learning_rate": 8.839369855237014e-05, "loss": 0.057996582984924314, "step": 40910 }, { "epoch": 11.61510076639228, "grad_norm": 11.502900123596191, "learning_rate": 8.839086006244678e-05, "loss": 0.03979346752166748, "step": 40920 }, { "epoch": 11.61793925631564, "grad_norm": 8.272281646728516, "learning_rate": 8.838802157252343e-05, "loss": 0.06818183660507202, "step": 40930 }, { "epoch": 11.620777746239002, "grad_norm": 12.573554039001465, "learning_rate": 8.838518308260007e-05, "loss": 0.04096316695213318, "step": 40940 }, { "epoch": 11.623616236162361, "grad_norm": 12.9218111038208, "learning_rate": 8.838234459267671e-05, "loss": 0.06615672111511231, "step": 40950 }, { "epoch": 11.626454726085722, "grad_norm": 12.730849266052246, "learning_rate": 8.837950610275334e-05, "loss": 0.06989012360572815, "step": 40960 }, { "epoch": 11.629293216009083, "grad_norm": 9.000524520874023, "learning_rate": 8.837666761282998e-05, "loss": 0.09680927991867065, "step": 40970 }, { "epoch": 11.632131705932444, "grad_norm": 5.213916778564453, "learning_rate": 8.837382912290662e-05, "loss": 0.061992645263671875, "step": 40980 }, { "epoch": 11.634970195855805, "grad_norm": 7.030336856842041, "learning_rate": 8.837099063298325e-05, "loss": 0.05134835243225098, "step": 40990 }, { "epoch": 11.637808685779165, "grad_norm": 1.3521445989608765, "learning_rate": 8.83681521430599e-05, "loss": 0.03942259848117828, "step": 41000 }, { "epoch": 11.637808685779165, "eval_accuracy": 0.9580975392636867, "eval_loss": 0.13014930486679077, "eval_runtime": 31.3658, "eval_samples_per_second": 501.407, "eval_steps_per_second": 7.843, "step": 41000 }, { "epoch": 11.640647175702526, "grad_norm": 3.281205415725708, "learning_rate": 8.836531365313654e-05, "loss": 0.03922618329524994, "step": 41010 }, { "epoch": 11.643485665625887, "grad_norm": 7.914754867553711, "learning_rate": 8.836247516321317e-05, "loss": 0.05260990858078003, "step": 41020 }, { "epoch": 11.646324155549248, "grad_norm": 3.968221664428711, "learning_rate": 8.835963667328981e-05, "loss": 0.0357495129108429, "step": 41030 }, { "epoch": 11.64916264547261, "grad_norm": 1.4710693359375, "learning_rate": 8.835679818336645e-05, "loss": 0.03923320174217224, "step": 41040 }, { "epoch": 11.65200113539597, "grad_norm": 4.021718978881836, "learning_rate": 8.83539596934431e-05, "loss": 0.03728710114955902, "step": 41050 }, { "epoch": 11.65483962531933, "grad_norm": 9.911172866821289, "learning_rate": 8.835112120351972e-05, "loss": 0.04307613372802734, "step": 41060 }, { "epoch": 11.65767811524269, "grad_norm": 3.696253538131714, "learning_rate": 8.834828271359638e-05, "loss": 0.04925021827220917, "step": 41070 }, { "epoch": 11.660516605166052, "grad_norm": 2.0492725372314453, "learning_rate": 8.834544422367302e-05, "loss": 0.041888803243637085, "step": 41080 }, { "epoch": 11.663355095089413, "grad_norm": 1.6307481527328491, "learning_rate": 8.834260573374965e-05, "loss": 0.042088651657104494, "step": 41090 }, { "epoch": 11.666193585012774, "grad_norm": 2.7641148567199707, "learning_rate": 8.833976724382629e-05, "loss": 0.06680274605751038, "step": 41100 }, { "epoch": 11.669032074936133, "grad_norm": 4.430089950561523, "learning_rate": 8.833692875390293e-05, "loss": 0.039932504296302795, "step": 41110 }, { "epoch": 11.671870564859494, "grad_norm": 5.572601318359375, "learning_rate": 8.833409026397956e-05, "loss": 0.0400811493396759, "step": 41120 }, { "epoch": 11.674709054782856, "grad_norm": 3.174543857574463, "learning_rate": 8.833125177405621e-05, "loss": 0.05859325528144836, "step": 41130 }, { "epoch": 11.677547544706217, "grad_norm": 0.4484679400920868, "learning_rate": 8.832841328413285e-05, "loss": 0.024863974750041963, "step": 41140 }, { "epoch": 11.680386034629578, "grad_norm": 8.200668334960938, "learning_rate": 8.832557479420948e-05, "loss": 0.04697154462337494, "step": 41150 }, { "epoch": 11.683224524552937, "grad_norm": 15.55133056640625, "learning_rate": 8.832273630428612e-05, "loss": 0.04557651579380036, "step": 41160 }, { "epoch": 11.686063014476298, "grad_norm": 3.048593521118164, "learning_rate": 8.831989781436276e-05, "loss": 0.04560634195804596, "step": 41170 }, { "epoch": 11.68890150439966, "grad_norm": 3.9336376190185547, "learning_rate": 8.83170593244394e-05, "loss": 0.06830593347549438, "step": 41180 }, { "epoch": 11.69173999432302, "grad_norm": 8.1607027053833, "learning_rate": 8.831422083451603e-05, "loss": 0.029866716265678404, "step": 41190 }, { "epoch": 11.694578484246382, "grad_norm": 1.0305315256118774, "learning_rate": 8.831138234459269e-05, "loss": 0.04140089750289917, "step": 41200 }, { "epoch": 11.697416974169741, "grad_norm": 2.2033443450927734, "learning_rate": 8.830854385466933e-05, "loss": 0.044012248516082764, "step": 41210 }, { "epoch": 11.700255464093102, "grad_norm": 8.77370548248291, "learning_rate": 8.830570536474596e-05, "loss": 0.045134469866752625, "step": 41220 }, { "epoch": 11.703093954016463, "grad_norm": 12.965953826904297, "learning_rate": 8.83028668748226e-05, "loss": 0.0631374478340149, "step": 41230 }, { "epoch": 11.705932443939824, "grad_norm": 2.009962797164917, "learning_rate": 8.830002838489924e-05, "loss": 0.03572629988193512, "step": 41240 }, { "epoch": 11.708770933863185, "grad_norm": 7.408824920654297, "learning_rate": 8.829718989497587e-05, "loss": 0.04673230350017547, "step": 41250 }, { "epoch": 11.711609423786545, "grad_norm": 3.6747851371765137, "learning_rate": 8.829435140505251e-05, "loss": 0.050879156589508055, "step": 41260 }, { "epoch": 11.714447913709906, "grad_norm": 4.7804059982299805, "learning_rate": 8.829151291512916e-05, "loss": 0.06859890222549439, "step": 41270 }, { "epoch": 11.717286403633267, "grad_norm": 12.197978973388672, "learning_rate": 8.828867442520579e-05, "loss": 0.06282455325126649, "step": 41280 }, { "epoch": 11.720124893556628, "grad_norm": 4.496255874633789, "learning_rate": 8.828583593528243e-05, "loss": 0.06147288680076599, "step": 41290 }, { "epoch": 11.72296338347999, "grad_norm": 6.720599174499512, "learning_rate": 8.828299744535907e-05, "loss": 0.041345641016960144, "step": 41300 }, { "epoch": 11.72580187340335, "grad_norm": 9.309507369995117, "learning_rate": 8.828015895543572e-05, "loss": 0.04281518161296845, "step": 41310 }, { "epoch": 11.72864036332671, "grad_norm": 11.473611831665039, "learning_rate": 8.827732046551234e-05, "loss": 0.04009150862693787, "step": 41320 }, { "epoch": 11.73147885325007, "grad_norm": 14.777087211608887, "learning_rate": 8.8274481975589e-05, "loss": 0.04862431287765503, "step": 41330 }, { "epoch": 11.734317343173432, "grad_norm": 5.718709468841553, "learning_rate": 8.827164348566564e-05, "loss": 0.03550182282924652, "step": 41340 }, { "epoch": 11.737155833096793, "grad_norm": 9.201991081237793, "learning_rate": 8.826880499574227e-05, "loss": 0.03538917601108551, "step": 41350 }, { "epoch": 11.739994323020154, "grad_norm": 3.1091489791870117, "learning_rate": 8.826596650581891e-05, "loss": 0.02286338061094284, "step": 41360 }, { "epoch": 11.742832812943513, "grad_norm": 5.587309837341309, "learning_rate": 8.826312801589555e-05, "loss": 0.04504280686378479, "step": 41370 }, { "epoch": 11.745671302866874, "grad_norm": 6.159370422363281, "learning_rate": 8.826028952597218e-05, "loss": 0.035818624496459964, "step": 41380 }, { "epoch": 11.748509792790236, "grad_norm": 6.3836517333984375, "learning_rate": 8.825745103604882e-05, "loss": 0.029144224524497987, "step": 41390 }, { "epoch": 11.751348282713597, "grad_norm": 3.7704317569732666, "learning_rate": 8.825461254612548e-05, "loss": 0.04859644770622253, "step": 41400 }, { "epoch": 11.754186772636958, "grad_norm": 5.243749618530273, "learning_rate": 8.82517740562021e-05, "loss": 0.05567106008529663, "step": 41410 }, { "epoch": 11.757025262560317, "grad_norm": 5.640360355377197, "learning_rate": 8.824893556627874e-05, "loss": 0.03157995641231537, "step": 41420 }, { "epoch": 11.759863752483678, "grad_norm": 5.861602306365967, "learning_rate": 8.824609707635539e-05, "loss": 0.052859604358673096, "step": 41430 }, { "epoch": 11.76270224240704, "grad_norm": 2.414142608642578, "learning_rate": 8.824325858643203e-05, "loss": 0.032310235500335696, "step": 41440 }, { "epoch": 11.7655407323304, "grad_norm": 3.531975030899048, "learning_rate": 8.824042009650865e-05, "loss": 0.04497153759002685, "step": 41450 }, { "epoch": 11.768379222253762, "grad_norm": 12.987493515014648, "learning_rate": 8.82375816065853e-05, "loss": 0.039786535501480105, "step": 41460 }, { "epoch": 11.771217712177123, "grad_norm": 5.657522678375244, "learning_rate": 8.823474311666194e-05, "loss": 0.04915342330932617, "step": 41470 }, { "epoch": 11.774056202100482, "grad_norm": 4.03167200088501, "learning_rate": 8.823190462673858e-05, "loss": 0.04671434760093689, "step": 41480 }, { "epoch": 11.776894692023843, "grad_norm": 4.126925945281982, "learning_rate": 8.822906613681522e-05, "loss": 0.041422408819198606, "step": 41490 }, { "epoch": 11.779733181947204, "grad_norm": 6.271815776824951, "learning_rate": 8.822622764689186e-05, "loss": 0.03411697447299957, "step": 41500 }, { "epoch": 11.779733181947204, "eval_accuracy": 0.9564443314045908, "eval_loss": 0.1370164006948471, "eval_runtime": 31.626, "eval_samples_per_second": 497.28, "eval_steps_per_second": 7.778, "step": 41500 }, { "epoch": 11.782571671870565, "grad_norm": 10.734105110168457, "learning_rate": 8.822338915696849e-05, "loss": 0.04735274910926819, "step": 41510 }, { "epoch": 11.785410161793926, "grad_norm": 7.924331188201904, "learning_rate": 8.822055066704513e-05, "loss": 0.03922942280769348, "step": 41520 }, { "epoch": 11.788248651717286, "grad_norm": 4.027884006500244, "learning_rate": 8.821771217712179e-05, "loss": 0.026624178886413573, "step": 41530 }, { "epoch": 11.791087141640647, "grad_norm": 8.639642715454102, "learning_rate": 8.821487368719841e-05, "loss": 0.03481380939483643, "step": 41540 }, { "epoch": 11.793925631564008, "grad_norm": 1.7185767889022827, "learning_rate": 8.821203519727506e-05, "loss": 0.03559336364269257, "step": 41550 }, { "epoch": 11.796764121487369, "grad_norm": 3.059953451156616, "learning_rate": 8.82091967073517e-05, "loss": 0.031420886516571045, "step": 41560 }, { "epoch": 11.79960261141073, "grad_norm": 11.784826278686523, "learning_rate": 8.820635821742832e-05, "loss": 0.05998876690864563, "step": 41570 }, { "epoch": 11.80244110133409, "grad_norm": 13.273045539855957, "learning_rate": 8.820351972750497e-05, "loss": 0.04332319796085358, "step": 41580 }, { "epoch": 11.80527959125745, "grad_norm": 7.318718910217285, "learning_rate": 8.820068123758161e-05, "loss": 0.06102036833763123, "step": 41590 }, { "epoch": 11.808118081180812, "grad_norm": 5.058545112609863, "learning_rate": 8.819784274765825e-05, "loss": 0.04044685959815979, "step": 41600 }, { "epoch": 11.810956571104173, "grad_norm": 7.207066059112549, "learning_rate": 8.819500425773489e-05, "loss": 0.03486702144145966, "step": 41610 }, { "epoch": 11.813795061027534, "grad_norm": 4.909163475036621, "learning_rate": 8.819216576781153e-05, "loss": 0.03199976980686188, "step": 41620 }, { "epoch": 11.816633550950893, "grad_norm": 2.056788921356201, "learning_rate": 8.818932727788817e-05, "loss": 0.03631947040557861, "step": 41630 }, { "epoch": 11.819472040874254, "grad_norm": 3.98041033744812, "learning_rate": 8.81864887879648e-05, "loss": 0.049060821533203125, "step": 41640 }, { "epoch": 11.822310530797616, "grad_norm": 3.0290110111236572, "learning_rate": 8.818365029804144e-05, "loss": 0.03193089067935943, "step": 41650 }, { "epoch": 11.825149020720977, "grad_norm": 2.7639479637145996, "learning_rate": 8.818081180811808e-05, "loss": 0.03945748507976532, "step": 41660 }, { "epoch": 11.827987510644338, "grad_norm": 12.770966529846191, "learning_rate": 8.817797331819472e-05, "loss": 0.04247860610485077, "step": 41670 }, { "epoch": 11.830826000567697, "grad_norm": 12.497370719909668, "learning_rate": 8.817513482827137e-05, "loss": 0.05721154808998108, "step": 41680 }, { "epoch": 11.833664490491058, "grad_norm": 5.532436370849609, "learning_rate": 8.817229633834801e-05, "loss": 0.04636166989803314, "step": 41690 }, { "epoch": 11.83650298041442, "grad_norm": 6.073328971862793, "learning_rate": 8.816945784842464e-05, "loss": 0.03356616199016571, "step": 41700 }, { "epoch": 11.83934147033778, "grad_norm": 4.38300085067749, "learning_rate": 8.816661935850128e-05, "loss": 0.053462475538253784, "step": 41710 }, { "epoch": 11.842179960261142, "grad_norm": 7.321256637573242, "learning_rate": 8.816378086857792e-05, "loss": 0.03292208909988403, "step": 41720 }, { "epoch": 11.845018450184503, "grad_norm": 8.405853271484375, "learning_rate": 8.816094237865456e-05, "loss": 0.03312258422374725, "step": 41730 }, { "epoch": 11.847856940107862, "grad_norm": 9.193829536437988, "learning_rate": 8.81581038887312e-05, "loss": 0.038640400767326354, "step": 41740 }, { "epoch": 11.850695430031223, "grad_norm": 6.199357032775879, "learning_rate": 8.815526539880784e-05, "loss": 0.037906485795974734, "step": 41750 }, { "epoch": 11.853533919954584, "grad_norm": 2.5867607593536377, "learning_rate": 8.815242690888448e-05, "loss": 0.04049636423587799, "step": 41760 }, { "epoch": 11.856372409877945, "grad_norm": 5.802853107452393, "learning_rate": 8.814958841896111e-05, "loss": 0.033687397837638855, "step": 41770 }, { "epoch": 11.859210899801306, "grad_norm": 10.755009651184082, "learning_rate": 8.814674992903775e-05, "loss": 0.047056376934051514, "step": 41780 }, { "epoch": 11.862049389724666, "grad_norm": 11.590856552124023, "learning_rate": 8.81439114391144e-05, "loss": 0.048236900568008424, "step": 41790 }, { "epoch": 11.864887879648027, "grad_norm": 5.509455680847168, "learning_rate": 8.814107294919104e-05, "loss": 0.04306095242500305, "step": 41800 }, { "epoch": 11.867726369571388, "grad_norm": 6.582401752471924, "learning_rate": 8.813823445926768e-05, "loss": 0.047679051756858826, "step": 41810 }, { "epoch": 11.870564859494749, "grad_norm": 9.859996795654297, "learning_rate": 8.813539596934432e-05, "loss": 0.03740607500076294, "step": 41820 }, { "epoch": 11.87340334941811, "grad_norm": 12.171548843383789, "learning_rate": 8.813255747942095e-05, "loss": 0.03052902817726135, "step": 41830 }, { "epoch": 11.876241839341471, "grad_norm": 2.2306880950927734, "learning_rate": 8.812971898949759e-05, "loss": 0.05683172345161438, "step": 41840 }, { "epoch": 11.87908032926483, "grad_norm": 12.165205001831055, "learning_rate": 8.812688049957423e-05, "loss": 0.05342947244644165, "step": 41850 }, { "epoch": 11.881918819188192, "grad_norm": 6.478231906890869, "learning_rate": 8.812404200965087e-05, "loss": 0.03707284927368164, "step": 41860 }, { "epoch": 11.884757309111553, "grad_norm": 22.78431510925293, "learning_rate": 8.812120351972751e-05, "loss": 0.05215840339660645, "step": 41870 }, { "epoch": 11.887595799034914, "grad_norm": 3.137918472290039, "learning_rate": 8.811836502980415e-05, "loss": 0.06916093230247497, "step": 41880 }, { "epoch": 11.890434288958275, "grad_norm": 8.645134925842285, "learning_rate": 8.81155265398808e-05, "loss": 0.04642211198806763, "step": 41890 }, { "epoch": 11.893272778881634, "grad_norm": 1.8387553691864014, "learning_rate": 8.811268804995742e-05, "loss": 0.0411649614572525, "step": 41900 }, { "epoch": 11.896111268804995, "grad_norm": 4.195104598999023, "learning_rate": 8.810984956003406e-05, "loss": 0.04706032574176788, "step": 41910 }, { "epoch": 11.898949758728357, "grad_norm": 4.468024253845215, "learning_rate": 8.81070110701107e-05, "loss": 0.04212497770786285, "step": 41920 }, { "epoch": 11.901788248651718, "grad_norm": 9.088099479675293, "learning_rate": 8.810417258018735e-05, "loss": 0.04539592862129212, "step": 41930 }, { "epoch": 11.904626738575079, "grad_norm": 7.755095958709717, "learning_rate": 8.810133409026399e-05, "loss": 0.040537679195404054, "step": 41940 }, { "epoch": 11.907465228498438, "grad_norm": 4.191405773162842, "learning_rate": 8.809849560034063e-05, "loss": 0.018843682110309602, "step": 41950 }, { "epoch": 11.9103037184218, "grad_norm": 1.374987006187439, "learning_rate": 8.809565711041726e-05, "loss": 0.043079504370689393, "step": 41960 }, { "epoch": 11.91314220834516, "grad_norm": 14.031201362609863, "learning_rate": 8.80928186204939e-05, "loss": 0.05393848419189453, "step": 41970 }, { "epoch": 11.915980698268521, "grad_norm": 14.838302612304688, "learning_rate": 8.808998013057054e-05, "loss": 0.050819283723831175, "step": 41980 }, { "epoch": 11.918819188191883, "grad_norm": 3.2832798957824707, "learning_rate": 8.808714164064718e-05, "loss": 0.020282982289791106, "step": 41990 }, { "epoch": 11.921657678115242, "grad_norm": 3.9569833278656006, "learning_rate": 8.808430315072382e-05, "loss": 0.017566296458244323, "step": 42000 }, { "epoch": 11.921657678115242, "eval_accuracy": 0.9522477268391938, "eval_loss": 0.14945776760578156, "eval_runtime": 31.1494, "eval_samples_per_second": 504.889, "eval_steps_per_second": 7.897, "step": 42000 }, { "epoch": 11.924496168038603, "grad_norm": 11.897820472717285, "learning_rate": 8.808146466080046e-05, "loss": 0.07478679418563842, "step": 42010 }, { "epoch": 11.927334657961964, "grad_norm": 6.896853923797607, "learning_rate": 8.80786261708771e-05, "loss": 0.0601233720779419, "step": 42020 }, { "epoch": 11.930173147885325, "grad_norm": 6.358935832977295, "learning_rate": 8.807578768095373e-05, "loss": 0.03607468008995056, "step": 42030 }, { "epoch": 11.933011637808686, "grad_norm": 2.915968656539917, "learning_rate": 8.807294919103037e-05, "loss": 0.025422203540802, "step": 42040 }, { "epoch": 11.935850127732046, "grad_norm": 22.498268127441406, "learning_rate": 8.807011070110702e-05, "loss": 0.07519700527191162, "step": 42050 }, { "epoch": 11.938688617655407, "grad_norm": 9.091548919677734, "learning_rate": 8.806727221118364e-05, "loss": 0.03408772349357605, "step": 42060 }, { "epoch": 11.941527107578768, "grad_norm": 12.65857219696045, "learning_rate": 8.80644337212603e-05, "loss": 0.06372144222259521, "step": 42070 }, { "epoch": 11.944365597502129, "grad_norm": 4.213757038116455, "learning_rate": 8.806159523133694e-05, "loss": 0.05281946063041687, "step": 42080 }, { "epoch": 11.94720408742549, "grad_norm": 4.296651840209961, "learning_rate": 8.805875674141357e-05, "loss": 0.03249916732311249, "step": 42090 }, { "epoch": 11.950042577348851, "grad_norm": 7.227084159851074, "learning_rate": 8.805591825149021e-05, "loss": 0.05526970028877258, "step": 42100 }, { "epoch": 11.95288106727221, "grad_norm": 2.970989465713501, "learning_rate": 8.805307976156685e-05, "loss": 0.08181440234184265, "step": 42110 }, { "epoch": 11.955719557195572, "grad_norm": 8.614218711853027, "learning_rate": 8.805024127164349e-05, "loss": 0.06890835165977478, "step": 42120 }, { "epoch": 11.958558047118933, "grad_norm": 11.872515678405762, "learning_rate": 8.804740278172013e-05, "loss": 0.04381870925426483, "step": 42130 }, { "epoch": 11.961396537042294, "grad_norm": 11.55391788482666, "learning_rate": 8.804456429179677e-05, "loss": 0.04326084852218628, "step": 42140 }, { "epoch": 11.964235026965655, "grad_norm": 3.883270025253296, "learning_rate": 8.804172580187342e-05, "loss": 0.05156497955322266, "step": 42150 }, { "epoch": 11.967073516889014, "grad_norm": 8.038182258605957, "learning_rate": 8.803888731195004e-05, "loss": 0.048931282758712766, "step": 42160 }, { "epoch": 11.969912006812375, "grad_norm": 11.797504425048828, "learning_rate": 8.803604882202668e-05, "loss": 0.047080081701278684, "step": 42170 }, { "epoch": 11.972750496735737, "grad_norm": 9.18109130859375, "learning_rate": 8.803321033210333e-05, "loss": 0.041204416751861574, "step": 42180 }, { "epoch": 11.975588986659098, "grad_norm": 6.056558609008789, "learning_rate": 8.803037184217995e-05, "loss": 0.059931010007858276, "step": 42190 }, { "epoch": 11.978427476582459, "grad_norm": 1.8931242227554321, "learning_rate": 8.802753335225661e-05, "loss": 0.031217482686042786, "step": 42200 }, { "epoch": 11.981265966505818, "grad_norm": 10.000553131103516, "learning_rate": 8.802469486233325e-05, "loss": 0.0313848614692688, "step": 42210 }, { "epoch": 11.98410445642918, "grad_norm": 17.2817325592041, "learning_rate": 8.802185637240988e-05, "loss": 0.03470008373260498, "step": 42220 }, { "epoch": 11.98694294635254, "grad_norm": 7.7406697273254395, "learning_rate": 8.801901788248652e-05, "loss": 0.03568956553936005, "step": 42230 }, { "epoch": 11.989781436275901, "grad_norm": 11.62606143951416, "learning_rate": 8.801617939256316e-05, "loss": 0.04242848157882691, "step": 42240 }, { "epoch": 11.992619926199263, "grad_norm": 12.246201515197754, "learning_rate": 8.80133409026398e-05, "loss": 0.04232434034347534, "step": 42250 }, { "epoch": 11.995458416122624, "grad_norm": 13.124924659729004, "learning_rate": 8.801050241271644e-05, "loss": 0.04008290469646454, "step": 42260 }, { "epoch": 11.998296906045983, "grad_norm": 4.904374599456787, "learning_rate": 8.800766392279308e-05, "loss": 0.033311459422111514, "step": 42270 }, { "epoch": 12.001135395969344, "grad_norm": 2.361665725708008, "learning_rate": 8.800482543286973e-05, "loss": 0.04348172843456268, "step": 42280 }, { "epoch": 12.003973885892705, "grad_norm": 6.2063984870910645, "learning_rate": 8.800198694294635e-05, "loss": 0.03905256688594818, "step": 42290 }, { "epoch": 12.006812375816066, "grad_norm": 15.71562671661377, "learning_rate": 8.7999148453023e-05, "loss": 0.04348371624946594, "step": 42300 }, { "epoch": 12.009650865739427, "grad_norm": 17.343069076538086, "learning_rate": 8.799630996309964e-05, "loss": 0.045898044109344484, "step": 42310 }, { "epoch": 12.012489355662787, "grad_norm": 3.102987766265869, "learning_rate": 8.799347147317626e-05, "loss": 0.035368016362190245, "step": 42320 }, { "epoch": 12.015327845586148, "grad_norm": 6.192180156707764, "learning_rate": 8.799063298325292e-05, "loss": 0.033758658170700076, "step": 42330 }, { "epoch": 12.018166335509509, "grad_norm": 3.8070037364959717, "learning_rate": 8.798779449332956e-05, "loss": 0.05028794407844543, "step": 42340 }, { "epoch": 12.02100482543287, "grad_norm": 2.517406463623047, "learning_rate": 8.798495600340619e-05, "loss": 0.03581957519054413, "step": 42350 }, { "epoch": 12.023843315356231, "grad_norm": 1.1406595706939697, "learning_rate": 8.798211751348283e-05, "loss": 0.01943010836839676, "step": 42360 }, { "epoch": 12.02668180527959, "grad_norm": 7.439162254333496, "learning_rate": 8.797927902355947e-05, "loss": 0.035764312744140624, "step": 42370 }, { "epoch": 12.029520295202952, "grad_norm": 7.404397010803223, "learning_rate": 8.797644053363611e-05, "loss": 0.03981121182441712, "step": 42380 }, { "epoch": 12.032358785126313, "grad_norm": 6.782740116119385, "learning_rate": 8.797360204371274e-05, "loss": 0.03376849591732025, "step": 42390 }, { "epoch": 12.035197275049674, "grad_norm": 9.032066345214844, "learning_rate": 8.79707635537894e-05, "loss": 0.03204431533813477, "step": 42400 }, { "epoch": 12.038035764973035, "grad_norm": 9.95860767364502, "learning_rate": 8.796792506386602e-05, "loss": 0.05169352889060974, "step": 42410 }, { "epoch": 12.040874254896394, "grad_norm": 10.233536720275879, "learning_rate": 8.796508657394266e-05, "loss": 0.03769796788692474, "step": 42420 }, { "epoch": 12.043712744819755, "grad_norm": 7.656820297241211, "learning_rate": 8.79622480840193e-05, "loss": 0.042575728893280027, "step": 42430 }, { "epoch": 12.046551234743117, "grad_norm": 0.9790419936180115, "learning_rate": 8.795940959409595e-05, "loss": 0.03717527687549591, "step": 42440 }, { "epoch": 12.049389724666478, "grad_norm": 5.362203598022461, "learning_rate": 8.795657110417258e-05, "loss": 0.023480720818042755, "step": 42450 }, { "epoch": 12.052228214589839, "grad_norm": 12.407505989074707, "learning_rate": 8.795373261424923e-05, "loss": 0.04921396374702454, "step": 42460 }, { "epoch": 12.0550667045132, "grad_norm": 4.909878730773926, "learning_rate": 8.795089412432587e-05, "loss": 0.03264098167419434, "step": 42470 }, { "epoch": 12.05790519443656, "grad_norm": 5.828683376312256, "learning_rate": 8.79480556344025e-05, "loss": 0.038746589422225954, "step": 42480 }, { "epoch": 12.06074368435992, "grad_norm": 5.357588291168213, "learning_rate": 8.794521714447914e-05, "loss": 0.03934710025787354, "step": 42490 }, { "epoch": 12.063582174283281, "grad_norm": 5.996522426605225, "learning_rate": 8.794237865455578e-05, "loss": 0.03904659152030945, "step": 42500 }, { "epoch": 12.063582174283281, "eval_accuracy": 0.9554269727220703, "eval_loss": 0.13257147371768951, "eval_runtime": 31.2505, "eval_samples_per_second": 503.256, "eval_steps_per_second": 7.872, "step": 42500 }, { "epoch": 12.066420664206642, "grad_norm": 4.982382297515869, "learning_rate": 8.793954016463241e-05, "loss": 0.03837008774280548, "step": 42510 }, { "epoch": 12.069259154130004, "grad_norm": 2.4558770656585693, "learning_rate": 8.793670167470905e-05, "loss": 0.037520408630371094, "step": 42520 }, { "epoch": 12.072097644053363, "grad_norm": 4.890630722045898, "learning_rate": 8.79338631847857e-05, "loss": 0.03202327191829681, "step": 42530 }, { "epoch": 12.074936133976724, "grad_norm": 1.131028413772583, "learning_rate": 8.793102469486233e-05, "loss": 0.020473235845565797, "step": 42540 }, { "epoch": 12.077774623900085, "grad_norm": 7.33089542388916, "learning_rate": 8.792818620493898e-05, "loss": 0.02888813614845276, "step": 42550 }, { "epoch": 12.080613113823446, "grad_norm": 8.652800559997559, "learning_rate": 8.792534771501562e-05, "loss": 0.02965686023235321, "step": 42560 }, { "epoch": 12.083451603746807, "grad_norm": 14.127680778503418, "learning_rate": 8.792250922509226e-05, "loss": 0.04270358085632324, "step": 42570 }, { "epoch": 12.086290093670167, "grad_norm": 1.539968490600586, "learning_rate": 8.791967073516889e-05, "loss": 0.037129050493240355, "step": 42580 }, { "epoch": 12.089128583593528, "grad_norm": 9.812592506408691, "learning_rate": 8.791683224524553e-05, "loss": 0.03342490792274475, "step": 42590 }, { "epoch": 12.091967073516889, "grad_norm": 7.126936912536621, "learning_rate": 8.791399375532218e-05, "loss": 0.029442226886749266, "step": 42600 }, { "epoch": 12.09480556344025, "grad_norm": 3.3653762340545654, "learning_rate": 8.791115526539881e-05, "loss": 0.029621991515159606, "step": 42610 }, { "epoch": 12.097644053363611, "grad_norm": 14.050369262695312, "learning_rate": 8.790831677547545e-05, "loss": 0.04310985803604126, "step": 42620 }, { "epoch": 12.10048254328697, "grad_norm": 6.579164981842041, "learning_rate": 8.790547828555209e-05, "loss": 0.030458983778953553, "step": 42630 }, { "epoch": 12.103321033210332, "grad_norm": 4.941464900970459, "learning_rate": 8.790263979562872e-05, "loss": 0.03447927534580231, "step": 42640 }, { "epoch": 12.106159523133693, "grad_norm": 10.615941047668457, "learning_rate": 8.789980130570536e-05, "loss": 0.0419352263212204, "step": 42650 }, { "epoch": 12.108998013057054, "grad_norm": 4.031155586242676, "learning_rate": 8.789696281578202e-05, "loss": 0.03987049460411072, "step": 42660 }, { "epoch": 12.111836502980415, "grad_norm": 1.352059245109558, "learning_rate": 8.789412432585865e-05, "loss": 0.03157951831817627, "step": 42670 }, { "epoch": 12.114674992903776, "grad_norm": 11.661965370178223, "learning_rate": 8.789128583593529e-05, "loss": 0.03795190453529358, "step": 42680 }, { "epoch": 12.117513482827135, "grad_norm": 1.8685001134872437, "learning_rate": 8.788844734601193e-05, "loss": 0.04514520764350891, "step": 42690 }, { "epoch": 12.120351972750496, "grad_norm": 5.572317600250244, "learning_rate": 8.788560885608857e-05, "loss": 0.048702746629714966, "step": 42700 }, { "epoch": 12.123190462673858, "grad_norm": 10.726156234741211, "learning_rate": 8.78827703661652e-05, "loss": 0.05781365633010864, "step": 42710 }, { "epoch": 12.126028952597219, "grad_norm": 7.369020938873291, "learning_rate": 8.787993187624184e-05, "loss": 0.05057991743087768, "step": 42720 }, { "epoch": 12.12886744252058, "grad_norm": 0.9827678203582764, "learning_rate": 8.78770933863185e-05, "loss": 0.024762935936450958, "step": 42730 }, { "epoch": 12.13170593244394, "grad_norm": 12.68351936340332, "learning_rate": 8.787425489639512e-05, "loss": 0.047663050889968875, "step": 42740 }, { "epoch": 12.1345444223673, "grad_norm": 13.245437622070312, "learning_rate": 8.787141640647176e-05, "loss": 0.04732508659362793, "step": 42750 }, { "epoch": 12.137382912290661, "grad_norm": 9.696499824523926, "learning_rate": 8.78685779165484e-05, "loss": 0.03219702243804932, "step": 42760 }, { "epoch": 12.140221402214022, "grad_norm": 6.578033924102783, "learning_rate": 8.786573942662503e-05, "loss": 0.02564353048801422, "step": 42770 }, { "epoch": 12.143059892137384, "grad_norm": 5.737947940826416, "learning_rate": 8.786290093670167e-05, "loss": 0.033503282070159915, "step": 42780 }, { "epoch": 12.145898382060743, "grad_norm": 9.456143379211426, "learning_rate": 8.786006244677831e-05, "loss": 0.03979947566986084, "step": 42790 }, { "epoch": 12.148736871984104, "grad_norm": 6.267122745513916, "learning_rate": 8.785722395685496e-05, "loss": 0.03283730149269104, "step": 42800 }, { "epoch": 12.151575361907465, "grad_norm": 7.741136074066162, "learning_rate": 8.78543854669316e-05, "loss": 0.031019583344459534, "step": 42810 }, { "epoch": 12.154413851830826, "grad_norm": 6.468270778656006, "learning_rate": 8.785154697700824e-05, "loss": 0.03639858365058899, "step": 42820 }, { "epoch": 12.157252341754187, "grad_norm": 10.36785888671875, "learning_rate": 8.784870848708488e-05, "loss": 0.037794700264930724, "step": 42830 }, { "epoch": 12.160090831677547, "grad_norm": 8.132712364196777, "learning_rate": 8.784586999716151e-05, "loss": 0.023357495665550232, "step": 42840 }, { "epoch": 12.162929321600908, "grad_norm": 1.8190107345581055, "learning_rate": 8.784303150723815e-05, "loss": 0.03351306617259979, "step": 42850 }, { "epoch": 12.165767811524269, "grad_norm": 1.7640764713287354, "learning_rate": 8.78401930173148e-05, "loss": 0.034797403216362, "step": 42860 }, { "epoch": 12.16860630144763, "grad_norm": 7.038665294647217, "learning_rate": 8.783735452739143e-05, "loss": 0.044648760557174684, "step": 42870 }, { "epoch": 12.171444791370991, "grad_norm": 8.174056053161621, "learning_rate": 8.783451603746807e-05, "loss": 0.033867499232292174, "step": 42880 }, { "epoch": 12.174283281294352, "grad_norm": 20.226119995117188, "learning_rate": 8.783167754754471e-05, "loss": 0.039821818470954895, "step": 42890 }, { "epoch": 12.177121771217712, "grad_norm": 7.061811923980713, "learning_rate": 8.782883905762134e-05, "loss": 0.030632388591766358, "step": 42900 }, { "epoch": 12.179960261141073, "grad_norm": 8.081908226013184, "learning_rate": 8.782600056769798e-05, "loss": 0.018213525414466858, "step": 42910 }, { "epoch": 12.182798751064434, "grad_norm": 3.5805306434631348, "learning_rate": 8.782316207777463e-05, "loss": 0.05772478580474853, "step": 42920 }, { "epoch": 12.185637240987795, "grad_norm": 5.696958065032959, "learning_rate": 8.782032358785127e-05, "loss": 0.022018913924694062, "step": 42930 }, { "epoch": 12.188475730911156, "grad_norm": 1.5079044103622437, "learning_rate": 8.781748509792791e-05, "loss": 0.024490922689437866, "step": 42940 }, { "epoch": 12.191314220834515, "grad_norm": 3.882221221923828, "learning_rate": 8.781464660800455e-05, "loss": 0.03649272918701172, "step": 42950 }, { "epoch": 12.194152710757876, "grad_norm": 4.7321882247924805, "learning_rate": 8.781180811808119e-05, "loss": 0.02632758319377899, "step": 42960 }, { "epoch": 12.196991200681238, "grad_norm": 6.319700241088867, "learning_rate": 8.780896962815782e-05, "loss": 0.02812599241733551, "step": 42970 }, { "epoch": 12.199829690604599, "grad_norm": 11.42015552520752, "learning_rate": 8.780613113823446e-05, "loss": 0.032302433252334596, "step": 42980 }, { "epoch": 12.20266818052796, "grad_norm": 5.789360523223877, "learning_rate": 8.78032926483111e-05, "loss": 0.024473686516284943, "step": 42990 }, { "epoch": 12.205506670451319, "grad_norm": 6.161420822143555, "learning_rate": 8.780045415838774e-05, "loss": 0.035588255524635314, "step": 43000 }, { "epoch": 12.205506670451319, "eval_accuracy": 0.9591784828638646, "eval_loss": 0.12536734342575073, "eval_runtime": 31.8222, "eval_samples_per_second": 494.215, "eval_steps_per_second": 7.73, "step": 43000 }, { "epoch": 12.20834516037468, "grad_norm": 4.745807647705078, "learning_rate": 8.779761566846438e-05, "loss": 0.02419144958257675, "step": 43010 }, { "epoch": 12.211183650298041, "grad_norm": 9.065601348876953, "learning_rate": 8.779477717854103e-05, "loss": 0.028294992446899415, "step": 43020 }, { "epoch": 12.214022140221402, "grad_norm": 0.9984960556030273, "learning_rate": 8.779193868861765e-05, "loss": 0.016124072670936584, "step": 43030 }, { "epoch": 12.216860630144764, "grad_norm": 9.727514266967773, "learning_rate": 8.77891001986943e-05, "loss": 0.028460302948951723, "step": 43040 }, { "epoch": 12.219699120068125, "grad_norm": 4.6544647216796875, "learning_rate": 8.778626170877094e-05, "loss": 0.015810102224349976, "step": 43050 }, { "epoch": 12.222537609991484, "grad_norm": 14.09741497039795, "learning_rate": 8.778342321884758e-05, "loss": 0.030832844972610473, "step": 43060 }, { "epoch": 12.225376099914845, "grad_norm": 1.9660956859588623, "learning_rate": 8.778058472892422e-05, "loss": 0.021106334030628206, "step": 43070 }, { "epoch": 12.228214589838206, "grad_norm": 6.233950138092041, "learning_rate": 8.777774623900086e-05, "loss": 0.030867934226989746, "step": 43080 }, { "epoch": 12.231053079761567, "grad_norm": 8.817118644714355, "learning_rate": 8.77749077490775e-05, "loss": 0.029362088441848753, "step": 43090 }, { "epoch": 12.233891569684928, "grad_norm": 2.01446270942688, "learning_rate": 8.777206925915413e-05, "loss": 0.0267075777053833, "step": 43100 }, { "epoch": 12.236730059608288, "grad_norm": 2.2910454273223877, "learning_rate": 8.776951461822311e-05, "loss": 0.028422459959983826, "step": 43110 }, { "epoch": 12.239568549531649, "grad_norm": 7.2257280349731445, "learning_rate": 8.776667612829975e-05, "loss": 0.05786190032958984, "step": 43120 }, { "epoch": 12.24240703945501, "grad_norm": 5.866209030151367, "learning_rate": 8.776383763837639e-05, "loss": 0.08099673986434937, "step": 43130 }, { "epoch": 12.245245529378371, "grad_norm": 4.997642993927002, "learning_rate": 8.776099914845303e-05, "loss": 0.032355567812919615, "step": 43140 }, { "epoch": 12.248084019301732, "grad_norm": 3.9322164058685303, "learning_rate": 8.775816065852966e-05, "loss": 0.028098559379577635, "step": 43150 }, { "epoch": 12.250922509225092, "grad_norm": 0.49283063411712646, "learning_rate": 8.77553221686063e-05, "loss": 0.020551927387714386, "step": 43160 }, { "epoch": 12.253760999148453, "grad_norm": 2.4942538738250732, "learning_rate": 8.775248367868294e-05, "loss": 0.03668495118618011, "step": 43170 }, { "epoch": 12.256599489071814, "grad_norm": 5.253053188323975, "learning_rate": 8.774964518875959e-05, "loss": 0.03662307262420654, "step": 43180 }, { "epoch": 12.259437978995175, "grad_norm": 5.2050275802612305, "learning_rate": 8.774680669883623e-05, "loss": 0.027386596798896788, "step": 43190 }, { "epoch": 12.262276468918536, "grad_norm": 2.7373571395874023, "learning_rate": 8.774396820891287e-05, "loss": 0.0306460440158844, "step": 43200 }, { "epoch": 12.265114958841895, "grad_norm": 4.104968070983887, "learning_rate": 8.77411297189895e-05, "loss": 0.03437112867832184, "step": 43210 }, { "epoch": 12.267953448765256, "grad_norm": 10.098012924194336, "learning_rate": 8.773829122906614e-05, "loss": 0.039316299557685855, "step": 43220 }, { "epoch": 12.270791938688618, "grad_norm": 8.756053924560547, "learning_rate": 8.773545273914278e-05, "loss": 0.048076283931732175, "step": 43230 }, { "epoch": 12.273630428611979, "grad_norm": 5.571841239929199, "learning_rate": 8.773261424921942e-05, "loss": 0.0457059919834137, "step": 43240 }, { "epoch": 12.27646891853534, "grad_norm": 6.984152793884277, "learning_rate": 8.772977575929606e-05, "loss": 0.04278313517570496, "step": 43250 }, { "epoch": 12.279307408458699, "grad_norm": 4.612057209014893, "learning_rate": 8.77269372693727e-05, "loss": 0.03646177053451538, "step": 43260 }, { "epoch": 12.28214589838206, "grad_norm": 10.539977073669434, "learning_rate": 8.772409877944934e-05, "loss": 0.028658172488212584, "step": 43270 }, { "epoch": 12.284984388305421, "grad_norm": 5.533712387084961, "learning_rate": 8.772126028952597e-05, "loss": 0.032504862546920775, "step": 43280 }, { "epoch": 12.287822878228782, "grad_norm": 3.1030781269073486, "learning_rate": 8.771842179960261e-05, "loss": 0.04522564709186554, "step": 43290 }, { "epoch": 12.290661368152143, "grad_norm": 5.694429874420166, "learning_rate": 8.771558330967925e-05, "loss": 0.027199763059616088, "step": 43300 }, { "epoch": 12.293499858075505, "grad_norm": 3.403394937515259, "learning_rate": 8.77127448197559e-05, "loss": 0.03828282952308655, "step": 43310 }, { "epoch": 12.296338347998864, "grad_norm": 4.743762493133545, "learning_rate": 8.770990632983254e-05, "loss": 0.05068618655204773, "step": 43320 }, { "epoch": 12.299176837922225, "grad_norm": 5.177332878112793, "learning_rate": 8.770706783990918e-05, "loss": 0.03021160066127777, "step": 43330 }, { "epoch": 12.302015327845586, "grad_norm": 1.2973488569259644, "learning_rate": 8.77042293499858e-05, "loss": 0.024166953563690186, "step": 43340 }, { "epoch": 12.304853817768947, "grad_norm": 10.911638259887695, "learning_rate": 8.770139086006245e-05, "loss": 0.04314410984516144, "step": 43350 }, { "epoch": 12.307692307692308, "grad_norm": 5.359420299530029, "learning_rate": 8.769855237013909e-05, "loss": 0.05115126371383667, "step": 43360 }, { "epoch": 12.310530797615668, "grad_norm": 9.825884819030762, "learning_rate": 8.769571388021573e-05, "loss": 0.037463635206222534, "step": 43370 }, { "epoch": 12.313369287539029, "grad_norm": 8.118050575256348, "learning_rate": 8.769287539029237e-05, "loss": 0.02044713944196701, "step": 43380 }, { "epoch": 12.31620777746239, "grad_norm": 5.115602016448975, "learning_rate": 8.769003690036901e-05, "loss": 0.03220890462398529, "step": 43390 }, { "epoch": 12.319046267385751, "grad_norm": 6.095489978790283, "learning_rate": 8.768719841044564e-05, "loss": 0.03606012463569641, "step": 43400 }, { "epoch": 12.321884757309112, "grad_norm": 1.2136930227279663, "learning_rate": 8.768435992052228e-05, "loss": 0.022545641660690306, "step": 43410 }, { "epoch": 12.324723247232471, "grad_norm": 3.3116722106933594, "learning_rate": 8.768152143059892e-05, "loss": 0.046520963311195374, "step": 43420 }, { "epoch": 12.327561737155833, "grad_norm": 5.2578444480896, "learning_rate": 8.767868294067557e-05, "loss": 0.02567618191242218, "step": 43430 }, { "epoch": 12.330400227079194, "grad_norm": 7.1209211349487305, "learning_rate": 8.767584445075221e-05, "loss": 0.03235772252082825, "step": 43440 }, { "epoch": 12.333238717002555, "grad_norm": 8.388750076293945, "learning_rate": 8.767300596082885e-05, "loss": 0.03658254742622376, "step": 43450 }, { "epoch": 12.336077206925916, "grad_norm": 6.390434265136719, "learning_rate": 8.767016747090549e-05, "loss": 0.04859638214111328, "step": 43460 }, { "epoch": 12.338915696849277, "grad_norm": 4.703362464904785, "learning_rate": 8.766732898098212e-05, "loss": 0.056396156549453735, "step": 43470 }, { "epoch": 12.341754186772636, "grad_norm": 6.984062671661377, "learning_rate": 8.766449049105876e-05, "loss": 0.047119522094726564, "step": 43480 }, { "epoch": 12.344592676695997, "grad_norm": 16.24080467224121, "learning_rate": 8.76616520011354e-05, "loss": 0.059197747707366945, "step": 43490 }, { "epoch": 12.347431166619359, "grad_norm": 4.961618900299072, "learning_rate": 8.765881351121203e-05, "loss": 0.043620502948760985, "step": 43500 }, { "epoch": 12.347431166619359, "eval_accuracy": 0.9575252750047689, "eval_loss": 0.12631969153881073, "eval_runtime": 32.0467, "eval_samples_per_second": 490.753, "eval_steps_per_second": 7.676, "step": 43500 }, { "epoch": 12.35026965654272, "grad_norm": 8.303855895996094, "learning_rate": 8.765597502128868e-05, "loss": 0.04292536377906799, "step": 43510 }, { "epoch": 12.35310814646608, "grad_norm": 3.528038501739502, "learning_rate": 8.765313653136532e-05, "loss": 0.025262534618377686, "step": 43520 }, { "epoch": 12.35594663638944, "grad_norm": 7.5579047203063965, "learning_rate": 8.765029804144195e-05, "loss": 0.037899512052536014, "step": 43530 }, { "epoch": 12.358785126312801, "grad_norm": 2.860135555267334, "learning_rate": 8.76474595515186e-05, "loss": 0.020976607501506806, "step": 43540 }, { "epoch": 12.361623616236162, "grad_norm": 10.304487228393555, "learning_rate": 8.764462106159523e-05, "loss": 0.021430324018001556, "step": 43550 }, { "epoch": 12.364462106159523, "grad_norm": 1.565558910369873, "learning_rate": 8.764178257167188e-05, "loss": 0.032142537832260135, "step": 43560 }, { "epoch": 12.367300596082885, "grad_norm": 3.965823173522949, "learning_rate": 8.76389440817485e-05, "loss": 0.03533589839935303, "step": 43570 }, { "epoch": 12.370139086006244, "grad_norm": 7.955080509185791, "learning_rate": 8.763610559182516e-05, "loss": 0.02849397957324982, "step": 43580 }, { "epoch": 12.372977575929605, "grad_norm": 3.1058242321014404, "learning_rate": 8.76332671019018e-05, "loss": 0.045594796538352966, "step": 43590 }, { "epoch": 12.375816065852966, "grad_norm": 9.424407958984375, "learning_rate": 8.763042861197843e-05, "loss": 0.027894610166549684, "step": 43600 }, { "epoch": 12.378654555776327, "grad_norm": 13.528703689575195, "learning_rate": 8.762759012205507e-05, "loss": 0.043479698896408084, "step": 43610 }, { "epoch": 12.381493045699688, "grad_norm": 3.8311986923217773, "learning_rate": 8.762475163213171e-05, "loss": 0.06389501690864563, "step": 43620 }, { "epoch": 12.384331535623048, "grad_norm": 5.446621417999268, "learning_rate": 8.762191314220834e-05, "loss": 0.05128917694091797, "step": 43630 }, { "epoch": 12.387170025546409, "grad_norm": 10.516417503356934, "learning_rate": 8.7619074652285e-05, "loss": 0.033826780319213864, "step": 43640 }, { "epoch": 12.39000851546977, "grad_norm": 9.974024772644043, "learning_rate": 8.761623616236164e-05, "loss": 0.027305030822753908, "step": 43650 }, { "epoch": 12.392847005393131, "grad_norm": 1.2486107349395752, "learning_rate": 8.761339767243826e-05, "loss": 0.031165832281112672, "step": 43660 }, { "epoch": 12.395685495316492, "grad_norm": 15.140544891357422, "learning_rate": 8.76105591825149e-05, "loss": 0.037294122576713565, "step": 43670 }, { "epoch": 12.398523985239853, "grad_norm": 1.334664225578308, "learning_rate": 8.760772069259155e-05, "loss": 0.03070785999298096, "step": 43680 }, { "epoch": 12.401362475163213, "grad_norm": 4.582605361938477, "learning_rate": 8.760488220266819e-05, "loss": 0.033212360739707944, "step": 43690 }, { "epoch": 12.404200965086574, "grad_norm": 4.622889995574951, "learning_rate": 8.760204371274481e-05, "loss": 0.025003960728645323, "step": 43700 }, { "epoch": 12.407039455009935, "grad_norm": 4.53710412979126, "learning_rate": 8.759920522282147e-05, "loss": 0.037190794944763184, "step": 43710 }, { "epoch": 12.409877944933296, "grad_norm": 14.40041732788086, "learning_rate": 8.759636673289811e-05, "loss": 0.03887277543544769, "step": 43720 }, { "epoch": 12.412716434856657, "grad_norm": 4.566652774810791, "learning_rate": 8.759352824297474e-05, "loss": 0.036245569586753845, "step": 43730 }, { "epoch": 12.415554924780016, "grad_norm": 1.31491219997406, "learning_rate": 8.759068975305138e-05, "loss": 0.04045813083648682, "step": 43740 }, { "epoch": 12.418393414703377, "grad_norm": 6.651790142059326, "learning_rate": 8.758785126312802e-05, "loss": 0.04301515221595764, "step": 43750 }, { "epoch": 12.421231904626739, "grad_norm": 0.6239034533500671, "learning_rate": 8.758501277320465e-05, "loss": 0.02968602180480957, "step": 43760 }, { "epoch": 12.4240703945501, "grad_norm": 14.26272964477539, "learning_rate": 8.758217428328129e-05, "loss": 0.050833702087402344, "step": 43770 }, { "epoch": 12.42690888447346, "grad_norm": 4.808349609375, "learning_rate": 8.757933579335795e-05, "loss": 0.04993874728679657, "step": 43780 }, { "epoch": 12.42974737439682, "grad_norm": 0.792005717754364, "learning_rate": 8.757649730343457e-05, "loss": 0.053781908750534055, "step": 43790 }, { "epoch": 12.432585864320181, "grad_norm": 15.564403533935547, "learning_rate": 8.757365881351122e-05, "loss": 0.031227922439575194, "step": 43800 }, { "epoch": 12.435424354243542, "grad_norm": 5.584966659545898, "learning_rate": 8.757082032358786e-05, "loss": 0.04057415425777435, "step": 43810 }, { "epoch": 12.438262844166903, "grad_norm": 10.339362144470215, "learning_rate": 8.75679818336645e-05, "loss": 0.047411096096038816, "step": 43820 }, { "epoch": 12.441101334090265, "grad_norm": 1.389992117881775, "learning_rate": 8.756514334374113e-05, "loss": 0.03012396991252899, "step": 43830 }, { "epoch": 12.443939824013626, "grad_norm": 12.567704200744629, "learning_rate": 8.756230485381778e-05, "loss": 0.036432310938835144, "step": 43840 }, { "epoch": 12.446778313936985, "grad_norm": 1.846071481704712, "learning_rate": 8.755946636389442e-05, "loss": 0.041059836745262146, "step": 43850 }, { "epoch": 12.449616803860346, "grad_norm": 1.4687913656234741, "learning_rate": 8.755662787397105e-05, "loss": 0.03178577125072479, "step": 43860 }, { "epoch": 12.452455293783707, "grad_norm": 8.927151679992676, "learning_rate": 8.755378938404769e-05, "loss": 0.05755612254142761, "step": 43870 }, { "epoch": 12.455293783707068, "grad_norm": 8.224190711975098, "learning_rate": 8.755095089412433e-05, "loss": 0.030533698201179505, "step": 43880 }, { "epoch": 12.45813227363043, "grad_norm": 3.2042739391326904, "learning_rate": 8.754811240420096e-05, "loss": 0.03339183330535889, "step": 43890 }, { "epoch": 12.460970763553789, "grad_norm": 0.938050389289856, "learning_rate": 8.75452739142776e-05, "loss": 0.03771317601203918, "step": 43900 }, { "epoch": 12.46380925347715, "grad_norm": 5.056128025054932, "learning_rate": 8.754243542435426e-05, "loss": 0.021968071162700654, "step": 43910 }, { "epoch": 12.466647743400511, "grad_norm": 2.449493169784546, "learning_rate": 8.753959693443088e-05, "loss": 0.021292699873447417, "step": 43920 }, { "epoch": 12.469486233323872, "grad_norm": 6.865640163421631, "learning_rate": 8.753675844450753e-05, "loss": 0.03274143636226654, "step": 43930 }, { "epoch": 12.472324723247233, "grad_norm": 6.458326816558838, "learning_rate": 8.753391995458417e-05, "loss": 0.02600388526916504, "step": 43940 }, { "epoch": 12.475163213170593, "grad_norm": 4.321523666381836, "learning_rate": 8.753108146466081e-05, "loss": 0.03519765734672546, "step": 43950 }, { "epoch": 12.478001703093954, "grad_norm": 3.655992269515991, "learning_rate": 8.752824297473744e-05, "loss": 0.02357748746871948, "step": 43960 }, { "epoch": 12.480840193017315, "grad_norm": 9.547161102294922, "learning_rate": 8.752540448481409e-05, "loss": 0.04760206341743469, "step": 43970 }, { "epoch": 12.483678682940676, "grad_norm": 7.882644176483154, "learning_rate": 8.752256599489073e-05, "loss": 0.06730462312698364, "step": 43980 }, { "epoch": 12.486517172864037, "grad_norm": 6.823720455169678, "learning_rate": 8.751972750496736e-05, "loss": 0.04396010637283325, "step": 43990 }, { "epoch": 12.489355662787396, "grad_norm": 0.9623273015022278, "learning_rate": 8.7516889015044e-05, "loss": 0.03538389205932617, "step": 44000 }, { "epoch": 12.489355662787396, "eval_accuracy": 0.9582882940166593, "eval_loss": 0.12650801241397858, "eval_runtime": 31.6181, "eval_samples_per_second": 497.405, "eval_steps_per_second": 7.78, "step": 44000 }, { "epoch": 12.492194152710757, "grad_norm": 14.151564598083496, "learning_rate": 8.751405052512064e-05, "loss": 0.043718361854553224, "step": 44010 }, { "epoch": 12.495032642634119, "grad_norm": 3.9960615634918213, "learning_rate": 8.751121203519727e-05, "loss": 0.04656648337841034, "step": 44020 }, { "epoch": 12.49787113255748, "grad_norm": 9.379133224487305, "learning_rate": 8.750837354527391e-05, "loss": 0.04604141414165497, "step": 44030 }, { "epoch": 12.50070962248084, "grad_norm": 10.999762535095215, "learning_rate": 8.750553505535057e-05, "loss": 0.06433385610580444, "step": 44040 }, { "epoch": 12.5035481124042, "grad_norm": 2.8640451431274414, "learning_rate": 8.75026965654272e-05, "loss": 0.05977180600166321, "step": 44050 }, { "epoch": 12.506386602327561, "grad_norm": 10.317655563354492, "learning_rate": 8.749985807550384e-05, "loss": 0.032905089855194095, "step": 44060 }, { "epoch": 12.509225092250922, "grad_norm": 7.531612873077393, "learning_rate": 8.749701958558048e-05, "loss": 0.0447359174489975, "step": 44070 }, { "epoch": 12.512063582174283, "grad_norm": 3.5741655826568604, "learning_rate": 8.749418109565712e-05, "loss": 0.0609171986579895, "step": 44080 }, { "epoch": 12.514902072097644, "grad_norm": 5.538876533508301, "learning_rate": 8.749134260573375e-05, "loss": 0.027841529250144957, "step": 44090 }, { "epoch": 12.517740562021006, "grad_norm": 2.073808431625366, "learning_rate": 8.748850411581039e-05, "loss": 0.038188064098358156, "step": 44100 }, { "epoch": 12.520579051944365, "grad_norm": 8.731266975402832, "learning_rate": 8.748566562588703e-05, "loss": 0.038576528429985046, "step": 44110 }, { "epoch": 12.523417541867726, "grad_norm": 18.402067184448242, "learning_rate": 8.748282713596367e-05, "loss": 0.043379539251327516, "step": 44120 }, { "epoch": 12.526256031791087, "grad_norm": 5.526108741760254, "learning_rate": 8.747998864604031e-05, "loss": 0.023209093511104582, "step": 44130 }, { "epoch": 12.529094521714448, "grad_norm": 11.312538146972656, "learning_rate": 8.747715015611695e-05, "loss": 0.02301383316516876, "step": 44140 }, { "epoch": 12.53193301163781, "grad_norm": 10.428009986877441, "learning_rate": 8.747431166619358e-05, "loss": 0.043978333473205566, "step": 44150 }, { "epoch": 12.534771501561169, "grad_norm": 10.228684425354004, "learning_rate": 8.747147317627022e-05, "loss": 0.035221797227859494, "step": 44160 }, { "epoch": 12.53760999148453, "grad_norm": 8.798989295959473, "learning_rate": 8.746863468634688e-05, "loss": 0.031769979000091556, "step": 44170 }, { "epoch": 12.540448481407891, "grad_norm": 6.162079334259033, "learning_rate": 8.74657961964235e-05, "loss": 0.027561104297637938, "step": 44180 }, { "epoch": 12.543286971331252, "grad_norm": 14.738848686218262, "learning_rate": 8.746295770650015e-05, "loss": 0.03986902236938476, "step": 44190 }, { "epoch": 12.546125461254613, "grad_norm": 7.156569480895996, "learning_rate": 8.746011921657679e-05, "loss": 0.04217462539672852, "step": 44200 }, { "epoch": 12.548963951177974, "grad_norm": 4.159841060638428, "learning_rate": 8.745728072665342e-05, "loss": 0.035502883791923526, "step": 44210 }, { "epoch": 12.551802441101334, "grad_norm": 7.1332316398620605, "learning_rate": 8.745444223673006e-05, "loss": 0.03097446858882904, "step": 44220 }, { "epoch": 12.554640931024695, "grad_norm": 8.165454864501953, "learning_rate": 8.74516037468067e-05, "loss": 0.04084550738334656, "step": 44230 }, { "epoch": 12.557479420948056, "grad_norm": 1.4880213737487793, "learning_rate": 8.744876525688334e-05, "loss": 0.026027166843414308, "step": 44240 }, { "epoch": 12.560317910871417, "grad_norm": 7.585758686065674, "learning_rate": 8.744592676695998e-05, "loss": 0.05606292486190796, "step": 44250 }, { "epoch": 12.563156400794778, "grad_norm": 14.649521827697754, "learning_rate": 8.744308827703662e-05, "loss": 0.03574174642562866, "step": 44260 }, { "epoch": 12.565994890718137, "grad_norm": 8.295636177062988, "learning_rate": 8.744024978711326e-05, "loss": 0.025662416219711305, "step": 44270 }, { "epoch": 12.568833380641498, "grad_norm": 6.584290981292725, "learning_rate": 8.743741129718989e-05, "loss": 0.045034149289131166, "step": 44280 }, { "epoch": 12.57167187056486, "grad_norm": 7.3886799812316895, "learning_rate": 8.743457280726653e-05, "loss": 0.039062914252281186, "step": 44290 }, { "epoch": 12.57451036048822, "grad_norm": 1.0931612253189087, "learning_rate": 8.743173431734318e-05, "loss": 0.026321253180503844, "step": 44300 }, { "epoch": 12.577348850411582, "grad_norm": 3.9734034538269043, "learning_rate": 8.742889582741982e-05, "loss": 0.030106019973754884, "step": 44310 }, { "epoch": 12.580187340334941, "grad_norm": 3.5984418392181396, "learning_rate": 8.742605733749646e-05, "loss": 0.038176172971725465, "step": 44320 }, { "epoch": 12.583025830258302, "grad_norm": 3.420219898223877, "learning_rate": 8.74232188475731e-05, "loss": 0.04225446283817291, "step": 44330 }, { "epoch": 12.585864320181663, "grad_norm": 6.645458221435547, "learning_rate": 8.742038035764973e-05, "loss": 0.04775756597518921, "step": 44340 }, { "epoch": 12.588702810105024, "grad_norm": 14.90196418762207, "learning_rate": 8.741754186772637e-05, "loss": 0.03470243215560913, "step": 44350 }, { "epoch": 12.591541300028386, "grad_norm": 6.199538230895996, "learning_rate": 8.741470337780301e-05, "loss": 0.028724318742752074, "step": 44360 }, { "epoch": 12.594379789951745, "grad_norm": 11.46839714050293, "learning_rate": 8.741186488787965e-05, "loss": 0.028799358010292053, "step": 44370 }, { "epoch": 12.597218279875106, "grad_norm": 6.085451126098633, "learning_rate": 8.740902639795629e-05, "loss": 0.044294270873069766, "step": 44380 }, { "epoch": 12.600056769798467, "grad_norm": 9.302290916442871, "learning_rate": 8.740618790803293e-05, "loss": 0.04389075338840485, "step": 44390 }, { "epoch": 12.602895259721828, "grad_norm": 9.839166641235352, "learning_rate": 8.740334941810958e-05, "loss": 0.04611421227455139, "step": 44400 }, { "epoch": 12.60573374964519, "grad_norm": 2.907285213470459, "learning_rate": 8.74005109281862e-05, "loss": 0.02291063964366913, "step": 44410 }, { "epoch": 12.608572239568549, "grad_norm": 15.603127479553223, "learning_rate": 8.739767243826284e-05, "loss": 0.050103431940078734, "step": 44420 }, { "epoch": 12.61141072949191, "grad_norm": 7.325162887573242, "learning_rate": 8.739483394833949e-05, "loss": 0.04359988272190094, "step": 44430 }, { "epoch": 12.61424921941527, "grad_norm": 5.135088920593262, "learning_rate": 8.739199545841613e-05, "loss": 0.02586413323879242, "step": 44440 }, { "epoch": 12.617087709338632, "grad_norm": 6.717268466949463, "learning_rate": 8.738915696849277e-05, "loss": 0.025001877546310426, "step": 44450 }, { "epoch": 12.619926199261993, "grad_norm": 5.10106897354126, "learning_rate": 8.738631847856941e-05, "loss": 0.033975788950920106, "step": 44460 }, { "epoch": 12.622764689185354, "grad_norm": 4.140856742858887, "learning_rate": 8.738347998864604e-05, "loss": 0.033056697249412535, "step": 44470 }, { "epoch": 12.625603179108714, "grad_norm": 6.44667911529541, "learning_rate": 8.738064149872268e-05, "loss": 0.036567941308021545, "step": 44480 }, { "epoch": 12.628441669032075, "grad_norm": 8.409709930419922, "learning_rate": 8.737780300879932e-05, "loss": 0.04436020851135254, "step": 44490 }, { "epoch": 12.631280158955436, "grad_norm": 7.989817142486572, "learning_rate": 8.737496451887596e-05, "loss": 0.05788024663925171, "step": 44500 }, { "epoch": 12.631280158955436, "eval_accuracy": 0.9514847078273033, "eval_loss": 0.15090706944465637, "eval_runtime": 31.5217, "eval_samples_per_second": 498.927, "eval_steps_per_second": 7.804, "step": 44500 }, { "epoch": 12.634118648878797, "grad_norm": 5.888620376586914, "learning_rate": 8.73721260289526e-05, "loss": 0.033754372596740724, "step": 44510 }, { "epoch": 12.636957138802158, "grad_norm": 13.206843376159668, "learning_rate": 8.736928753902924e-05, "loss": 0.051671010255813596, "step": 44520 }, { "epoch": 12.639795628725517, "grad_norm": 17.667720794677734, "learning_rate": 8.736644904910589e-05, "loss": 0.0664360761642456, "step": 44530 }, { "epoch": 12.642634118648878, "grad_norm": 5.94382905960083, "learning_rate": 8.736361055918251e-05, "loss": 0.046749609708786014, "step": 44540 }, { "epoch": 12.64547260857224, "grad_norm": 3.479749917984009, "learning_rate": 8.736077206925916e-05, "loss": 0.06297379732131958, "step": 44550 }, { "epoch": 12.6483110984956, "grad_norm": 2.689229726791382, "learning_rate": 8.73579335793358e-05, "loss": 0.05441311001777649, "step": 44560 }, { "epoch": 12.651149588418962, "grad_norm": 8.93832015991211, "learning_rate": 8.735509508941244e-05, "loss": 0.07238125801086426, "step": 44570 }, { "epoch": 12.653988078342321, "grad_norm": 1.7631980180740356, "learning_rate": 8.735225659948908e-05, "loss": 0.0652324378490448, "step": 44580 }, { "epoch": 12.656826568265682, "grad_norm": 12.822599411010742, "learning_rate": 8.734941810956572e-05, "loss": 0.062204664945602416, "step": 44590 }, { "epoch": 12.659665058189043, "grad_norm": 10.897828102111816, "learning_rate": 8.734657961964235e-05, "loss": 0.0740848183631897, "step": 44600 }, { "epoch": 12.662503548112404, "grad_norm": 10.319339752197266, "learning_rate": 8.734374112971899e-05, "loss": 0.04726337492465973, "step": 44610 }, { "epoch": 12.665342038035766, "grad_norm": 13.167956352233887, "learning_rate": 8.734090263979563e-05, "loss": 0.052084213495254515, "step": 44620 }, { "epoch": 12.668180527959127, "grad_norm": 12.470789909362793, "learning_rate": 8.733806414987227e-05, "loss": 0.03885383605957031, "step": 44630 }, { "epoch": 12.671019017882486, "grad_norm": 3.0231542587280273, "learning_rate": 8.733522565994891e-05, "loss": 0.02193387597799301, "step": 44640 }, { "epoch": 12.673857507805847, "grad_norm": 6.86521577835083, "learning_rate": 8.733238717002556e-05, "loss": 0.04313365817070007, "step": 44650 }, { "epoch": 12.676695997729208, "grad_norm": 6.823424339294434, "learning_rate": 8.73295486801022e-05, "loss": 0.04115325808525085, "step": 44660 }, { "epoch": 12.67953448765257, "grad_norm": 5.839014530181885, "learning_rate": 8.732671019017882e-05, "loss": 0.028002548217773437, "step": 44670 }, { "epoch": 12.68237297757593, "grad_norm": 3.50873064994812, "learning_rate": 8.732387170025547e-05, "loss": 0.04054447710514068, "step": 44680 }, { "epoch": 12.68521146749929, "grad_norm": 10.432472229003906, "learning_rate": 8.732103321033211e-05, "loss": 0.05989189743995667, "step": 44690 }, { "epoch": 12.68804995742265, "grad_norm": 3.8197381496429443, "learning_rate": 8.731819472040874e-05, "loss": 0.07042637467384338, "step": 44700 }, { "epoch": 12.690888447346012, "grad_norm": 18.913007736206055, "learning_rate": 8.731535623048539e-05, "loss": 0.03881549537181854, "step": 44710 }, { "epoch": 12.693726937269373, "grad_norm": 7.585214138031006, "learning_rate": 8.731251774056203e-05, "loss": 0.03703373670578003, "step": 44720 }, { "epoch": 12.696565427192734, "grad_norm": 10.077887535095215, "learning_rate": 8.730967925063866e-05, "loss": 0.04344882369041443, "step": 44730 }, { "epoch": 12.699403917116094, "grad_norm": 4.145294666290283, "learning_rate": 8.73068407607153e-05, "loss": 0.03734053671360016, "step": 44740 }, { "epoch": 12.702242407039455, "grad_norm": 13.83118724822998, "learning_rate": 8.730400227079194e-05, "loss": 0.03142139911651611, "step": 44750 }, { "epoch": 12.705080896962816, "grad_norm": 7.415091514587402, "learning_rate": 8.730116378086858e-05, "loss": 0.04637177884578705, "step": 44760 }, { "epoch": 12.707919386886177, "grad_norm": 2.674288511276245, "learning_rate": 8.729832529094523e-05, "loss": 0.036681899428367616, "step": 44770 }, { "epoch": 12.710757876809538, "grad_norm": 10.82300853729248, "learning_rate": 8.729548680102187e-05, "loss": 0.04347237348556519, "step": 44780 }, { "epoch": 12.713596366732897, "grad_norm": 2.6379778385162354, "learning_rate": 8.729264831109851e-05, "loss": 0.03218862116336822, "step": 44790 }, { "epoch": 12.716434856656258, "grad_norm": 2.0847623348236084, "learning_rate": 8.728980982117514e-05, "loss": 0.026503151655197142, "step": 44800 }, { "epoch": 12.71927334657962, "grad_norm": 10.228126525878906, "learning_rate": 8.728697133125178e-05, "loss": 0.03470951318740845, "step": 44810 }, { "epoch": 12.72211183650298, "grad_norm": 2.4183998107910156, "learning_rate": 8.728413284132842e-05, "loss": 0.04810204803943634, "step": 44820 }, { "epoch": 12.724950326426342, "grad_norm": 4.971852779388428, "learning_rate": 8.728129435140505e-05, "loss": 0.027870452404022215, "step": 44830 }, { "epoch": 12.727788816349701, "grad_norm": 2.172386884689331, "learning_rate": 8.72784558614817e-05, "loss": 0.052520227432250974, "step": 44840 }, { "epoch": 12.730627306273062, "grad_norm": 9.188197135925293, "learning_rate": 8.727561737155834e-05, "loss": 0.03296519219875336, "step": 44850 }, { "epoch": 12.733465796196423, "grad_norm": 12.632057189941406, "learning_rate": 8.727277888163497e-05, "loss": 0.0348456084728241, "step": 44860 }, { "epoch": 12.736304286119784, "grad_norm": 3.0140838623046875, "learning_rate": 8.726994039171161e-05, "loss": 0.040859383344650266, "step": 44870 }, { "epoch": 12.739142776043145, "grad_norm": 6.8895769119262695, "learning_rate": 8.726710190178825e-05, "loss": 0.03838675320148468, "step": 44880 }, { "epoch": 12.741981265966507, "grad_norm": 3.9290289878845215, "learning_rate": 8.72642634118649e-05, "loss": 0.043990492820739746, "step": 44890 }, { "epoch": 12.744819755889866, "grad_norm": 9.954632759094238, "learning_rate": 8.726142492194152e-05, "loss": 0.04762568473815918, "step": 44900 }, { "epoch": 12.747658245813227, "grad_norm": 12.930060386657715, "learning_rate": 8.725858643201818e-05, "loss": 0.04320020079612732, "step": 44910 }, { "epoch": 12.750496735736588, "grad_norm": 1.8095616102218628, "learning_rate": 8.725574794209482e-05, "loss": 0.0823807418346405, "step": 44920 }, { "epoch": 12.75333522565995, "grad_norm": 5.546041965484619, "learning_rate": 8.725290945217145e-05, "loss": 0.0340006411075592, "step": 44930 }, { "epoch": 12.75617371558331, "grad_norm": 8.760685920715332, "learning_rate": 8.725007096224809e-05, "loss": 0.02766079604625702, "step": 44940 }, { "epoch": 12.75901220550667, "grad_norm": 4.659869194030762, "learning_rate": 8.724723247232473e-05, "loss": 0.034121465682983396, "step": 44950 }, { "epoch": 12.76185069543003, "grad_norm": 7.385076999664307, "learning_rate": 8.724439398240136e-05, "loss": 0.04381527006626129, "step": 44960 }, { "epoch": 12.764689185353392, "grad_norm": 12.8385009765625, "learning_rate": 8.724155549247801e-05, "loss": 0.03534000217914581, "step": 44970 }, { "epoch": 12.767527675276753, "grad_norm": 5.491786956787109, "learning_rate": 8.723871700255465e-05, "loss": 0.03210706412792206, "step": 44980 }, { "epoch": 12.770366165200114, "grad_norm": 9.597651481628418, "learning_rate": 8.723587851263128e-05, "loss": 0.05301475524902344, "step": 44990 }, { "epoch": 12.773204655123475, "grad_norm": 9.693765640258789, "learning_rate": 8.723304002270792e-05, "loss": 0.047547003626823424, "step": 45000 }, { "epoch": 12.773204655123475, "eval_accuracy": 0.9508488586507281, "eval_loss": 0.1479937732219696, "eval_runtime": 31.7114, "eval_samples_per_second": 495.942, "eval_steps_per_second": 7.757, "step": 45000 }, { "epoch": 12.776043145046835, "grad_norm": 4.376029014587402, "learning_rate": 8.723020153278456e-05, "loss": 0.04740391373634338, "step": 45010 }, { "epoch": 12.778881634970196, "grad_norm": 3.5137078762054443, "learning_rate": 8.72273630428612e-05, "loss": 0.054237252473831175, "step": 45020 }, { "epoch": 12.781720124893557, "grad_norm": 4.819000244140625, "learning_rate": 8.722452455293783e-05, "loss": 0.04695221185684204, "step": 45030 }, { "epoch": 12.784558614816918, "grad_norm": 11.225417137145996, "learning_rate": 8.722168606301449e-05, "loss": 0.030767554044723512, "step": 45040 }, { "epoch": 12.787397104740279, "grad_norm": 2.993690252304077, "learning_rate": 8.721884757309112e-05, "loss": 0.04000074565410614, "step": 45050 }, { "epoch": 12.790235594663638, "grad_norm": 2.7759902477264404, "learning_rate": 8.721600908316776e-05, "loss": 0.061340874433517455, "step": 45060 }, { "epoch": 12.793074084587, "grad_norm": 12.015202522277832, "learning_rate": 8.72131705932444e-05, "loss": 0.06872159838676453, "step": 45070 }, { "epoch": 12.79591257451036, "grad_norm": 13.68930435180664, "learning_rate": 8.721033210332104e-05, "loss": 0.04538925886154175, "step": 45080 }, { "epoch": 12.798751064433722, "grad_norm": 16.635292053222656, "learning_rate": 8.720749361339767e-05, "loss": 0.08499961495399475, "step": 45090 }, { "epoch": 12.801589554357083, "grad_norm": 1.5720669031143188, "learning_rate": 8.720465512347431e-05, "loss": 0.07012171745300293, "step": 45100 }, { "epoch": 12.804428044280442, "grad_norm": 1.3354732990264893, "learning_rate": 8.720181663355096e-05, "loss": 0.025925347208976747, "step": 45110 }, { "epoch": 12.807266534203803, "grad_norm": 4.402794361114502, "learning_rate": 8.719897814362759e-05, "loss": 0.03301866352558136, "step": 45120 }, { "epoch": 12.810105024127164, "grad_norm": 1.6817296743392944, "learning_rate": 8.719613965370423e-05, "loss": 0.032939016819000244, "step": 45130 }, { "epoch": 12.812943514050525, "grad_norm": 14.497953414916992, "learning_rate": 8.719330116378087e-05, "loss": 0.04649432897567749, "step": 45140 }, { "epoch": 12.815782003973887, "grad_norm": 7.468667030334473, "learning_rate": 8.71904626738575e-05, "loss": 0.028683766722679138, "step": 45150 }, { "epoch": 12.818620493897246, "grad_norm": 6.453930854797363, "learning_rate": 8.718762418393414e-05, "loss": 0.06180890202522278, "step": 45160 }, { "epoch": 12.821458983820607, "grad_norm": 0.8599632382392883, "learning_rate": 8.71847856940108e-05, "loss": 0.02617986798286438, "step": 45170 }, { "epoch": 12.824297473743968, "grad_norm": 7.782673358917236, "learning_rate": 8.718194720408743e-05, "loss": 0.04447529911994934, "step": 45180 }, { "epoch": 12.82713596366733, "grad_norm": 3.7080676555633545, "learning_rate": 8.717910871416407e-05, "loss": 0.04249391853809357, "step": 45190 }, { "epoch": 12.82997445359069, "grad_norm": 1.1791934967041016, "learning_rate": 8.717627022424071e-05, "loss": 0.02329118549823761, "step": 45200 }, { "epoch": 12.83281294351405, "grad_norm": 9.215568542480469, "learning_rate": 8.717343173431735e-05, "loss": 0.03221278488636017, "step": 45210 }, { "epoch": 12.83565143343741, "grad_norm": 2.603409767150879, "learning_rate": 8.717059324439398e-05, "loss": 0.033879855275154115, "step": 45220 }, { "epoch": 12.838489923360772, "grad_norm": 11.32485294342041, "learning_rate": 8.716775475447062e-05, "loss": 0.041959017515182495, "step": 45230 }, { "epoch": 12.841328413284133, "grad_norm": 6.9285807609558105, "learning_rate": 8.716491626454727e-05, "loss": 0.03080936670303345, "step": 45240 }, { "epoch": 12.844166903207494, "grad_norm": 0.6139256358146667, "learning_rate": 8.71620777746239e-05, "loss": 0.04797856211662292, "step": 45250 }, { "epoch": 12.847005393130855, "grad_norm": 6.193779945373535, "learning_rate": 8.715923928470054e-05, "loss": 0.03794891238212585, "step": 45260 }, { "epoch": 12.849843883054215, "grad_norm": 9.690537452697754, "learning_rate": 8.715640079477719e-05, "loss": 0.044877105951309205, "step": 45270 }, { "epoch": 12.852682372977576, "grad_norm": 9.704392433166504, "learning_rate": 8.715356230485381e-05, "loss": 0.03184947371482849, "step": 45280 }, { "epoch": 12.855520862900937, "grad_norm": 5.39846658706665, "learning_rate": 8.715072381493045e-05, "loss": 0.04482846856117249, "step": 45290 }, { "epoch": 12.858359352824298, "grad_norm": 3.099754810333252, "learning_rate": 8.714788532500711e-05, "loss": 0.03206309378147125, "step": 45300 }, { "epoch": 12.861197842747659, "grad_norm": 3.275723934173584, "learning_rate": 8.714504683508374e-05, "loss": 0.04932217299938202, "step": 45310 }, { "epoch": 12.864036332671018, "grad_norm": 5.542270660400391, "learning_rate": 8.714220834516038e-05, "loss": 0.025083592534065245, "step": 45320 }, { "epoch": 12.86687482259438, "grad_norm": 16.415597915649414, "learning_rate": 8.713936985523702e-05, "loss": 0.05791771411895752, "step": 45330 }, { "epoch": 12.86971331251774, "grad_norm": 11.34880542755127, "learning_rate": 8.713653136531366e-05, "loss": 0.036676087975502016, "step": 45340 }, { "epoch": 12.872551802441102, "grad_norm": 6.134761810302734, "learning_rate": 8.713369287539029e-05, "loss": 0.03904784321784973, "step": 45350 }, { "epoch": 12.875390292364463, "grad_norm": 8.232940673828125, "learning_rate": 8.713085438546693e-05, "loss": 0.044435751438140866, "step": 45360 }, { "epoch": 12.878228782287822, "grad_norm": 13.713446617126465, "learning_rate": 8.712801589554359e-05, "loss": 0.0583861231803894, "step": 45370 }, { "epoch": 12.881067272211183, "grad_norm": 12.268309593200684, "learning_rate": 8.712517740562021e-05, "loss": 0.0572661817073822, "step": 45380 }, { "epoch": 12.883905762134544, "grad_norm": 1.7122182846069336, "learning_rate": 8.712233891569685e-05, "loss": 0.04821889400482178, "step": 45390 }, { "epoch": 12.886744252057905, "grad_norm": 9.639969825744629, "learning_rate": 8.71195004257735e-05, "loss": 0.04056214690208435, "step": 45400 }, { "epoch": 12.889582741981267, "grad_norm": 3.87674617767334, "learning_rate": 8.711666193585012e-05, "loss": 0.015631312131881715, "step": 45410 }, { "epoch": 12.892421231904628, "grad_norm": 5.597787857055664, "learning_rate": 8.711382344592677e-05, "loss": 0.042028391361236574, "step": 45420 }, { "epoch": 12.895259721827987, "grad_norm": 5.725367546081543, "learning_rate": 8.711098495600341e-05, "loss": 0.025868400931358337, "step": 45430 }, { "epoch": 12.898098211751348, "grad_norm": 1.6748632192611694, "learning_rate": 8.710814646608005e-05, "loss": 0.05726107954978943, "step": 45440 }, { "epoch": 12.90093670167471, "grad_norm": 7.724864482879639, "learning_rate": 8.710530797615669e-05, "loss": 0.028647717833518983, "step": 45450 }, { "epoch": 12.90377519159807, "grad_norm": 6.866396903991699, "learning_rate": 8.710246948623333e-05, "loss": 0.04249510169029236, "step": 45460 }, { "epoch": 12.906613681521431, "grad_norm": 3.0799500942230225, "learning_rate": 8.709963099630997e-05, "loss": 0.05019757151603699, "step": 45470 }, { "epoch": 12.90945217144479, "grad_norm": 3.3673219680786133, "learning_rate": 8.70967925063866e-05, "loss": 0.03434927761554718, "step": 45480 }, { "epoch": 12.912290661368152, "grad_norm": 1.4446579217910767, "learning_rate": 8.709395401646324e-05, "loss": 0.03518008589744568, "step": 45490 }, { "epoch": 12.915129151291513, "grad_norm": 11.165140151977539, "learning_rate": 8.70911155265399e-05, "loss": 0.03347097933292389, "step": 45500 }, { "epoch": 12.915129151291513, "eval_accuracy": 0.9593056526991798, "eval_loss": 0.1263011395931244, "eval_runtime": 31.3449, "eval_samples_per_second": 501.741, "eval_steps_per_second": 7.848, "step": 45500 }, { "epoch": 12.917967641214874, "grad_norm": 3.674799680709839, "learning_rate": 8.708856088560886e-05, "loss": 0.06423990130424499, "step": 45510 }, { "epoch": 12.920806131138235, "grad_norm": 6.9470062255859375, "learning_rate": 8.70857223956855e-05, "loss": 0.032919925451278684, "step": 45520 }, { "epoch": 12.923644621061595, "grad_norm": 0.6750406622886658, "learning_rate": 8.708288390576213e-05, "loss": 0.027924519777297974, "step": 45530 }, { "epoch": 12.926483110984956, "grad_norm": 2.5715408325195312, "learning_rate": 8.708004541583877e-05, "loss": 0.03935595750808716, "step": 45540 }, { "epoch": 12.929321600908317, "grad_norm": 5.05348539352417, "learning_rate": 8.707720692591543e-05, "loss": 0.0449282169342041, "step": 45550 }, { "epoch": 12.932160090831678, "grad_norm": 5.564386367797852, "learning_rate": 8.707436843599206e-05, "loss": 0.03764061629772186, "step": 45560 }, { "epoch": 12.934998580755039, "grad_norm": 21.975465774536133, "learning_rate": 8.70715299460687e-05, "loss": 0.052472901344299314, "step": 45570 }, { "epoch": 12.937837070678398, "grad_norm": 11.615373611450195, "learning_rate": 8.706869145614534e-05, "loss": 0.04490956962108612, "step": 45580 }, { "epoch": 12.94067556060176, "grad_norm": 5.858719348907471, "learning_rate": 8.706585296622197e-05, "loss": 0.038946935534477235, "step": 45590 }, { "epoch": 12.94351405052512, "grad_norm": 10.555013656616211, "learning_rate": 8.706301447629861e-05, "loss": 0.04604542851448059, "step": 45600 }, { "epoch": 12.946352540448482, "grad_norm": 12.783527374267578, "learning_rate": 8.706017598637525e-05, "loss": 0.04718633890151978, "step": 45610 }, { "epoch": 12.949191030371843, "grad_norm": 7.371231555938721, "learning_rate": 8.705733749645189e-05, "loss": 0.05113707780838013, "step": 45620 }, { "epoch": 12.952029520295202, "grad_norm": 7.177131652832031, "learning_rate": 8.705449900652853e-05, "loss": 0.029880782961845397, "step": 45630 }, { "epoch": 12.954868010218563, "grad_norm": 6.594571113586426, "learning_rate": 8.705166051660517e-05, "loss": 0.04537354707717896, "step": 45640 }, { "epoch": 12.957706500141924, "grad_norm": 3.1812551021575928, "learning_rate": 8.704882202668181e-05, "loss": 0.05719947814941406, "step": 45650 }, { "epoch": 12.960544990065285, "grad_norm": 7.866095066070557, "learning_rate": 8.704598353675844e-05, "loss": 0.0355503648519516, "step": 45660 }, { "epoch": 12.963383479988646, "grad_norm": 17.68269157409668, "learning_rate": 8.704314504683508e-05, "loss": 0.03570097386837005, "step": 45670 }, { "epoch": 12.966221969912008, "grad_norm": 1.6464214324951172, "learning_rate": 8.704030655691174e-05, "loss": 0.04330264925956726, "step": 45680 }, { "epoch": 12.969060459835367, "grad_norm": 1.3372726440429688, "learning_rate": 8.703746806698837e-05, "loss": 0.018665172159671783, "step": 45690 }, { "epoch": 12.971898949758728, "grad_norm": 10.106221199035645, "learning_rate": 8.703462957706501e-05, "loss": 0.031829485297203065, "step": 45700 }, { "epoch": 12.97473743968209, "grad_norm": 13.196824073791504, "learning_rate": 8.703179108714165e-05, "loss": 0.03434259295463562, "step": 45710 }, { "epoch": 12.97757592960545, "grad_norm": 3.910924196243286, "learning_rate": 8.702895259721828e-05, "loss": 0.05792022347450256, "step": 45720 }, { "epoch": 12.980414419528811, "grad_norm": 17.774368286132812, "learning_rate": 8.702611410729492e-05, "loss": 0.07785875797271728, "step": 45730 }, { "epoch": 12.98325290945217, "grad_norm": 8.901176452636719, "learning_rate": 8.702327561737156e-05, "loss": 0.037799912691116336, "step": 45740 }, { "epoch": 12.986091399375532, "grad_norm": 13.765517234802246, "learning_rate": 8.70204371274482e-05, "loss": 0.043500500917434695, "step": 45750 }, { "epoch": 12.988929889298893, "grad_norm": 7.7782745361328125, "learning_rate": 8.701759863752484e-05, "loss": 0.044987809658050534, "step": 45760 }, { "epoch": 12.991768379222254, "grad_norm": 13.591089248657227, "learning_rate": 8.701476014760148e-05, "loss": 0.03518059253692627, "step": 45770 }, { "epoch": 12.994606869145615, "grad_norm": 4.035161972045898, "learning_rate": 8.701192165767813e-05, "loss": 0.028557729721069337, "step": 45780 }, { "epoch": 12.997445359068976, "grad_norm": 4.575911521911621, "learning_rate": 8.700908316775475e-05, "loss": 0.020898987352848054, "step": 45790 }, { "epoch": 13.000283848992336, "grad_norm": 8.753043174743652, "learning_rate": 8.70062446778314e-05, "loss": 0.025048828125, "step": 45800 }, { "epoch": 13.003122338915697, "grad_norm": 3.86434268951416, "learning_rate": 8.700340618790804e-05, "loss": 0.030737340450286865, "step": 45810 }, { "epoch": 13.005960828839058, "grad_norm": 1.5299665927886963, "learning_rate": 8.700056769798468e-05, "loss": 0.019111914932727812, "step": 45820 }, { "epoch": 13.008799318762419, "grad_norm": 8.415980339050293, "learning_rate": 8.699772920806132e-05, "loss": 0.03076198399066925, "step": 45830 }, { "epoch": 13.01163780868578, "grad_norm": 11.649621963500977, "learning_rate": 8.699489071813796e-05, "loss": 0.03850152194499969, "step": 45840 }, { "epoch": 13.01447629860914, "grad_norm": 11.341511726379395, "learning_rate": 8.699205222821459e-05, "loss": 0.024037735164165498, "step": 45850 }, { "epoch": 13.0173147885325, "grad_norm": 6.691775798797607, "learning_rate": 8.698921373829123e-05, "loss": 0.022889155149459838, "step": 45860 }, { "epoch": 13.020153278455862, "grad_norm": 5.299643039703369, "learning_rate": 8.698637524836787e-05, "loss": 0.02528354525566101, "step": 45870 }, { "epoch": 13.022991768379223, "grad_norm": 5.357428073883057, "learning_rate": 8.698353675844451e-05, "loss": 0.030365380644798278, "step": 45880 }, { "epoch": 13.025830258302584, "grad_norm": 17.28185272216797, "learning_rate": 8.698069826852115e-05, "loss": 0.032807016372680665, "step": 45890 }, { "epoch": 13.028668748225943, "grad_norm": 10.43936824798584, "learning_rate": 8.69778597785978e-05, "loss": 0.03723643124103546, "step": 45900 }, { "epoch": 13.031507238149304, "grad_norm": 3.7319672107696533, "learning_rate": 8.697502128867444e-05, "loss": 0.03306630849838257, "step": 45910 }, { "epoch": 13.034345728072665, "grad_norm": 3.9782063961029053, "learning_rate": 8.697218279875106e-05, "loss": 0.016803576052188872, "step": 45920 }, { "epoch": 13.037184217996026, "grad_norm": 4.513489723205566, "learning_rate": 8.69693443088277e-05, "loss": 0.037658748030662534, "step": 45930 }, { "epoch": 13.040022707919388, "grad_norm": 5.186512470245361, "learning_rate": 8.696650581890435e-05, "loss": 0.023856021463871002, "step": 45940 }, { "epoch": 13.042861197842747, "grad_norm": 2.8630008697509766, "learning_rate": 8.696366732898099e-05, "loss": 0.029960185289382935, "step": 45950 }, { "epoch": 13.045699687766108, "grad_norm": 5.441481113433838, "learning_rate": 8.696082883905763e-05, "loss": 0.031141310930252075, "step": 45960 }, { "epoch": 13.048538177689469, "grad_norm": 3.809981107711792, "learning_rate": 8.695799034913427e-05, "loss": 0.035477432608604434, "step": 45970 }, { "epoch": 13.05137666761283, "grad_norm": 1.3065361976623535, "learning_rate": 8.69551518592109e-05, "loss": 0.01905675381422043, "step": 45980 }, { "epoch": 13.054215157536191, "grad_norm": 11.69388484954834, "learning_rate": 8.695231336928754e-05, "loss": 0.04160212874412537, "step": 45990 }, { "epoch": 13.05705364745955, "grad_norm": 9.860189437866211, "learning_rate": 8.694947487936418e-05, "loss": 0.03494201600551605, "step": 46000 }, { "epoch": 13.05705364745955, "eval_accuracy": 0.9600050867934126, "eval_loss": 0.12102089822292328, "eval_runtime": 31.4676, "eval_samples_per_second": 499.785, "eval_steps_per_second": 7.818, "step": 46000 }, { "epoch": 13.059892137382912, "grad_norm": 13.886919975280762, "learning_rate": 8.694663638944082e-05, "loss": 0.037542781233787535, "step": 46010 }, { "epoch": 13.062730627306273, "grad_norm": 2.0131142139434814, "learning_rate": 8.694379789951746e-05, "loss": 0.016535870730876923, "step": 46020 }, { "epoch": 13.065569117229634, "grad_norm": 7.188528537750244, "learning_rate": 8.69409594095941e-05, "loss": 0.027523231506347657, "step": 46030 }, { "epoch": 13.068407607152995, "grad_norm": 4.929324626922607, "learning_rate": 8.693812091967073e-05, "loss": 0.023347745835781097, "step": 46040 }, { "epoch": 13.071246097076356, "grad_norm": 10.391345024108887, "learning_rate": 8.693528242974738e-05, "loss": 0.026271986961364745, "step": 46050 }, { "epoch": 13.074084586999716, "grad_norm": 9.589155197143555, "learning_rate": 8.693244393982402e-05, "loss": 0.0391389787197113, "step": 46060 }, { "epoch": 13.076923076923077, "grad_norm": 5.854538440704346, "learning_rate": 8.692960544990066e-05, "loss": 0.02978052794933319, "step": 46070 }, { "epoch": 13.079761566846438, "grad_norm": 1.6398125886917114, "learning_rate": 8.69267669599773e-05, "loss": 0.02386342138051987, "step": 46080 }, { "epoch": 13.082600056769799, "grad_norm": 8.533074378967285, "learning_rate": 8.692392847005394e-05, "loss": 0.04604070782661438, "step": 46090 }, { "epoch": 13.08543854669316, "grad_norm": 8.954219818115234, "learning_rate": 8.692108998013058e-05, "loss": 0.03578729331493378, "step": 46100 }, { "epoch": 13.08827703661652, "grad_norm": 6.10044002532959, "learning_rate": 8.691825149020721e-05, "loss": 0.027187839150428772, "step": 46110 }, { "epoch": 13.09111552653988, "grad_norm": 10.100062370300293, "learning_rate": 8.691541300028385e-05, "loss": 0.021938730776309968, "step": 46120 }, { "epoch": 13.093954016463242, "grad_norm": 1.8479245901107788, "learning_rate": 8.691257451036049e-05, "loss": 0.04361025094985962, "step": 46130 }, { "epoch": 13.096792506386603, "grad_norm": 3.1001205444335938, "learning_rate": 8.690973602043712e-05, "loss": 0.0469401091337204, "step": 46140 }, { "epoch": 13.099630996309964, "grad_norm": 11.211882591247559, "learning_rate": 8.690689753051378e-05, "loss": 0.041463214159011844, "step": 46150 }, { "epoch": 13.102469486233323, "grad_norm": 9.55882740020752, "learning_rate": 8.690405904059042e-05, "loss": 0.029076778888702394, "step": 46160 }, { "epoch": 13.105307976156684, "grad_norm": 2.4896345138549805, "learning_rate": 8.690122055066704e-05, "loss": 0.03126168251037598, "step": 46170 }, { "epoch": 13.108146466080045, "grad_norm": 4.450166702270508, "learning_rate": 8.689838206074369e-05, "loss": 0.029432645440101622, "step": 46180 }, { "epoch": 13.110984956003406, "grad_norm": 7.018934726715088, "learning_rate": 8.689554357082033e-05, "loss": 0.024624615907669067, "step": 46190 }, { "epoch": 13.113823445926768, "grad_norm": 4.068532943725586, "learning_rate": 8.689270508089697e-05, "loss": 0.017405125498771667, "step": 46200 }, { "epoch": 13.116661935850127, "grad_norm": 24.175397872924805, "learning_rate": 8.68898665909736e-05, "loss": 0.03965692520141602, "step": 46210 }, { "epoch": 13.119500425773488, "grad_norm": 3.6043851375579834, "learning_rate": 8.688702810105025e-05, "loss": 0.03555071651935578, "step": 46220 }, { "epoch": 13.122338915696849, "grad_norm": 6.531484603881836, "learning_rate": 8.688418961112689e-05, "loss": 0.0271712988615036, "step": 46230 }, { "epoch": 13.12517740562021, "grad_norm": 10.330556869506836, "learning_rate": 8.688135112120352e-05, "loss": 0.036047860980033875, "step": 46240 }, { "epoch": 13.128015895543571, "grad_norm": 7.164229869842529, "learning_rate": 8.687851263128016e-05, "loss": 0.03753184676170349, "step": 46250 }, { "epoch": 13.130854385466932, "grad_norm": 9.894229888916016, "learning_rate": 8.68756741413568e-05, "loss": 0.044302555918693545, "step": 46260 }, { "epoch": 13.133692875390292, "grad_norm": 9.697216033935547, "learning_rate": 8.687283565143343e-05, "loss": 0.03176181614398956, "step": 46270 }, { "epoch": 13.136531365313653, "grad_norm": 12.86292552947998, "learning_rate": 8.686999716151009e-05, "loss": 0.027163279056549073, "step": 46280 }, { "epoch": 13.139369855237014, "grad_norm": 15.37137508392334, "learning_rate": 8.686715867158673e-05, "loss": 0.05186057090759277, "step": 46290 }, { "epoch": 13.142208345160375, "grad_norm": 5.393747329711914, "learning_rate": 8.686432018166336e-05, "loss": 0.03587070107460022, "step": 46300 }, { "epoch": 13.145046835083736, "grad_norm": 7.149489402770996, "learning_rate": 8.686148169174e-05, "loss": 0.03435201644897461, "step": 46310 }, { "epoch": 13.147885325007096, "grad_norm": 6.243102073669434, "learning_rate": 8.685864320181664e-05, "loss": 0.03733316957950592, "step": 46320 }, { "epoch": 13.150723814930457, "grad_norm": 10.496167182922363, "learning_rate": 8.685580471189328e-05, "loss": 0.05221510529518127, "step": 46330 }, { "epoch": 13.153562304853818, "grad_norm": 13.295903205871582, "learning_rate": 8.685296622196991e-05, "loss": 0.03955149352550506, "step": 46340 }, { "epoch": 13.156400794777179, "grad_norm": 6.255700588226318, "learning_rate": 8.685012773204656e-05, "loss": 0.03965249061584473, "step": 46350 }, { "epoch": 13.15923928470054, "grad_norm": 2.5287551879882812, "learning_rate": 8.68472892421232e-05, "loss": 0.03453528881072998, "step": 46360 }, { "epoch": 13.1620777746239, "grad_norm": 5.715973854064941, "learning_rate": 8.684445075219983e-05, "loss": 0.025282251834869384, "step": 46370 }, { "epoch": 13.16491626454726, "grad_norm": 7.935683250427246, "learning_rate": 8.684161226227647e-05, "loss": 0.033587193489074706, "step": 46380 }, { "epoch": 13.167754754470621, "grad_norm": 8.875100135803223, "learning_rate": 8.683877377235311e-05, "loss": 0.059543246030807497, "step": 46390 }, { "epoch": 13.170593244393983, "grad_norm": 2.1696767807006836, "learning_rate": 8.683593528242974e-05, "loss": 0.015411739051342011, "step": 46400 }, { "epoch": 13.173431734317344, "grad_norm": 8.039100646972656, "learning_rate": 8.683309679250638e-05, "loss": 0.026804310083389283, "step": 46410 }, { "epoch": 13.176270224240703, "grad_norm": 1.1770687103271484, "learning_rate": 8.683025830258304e-05, "loss": 0.029194334149360658, "step": 46420 }, { "epoch": 13.179108714164064, "grad_norm": 16.008943557739258, "learning_rate": 8.682741981265967e-05, "loss": 0.03047935664653778, "step": 46430 }, { "epoch": 13.181947204087425, "grad_norm": 6.653017520904541, "learning_rate": 8.682458132273631e-05, "loss": 0.03581974804401398, "step": 46440 }, { "epoch": 13.184785694010786, "grad_norm": 3.258111000061035, "learning_rate": 8.682174283281295e-05, "loss": 0.0355497270822525, "step": 46450 }, { "epoch": 13.187624183934147, "grad_norm": 4.663342475891113, "learning_rate": 8.681890434288959e-05, "loss": 0.02163380980491638, "step": 46460 }, { "epoch": 13.190462673857509, "grad_norm": 9.256166458129883, "learning_rate": 8.681606585296622e-05, "loss": 0.015953955054283143, "step": 46470 }, { "epoch": 13.193301163780868, "grad_norm": 9.308165550231934, "learning_rate": 8.681322736304287e-05, "loss": 0.021913789212703705, "step": 46480 }, { "epoch": 13.196139653704229, "grad_norm": 5.865832805633545, "learning_rate": 8.681038887311951e-05, "loss": 0.021701508760452272, "step": 46490 }, { "epoch": 13.19897814362759, "grad_norm": 3.9808759689331055, "learning_rate": 8.680755038319614e-05, "loss": 0.032222169637680056, "step": 46500 }, { "epoch": 13.19897814362759, "eval_accuracy": 0.9576524448400839, "eval_loss": 0.13142837584018707, "eval_runtime": 31.4104, "eval_samples_per_second": 500.693, "eval_steps_per_second": 7.832, "step": 46500 }, { "epoch": 13.201816633550951, "grad_norm": 4.899045467376709, "learning_rate": 8.680471189327278e-05, "loss": 0.048612141609191896, "step": 46510 }, { "epoch": 13.204655123474312, "grad_norm": 5.104890823364258, "learning_rate": 8.680187340334942e-05, "loss": 0.02812759280204773, "step": 46520 }, { "epoch": 13.207493613397672, "grad_norm": 0.8021270036697388, "learning_rate": 8.679903491342605e-05, "loss": 0.014854462444782257, "step": 46530 }, { "epoch": 13.210332103321033, "grad_norm": 2.0054574012756348, "learning_rate": 8.67961964235027e-05, "loss": 0.035123175382614134, "step": 46540 }, { "epoch": 13.213170593244394, "grad_norm": 14.444690704345703, "learning_rate": 8.679335793357935e-05, "loss": 0.03248676061630249, "step": 46550 }, { "epoch": 13.216009083167755, "grad_norm": 10.37210464477539, "learning_rate": 8.679051944365598e-05, "loss": 0.032960107922554015, "step": 46560 }, { "epoch": 13.218847573091116, "grad_norm": 8.02227783203125, "learning_rate": 8.678768095373262e-05, "loss": 0.025931930541992186, "step": 46570 }, { "epoch": 13.221686063014475, "grad_norm": 1.6116859912872314, "learning_rate": 8.678484246380926e-05, "loss": 0.025424587726593017, "step": 46580 }, { "epoch": 13.224524552937837, "grad_norm": 14.00739574432373, "learning_rate": 8.67820039738859e-05, "loss": 0.036745718121528624, "step": 46590 }, { "epoch": 13.227363042861198, "grad_norm": 3.6275362968444824, "learning_rate": 8.677916548396253e-05, "loss": 0.03769190013408661, "step": 46600 }, { "epoch": 13.230201532784559, "grad_norm": 7.274720191955566, "learning_rate": 8.677632699403917e-05, "loss": 0.017800626158714295, "step": 46610 }, { "epoch": 13.23304002270792, "grad_norm": 20.06517791748047, "learning_rate": 8.677348850411583e-05, "loss": 0.06622192859649659, "step": 46620 }, { "epoch": 13.235878512631281, "grad_norm": 9.146626472473145, "learning_rate": 8.677065001419245e-05, "loss": 0.022119684517383574, "step": 46630 }, { "epoch": 13.23871700255464, "grad_norm": 10.979401588439941, "learning_rate": 8.67678115242691e-05, "loss": 0.033708488941192626, "step": 46640 }, { "epoch": 13.241555492478001, "grad_norm": 8.480527877807617, "learning_rate": 8.676497303434574e-05, "loss": 0.031891119480133054, "step": 46650 }, { "epoch": 13.244393982401363, "grad_norm": 3.1861956119537354, "learning_rate": 8.676213454442236e-05, "loss": 0.02654237449169159, "step": 46660 }, { "epoch": 13.247232472324724, "grad_norm": 1.1386229991912842, "learning_rate": 8.6759296054499e-05, "loss": 0.029223036766052247, "step": 46670 }, { "epoch": 13.250070962248085, "grad_norm": 8.858532905578613, "learning_rate": 8.675645756457566e-05, "loss": 0.03646268248558045, "step": 46680 }, { "epoch": 13.252909452171444, "grad_norm": 7.6891632080078125, "learning_rate": 8.675361907465229e-05, "loss": 0.04000713527202606, "step": 46690 }, { "epoch": 13.255747942094805, "grad_norm": 8.544937133789062, "learning_rate": 8.675078058472893e-05, "loss": 0.04307003319263458, "step": 46700 }, { "epoch": 13.258586432018166, "grad_norm": 4.729654788970947, "learning_rate": 8.674794209480557e-05, "loss": 0.0224888414144516, "step": 46710 }, { "epoch": 13.261424921941527, "grad_norm": 10.592219352722168, "learning_rate": 8.674510360488221e-05, "loss": 0.017426466941833495, "step": 46720 }, { "epoch": 13.264263411864889, "grad_norm": 12.848734855651855, "learning_rate": 8.674226511495884e-05, "loss": 0.02888982892036438, "step": 46730 }, { "epoch": 13.267101901788248, "grad_norm": 5.484411716461182, "learning_rate": 8.673942662503548e-05, "loss": 0.03522112965583801, "step": 46740 }, { "epoch": 13.269940391711609, "grad_norm": 12.630756378173828, "learning_rate": 8.673658813511214e-05, "loss": 0.04130620062351227, "step": 46750 }, { "epoch": 13.27277888163497, "grad_norm": 8.3544921875, "learning_rate": 8.673374964518876e-05, "loss": 0.0466865062713623, "step": 46760 }, { "epoch": 13.275617371558331, "grad_norm": 3.942180633544922, "learning_rate": 8.67309111552654e-05, "loss": 0.03530599772930145, "step": 46770 }, { "epoch": 13.278455861481692, "grad_norm": 12.378708839416504, "learning_rate": 8.672807266534205e-05, "loss": 0.052664142847061154, "step": 46780 }, { "epoch": 13.281294351405052, "grad_norm": 3.9495184421539307, "learning_rate": 8.672523417541867e-05, "loss": 0.031207001209259032, "step": 46790 }, { "epoch": 13.284132841328413, "grad_norm": 0.7871926426887512, "learning_rate": 8.672239568549532e-05, "loss": 0.01120012104511261, "step": 46800 }, { "epoch": 13.286971331251774, "grad_norm": 2.352360963821411, "learning_rate": 8.671955719557196e-05, "loss": 0.02702072560787201, "step": 46810 }, { "epoch": 13.289809821175135, "grad_norm": 2.5599679946899414, "learning_rate": 8.67167187056486e-05, "loss": 0.04142013490200043, "step": 46820 }, { "epoch": 13.292648311098496, "grad_norm": 2.9239768981933594, "learning_rate": 8.671388021572524e-05, "loss": 0.022765138745307924, "step": 46830 }, { "epoch": 13.295486801021857, "grad_norm": 4.48128604888916, "learning_rate": 8.671104172580188e-05, "loss": 0.04166404902935028, "step": 46840 }, { "epoch": 13.298325290945217, "grad_norm": 6.070557117462158, "learning_rate": 8.670820323587852e-05, "loss": 0.047522330284118654, "step": 46850 }, { "epoch": 13.301163780868578, "grad_norm": 3.784759521484375, "learning_rate": 8.670536474595515e-05, "loss": 0.025988921523094177, "step": 46860 }, { "epoch": 13.304002270791939, "grad_norm": 2.189690351486206, "learning_rate": 8.670252625603179e-05, "loss": 0.030152735114097596, "step": 46870 }, { "epoch": 13.3068407607153, "grad_norm": 6.351118087768555, "learning_rate": 8.669968776610843e-05, "loss": 0.02525656819343567, "step": 46880 }, { "epoch": 13.309679250638661, "grad_norm": 4.462566375732422, "learning_rate": 8.669684927618507e-05, "loss": 0.036944061517715454, "step": 46890 }, { "epoch": 13.31251774056202, "grad_norm": 1.909093976020813, "learning_rate": 8.669401078626172e-05, "loss": 0.03044966757297516, "step": 46900 }, { "epoch": 13.315356230485381, "grad_norm": 9.567749977111816, "learning_rate": 8.669117229633836e-05, "loss": 0.0394818127155304, "step": 46910 }, { "epoch": 13.318194720408743, "grad_norm": 5.714660167694092, "learning_rate": 8.668833380641498e-05, "loss": 0.03719258904457092, "step": 46920 }, { "epoch": 13.321033210332104, "grad_norm": 10.4080810546875, "learning_rate": 8.668549531649163e-05, "loss": 0.04888079762458801, "step": 46930 }, { "epoch": 13.323871700255465, "grad_norm": 15.858975410461426, "learning_rate": 8.668265682656827e-05, "loss": 0.05152977108955383, "step": 46940 }, { "epoch": 13.326710190178824, "grad_norm": 5.539859294891357, "learning_rate": 8.667981833664491e-05, "loss": 0.029546111822128296, "step": 46950 }, { "epoch": 13.329548680102185, "grad_norm": 7.2826457023620605, "learning_rate": 8.667697984672155e-05, "loss": 0.03621213436126709, "step": 46960 }, { "epoch": 13.332387170025546, "grad_norm": 4.723855972290039, "learning_rate": 8.667414135679819e-05, "loss": 0.03703057169914246, "step": 46970 }, { "epoch": 13.335225659948907, "grad_norm": 9.292047500610352, "learning_rate": 8.667130286687482e-05, "loss": 0.035714578628540036, "step": 46980 }, { "epoch": 13.338064149872269, "grad_norm": 8.979728698730469, "learning_rate": 8.666846437695146e-05, "loss": 0.034203764796257016, "step": 46990 }, { "epoch": 13.34090263979563, "grad_norm": 1.1390995979309082, "learning_rate": 8.66656258870281e-05, "loss": 0.033992066979408264, "step": 47000 }, { "epoch": 13.34090263979563, "eval_accuracy": 0.9556813123927005, "eval_loss": 0.1364302933216095, "eval_runtime": 31.5685, "eval_samples_per_second": 498.186, "eval_steps_per_second": 7.793, "step": 47000 }, { "epoch": 13.343741129718989, "grad_norm": 1.6108620166778564, "learning_rate": 8.666278739710474e-05, "loss": 0.019977061450481413, "step": 47010 }, { "epoch": 13.34657961964235, "grad_norm": 3.0914905071258545, "learning_rate": 8.665994890718139e-05, "loss": 0.026553791761398316, "step": 47020 }, { "epoch": 13.349418109565711, "grad_norm": 2.484858989715576, "learning_rate": 8.665711041725803e-05, "loss": 0.01819160133600235, "step": 47030 }, { "epoch": 13.352256599489072, "grad_norm": 5.5316548347473145, "learning_rate": 8.665427192733467e-05, "loss": 0.029444748163223268, "step": 47040 }, { "epoch": 13.355095089412433, "grad_norm": 1.544081449508667, "learning_rate": 8.66514334374113e-05, "loss": 0.01586762219667435, "step": 47050 }, { "epoch": 13.357933579335793, "grad_norm": 6.189842700958252, "learning_rate": 8.664859494748794e-05, "loss": 0.040950334072113036, "step": 47060 }, { "epoch": 13.360772069259154, "grad_norm": 2.857926845550537, "learning_rate": 8.664575645756458e-05, "loss": 0.019795121252536775, "step": 47070 }, { "epoch": 13.363610559182515, "grad_norm": 8.21424674987793, "learning_rate": 8.664291796764122e-05, "loss": 0.05254107117652893, "step": 47080 }, { "epoch": 13.366449049105876, "grad_norm": 3.3173604011535645, "learning_rate": 8.664007947771786e-05, "loss": 0.048231422901153564, "step": 47090 }, { "epoch": 13.369287539029237, "grad_norm": 12.414484977722168, "learning_rate": 8.66372409877945e-05, "loss": 0.025853991508483887, "step": 47100 }, { "epoch": 13.372126028952596, "grad_norm": 8.269749641418457, "learning_rate": 8.663440249787113e-05, "loss": 0.043690598011016844, "step": 47110 }, { "epoch": 13.374964518875958, "grad_norm": 3.5122714042663574, "learning_rate": 8.663156400794777e-05, "loss": 0.038815534114837645, "step": 47120 }, { "epoch": 13.377803008799319, "grad_norm": 8.742683410644531, "learning_rate": 8.662872551802441e-05, "loss": 0.0601883590221405, "step": 47130 }, { "epoch": 13.38064149872268, "grad_norm": 1.2642333507537842, "learning_rate": 8.662588702810105e-05, "loss": 0.034104183316230774, "step": 47140 }, { "epoch": 13.383479988646041, "grad_norm": 3.2773542404174805, "learning_rate": 8.66230485381777e-05, "loss": 0.025511911511421202, "step": 47150 }, { "epoch": 13.3863184785694, "grad_norm": 16.95147705078125, "learning_rate": 8.662021004825434e-05, "loss": 0.03688313961029053, "step": 47160 }, { "epoch": 13.389156968492761, "grad_norm": 3.6043238639831543, "learning_rate": 8.661737155833098e-05, "loss": 0.034344923496246335, "step": 47170 }, { "epoch": 13.391995458416122, "grad_norm": 9.516392707824707, "learning_rate": 8.66145330684076e-05, "loss": 0.0326772004365921, "step": 47180 }, { "epoch": 13.394833948339484, "grad_norm": 3.5813658237457275, "learning_rate": 8.661169457848425e-05, "loss": 0.04920532703399658, "step": 47190 }, { "epoch": 13.397672438262845, "grad_norm": 12.189434051513672, "learning_rate": 8.660885608856089e-05, "loss": 0.039902889728546144, "step": 47200 }, { "epoch": 13.400510928186204, "grad_norm": 1.0895549058914185, "learning_rate": 8.660601759863753e-05, "loss": 0.011589239537715911, "step": 47210 }, { "epoch": 13.403349418109565, "grad_norm": 3.7724554538726807, "learning_rate": 8.660317910871417e-05, "loss": 0.02462998479604721, "step": 47220 }, { "epoch": 13.406187908032926, "grad_norm": 12.197464942932129, "learning_rate": 8.660034061879081e-05, "loss": 0.01989622414112091, "step": 47230 }, { "epoch": 13.409026397956287, "grad_norm": 10.303778648376465, "learning_rate": 8.659750212886744e-05, "loss": 0.03702112436294556, "step": 47240 }, { "epoch": 13.411864887879648, "grad_norm": 25.24805450439453, "learning_rate": 8.659466363894408e-05, "loss": 0.05495012998580932, "step": 47250 }, { "epoch": 13.41470337780301, "grad_norm": 10.352737426757812, "learning_rate": 8.659182514902072e-05, "loss": 0.04449354410171509, "step": 47260 }, { "epoch": 13.417541867726369, "grad_norm": 6.472108364105225, "learning_rate": 8.658898665909737e-05, "loss": 0.037041071057319644, "step": 47270 }, { "epoch": 13.42038035764973, "grad_norm": 4.82477331161499, "learning_rate": 8.6586148169174e-05, "loss": 0.027723467350006102, "step": 47280 }, { "epoch": 13.423218847573091, "grad_norm": 10.74465274810791, "learning_rate": 8.658330967925065e-05, "loss": 0.04451736807823181, "step": 47290 }, { "epoch": 13.426057337496452, "grad_norm": 9.49945068359375, "learning_rate": 8.658047118932729e-05, "loss": 0.03110710084438324, "step": 47300 }, { "epoch": 13.428895827419813, "grad_norm": 1.6850651502609253, "learning_rate": 8.657763269940392e-05, "loss": 0.02335614264011383, "step": 47310 }, { "epoch": 13.431734317343173, "grad_norm": 0.6513549089431763, "learning_rate": 8.657479420948056e-05, "loss": 0.027879971265792846, "step": 47320 }, { "epoch": 13.434572807266534, "grad_norm": 4.218456745147705, "learning_rate": 8.65719557195572e-05, "loss": 0.03128032088279724, "step": 47330 }, { "epoch": 13.437411297189895, "grad_norm": 2.6938302516937256, "learning_rate": 8.656911722963383e-05, "loss": 0.01789194643497467, "step": 47340 }, { "epoch": 13.440249787113256, "grad_norm": 4.033883571624756, "learning_rate": 8.656627873971048e-05, "loss": 0.019952358305454256, "step": 47350 }, { "epoch": 13.443088277036617, "grad_norm": 4.329926490783691, "learning_rate": 8.656344024978712e-05, "loss": 0.032864853739738464, "step": 47360 }, { "epoch": 13.445926766959976, "grad_norm": 7.312324523925781, "learning_rate": 8.656060175986375e-05, "loss": 0.03324364721775055, "step": 47370 }, { "epoch": 13.448765256883338, "grad_norm": 1.3990533351898193, "learning_rate": 8.65577632699404e-05, "loss": 0.029722386598587038, "step": 47380 }, { "epoch": 13.451603746806699, "grad_norm": 11.081133842468262, "learning_rate": 8.655492478001703e-05, "loss": 0.024282051622867583, "step": 47390 }, { "epoch": 13.45444223673006, "grad_norm": 5.652640342712402, "learning_rate": 8.655208629009368e-05, "loss": 0.035066595673561095, "step": 47400 }, { "epoch": 13.457280726653421, "grad_norm": 1.3877437114715576, "learning_rate": 8.654924780017032e-05, "loss": 0.04085328876972198, "step": 47410 }, { "epoch": 13.460119216576782, "grad_norm": 6.15867805480957, "learning_rate": 8.654640931024696e-05, "loss": 0.029024744033813478, "step": 47420 }, { "epoch": 13.462957706500141, "grad_norm": 3.4995944499969482, "learning_rate": 8.65435708203236e-05, "loss": 0.03490034639835358, "step": 47430 }, { "epoch": 13.465796196423502, "grad_norm": 12.462900161743164, "learning_rate": 8.654073233040023e-05, "loss": 0.04476391971111297, "step": 47440 }, { "epoch": 13.468634686346864, "grad_norm": 2.4970486164093018, "learning_rate": 8.653789384047687e-05, "loss": 0.03142915666103363, "step": 47450 }, { "epoch": 13.471473176270225, "grad_norm": 9.125443458557129, "learning_rate": 8.653505535055351e-05, "loss": 0.04043779075145722, "step": 47460 }, { "epoch": 13.474311666193586, "grad_norm": 0.5085991621017456, "learning_rate": 8.653221686063014e-05, "loss": 0.022030943632125856, "step": 47470 }, { "epoch": 13.477150156116945, "grad_norm": 7.457637310028076, "learning_rate": 8.65293783707068e-05, "loss": 0.01750267595052719, "step": 47480 }, { "epoch": 13.479988646040306, "grad_norm": 8.2711181640625, "learning_rate": 8.652653988078343e-05, "loss": 0.028965687751770018, "step": 47490 }, { "epoch": 13.482827135963667, "grad_norm": 8.768735885620117, "learning_rate": 8.652370139086006e-05, "loss": 0.04551350176334381, "step": 47500 }, { "epoch": 13.482827135963667, "eval_accuracy": 0.9582247090990017, "eval_loss": 0.1277717500925064, "eval_runtime": 31.6797, "eval_samples_per_second": 496.437, "eval_steps_per_second": 7.765, "step": 47500 }, { "epoch": 13.485665625887028, "grad_norm": 6.081392288208008, "learning_rate": 8.65208629009367e-05, "loss": 0.026894816756248476, "step": 47510 }, { "epoch": 13.48850411581039, "grad_norm": 9.793679237365723, "learning_rate": 8.651802441101335e-05, "loss": 0.02709413170814514, "step": 47520 }, { "epoch": 13.491342605733749, "grad_norm": 6.559260368347168, "learning_rate": 8.651518592108999e-05, "loss": 0.030215150117874144, "step": 47530 }, { "epoch": 13.49418109565711, "grad_norm": 6.889903545379639, "learning_rate": 8.651234743116661e-05, "loss": 0.040194612741470334, "step": 47540 }, { "epoch": 13.497019585580471, "grad_norm": 3.6317713260650635, "learning_rate": 8.650950894124327e-05, "loss": 0.038753736019134524, "step": 47550 }, { "epoch": 13.499858075503832, "grad_norm": 8.623577117919922, "learning_rate": 8.650667045131991e-05, "loss": 0.030288875102996826, "step": 47560 }, { "epoch": 13.502696565427193, "grad_norm": 7.882750988006592, "learning_rate": 8.650383196139654e-05, "loss": 0.03433360159397125, "step": 47570 }, { "epoch": 13.505535055350553, "grad_norm": 5.83099889755249, "learning_rate": 8.650099347147318e-05, "loss": 0.025126606225967407, "step": 47580 }, { "epoch": 13.508373545273914, "grad_norm": 16.601478576660156, "learning_rate": 8.649815498154982e-05, "loss": 0.03801619708538055, "step": 47590 }, { "epoch": 13.511212035197275, "grad_norm": 3.1400492191314697, "learning_rate": 8.649531649162645e-05, "loss": 0.02400256395339966, "step": 47600 }, { "epoch": 13.514050525120636, "grad_norm": 12.873468399047852, "learning_rate": 8.64924780017031e-05, "loss": 0.03426647186279297, "step": 47610 }, { "epoch": 13.516889015043997, "grad_norm": 1.4552162885665894, "learning_rate": 8.648963951177975e-05, "loss": 0.04324913024902344, "step": 47620 }, { "epoch": 13.519727504967356, "grad_norm": 5.839138507843018, "learning_rate": 8.648680102185637e-05, "loss": 0.02866055369377136, "step": 47630 }, { "epoch": 13.522565994890718, "grad_norm": 2.0074400901794434, "learning_rate": 8.648396253193301e-05, "loss": 0.044433844089508054, "step": 47640 }, { "epoch": 13.525404484814079, "grad_norm": 2.4012770652770996, "learning_rate": 8.648112404200966e-05, "loss": 0.031976637244224546, "step": 47650 }, { "epoch": 13.52824297473744, "grad_norm": 0.5516537427902222, "learning_rate": 8.647856940107864e-05, "loss": 0.032358679175376895, "step": 47660 }, { "epoch": 13.5310814646608, "grad_norm": 10.947912216186523, "learning_rate": 8.647573091115528e-05, "loss": 0.029296010732650757, "step": 47670 }, { "epoch": 13.533919954584162, "grad_norm": 4.325680255889893, "learning_rate": 8.64728924212319e-05, "loss": 0.03215175569057464, "step": 47680 }, { "epoch": 13.536758444507521, "grad_norm": 3.7877259254455566, "learning_rate": 8.647005393130855e-05, "loss": 0.021947005391120912, "step": 47690 }, { "epoch": 13.539596934430882, "grad_norm": 3.5999224185943604, "learning_rate": 8.646721544138519e-05, "loss": 0.028984886407852174, "step": 47700 }, { "epoch": 13.542435424354244, "grad_norm": 2.117652416229248, "learning_rate": 8.646437695146183e-05, "loss": 0.03015979528427124, "step": 47710 }, { "epoch": 13.545273914277605, "grad_norm": 6.415585994720459, "learning_rate": 8.646153846153846e-05, "loss": 0.02594228684902191, "step": 47720 }, { "epoch": 13.548112404200966, "grad_norm": 11.240501403808594, "learning_rate": 8.645869997161511e-05, "loss": 0.0444490909576416, "step": 47730 }, { "epoch": 13.550950894124325, "grad_norm": 4.122629642486572, "learning_rate": 8.645586148169174e-05, "loss": 0.027328258752822875, "step": 47740 }, { "epoch": 13.553789384047686, "grad_norm": 5.013462066650391, "learning_rate": 8.645302299176838e-05, "loss": 0.04120333194732666, "step": 47750 }, { "epoch": 13.556627873971047, "grad_norm": 0.9704323410987854, "learning_rate": 8.645018450184502e-05, "loss": 0.03185535073280334, "step": 47760 }, { "epoch": 13.559466363894408, "grad_norm": 11.820596694946289, "learning_rate": 8.644734601192166e-05, "loss": 0.025168603658676146, "step": 47770 }, { "epoch": 13.56230485381777, "grad_norm": 9.895241737365723, "learning_rate": 8.644450752199829e-05, "loss": 0.027022379636764526, "step": 47780 }, { "epoch": 13.56514334374113, "grad_norm": 9.648585319519043, "learning_rate": 8.644166903207495e-05, "loss": 0.04760119318962097, "step": 47790 }, { "epoch": 13.56798183366449, "grad_norm": 5.825608253479004, "learning_rate": 8.643883054215159e-05, "loss": 0.027638736367225646, "step": 47800 }, { "epoch": 13.570820323587851, "grad_norm": 3.1733558177948, "learning_rate": 8.643599205222822e-05, "loss": 0.01781163513660431, "step": 47810 }, { "epoch": 13.573658813511212, "grad_norm": 20.244632720947266, "learning_rate": 8.643315356230486e-05, "loss": 0.053033411502838135, "step": 47820 }, { "epoch": 13.576497303434573, "grad_norm": 3.861985683441162, "learning_rate": 8.64303150723815e-05, "loss": 0.040492516756057736, "step": 47830 }, { "epoch": 13.579335793357934, "grad_norm": 6.913362979888916, "learning_rate": 8.642747658245813e-05, "loss": 0.02677871584892273, "step": 47840 }, { "epoch": 13.582174283281294, "grad_norm": 4.416360378265381, "learning_rate": 8.642463809253477e-05, "loss": 0.02400323748588562, "step": 47850 }, { "epoch": 13.585012773204655, "grad_norm": 14.091269493103027, "learning_rate": 8.642179960261142e-05, "loss": 0.06146126985549927, "step": 47860 }, { "epoch": 13.587851263128016, "grad_norm": 9.991366386413574, "learning_rate": 8.641896111268805e-05, "loss": 0.04355473220348358, "step": 47870 }, { "epoch": 13.590689753051377, "grad_norm": 8.972782135009766, "learning_rate": 8.641612262276469e-05, "loss": 0.029344624280929564, "step": 47880 }, { "epoch": 13.593528242974738, "grad_norm": 7.045830726623535, "learning_rate": 8.641328413284133e-05, "loss": 0.04973292350769043, "step": 47890 }, { "epoch": 13.596366732898097, "grad_norm": 8.405793190002441, "learning_rate": 8.641044564291797e-05, "loss": 0.04243167638778687, "step": 47900 }, { "epoch": 13.599205222821459, "grad_norm": 5.957789421081543, "learning_rate": 8.64076071529946e-05, "loss": 0.02167646735906601, "step": 47910 }, { "epoch": 13.60204371274482, "grad_norm": 3.9297266006469727, "learning_rate": 8.640476866307124e-05, "loss": 0.041967970132827756, "step": 47920 }, { "epoch": 13.60488220266818, "grad_norm": 5.658904075622559, "learning_rate": 8.64019301731479e-05, "loss": 0.046526464819908145, "step": 47930 }, { "epoch": 13.607720692591542, "grad_norm": 3.7970118522644043, "learning_rate": 8.639909168322453e-05, "loss": 0.03723128139972687, "step": 47940 }, { "epoch": 13.610559182514901, "grad_norm": 4.04428768157959, "learning_rate": 8.639625319330117e-05, "loss": 0.03817231059074402, "step": 47950 }, { "epoch": 13.613397672438262, "grad_norm": 7.314342975616455, "learning_rate": 8.639341470337781e-05, "loss": 0.040563181042671204, "step": 47960 }, { "epoch": 13.616236162361623, "grad_norm": 4.703126430511475, "learning_rate": 8.639057621345444e-05, "loss": 0.042730242013931274, "step": 47970 }, { "epoch": 13.619074652284985, "grad_norm": 16.723527908325195, "learning_rate": 8.638773772353108e-05, "loss": 0.044729626178741454, "step": 47980 }, { "epoch": 13.621913142208346, "grad_norm": 8.587444305419922, "learning_rate": 8.638489923360773e-05, "loss": 0.036767077445983884, "step": 47990 }, { "epoch": 13.624751632131705, "grad_norm": 7.164139747619629, "learning_rate": 8.638206074368436e-05, "loss": 0.03381766676902771, "step": 48000 }, { "epoch": 13.624751632131705, "eval_accuracy": 0.9483690468620843, "eval_loss": 0.15916083753108978, "eval_runtime": 31.362, "eval_samples_per_second": 501.467, "eval_steps_per_second": 7.844, "step": 48000 }, { "epoch": 13.627590122055066, "grad_norm": 7.926647186279297, "learning_rate": 8.6379222253761e-05, "loss": 0.047017800807952884, "step": 48010 }, { "epoch": 13.630428611978427, "grad_norm": 5.58323335647583, "learning_rate": 8.637638376383764e-05, "loss": 0.020823168754577636, "step": 48020 }, { "epoch": 13.633267101901788, "grad_norm": 5.785126686096191, "learning_rate": 8.637354527391429e-05, "loss": 0.026754921674728392, "step": 48030 }, { "epoch": 13.63610559182515, "grad_norm": 4.36375093460083, "learning_rate": 8.637070678399091e-05, "loss": 0.026675713062286378, "step": 48040 }, { "epoch": 13.63894408174851, "grad_norm": 6.520670413970947, "learning_rate": 8.636786829406755e-05, "loss": 0.027671533823013305, "step": 48050 }, { "epoch": 13.64178257167187, "grad_norm": 9.967583656311035, "learning_rate": 8.636502980414421e-05, "loss": 0.037085187435150144, "step": 48060 }, { "epoch": 13.644621061595231, "grad_norm": 1.4123072624206543, "learning_rate": 8.636219131422084e-05, "loss": 0.03883474767208099, "step": 48070 }, { "epoch": 13.647459551518592, "grad_norm": 4.982833385467529, "learning_rate": 8.635935282429748e-05, "loss": 0.0395208865404129, "step": 48080 }, { "epoch": 13.650298041441953, "grad_norm": 11.74831771850586, "learning_rate": 8.635651433437412e-05, "loss": 0.04091322720050812, "step": 48090 }, { "epoch": 13.653136531365314, "grad_norm": 13.438674926757812, "learning_rate": 8.635367584445075e-05, "loss": 0.04151826798915863, "step": 48100 }, { "epoch": 13.655975021288674, "grad_norm": 6.555180549621582, "learning_rate": 8.635083735452739e-05, "loss": 0.052014333009719846, "step": 48110 }, { "epoch": 13.658813511212035, "grad_norm": 1.3131600618362427, "learning_rate": 8.634799886460403e-05, "loss": 0.05837852954864502, "step": 48120 }, { "epoch": 13.661652001135396, "grad_norm": 4.496669769287109, "learning_rate": 8.634516037468067e-05, "loss": 0.03982115089893341, "step": 48130 }, { "epoch": 13.664490491058757, "grad_norm": 6.10742712020874, "learning_rate": 8.634232188475731e-05, "loss": 0.043477493524551394, "step": 48140 }, { "epoch": 13.667328980982118, "grad_norm": 11.731135368347168, "learning_rate": 8.633948339483396e-05, "loss": 0.0458399623632431, "step": 48150 }, { "epoch": 13.670167470905477, "grad_norm": 5.114226341247559, "learning_rate": 8.63366449049106e-05, "loss": 0.029850488901138304, "step": 48160 }, { "epoch": 13.673005960828839, "grad_norm": 10.64398193359375, "learning_rate": 8.633380641498722e-05, "loss": 0.03504253327846527, "step": 48170 }, { "epoch": 13.6758444507522, "grad_norm": 10.427469253540039, "learning_rate": 8.633096792506387e-05, "loss": 0.04113469421863556, "step": 48180 }, { "epoch": 13.67868294067556, "grad_norm": 1.5694217681884766, "learning_rate": 8.632812943514052e-05, "loss": 0.033141395449638365, "step": 48190 }, { "epoch": 13.681521430598922, "grad_norm": 3.8541409969329834, "learning_rate": 8.632529094521715e-05, "loss": 0.032296416163444516, "step": 48200 }, { "epoch": 13.684359920522283, "grad_norm": 3.6853768825531006, "learning_rate": 8.632245245529379e-05, "loss": 0.04020425975322724, "step": 48210 }, { "epoch": 13.687198410445642, "grad_norm": 7.272515773773193, "learning_rate": 8.631961396537043e-05, "loss": 0.022356095910072326, "step": 48220 }, { "epoch": 13.690036900369003, "grad_norm": 0.4939178228378296, "learning_rate": 8.631677547544706e-05, "loss": 0.013650459051132203, "step": 48230 }, { "epoch": 13.692875390292365, "grad_norm": 5.019110679626465, "learning_rate": 8.63139369855237e-05, "loss": 0.049863725900650024, "step": 48240 }, { "epoch": 13.695713880215726, "grad_norm": 1.272499442100525, "learning_rate": 8.631109849560034e-05, "loss": 0.01859838664531708, "step": 48250 }, { "epoch": 13.698552370139087, "grad_norm": 3.420999526977539, "learning_rate": 8.630826000567698e-05, "loss": 0.022597207129001616, "step": 48260 }, { "epoch": 13.701390860062446, "grad_norm": 4.212942600250244, "learning_rate": 8.630542151575362e-05, "loss": 0.03907855451107025, "step": 48270 }, { "epoch": 13.704229349985807, "grad_norm": 13.82310676574707, "learning_rate": 8.630258302583027e-05, "loss": 0.052293556928634646, "step": 48280 }, { "epoch": 13.707067839909168, "grad_norm": 8.72931957244873, "learning_rate": 8.629974453590691e-05, "loss": 0.03531932830810547, "step": 48290 }, { "epoch": 13.70990632983253, "grad_norm": 4.780879974365234, "learning_rate": 8.629690604598354e-05, "loss": 0.023269952833652498, "step": 48300 }, { "epoch": 13.71274481975589, "grad_norm": 4.586771011352539, "learning_rate": 8.629406755606018e-05, "loss": 0.026098227500915526, "step": 48310 }, { "epoch": 13.71558330967925, "grad_norm": 2.3212790489196777, "learning_rate": 8.629122906613682e-05, "loss": 0.019361671805381776, "step": 48320 }, { "epoch": 13.718421799602611, "grad_norm": 9.564801216125488, "learning_rate": 8.628839057621346e-05, "loss": 0.057374054193496705, "step": 48330 }, { "epoch": 13.721260289525972, "grad_norm": 1.632908821105957, "learning_rate": 8.62855520862901e-05, "loss": 0.03448108434677124, "step": 48340 }, { "epoch": 13.724098779449333, "grad_norm": 1.9769229888916016, "learning_rate": 8.628271359636674e-05, "loss": 0.03277253210544586, "step": 48350 }, { "epoch": 13.726937269372694, "grad_norm": 2.660634756088257, "learning_rate": 8.627987510644337e-05, "loss": 0.030680391192436218, "step": 48360 }, { "epoch": 13.729775759296054, "grad_norm": 3.069620132446289, "learning_rate": 8.627703661652001e-05, "loss": 0.03792406022548676, "step": 48370 }, { "epoch": 13.732614249219415, "grad_norm": 3.5685558319091797, "learning_rate": 8.627419812659665e-05, "loss": 0.052587497234344485, "step": 48380 }, { "epoch": 13.735452739142776, "grad_norm": 5.610739707946777, "learning_rate": 8.62713596366733e-05, "loss": 0.028223451972007752, "step": 48390 }, { "epoch": 13.738291229066137, "grad_norm": 5.543591022491455, "learning_rate": 8.626852114674994e-05, "loss": 0.03342098891735077, "step": 48400 }, { "epoch": 13.741129718989498, "grad_norm": 7.765977382659912, "learning_rate": 8.626568265682658e-05, "loss": 0.03520514965057373, "step": 48410 }, { "epoch": 13.743968208912857, "grad_norm": 8.167157173156738, "learning_rate": 8.626284416690322e-05, "loss": 0.021751371026039124, "step": 48420 }, { "epoch": 13.746806698836219, "grad_norm": 11.61294937133789, "learning_rate": 8.626000567697985e-05, "loss": 0.04853320717811584, "step": 48430 }, { "epoch": 13.74964518875958, "grad_norm": 7.700771808624268, "learning_rate": 8.625716718705649e-05, "loss": 0.0263394832611084, "step": 48440 }, { "epoch": 13.75248367868294, "grad_norm": 13.254837989807129, "learning_rate": 8.625432869713313e-05, "loss": 0.03906325101852417, "step": 48450 }, { "epoch": 13.755322168606302, "grad_norm": 1.32351553440094, "learning_rate": 8.625149020720977e-05, "loss": 0.02241494506597519, "step": 48460 }, { "epoch": 13.758160658529663, "grad_norm": 2.231417417526245, "learning_rate": 8.624865171728641e-05, "loss": 0.03027019202709198, "step": 48470 }, { "epoch": 13.760999148453022, "grad_norm": 11.51838207244873, "learning_rate": 8.624581322736305e-05, "loss": 0.0392301082611084, "step": 48480 }, { "epoch": 13.763837638376383, "grad_norm": 6.8642354011535645, "learning_rate": 8.624297473743968e-05, "loss": 0.03570234477519989, "step": 48490 }, { "epoch": 13.766676128299745, "grad_norm": 12.227685928344727, "learning_rate": 8.624013624751632e-05, "loss": 0.03676712810993195, "step": 48500 }, { "epoch": 13.766676128299745, "eval_accuracy": 0.9556813123927005, "eval_loss": 0.13903740048408508, "eval_runtime": 31.1746, "eval_samples_per_second": 504.481, "eval_steps_per_second": 7.891, "step": 48500 }, { "epoch": 13.769514618223106, "grad_norm": 8.302947044372559, "learning_rate": 8.623729775759296e-05, "loss": 0.03121452033519745, "step": 48510 }, { "epoch": 13.772353108146467, "grad_norm": 0.5118089318275452, "learning_rate": 8.62344592676696e-05, "loss": 0.026290327310562134, "step": 48520 }, { "epoch": 13.775191598069826, "grad_norm": 8.581632614135742, "learning_rate": 8.623162077774625e-05, "loss": 0.029920867085456847, "step": 48530 }, { "epoch": 13.778030087993187, "grad_norm": 17.793174743652344, "learning_rate": 8.622878228782289e-05, "loss": 0.04763144850730896, "step": 48540 }, { "epoch": 13.780868577916548, "grad_norm": 5.864516258239746, "learning_rate": 8.622594379789953e-05, "loss": 0.03731409013271332, "step": 48550 }, { "epoch": 13.78370706783991, "grad_norm": 4.590426921844482, "learning_rate": 8.622310530797616e-05, "loss": 0.052971386909484865, "step": 48560 }, { "epoch": 13.78654555776327, "grad_norm": 7.735920429229736, "learning_rate": 8.62202668180528e-05, "loss": 0.029651179909706116, "step": 48570 }, { "epoch": 13.789384047686632, "grad_norm": 3.522223711013794, "learning_rate": 8.621742832812944e-05, "loss": 0.03489167392253876, "step": 48580 }, { "epoch": 13.792222537609991, "grad_norm": 7.527494430541992, "learning_rate": 8.621458983820608e-05, "loss": 0.05272212624549866, "step": 48590 }, { "epoch": 13.795061027533352, "grad_norm": 6.009422779083252, "learning_rate": 8.621175134828272e-05, "loss": 0.03795459270477295, "step": 48600 }, { "epoch": 13.797899517456713, "grad_norm": 2.8332602977752686, "learning_rate": 8.620891285835936e-05, "loss": 0.04317569136619568, "step": 48610 }, { "epoch": 13.800738007380074, "grad_norm": 6.636512756347656, "learning_rate": 8.620607436843599e-05, "loss": 0.0366673469543457, "step": 48620 }, { "epoch": 13.803576497303435, "grad_norm": 7.454467296600342, "learning_rate": 8.620323587851263e-05, "loss": 0.04124805927276611, "step": 48630 }, { "epoch": 13.806414987226795, "grad_norm": 3.7960667610168457, "learning_rate": 8.620039738858927e-05, "loss": 0.033261549472808835, "step": 48640 }, { "epoch": 13.809253477150156, "grad_norm": 2.6478302478790283, "learning_rate": 8.619755889866592e-05, "loss": 0.027139192819595336, "step": 48650 }, { "epoch": 13.812091967073517, "grad_norm": 6.87192964553833, "learning_rate": 8.619472040874256e-05, "loss": 0.0389889806509018, "step": 48660 }, { "epoch": 13.814930456996878, "grad_norm": 1.3228923082351685, "learning_rate": 8.61918819188192e-05, "loss": 0.04917533099651337, "step": 48670 }, { "epoch": 13.81776894692024, "grad_norm": 2.538217067718506, "learning_rate": 8.618904342889583e-05, "loss": 0.03401821255683899, "step": 48680 }, { "epoch": 13.820607436843598, "grad_norm": 6.82718563079834, "learning_rate": 8.618620493897247e-05, "loss": 0.0350192666053772, "step": 48690 }, { "epoch": 13.82344592676696, "grad_norm": 5.149806499481201, "learning_rate": 8.618336644904911e-05, "loss": 0.03552314043045044, "step": 48700 }, { "epoch": 13.82628441669032, "grad_norm": 8.446120262145996, "learning_rate": 8.618052795912575e-05, "loss": 0.05527779459953308, "step": 48710 }, { "epoch": 13.829122906613682, "grad_norm": 2.4945015907287598, "learning_rate": 8.617768946920239e-05, "loss": 0.021887503564357758, "step": 48720 }, { "epoch": 13.831961396537043, "grad_norm": 7.55051326751709, "learning_rate": 8.617485097927903e-05, "loss": 0.051308661699295044, "step": 48730 }, { "epoch": 13.834799886460402, "grad_norm": 11.434135437011719, "learning_rate": 8.617201248935567e-05, "loss": 0.03981300890445709, "step": 48740 }, { "epoch": 13.837638376383763, "grad_norm": 5.199482440948486, "learning_rate": 8.61691739994323e-05, "loss": 0.04285101294517517, "step": 48750 }, { "epoch": 13.840476866307124, "grad_norm": 2.992975950241089, "learning_rate": 8.616633550950894e-05, "loss": 0.03893276751041412, "step": 48760 }, { "epoch": 13.843315356230486, "grad_norm": 1.883261799812317, "learning_rate": 8.616349701958558e-05, "loss": 0.03400767743587494, "step": 48770 }, { "epoch": 13.846153846153847, "grad_norm": 13.214120864868164, "learning_rate": 8.616065852966221e-05, "loss": 0.03298504948616028, "step": 48780 }, { "epoch": 13.848992336077206, "grad_norm": 6.457499980926514, "learning_rate": 8.615782003973887e-05, "loss": 0.024342995882034302, "step": 48790 }, { "epoch": 13.851830826000567, "grad_norm": 4.725510120391846, "learning_rate": 8.615498154981551e-05, "loss": 0.044960802793502806, "step": 48800 }, { "epoch": 13.854669315923928, "grad_norm": 12.056913375854492, "learning_rate": 8.615214305989214e-05, "loss": 0.03085973858833313, "step": 48810 }, { "epoch": 13.85750780584729, "grad_norm": 1.6123108863830566, "learning_rate": 8.614930456996878e-05, "loss": 0.042486336827278134, "step": 48820 }, { "epoch": 13.86034629577065, "grad_norm": 10.329974174499512, "learning_rate": 8.614646608004542e-05, "loss": 0.028568169474601744, "step": 48830 }, { "epoch": 13.863184785694012, "grad_norm": 5.0873703956604, "learning_rate": 8.614362759012206e-05, "loss": 0.03121526539325714, "step": 48840 }, { "epoch": 13.866023275617371, "grad_norm": 8.78348445892334, "learning_rate": 8.614078910019869e-05, "loss": 0.03692586123943329, "step": 48850 }, { "epoch": 13.868861765540732, "grad_norm": 2.368893623352051, "learning_rate": 8.613795061027534e-05, "loss": 0.03682179450988769, "step": 48860 }, { "epoch": 13.871700255464093, "grad_norm": 3.834256172180176, "learning_rate": 8.613511212035199e-05, "loss": 0.042053830623626706, "step": 48870 }, { "epoch": 13.874538745387454, "grad_norm": 8.786736488342285, "learning_rate": 8.613227363042861e-05, "loss": 0.030211985111236572, "step": 48880 }, { "epoch": 13.877377235310815, "grad_norm": 6.813754558563232, "learning_rate": 8.612943514050525e-05, "loss": 0.04961362779140473, "step": 48890 }, { "epoch": 13.880215725234175, "grad_norm": 1.4872814416885376, "learning_rate": 8.61265966505819e-05, "loss": 0.03151536285877228, "step": 48900 }, { "epoch": 13.883054215157536, "grad_norm": 0.777333676815033, "learning_rate": 8.612375816065852e-05, "loss": 0.03926765620708465, "step": 48910 }, { "epoch": 13.885892705080897, "grad_norm": 11.57472038269043, "learning_rate": 8.612091967073518e-05, "loss": 0.04946111738681793, "step": 48920 }, { "epoch": 13.888731195004258, "grad_norm": 1.3169654607772827, "learning_rate": 8.611808118081182e-05, "loss": 0.05031781792640686, "step": 48930 }, { "epoch": 13.89156968492762, "grad_norm": 4.004415035247803, "learning_rate": 8.611524269088845e-05, "loss": 0.031546711921691895, "step": 48940 }, { "epoch": 13.894408174850978, "grad_norm": 1.268197774887085, "learning_rate": 8.611240420096509e-05, "loss": 0.034222853183746335, "step": 48950 }, { "epoch": 13.89724666477434, "grad_norm": 5.52115535736084, "learning_rate": 8.610956571104173e-05, "loss": 0.03151540458202362, "step": 48960 }, { "epoch": 13.9000851546977, "grad_norm": 1.7569721937179565, "learning_rate": 8.610672722111837e-05, "loss": 0.023706762492656706, "step": 48970 }, { "epoch": 13.902923644621062, "grad_norm": 6.592088222503662, "learning_rate": 8.6103888731195e-05, "loss": 0.057257479429244994, "step": 48980 }, { "epoch": 13.905762134544423, "grad_norm": 11.381346702575684, "learning_rate": 8.610105024127165e-05, "loss": 0.03457716703414917, "step": 48990 }, { "epoch": 13.908600624467784, "grad_norm": 3.538055658340454, "learning_rate": 8.60982117513483e-05, "loss": 0.037588676810264586, "step": 49000 }, { "epoch": 13.908600624467784, "eval_accuracy": 0.9500222547211802, "eval_loss": 0.1497591733932495, "eval_runtime": 31.5303, "eval_samples_per_second": 498.79, "eval_steps_per_second": 7.802, "step": 49000 }, { "epoch": 13.911439114391143, "grad_norm": 8.341750144958496, "learning_rate": 8.609537326142492e-05, "loss": 0.024016308784484863, "step": 49010 }, { "epoch": 13.914277604314504, "grad_norm": 3.7513880729675293, "learning_rate": 8.609253477150156e-05, "loss": 0.03671912550926208, "step": 49020 }, { "epoch": 13.917116094237866, "grad_norm": 7.933117389678955, "learning_rate": 8.60896962815782e-05, "loss": 0.05676103830337524, "step": 49030 }, { "epoch": 13.919954584161227, "grad_norm": 3.312819004058838, "learning_rate": 8.608685779165483e-05, "loss": 0.041959112882614134, "step": 49040 }, { "epoch": 13.922793074084588, "grad_norm": 2.7314186096191406, "learning_rate": 8.608401930173148e-05, "loss": 0.047778987884521486, "step": 49050 }, { "epoch": 13.925631564007947, "grad_norm": 9.731554985046387, "learning_rate": 8.608118081180813e-05, "loss": 0.043270617723464966, "step": 49060 }, { "epoch": 13.928470053931308, "grad_norm": 4.183832168579102, "learning_rate": 8.607834232188476e-05, "loss": 0.03043203353881836, "step": 49070 }, { "epoch": 13.93130854385467, "grad_norm": 6.057960510253906, "learning_rate": 8.60755038319614e-05, "loss": 0.026605433225631712, "step": 49080 }, { "epoch": 13.93414703377803, "grad_norm": 9.82526969909668, "learning_rate": 8.607266534203804e-05, "loss": 0.02724425494670868, "step": 49090 }, { "epoch": 13.936985523701392, "grad_norm": 1.08476984500885, "learning_rate": 8.606982685211468e-05, "loss": 0.020269884169101714, "step": 49100 }, { "epoch": 13.93982401362475, "grad_norm": 9.197391510009766, "learning_rate": 8.606698836219131e-05, "loss": 0.01588790714740753, "step": 49110 }, { "epoch": 13.942662503548112, "grad_norm": 3.962165594100952, "learning_rate": 8.606414987226797e-05, "loss": 0.018375831842422485, "step": 49120 }, { "epoch": 13.945500993471473, "grad_norm": 10.083041191101074, "learning_rate": 8.60613113823446e-05, "loss": 0.0471290111541748, "step": 49130 }, { "epoch": 13.948339483394834, "grad_norm": 7.544643878936768, "learning_rate": 8.605847289242123e-05, "loss": 0.03300432562828064, "step": 49140 }, { "epoch": 13.951177973318195, "grad_norm": 3.965888500213623, "learning_rate": 8.605563440249788e-05, "loss": 0.04498538374900818, "step": 49150 }, { "epoch": 13.954016463241555, "grad_norm": 2.322057008743286, "learning_rate": 8.605279591257452e-05, "loss": 0.028949755430221557, "step": 49160 }, { "epoch": 13.956854953164916, "grad_norm": 3.3506157398223877, "learning_rate": 8.604995742265114e-05, "loss": 0.02183050811290741, "step": 49170 }, { "epoch": 13.959693443088277, "grad_norm": 9.87710189819336, "learning_rate": 8.604711893272779e-05, "loss": 0.034973537921905516, "step": 49180 }, { "epoch": 13.962531933011638, "grad_norm": 0.7107105851173401, "learning_rate": 8.604428044280444e-05, "loss": 0.037807163596153257, "step": 49190 }, { "epoch": 13.965370422934999, "grad_norm": 0.5313303470611572, "learning_rate": 8.604144195288107e-05, "loss": 0.03104567527770996, "step": 49200 }, { "epoch": 13.968208912858358, "grad_norm": 11.003992080688477, "learning_rate": 8.603860346295771e-05, "loss": 0.023176269233226778, "step": 49210 }, { "epoch": 13.97104740278172, "grad_norm": 7.429601669311523, "learning_rate": 8.603576497303435e-05, "loss": 0.04297060966491699, "step": 49220 }, { "epoch": 13.97388589270508, "grad_norm": 4.123441696166992, "learning_rate": 8.603292648311099e-05, "loss": 0.04354064166545868, "step": 49230 }, { "epoch": 13.976724382628442, "grad_norm": 15.48652458190918, "learning_rate": 8.603008799318762e-05, "loss": 0.03613204956054687, "step": 49240 }, { "epoch": 13.979562872551803, "grad_norm": 8.313751220703125, "learning_rate": 8.602724950326426e-05, "loss": 0.036995112895965576, "step": 49250 }, { "epoch": 13.982401362475164, "grad_norm": 6.673551559448242, "learning_rate": 8.602441101334092e-05, "loss": 0.04898467361927032, "step": 49260 }, { "epoch": 13.985239852398523, "grad_norm": 12.521001815795898, "learning_rate": 8.602157252341755e-05, "loss": 0.04510078430175781, "step": 49270 }, { "epoch": 13.988078342321884, "grad_norm": 3.74715518951416, "learning_rate": 8.601873403349419e-05, "loss": 0.04523438215255737, "step": 49280 }, { "epoch": 13.990916832245246, "grad_norm": 4.469878196716309, "learning_rate": 8.601589554357083e-05, "loss": 0.023627981543540955, "step": 49290 }, { "epoch": 13.993755322168607, "grad_norm": 10.40145492553711, "learning_rate": 8.601305705364746e-05, "loss": 0.024804672598838805, "step": 49300 }, { "epoch": 13.996593812091968, "grad_norm": 11.39846420288086, "learning_rate": 8.60102185637241e-05, "loss": 0.04401984214782715, "step": 49310 }, { "epoch": 13.999432302015327, "grad_norm": 11.098444938659668, "learning_rate": 8.600738007380075e-05, "loss": 0.02608799934387207, "step": 49320 }, { "epoch": 14.002270791938688, "grad_norm": 15.914974212646484, "learning_rate": 8.600454158387738e-05, "loss": 0.03237663507461548, "step": 49330 }, { "epoch": 14.00510928186205, "grad_norm": 14.43706226348877, "learning_rate": 8.600170309395402e-05, "loss": 0.031493523716926576, "step": 49340 }, { "epoch": 14.00794777178541, "grad_norm": 10.505023002624512, "learning_rate": 8.599886460403066e-05, "loss": 0.034197115898132326, "step": 49350 }, { "epoch": 14.010786261708772, "grad_norm": 15.057801246643066, "learning_rate": 8.59960261141073e-05, "loss": 0.044925516843795775, "step": 49360 }, { "epoch": 14.01362475163213, "grad_norm": 10.624711036682129, "learning_rate": 8.599318762418393e-05, "loss": 0.027485841512680055, "step": 49370 }, { "epoch": 14.016463241555492, "grad_norm": 0.39117997884750366, "learning_rate": 8.599034913426057e-05, "loss": 0.02868073582649231, "step": 49380 }, { "epoch": 14.019301731478853, "grad_norm": 4.588068962097168, "learning_rate": 8.598751064433723e-05, "loss": 0.0261732816696167, "step": 49390 }, { "epoch": 14.022140221402214, "grad_norm": 3.7235233783721924, "learning_rate": 8.598467215441386e-05, "loss": 0.028425517678260803, "step": 49400 }, { "epoch": 14.024978711325575, "grad_norm": 8.271585464477539, "learning_rate": 8.59818336644905e-05, "loss": 0.019010022282600403, "step": 49410 }, { "epoch": 14.027817201248936, "grad_norm": 1.4294496774673462, "learning_rate": 8.597899517456714e-05, "loss": 0.03500679433345795, "step": 49420 }, { "epoch": 14.030655691172296, "grad_norm": 9.961371421813965, "learning_rate": 8.597615668464377e-05, "loss": 0.041388750076293945, "step": 49430 }, { "epoch": 14.033494181095657, "grad_norm": 4.820285797119141, "learning_rate": 8.597331819472041e-05, "loss": 0.022803471982479097, "step": 49440 }, { "epoch": 14.036332671019018, "grad_norm": 0.7603951692581177, "learning_rate": 8.597047970479705e-05, "loss": 0.02021336704492569, "step": 49450 }, { "epoch": 14.039171160942379, "grad_norm": 11.177534103393555, "learning_rate": 8.596764121487369e-05, "loss": 0.03464284539222717, "step": 49460 }, { "epoch": 14.04200965086574, "grad_norm": 16.69522476196289, "learning_rate": 8.596480272495033e-05, "loss": 0.042182856798172, "step": 49470 }, { "epoch": 14.0448481407891, "grad_norm": 15.361966133117676, "learning_rate": 8.596196423502697e-05, "loss": 0.03753726482391358, "step": 49480 }, { "epoch": 14.04768663071246, "grad_norm": 7.312578201293945, "learning_rate": 8.595912574510361e-05, "loss": 0.04405333399772644, "step": 49490 }, { "epoch": 14.050525120635822, "grad_norm": 5.694483757019043, "learning_rate": 8.595628725518024e-05, "loss": 0.018757201731204987, "step": 49500 }, { "epoch": 14.050525120635822, "eval_accuracy": 0.9628664080880015, "eval_loss": 0.11576772481203079, "eval_runtime": 31.4364, "eval_samples_per_second": 500.279, "eval_steps_per_second": 7.825, "step": 49500 }, { "epoch": 14.053363610559183, "grad_norm": 1.6199156045913696, "learning_rate": 8.595344876525688e-05, "loss": 0.02848471999168396, "step": 49510 }, { "epoch": 14.056202100482544, "grad_norm": 5.574838638305664, "learning_rate": 8.595061027533353e-05, "loss": 0.02811778783798218, "step": 49520 }, { "epoch": 14.059040590405903, "grad_norm": 2.209955930709839, "learning_rate": 8.594777178541017e-05, "loss": 0.028638684749603273, "step": 49530 }, { "epoch": 14.061879080329264, "grad_norm": 2.9624717235565186, "learning_rate": 8.594493329548681e-05, "loss": 0.0230121374130249, "step": 49540 }, { "epoch": 14.064717570252625, "grad_norm": 7.450040340423584, "learning_rate": 8.594209480556345e-05, "loss": 0.035555633902549746, "step": 49550 }, { "epoch": 14.067556060175987, "grad_norm": 6.268996715545654, "learning_rate": 8.593925631564008e-05, "loss": 0.031611236929893496, "step": 49560 }, { "epoch": 14.070394550099348, "grad_norm": 5.829379081726074, "learning_rate": 8.593641782571672e-05, "loss": 0.04754213988780975, "step": 49570 }, { "epoch": 14.073233040022707, "grad_norm": 12.128203392028809, "learning_rate": 8.593357933579336e-05, "loss": 0.040772438049316406, "step": 49580 }, { "epoch": 14.076071529946068, "grad_norm": 1.4006669521331787, "learning_rate": 8.593074084587e-05, "loss": 0.02982007563114166, "step": 49590 }, { "epoch": 14.07891001986943, "grad_norm": 4.344081401824951, "learning_rate": 8.592790235594664e-05, "loss": 0.015107987821102143, "step": 49600 }, { "epoch": 14.08174850979279, "grad_norm": 4.288851737976074, "learning_rate": 8.592506386602328e-05, "loss": 0.02266688793897629, "step": 49610 }, { "epoch": 14.084586999716151, "grad_norm": 9.014312744140625, "learning_rate": 8.592222537609991e-05, "loss": 0.04199548959732056, "step": 49620 }, { "epoch": 14.087425489639513, "grad_norm": 6.336841106414795, "learning_rate": 8.591938688617655e-05, "loss": 0.03385908305644989, "step": 49630 }, { "epoch": 14.090263979562872, "grad_norm": 4.437830924987793, "learning_rate": 8.59165483962532e-05, "loss": 0.03252928256988526, "step": 49640 }, { "epoch": 14.093102469486233, "grad_norm": 2.8051395416259766, "learning_rate": 8.591370990632984e-05, "loss": 0.02804200053215027, "step": 49650 }, { "epoch": 14.095940959409594, "grad_norm": 8.211349487304688, "learning_rate": 8.591087141640648e-05, "loss": 0.029553130269050598, "step": 49660 }, { "epoch": 14.098779449332955, "grad_norm": 6.522387981414795, "learning_rate": 8.590803292648312e-05, "loss": 0.041052433848381045, "step": 49670 }, { "epoch": 14.101617939256316, "grad_norm": 1.68451988697052, "learning_rate": 8.590519443655976e-05, "loss": 0.03737498223781586, "step": 49680 }, { "epoch": 14.104456429179676, "grad_norm": 0.94016432762146, "learning_rate": 8.590235594663639e-05, "loss": 0.024851275980472563, "step": 49690 }, { "epoch": 14.107294919103037, "grad_norm": 4.330970764160156, "learning_rate": 8.589951745671303e-05, "loss": 0.01902497559785843, "step": 49700 }, { "epoch": 14.110133409026398, "grad_norm": 9.700041770935059, "learning_rate": 8.589667896678967e-05, "loss": 0.01783207356929779, "step": 49710 }, { "epoch": 14.112971898949759, "grad_norm": 1.798171043395996, "learning_rate": 8.589384047686631e-05, "loss": 0.030052945017814636, "step": 49720 }, { "epoch": 14.11581038887312, "grad_norm": 0.37163740396499634, "learning_rate": 8.589100198694295e-05, "loss": 0.03718210458755493, "step": 49730 }, { "epoch": 14.11864887879648, "grad_norm": 1.8951197862625122, "learning_rate": 8.58881634970196e-05, "loss": 0.02399684488773346, "step": 49740 }, { "epoch": 14.12148736871984, "grad_norm": 5.541714668273926, "learning_rate": 8.588532500709622e-05, "loss": 0.03576144576072693, "step": 49750 }, { "epoch": 14.124325858643202, "grad_norm": 0.4724043905735016, "learning_rate": 8.588248651717286e-05, "loss": 0.025749751925468446, "step": 49760 }, { "epoch": 14.127164348566563, "grad_norm": 5.300692081451416, "learning_rate": 8.58796480272495e-05, "loss": 0.017452608048915862, "step": 49770 }, { "epoch": 14.130002838489924, "grad_norm": 0.691306471824646, "learning_rate": 8.587680953732615e-05, "loss": 0.014005993306636811, "step": 49780 }, { "epoch": 14.132841328413285, "grad_norm": 10.76020336151123, "learning_rate": 8.587397104740279e-05, "loss": 0.041131556034088135, "step": 49790 }, { "epoch": 14.135679818336644, "grad_norm": 3.684041976928711, "learning_rate": 8.587113255747943e-05, "loss": 0.023859602212905884, "step": 49800 }, { "epoch": 14.138518308260005, "grad_norm": 6.936976909637451, "learning_rate": 8.586829406755607e-05, "loss": 0.015928187966346742, "step": 49810 }, { "epoch": 14.141356798183367, "grad_norm": 6.7339091300964355, "learning_rate": 8.58654555776327e-05, "loss": 0.023181793093681336, "step": 49820 }, { "epoch": 14.144195288106728, "grad_norm": 9.926058769226074, "learning_rate": 8.586261708770934e-05, "loss": 0.031194770336151124, "step": 49830 }, { "epoch": 14.147033778030089, "grad_norm": 3.7394731044769287, "learning_rate": 8.585977859778598e-05, "loss": 0.03421114981174469, "step": 49840 }, { "epoch": 14.149872267953448, "grad_norm": 10.771072387695312, "learning_rate": 8.585694010786261e-05, "loss": 0.023525959253311156, "step": 49850 }, { "epoch": 14.15271075787681, "grad_norm": 3.9890058040618896, "learning_rate": 8.585410161793926e-05, "loss": 0.04136554598808288, "step": 49860 }, { "epoch": 14.15554924780017, "grad_norm": 3.086162567138672, "learning_rate": 8.58512631280159e-05, "loss": 0.033159708976745604, "step": 49870 }, { "epoch": 14.158387737723531, "grad_norm": 2.5958006381988525, "learning_rate": 8.584842463809253e-05, "loss": 0.03257180154323578, "step": 49880 }, { "epoch": 14.161226227646893, "grad_norm": 1.4766089916229248, "learning_rate": 8.584558614816917e-05, "loss": 0.025836512446403503, "step": 49890 }, { "epoch": 14.164064717570252, "grad_norm": 12.105684280395508, "learning_rate": 8.584274765824582e-05, "loss": 0.02869676351547241, "step": 49900 }, { "epoch": 14.166903207493613, "grad_norm": 6.916757583618164, "learning_rate": 8.583990916832246e-05, "loss": 0.01802630126476288, "step": 49910 }, { "epoch": 14.169741697416974, "grad_norm": 4.512959003448486, "learning_rate": 8.58370706783991e-05, "loss": 0.01660865545272827, "step": 49920 }, { "epoch": 14.172580187340335, "grad_norm": 6.5449981689453125, "learning_rate": 8.583423218847574e-05, "loss": 0.023651117086410524, "step": 49930 }, { "epoch": 14.175418677263696, "grad_norm": 1.095157265663147, "learning_rate": 8.583139369855238e-05, "loss": 0.027622419595718383, "step": 49940 }, { "epoch": 14.178257167187056, "grad_norm": 4.747420310974121, "learning_rate": 8.582855520862901e-05, "loss": 0.03191403746604919, "step": 49950 }, { "epoch": 14.181095657110417, "grad_norm": 2.007878065109253, "learning_rate": 8.582571671870565e-05, "loss": 0.04082639217376709, "step": 49960 }, { "epoch": 14.183934147033778, "grad_norm": 18.160432815551758, "learning_rate": 8.582287822878229e-05, "loss": 0.025690150260925294, "step": 49970 }, { "epoch": 14.186772636957139, "grad_norm": 4.28290319442749, "learning_rate": 8.582003973885892e-05, "loss": 0.02678082585334778, "step": 49980 }, { "epoch": 14.1896111268805, "grad_norm": 11.701045036315918, "learning_rate": 8.581720124893558e-05, "loss": 0.04468222260475159, "step": 49990 }, { "epoch": 14.192449616803861, "grad_norm": 7.335849761962891, "learning_rate": 8.581436275901222e-05, "loss": 0.027931097149848937, "step": 50000 }, { "epoch": 14.192449616803861, "eval_accuracy": 0.9580975392636867, "eval_loss": 0.12964147329330444, "eval_runtime": 31.689, "eval_samples_per_second": 496.293, "eval_steps_per_second": 7.763, "step": 50000 }, { "epoch": 14.19528810672722, "grad_norm": 3.745678663253784, "learning_rate": 8.581152426908884e-05, "loss": 0.01775626391172409, "step": 50010 }, { "epoch": 14.198126596650582, "grad_norm": 3.5942907333374023, "learning_rate": 8.580868577916549e-05, "loss": 0.01917008012533188, "step": 50020 }, { "epoch": 14.200965086573943, "grad_norm": 12.183575630187988, "learning_rate": 8.580584728924213e-05, "loss": 0.031027162075042726, "step": 50030 }, { "epoch": 14.203803576497304, "grad_norm": 1.2091981172561646, "learning_rate": 8.580300879931877e-05, "loss": 0.00841534286737442, "step": 50040 }, { "epoch": 14.206642066420665, "grad_norm": 11.461080551147461, "learning_rate": 8.580017030939541e-05, "loss": 0.017339488863945006, "step": 50050 }, { "epoch": 14.209480556344024, "grad_norm": 5.73613166809082, "learning_rate": 8.579733181947205e-05, "loss": 0.032194784283638, "step": 50060 }, { "epoch": 14.212319046267385, "grad_norm": 3.822580099105835, "learning_rate": 8.579449332954869e-05, "loss": 0.03832382559776306, "step": 50070 }, { "epoch": 14.215157536190747, "grad_norm": 6.792261123657227, "learning_rate": 8.579165483962532e-05, "loss": 0.02406049370765686, "step": 50080 }, { "epoch": 14.217996026114108, "grad_norm": 6.343382358551025, "learning_rate": 8.578881634970196e-05, "loss": 0.021263013780117034, "step": 50090 }, { "epoch": 14.220834516037469, "grad_norm": 4.553318977355957, "learning_rate": 8.57859778597786e-05, "loss": 0.02685590982437134, "step": 50100 }, { "epoch": 14.223673005960828, "grad_norm": 7.7648844718933105, "learning_rate": 8.578313936985523e-05, "loss": 0.02674403488636017, "step": 50110 }, { "epoch": 14.22651149588419, "grad_norm": 1.3327938318252563, "learning_rate": 8.578030087993189e-05, "loss": 0.03879241645336151, "step": 50120 }, { "epoch": 14.22934998580755, "grad_norm": 4.96824312210083, "learning_rate": 8.577746239000853e-05, "loss": 0.017473468184471132, "step": 50130 }, { "epoch": 14.232188475730911, "grad_norm": 4.059240818023682, "learning_rate": 8.577462390008515e-05, "loss": 0.019765456020832062, "step": 50140 }, { "epoch": 14.235026965654273, "grad_norm": 1.62040114402771, "learning_rate": 8.57717854101618e-05, "loss": 0.029139450192451476, "step": 50150 }, { "epoch": 14.237865455577632, "grad_norm": 13.126379013061523, "learning_rate": 8.576894692023844e-05, "loss": 0.03385429084300995, "step": 50160 }, { "epoch": 14.240703945500993, "grad_norm": 18.521568298339844, "learning_rate": 8.576610843031508e-05, "loss": 0.032480287551879886, "step": 50170 }, { "epoch": 14.243542435424354, "grad_norm": 1.7817473411560059, "learning_rate": 8.576326994039171e-05, "loss": 0.03137780725955963, "step": 50180 }, { "epoch": 14.246380925347715, "grad_norm": 2.8078908920288086, "learning_rate": 8.576043145046836e-05, "loss": 0.029249498248100282, "step": 50190 }, { "epoch": 14.249219415271076, "grad_norm": 0.9359973669052124, "learning_rate": 8.5757592960545e-05, "loss": 0.02133913040161133, "step": 50200 }, { "epoch": 14.252057905194437, "grad_norm": 8.117232322692871, "learning_rate": 8.575475447062163e-05, "loss": 0.03519972562789917, "step": 50210 }, { "epoch": 14.254896395117797, "grad_norm": 10.54971694946289, "learning_rate": 8.575191598069827e-05, "loss": 0.041789859533309937, "step": 50220 }, { "epoch": 14.257734885041158, "grad_norm": 9.854226112365723, "learning_rate": 8.574907749077491e-05, "loss": 0.026445215940475462, "step": 50230 }, { "epoch": 14.260573374964519, "grad_norm": 8.530778884887695, "learning_rate": 8.574623900085154e-05, "loss": 0.025764849781990052, "step": 50240 }, { "epoch": 14.26341186488788, "grad_norm": 7.195922374725342, "learning_rate": 8.57434005109282e-05, "loss": 0.022102949023246766, "step": 50250 }, { "epoch": 14.266250354811241, "grad_norm": 5.125481128692627, "learning_rate": 8.574056202100484e-05, "loss": 0.03041970729827881, "step": 50260 }, { "epoch": 14.2690888447346, "grad_norm": 12.072976112365723, "learning_rate": 8.573772353108147e-05, "loss": 0.043194171786308286, "step": 50270 }, { "epoch": 14.271927334657962, "grad_norm": 0.5271238684654236, "learning_rate": 8.573488504115811e-05, "loss": 0.021397794783115386, "step": 50280 }, { "epoch": 14.274765824581323, "grad_norm": 7.865839958190918, "learning_rate": 8.573204655123475e-05, "loss": 0.015236221253871918, "step": 50290 }, { "epoch": 14.277604314504684, "grad_norm": 3.6244921684265137, "learning_rate": 8.572920806131139e-05, "loss": 0.02463509887456894, "step": 50300 }, { "epoch": 14.280442804428045, "grad_norm": 17.5566463470459, "learning_rate": 8.572636957138802e-05, "loss": 0.058706659078598025, "step": 50310 }, { "epoch": 14.283281294351404, "grad_norm": 2.5470423698425293, "learning_rate": 8.572353108146467e-05, "loss": 0.027238631248474122, "step": 50320 }, { "epoch": 14.286119784274765, "grad_norm": 14.131685256958008, "learning_rate": 8.572069259154131e-05, "loss": 0.02821727395057678, "step": 50330 }, { "epoch": 14.288958274198126, "grad_norm": 9.750001907348633, "learning_rate": 8.571785410161794e-05, "loss": 0.0328499972820282, "step": 50340 }, { "epoch": 14.291796764121488, "grad_norm": 6.270340919494629, "learning_rate": 8.571501561169458e-05, "loss": 0.042112496495246884, "step": 50350 }, { "epoch": 14.294635254044849, "grad_norm": 0.8371354341506958, "learning_rate": 8.571217712177122e-05, "loss": 0.018076685070991517, "step": 50360 }, { "epoch": 14.297473743968208, "grad_norm": 8.482423782348633, "learning_rate": 8.570933863184785e-05, "loss": 0.02399584650993347, "step": 50370 }, { "epoch": 14.30031223389157, "grad_norm": 18.69969367980957, "learning_rate": 8.57065001419245e-05, "loss": 0.02841760218143463, "step": 50380 }, { "epoch": 14.30315072381493, "grad_norm": 1.6346039772033691, "learning_rate": 8.570366165200115e-05, "loss": 0.021412229537963866, "step": 50390 }, { "epoch": 14.305989213738291, "grad_norm": 12.068750381469727, "learning_rate": 8.570082316207778e-05, "loss": 0.03126122355461121, "step": 50400 }, { "epoch": 14.308827703661652, "grad_norm": 12.208056449890137, "learning_rate": 8.569798467215442e-05, "loss": 0.03616892397403717, "step": 50410 }, { "epoch": 14.311666193585014, "grad_norm": 6.67946195602417, "learning_rate": 8.569514618223106e-05, "loss": 0.029627156257629395, "step": 50420 }, { "epoch": 14.314504683508373, "grad_norm": 3.8765785694122314, "learning_rate": 8.56923076923077e-05, "loss": 0.032170730829238894, "step": 50430 }, { "epoch": 14.317343173431734, "grad_norm": 10.998615264892578, "learning_rate": 8.568946920238433e-05, "loss": 0.05253680944442749, "step": 50440 }, { "epoch": 14.320181663355095, "grad_norm": 0.28679242730140686, "learning_rate": 8.568663071246098e-05, "loss": 0.01667790859937668, "step": 50450 }, { "epoch": 14.323020153278456, "grad_norm": 1.6101802587509155, "learning_rate": 8.568379222253761e-05, "loss": 0.05359309911727905, "step": 50460 }, { "epoch": 14.325858643201817, "grad_norm": 0.8244699239730835, "learning_rate": 8.568095373261425e-05, "loss": 0.02687566578388214, "step": 50470 }, { "epoch": 14.328697133125177, "grad_norm": 15.569474220275879, "learning_rate": 8.56781152426909e-05, "loss": 0.023008763790130615, "step": 50480 }, { "epoch": 14.331535623048538, "grad_norm": 3.032259225845337, "learning_rate": 8.567527675276754e-05, "loss": 0.03624162673950195, "step": 50490 }, { "epoch": 14.334374112971899, "grad_norm": 14.733590126037598, "learning_rate": 8.567272211183652e-05, "loss": 0.08000153303146362, "step": 50500 }, { "epoch": 14.334374112971899, "eval_accuracy": 0.9556177274750429, "eval_loss": 0.1320401430130005, "eval_runtime": 31.5257, "eval_samples_per_second": 498.862, "eval_steps_per_second": 7.803, "step": 50500 }, { "epoch": 14.33721260289526, "grad_norm": 4.095160007476807, "learning_rate": 8.566988362191314e-05, "loss": 0.013829153776168824, "step": 50510 }, { "epoch": 14.340051092818621, "grad_norm": 5.885178565979004, "learning_rate": 8.566704513198978e-05, "loss": 0.019609487056732176, "step": 50520 }, { "epoch": 14.34288958274198, "grad_norm": 1.6446106433868408, "learning_rate": 8.566420664206643e-05, "loss": 0.029877638816833495, "step": 50530 }, { "epoch": 14.345728072665342, "grad_norm": 1.0699890851974487, "learning_rate": 8.566136815214307e-05, "loss": 0.02531459629535675, "step": 50540 }, { "epoch": 14.348566562588703, "grad_norm": 2.2033169269561768, "learning_rate": 8.56585296622197e-05, "loss": 0.027001941204071046, "step": 50550 }, { "epoch": 14.351405052512064, "grad_norm": 17.86330795288086, "learning_rate": 8.565569117229634e-05, "loss": 0.04457869231700897, "step": 50560 }, { "epoch": 14.354243542435425, "grad_norm": 7.273069381713867, "learning_rate": 8.565285268237299e-05, "loss": 0.04515500962734222, "step": 50570 }, { "epoch": 14.357082032358786, "grad_norm": 2.293480396270752, "learning_rate": 8.565001419244962e-05, "loss": 0.02725212275981903, "step": 50580 }, { "epoch": 14.359920522282145, "grad_norm": 6.626486301422119, "learning_rate": 8.564717570252626e-05, "loss": 0.03408137559890747, "step": 50590 }, { "epoch": 14.362759012205506, "grad_norm": 12.899566650390625, "learning_rate": 8.56443372126029e-05, "loss": 0.03901633620262146, "step": 50600 }, { "epoch": 14.365597502128868, "grad_norm": 5.514700412750244, "learning_rate": 8.564149872267953e-05, "loss": 0.0323235034942627, "step": 50610 }, { "epoch": 14.368435992052229, "grad_norm": 0.6008784174919128, "learning_rate": 8.563866023275617e-05, "loss": 0.020973995327949524, "step": 50620 }, { "epoch": 14.37127448197559, "grad_norm": 8.451521873474121, "learning_rate": 8.563582174283283e-05, "loss": 0.0300495445728302, "step": 50630 }, { "epoch": 14.374112971898949, "grad_norm": 4.776422500610352, "learning_rate": 8.563298325290945e-05, "loss": 0.03752357959747314, "step": 50640 }, { "epoch": 14.37695146182231, "grad_norm": 8.96488094329834, "learning_rate": 8.56301447629861e-05, "loss": 0.04631670713424683, "step": 50650 }, { "epoch": 14.379789951745671, "grad_norm": 12.579450607299805, "learning_rate": 8.562730627306274e-05, "loss": 0.03514888882637024, "step": 50660 }, { "epoch": 14.382628441669032, "grad_norm": 6.835667610168457, "learning_rate": 8.562446778313938e-05, "loss": 0.031214013695716858, "step": 50670 }, { "epoch": 14.385466931592394, "grad_norm": 10.591228485107422, "learning_rate": 8.5621629293216e-05, "loss": 0.045433837175369265, "step": 50680 }, { "epoch": 14.388305421515753, "grad_norm": 4.52405309677124, "learning_rate": 8.561879080329265e-05, "loss": 0.014749705791473389, "step": 50690 }, { "epoch": 14.391143911439114, "grad_norm": 5.818645477294922, "learning_rate": 8.56159523133693e-05, "loss": 0.030107370018959044, "step": 50700 }, { "epoch": 14.393982401362475, "grad_norm": 13.904401779174805, "learning_rate": 8.561311382344593e-05, "loss": 0.039076763391494754, "step": 50710 }, { "epoch": 14.396820891285836, "grad_norm": 6.508571624755859, "learning_rate": 8.561027533352257e-05, "loss": 0.06043229103088379, "step": 50720 }, { "epoch": 14.399659381209197, "grad_norm": 5.219316482543945, "learning_rate": 8.560743684359921e-05, "loss": 0.02476014941930771, "step": 50730 }, { "epoch": 14.402497871132557, "grad_norm": 12.049295425415039, "learning_rate": 8.560459835367584e-05, "loss": 0.02449103593826294, "step": 50740 }, { "epoch": 14.405336361055918, "grad_norm": 5.143094062805176, "learning_rate": 8.560175986375248e-05, "loss": 0.019943231344223024, "step": 50750 }, { "epoch": 14.408174850979279, "grad_norm": 14.276522636413574, "learning_rate": 8.559892137382912e-05, "loss": 0.034413126111030576, "step": 50760 }, { "epoch": 14.41101334090264, "grad_norm": 14.80979061126709, "learning_rate": 8.559608288390576e-05, "loss": 0.03124769628047943, "step": 50770 }, { "epoch": 14.413851830826001, "grad_norm": 13.902080535888672, "learning_rate": 8.55932443939824e-05, "loss": 0.03595017194747925, "step": 50780 }, { "epoch": 14.416690320749362, "grad_norm": 4.783239364624023, "learning_rate": 8.559040590405905e-05, "loss": 0.018854564428329466, "step": 50790 }, { "epoch": 14.419528810672722, "grad_norm": 11.942090034484863, "learning_rate": 8.558756741413569e-05, "loss": 0.046934318542480466, "step": 50800 }, { "epoch": 14.422367300596083, "grad_norm": 6.899314880371094, "learning_rate": 8.558472892421232e-05, "loss": 0.020881219208240508, "step": 50810 }, { "epoch": 14.425205790519444, "grad_norm": 1.4761505126953125, "learning_rate": 8.558189043428896e-05, "loss": 0.04355973303318024, "step": 50820 }, { "epoch": 14.428044280442805, "grad_norm": 9.936765670776367, "learning_rate": 8.557905194436561e-05, "loss": 0.025480246543884276, "step": 50830 }, { "epoch": 14.430882770366166, "grad_norm": 7.196719646453857, "learning_rate": 8.557621345444224e-05, "loss": 0.051013869047164914, "step": 50840 }, { "epoch": 14.433721260289525, "grad_norm": 7.83380651473999, "learning_rate": 8.557337496451888e-05, "loss": 0.026437598466873168, "step": 50850 }, { "epoch": 14.436559750212886, "grad_norm": 2.26269793510437, "learning_rate": 8.557053647459552e-05, "loss": 0.029989293217658995, "step": 50860 }, { "epoch": 14.439398240136248, "grad_norm": 1.4525343179702759, "learning_rate": 8.556769798467215e-05, "loss": 0.033813050389289855, "step": 50870 }, { "epoch": 14.442236730059609, "grad_norm": 8.670069694519043, "learning_rate": 8.556485949474879e-05, "loss": 0.03728064000606537, "step": 50880 }, { "epoch": 14.44507521998297, "grad_norm": 7.66602897644043, "learning_rate": 8.556202100482543e-05, "loss": 0.047035437822341916, "step": 50890 }, { "epoch": 14.447913709906329, "grad_norm": 0.837008535861969, "learning_rate": 8.555918251490208e-05, "loss": 0.025792202353477477, "step": 50900 }, { "epoch": 14.45075219982969, "grad_norm": 0.4187609553337097, "learning_rate": 8.555634402497872e-05, "loss": 0.032220259308815, "step": 50910 }, { "epoch": 14.453590689753051, "grad_norm": 3.26013445854187, "learning_rate": 8.555350553505536e-05, "loss": 0.034147176146507266, "step": 50920 }, { "epoch": 14.456429179676412, "grad_norm": 14.086835861206055, "learning_rate": 8.5550667045132e-05, "loss": 0.023838698863983154, "step": 50930 }, { "epoch": 14.459267669599773, "grad_norm": 2.3969225883483887, "learning_rate": 8.554782855520863e-05, "loss": 0.025146037340164185, "step": 50940 }, { "epoch": 14.462106159523135, "grad_norm": 1.6130523681640625, "learning_rate": 8.554499006528527e-05, "loss": 0.02198980748653412, "step": 50950 }, { "epoch": 14.464944649446494, "grad_norm": 2.830883502960205, "learning_rate": 8.554215157536191e-05, "loss": 0.02476453483104706, "step": 50960 }, { "epoch": 14.467783139369855, "grad_norm": 13.475018501281738, "learning_rate": 8.553931308543855e-05, "loss": 0.034607219696044925, "step": 50970 }, { "epoch": 14.470621629293216, "grad_norm": 2.6382336616516113, "learning_rate": 8.553647459551519e-05, "loss": 0.016006499528884888, "step": 50980 }, { "epoch": 14.473460119216577, "grad_norm": 1.152177333831787, "learning_rate": 8.553363610559183e-05, "loss": 0.037303170561790465, "step": 50990 }, { "epoch": 14.476298609139938, "grad_norm": 0.9025798439979553, "learning_rate": 8.553079761566846e-05, "loss": 0.03282157778739929, "step": 51000 }, { "epoch": 14.476298609139938, "eval_accuracy": 0.9616582946525084, "eval_loss": 0.11275384575128555, "eval_runtime": 31.983, "eval_samples_per_second": 491.729, "eval_steps_per_second": 7.692, "step": 51000 }, { "epoch": 14.479137099063298, "grad_norm": 4.254899501800537, "learning_rate": 8.55279591257451e-05, "loss": 0.0281696081161499, "step": 51010 }, { "epoch": 14.481975588986659, "grad_norm": 3.0414316654205322, "learning_rate": 8.552512063582174e-05, "loss": 0.02458951771259308, "step": 51020 }, { "epoch": 14.48481407891002, "grad_norm": 3.1042959690093994, "learning_rate": 8.552228214589839e-05, "loss": 0.02192099541425705, "step": 51030 }, { "epoch": 14.487652568833381, "grad_norm": 0.6739271283149719, "learning_rate": 8.551944365597503e-05, "loss": 0.02606382668018341, "step": 51040 }, { "epoch": 14.490491058756742, "grad_norm": 19.049753189086914, "learning_rate": 8.551660516605167e-05, "loss": 0.02903587222099304, "step": 51050 }, { "epoch": 14.493329548680101, "grad_norm": 12.988877296447754, "learning_rate": 8.551376667612831e-05, "loss": 0.030290287733078004, "step": 51060 }, { "epoch": 14.496168038603463, "grad_norm": 19.155982971191406, "learning_rate": 8.551092818620494e-05, "loss": 0.05030482411384583, "step": 51070 }, { "epoch": 14.499006528526824, "grad_norm": 14.54448413848877, "learning_rate": 8.550808969628158e-05, "loss": 0.03043851852416992, "step": 51080 }, { "epoch": 14.501845018450185, "grad_norm": 3.08514666557312, "learning_rate": 8.550525120635822e-05, "loss": 0.02116159498691559, "step": 51090 }, { "epoch": 14.504683508373546, "grad_norm": 10.747303009033203, "learning_rate": 8.550241271643486e-05, "loss": 0.05740382671356201, "step": 51100 }, { "epoch": 14.507521998296905, "grad_norm": 10.09619426727295, "learning_rate": 8.54995742265115e-05, "loss": 0.043869960308074954, "step": 51110 }, { "epoch": 14.510360488220266, "grad_norm": 5.170281887054443, "learning_rate": 8.549673573658814e-05, "loss": 0.05161537528038025, "step": 51120 }, { "epoch": 14.513198978143627, "grad_norm": 2.299725294113159, "learning_rate": 8.549389724666477e-05, "loss": 0.037432721257209776, "step": 51130 }, { "epoch": 14.516037468066989, "grad_norm": 0.9760419130325317, "learning_rate": 8.549105875674141e-05, "loss": 0.022002269327640534, "step": 51140 }, { "epoch": 14.51887595799035, "grad_norm": 4.733566761016846, "learning_rate": 8.548822026681806e-05, "loss": 0.017149904370307924, "step": 51150 }, { "epoch": 14.521714447913709, "grad_norm": 5.653111457824707, "learning_rate": 8.54853817768947e-05, "loss": 0.019655078649520874, "step": 51160 }, { "epoch": 14.52455293783707, "grad_norm": 3.013505220413208, "learning_rate": 8.548254328697134e-05, "loss": 0.038420644402503965, "step": 51170 }, { "epoch": 14.527391427760431, "grad_norm": 6.026484966278076, "learning_rate": 8.547970479704798e-05, "loss": 0.03479229509830475, "step": 51180 }, { "epoch": 14.530229917683792, "grad_norm": 9.362969398498535, "learning_rate": 8.547686630712462e-05, "loss": 0.02241542041301727, "step": 51190 }, { "epoch": 14.533068407607153, "grad_norm": 2.119218587875366, "learning_rate": 8.547402781720125e-05, "loss": 0.030665215849876405, "step": 51200 }, { "epoch": 14.535906897530515, "grad_norm": 3.773319959640503, "learning_rate": 8.547118932727789e-05, "loss": 0.0242320716381073, "step": 51210 }, { "epoch": 14.538745387453874, "grad_norm": 12.859066009521484, "learning_rate": 8.546835083735453e-05, "loss": 0.04029773473739624, "step": 51220 }, { "epoch": 14.541583877377235, "grad_norm": 8.100462913513184, "learning_rate": 8.546551234743117e-05, "loss": 0.028128573298454286, "step": 51230 }, { "epoch": 14.544422367300596, "grad_norm": 6.918891429901123, "learning_rate": 8.546267385750781e-05, "loss": 0.035101976990699765, "step": 51240 }, { "epoch": 14.547260857223957, "grad_norm": 12.470224380493164, "learning_rate": 8.545983536758446e-05, "loss": 0.04570910930633545, "step": 51250 }, { "epoch": 14.550099347147318, "grad_norm": 11.28945255279541, "learning_rate": 8.545699687766108e-05, "loss": 0.057882821559906004, "step": 51260 }, { "epoch": 14.552937837070678, "grad_norm": 7.4758405685424805, "learning_rate": 8.545415838773772e-05, "loss": 0.017592470347881316, "step": 51270 }, { "epoch": 14.555776326994039, "grad_norm": 3.6670124530792236, "learning_rate": 8.545131989781437e-05, "loss": 0.01690032333135605, "step": 51280 }, { "epoch": 14.5586148169174, "grad_norm": 6.950485706329346, "learning_rate": 8.544848140789101e-05, "loss": 0.02661212682723999, "step": 51290 }, { "epoch": 14.561453306840761, "grad_norm": 12.661957740783691, "learning_rate": 8.544564291796765e-05, "loss": 0.04555579125881195, "step": 51300 }, { "epoch": 14.564291796764122, "grad_norm": 2.024977445602417, "learning_rate": 8.544280442804429e-05, "loss": 0.03390985727310181, "step": 51310 }, { "epoch": 14.567130286687481, "grad_norm": 2.6837615966796875, "learning_rate": 8.543996593812092e-05, "loss": 0.021190357208251954, "step": 51320 }, { "epoch": 14.569968776610843, "grad_norm": 7.549190521240234, "learning_rate": 8.543712744819756e-05, "loss": 0.04251736998558044, "step": 51330 }, { "epoch": 14.572807266534204, "grad_norm": 8.728250503540039, "learning_rate": 8.54342889582742e-05, "loss": 0.024599093198776244, "step": 51340 }, { "epoch": 14.575645756457565, "grad_norm": 0.6689748167991638, "learning_rate": 8.543145046835084e-05, "loss": 0.029670712351799012, "step": 51350 }, { "epoch": 14.578484246380926, "grad_norm": 8.635516166687012, "learning_rate": 8.542861197842747e-05, "loss": 0.037997931241989136, "step": 51360 }, { "epoch": 14.581322736304287, "grad_norm": 1.4663504362106323, "learning_rate": 8.542577348850413e-05, "loss": 0.020631946623325348, "step": 51370 }, { "epoch": 14.584161226227646, "grad_norm": 1.7536824941635132, "learning_rate": 8.542293499858077e-05, "loss": 0.023515698313713074, "step": 51380 }, { "epoch": 14.586999716151007, "grad_norm": 3.2846157550811768, "learning_rate": 8.54200965086574e-05, "loss": 0.031092366576194762, "step": 51390 }, { "epoch": 14.589838206074369, "grad_norm": 4.611798286437988, "learning_rate": 8.541725801873404e-05, "loss": 0.027603542804718016, "step": 51400 }, { "epoch": 14.59267669599773, "grad_norm": 2.8955039978027344, "learning_rate": 8.541441952881068e-05, "loss": 0.027316337823867796, "step": 51410 }, { "epoch": 14.59551518592109, "grad_norm": 1.4328004121780396, "learning_rate": 8.54115810388873e-05, "loss": 0.010398831963539124, "step": 51420 }, { "epoch": 14.59835367584445, "grad_norm": 6.431270599365234, "learning_rate": 8.540874254896396e-05, "loss": 0.020113934576511384, "step": 51430 }, { "epoch": 14.601192165767811, "grad_norm": 8.832716941833496, "learning_rate": 8.54059040590406e-05, "loss": 0.03382132351398468, "step": 51440 }, { "epoch": 14.604030655691172, "grad_norm": 7.1022233963012695, "learning_rate": 8.540306556911723e-05, "loss": 0.04306955337524414, "step": 51450 }, { "epoch": 14.606869145614533, "grad_norm": 8.230582237243652, "learning_rate": 8.540022707919387e-05, "loss": 0.042462009191513064, "step": 51460 }, { "epoch": 14.609707635537895, "grad_norm": 1.2157502174377441, "learning_rate": 8.539738858927051e-05, "loss": 0.022740651667118073, "step": 51470 }, { "epoch": 14.612546125461254, "grad_norm": 7.07900857925415, "learning_rate": 8.539455009934715e-05, "loss": 0.045801737904548646, "step": 51480 }, { "epoch": 14.615384615384615, "grad_norm": 9.022019386291504, "learning_rate": 8.539171160942378e-05, "loss": 0.020640844106674196, "step": 51490 }, { "epoch": 14.618223105307976, "grad_norm": 2.2961812019348145, "learning_rate": 8.538887311950044e-05, "loss": 0.012333531677722932, "step": 51500 }, { "epoch": 14.618223105307976, "eval_accuracy": 0.9654733897119603, "eval_loss": 0.11064077168703079, "eval_runtime": 31.5427, "eval_samples_per_second": 498.594, "eval_steps_per_second": 7.799, "step": 51500 }, { "epoch": 14.621061595231337, "grad_norm": 2.7893013954162598, "learning_rate": 8.538603462957708e-05, "loss": 0.022663894295692443, "step": 51510 }, { "epoch": 14.623900085154698, "grad_norm": 9.522071838378906, "learning_rate": 8.53831961396537e-05, "loss": 0.05162531733512878, "step": 51520 }, { "epoch": 14.626738575078058, "grad_norm": 1.3576302528381348, "learning_rate": 8.538035764973035e-05, "loss": 0.0386125922203064, "step": 51530 }, { "epoch": 14.629577065001419, "grad_norm": 15.340320587158203, "learning_rate": 8.537751915980699e-05, "loss": 0.0510531485080719, "step": 51540 }, { "epoch": 14.63241555492478, "grad_norm": 2.0894927978515625, "learning_rate": 8.537468066988362e-05, "loss": 0.028602179884910584, "step": 51550 }, { "epoch": 14.635254044848141, "grad_norm": 9.491701126098633, "learning_rate": 8.537184217996026e-05, "loss": 0.027068281173706056, "step": 51560 }, { "epoch": 14.638092534771502, "grad_norm": 2.4531548023223877, "learning_rate": 8.536900369003691e-05, "loss": 0.020249664783477783, "step": 51570 }, { "epoch": 14.640931024694861, "grad_norm": 9.986639976501465, "learning_rate": 8.536616520011354e-05, "loss": 0.025724044442176817, "step": 51580 }, { "epoch": 14.643769514618223, "grad_norm": 2.639873743057251, "learning_rate": 8.536332671019018e-05, "loss": 0.021254521608352662, "step": 51590 }, { "epoch": 14.646608004541584, "grad_norm": 6.323326587677002, "learning_rate": 8.536048822026682e-05, "loss": 0.03456567525863648, "step": 51600 }, { "epoch": 14.649446494464945, "grad_norm": 1.1075998544692993, "learning_rate": 8.535764973034346e-05, "loss": 0.039362508058547976, "step": 51610 }, { "epoch": 14.652284984388306, "grad_norm": 11.35267162322998, "learning_rate": 8.535481124042009e-05, "loss": 0.03138631880283356, "step": 51620 }, { "epoch": 14.655123474311667, "grad_norm": 0.9547427892684937, "learning_rate": 8.535197275049675e-05, "loss": 0.01574273854494095, "step": 51630 }, { "epoch": 14.657961964235026, "grad_norm": 5.166176795959473, "learning_rate": 8.534913426057339e-05, "loss": 0.018043191730976106, "step": 51640 }, { "epoch": 14.660800454158387, "grad_norm": 3.543492317199707, "learning_rate": 8.534629577065002e-05, "loss": 0.02168959379196167, "step": 51650 }, { "epoch": 14.663638944081749, "grad_norm": 3.625410318374634, "learning_rate": 8.534345728072666e-05, "loss": 0.026066002249717713, "step": 51660 }, { "epoch": 14.66647743400511, "grad_norm": 14.819214820861816, "learning_rate": 8.53406187908033e-05, "loss": 0.04708296358585358, "step": 51670 }, { "epoch": 14.66931592392847, "grad_norm": 1.5611768960952759, "learning_rate": 8.533778030087993e-05, "loss": 0.021326422691345215, "step": 51680 }, { "epoch": 14.67215441385183, "grad_norm": 6.022402286529541, "learning_rate": 8.533494181095657e-05, "loss": 0.008427245914936066, "step": 51690 }, { "epoch": 14.674992903775191, "grad_norm": 9.686131477355957, "learning_rate": 8.533210332103322e-05, "loss": 0.03838449716567993, "step": 51700 }, { "epoch": 14.677831393698552, "grad_norm": 2.243096113204956, "learning_rate": 8.532926483110985e-05, "loss": 0.03304242491722107, "step": 51710 }, { "epoch": 14.680669883621913, "grad_norm": 4.648524284362793, "learning_rate": 8.532642634118649e-05, "loss": 0.025969645380973815, "step": 51720 }, { "epoch": 14.683508373545274, "grad_norm": 4.643424034118652, "learning_rate": 8.532358785126313e-05, "loss": 0.026057317852973938, "step": 51730 }, { "epoch": 14.686346863468636, "grad_norm": 3.315474510192871, "learning_rate": 8.532074936133977e-05, "loss": 0.03135319650173187, "step": 51740 }, { "epoch": 14.689185353391995, "grad_norm": 11.527090072631836, "learning_rate": 8.53179108714164e-05, "loss": 0.042044630646705626, "step": 51750 }, { "epoch": 14.692023843315356, "grad_norm": 7.199404239654541, "learning_rate": 8.531507238149306e-05, "loss": 0.029575473070144652, "step": 51760 }, { "epoch": 14.694862333238717, "grad_norm": 1.290221095085144, "learning_rate": 8.53122338915697e-05, "loss": 0.01553715169429779, "step": 51770 }, { "epoch": 14.697700823162078, "grad_norm": 2.0115106105804443, "learning_rate": 8.530939540164633e-05, "loss": 0.023492184281349183, "step": 51780 }, { "epoch": 14.70053931308544, "grad_norm": 6.614366054534912, "learning_rate": 8.530655691172297e-05, "loss": 0.03776212334632874, "step": 51790 }, { "epoch": 14.703377803008799, "grad_norm": 0.5551867485046387, "learning_rate": 8.530371842179961e-05, "loss": 0.02782229483127594, "step": 51800 }, { "epoch": 14.70621629293216, "grad_norm": 1.761235237121582, "learning_rate": 8.530087993187624e-05, "loss": 0.02025478333234787, "step": 51810 }, { "epoch": 14.709054782855521, "grad_norm": 4.280798435211182, "learning_rate": 8.529804144195288e-05, "loss": 0.033472859859466554, "step": 51820 }, { "epoch": 14.711893272778882, "grad_norm": 7.388118267059326, "learning_rate": 8.529520295202953e-05, "loss": 0.028340888023376466, "step": 51830 }, { "epoch": 14.714731762702243, "grad_norm": 8.572880744934082, "learning_rate": 8.529236446210616e-05, "loss": 0.03093830347061157, "step": 51840 }, { "epoch": 14.717570252625602, "grad_norm": 12.007716178894043, "learning_rate": 8.52895259721828e-05, "loss": 0.04730663299560547, "step": 51850 }, { "epoch": 14.720408742548964, "grad_norm": 1.922437310218811, "learning_rate": 8.528668748225944e-05, "loss": 0.03615111708641052, "step": 51860 }, { "epoch": 14.723247232472325, "grad_norm": 10.655746459960938, "learning_rate": 8.528384899233609e-05, "loss": 0.05345605611801148, "step": 51870 }, { "epoch": 14.726085722395686, "grad_norm": 1.8940304517745972, "learning_rate": 8.528101050241271e-05, "loss": 0.027235192060470582, "step": 51880 }, { "epoch": 14.728924212319047, "grad_norm": 12.067229270935059, "learning_rate": 8.527817201248935e-05, "loss": 0.03236901462078094, "step": 51890 }, { "epoch": 14.731762702242406, "grad_norm": 8.673604011535645, "learning_rate": 8.527533352256601e-05, "loss": 0.02623067796230316, "step": 51900 }, { "epoch": 14.734601192165767, "grad_norm": 10.754587173461914, "learning_rate": 8.527249503264264e-05, "loss": 0.05044602751731873, "step": 51910 }, { "epoch": 14.737439682089128, "grad_norm": 8.14383602142334, "learning_rate": 8.526965654271928e-05, "loss": 0.03298507928848267, "step": 51920 }, { "epoch": 14.74027817201249, "grad_norm": 9.571702003479004, "learning_rate": 8.526681805279592e-05, "loss": 0.020658814907073976, "step": 51930 }, { "epoch": 14.74311666193585, "grad_norm": 1.7532601356506348, "learning_rate": 8.526397956287255e-05, "loss": 0.02656387984752655, "step": 51940 }, { "epoch": 14.74595515185921, "grad_norm": 4.6879987716674805, "learning_rate": 8.526114107294919e-05, "loss": 0.04317150413990021, "step": 51950 }, { "epoch": 14.748793641782571, "grad_norm": 16.127050399780273, "learning_rate": 8.525830258302584e-05, "loss": 0.05169779658317566, "step": 51960 }, { "epoch": 14.751632131705932, "grad_norm": 9.529219627380371, "learning_rate": 8.525546409310247e-05, "loss": 0.022693523764610292, "step": 51970 }, { "epoch": 14.754470621629293, "grad_norm": 1.4467220306396484, "learning_rate": 8.525262560317911e-05, "loss": 0.05085247755050659, "step": 51980 }, { "epoch": 14.757309111552654, "grad_norm": 3.411275863647461, "learning_rate": 8.524978711325575e-05, "loss": 0.017076879739761353, "step": 51990 }, { "epoch": 14.760147601476016, "grad_norm": 5.0562744140625, "learning_rate": 8.52469486233324e-05, "loss": 0.01956384479999542, "step": 52000 }, { "epoch": 14.760147601476016, "eval_accuracy": 0.9601958415463852, "eval_loss": 0.12304113060235977, "eval_runtime": 31.6736, "eval_samples_per_second": 496.533, "eval_steps_per_second": 7.767, "step": 52000 }, { "epoch": 14.762986091399375, "grad_norm": 2.034628391265869, "learning_rate": 8.524411013340902e-05, "loss": 0.038484308123588565, "step": 52010 }, { "epoch": 14.765824581322736, "grad_norm": 15.556130409240723, "learning_rate": 8.524127164348567e-05, "loss": 0.03666593432426453, "step": 52020 }, { "epoch": 14.768663071246097, "grad_norm": 1.5747390985488892, "learning_rate": 8.523843315356232e-05, "loss": 0.014248223602771759, "step": 52030 }, { "epoch": 14.771501561169458, "grad_norm": 11.546631813049316, "learning_rate": 8.523559466363895e-05, "loss": 0.03246625065803528, "step": 52040 }, { "epoch": 14.77434005109282, "grad_norm": 1.5567431449890137, "learning_rate": 8.523275617371559e-05, "loss": 0.03479932546615601, "step": 52050 }, { "epoch": 14.777178541016179, "grad_norm": 8.006046295166016, "learning_rate": 8.522991768379223e-05, "loss": 0.036957567930221556, "step": 52060 }, { "epoch": 14.78001703093954, "grad_norm": 10.67141342163086, "learning_rate": 8.522707919386886e-05, "loss": 0.03419145047664642, "step": 52070 }, { "epoch": 14.782855520862901, "grad_norm": 2.0818207263946533, "learning_rate": 8.52242407039455e-05, "loss": 0.029305773973464965, "step": 52080 }, { "epoch": 14.785694010786262, "grad_norm": 6.951576232910156, "learning_rate": 8.522140221402214e-05, "loss": 0.05066871643066406, "step": 52090 }, { "epoch": 14.788532500709623, "grad_norm": 12.597771644592285, "learning_rate": 8.521856372409878e-05, "loss": 0.02722790539264679, "step": 52100 }, { "epoch": 14.791370990632982, "grad_norm": 3.6698906421661377, "learning_rate": 8.521572523417542e-05, "loss": 0.03936859667301178, "step": 52110 }, { "epoch": 14.794209480556344, "grad_norm": 0.5758851766586304, "learning_rate": 8.521288674425207e-05, "loss": 0.031905907392501834, "step": 52120 }, { "epoch": 14.797047970479705, "grad_norm": 4.625484466552734, "learning_rate": 8.521004825432871e-05, "loss": 0.032159218192100526, "step": 52130 }, { "epoch": 14.799886460403066, "grad_norm": 0.46332302689552307, "learning_rate": 8.520720976440533e-05, "loss": 0.023548538982868194, "step": 52140 }, { "epoch": 14.802724950326427, "grad_norm": 0.3681604266166687, "learning_rate": 8.520437127448198e-05, "loss": 0.018060213327407836, "step": 52150 }, { "epoch": 14.805563440249788, "grad_norm": 5.4887847900390625, "learning_rate": 8.520153278455862e-05, "loss": 0.02133317440748215, "step": 52160 }, { "epoch": 14.808401930173147, "grad_norm": 9.38009262084961, "learning_rate": 8.519869429463526e-05, "loss": 0.026021358370780946, "step": 52170 }, { "epoch": 14.811240420096508, "grad_norm": 11.541468620300293, "learning_rate": 8.51958558047119e-05, "loss": 0.03665421307086945, "step": 52180 }, { "epoch": 14.81407891001987, "grad_norm": 4.257431507110596, "learning_rate": 8.519301731478854e-05, "loss": 0.032706519961357115, "step": 52190 }, { "epoch": 14.81691739994323, "grad_norm": 14.1207914352417, "learning_rate": 8.519017882486517e-05, "loss": 0.029098379611968993, "step": 52200 }, { "epoch": 14.819755889866592, "grad_norm": 6.371778964996338, "learning_rate": 8.518734033494181e-05, "loss": 0.03482284843921661, "step": 52210 }, { "epoch": 14.822594379789951, "grad_norm": 6.763216018676758, "learning_rate": 8.518450184501845e-05, "loss": 0.03519497513771057, "step": 52220 }, { "epoch": 14.825432869713312, "grad_norm": 2.672987699508667, "learning_rate": 8.51816633550951e-05, "loss": 0.034593862295150754, "step": 52230 }, { "epoch": 14.828271359636673, "grad_norm": 11.970619201660156, "learning_rate": 8.517882486517174e-05, "loss": 0.0287896990776062, "step": 52240 }, { "epoch": 14.831109849560034, "grad_norm": 5.577654838562012, "learning_rate": 8.517598637524838e-05, "loss": 0.030020585656166075, "step": 52250 }, { "epoch": 14.833948339483396, "grad_norm": 1.1137555837631226, "learning_rate": 8.5173147885325e-05, "loss": 0.023542656004428862, "step": 52260 }, { "epoch": 14.836786829406755, "grad_norm": 6.946108818054199, "learning_rate": 8.517030939540165e-05, "loss": 0.025078311562538147, "step": 52270 }, { "epoch": 14.839625319330116, "grad_norm": 1.5836098194122314, "learning_rate": 8.516747090547829e-05, "loss": 0.01837528347969055, "step": 52280 }, { "epoch": 14.842463809253477, "grad_norm": 6.4721221923828125, "learning_rate": 8.516463241555493e-05, "loss": 0.027233871817588805, "step": 52290 }, { "epoch": 14.845302299176838, "grad_norm": 2.855774402618408, "learning_rate": 8.516179392563157e-05, "loss": 0.019256198406219484, "step": 52300 }, { "epoch": 14.8481407891002, "grad_norm": 4.033708095550537, "learning_rate": 8.515895543570821e-05, "loss": 0.024883374571800232, "step": 52310 }, { "epoch": 14.850979279023559, "grad_norm": 14.830466270446777, "learning_rate": 8.515611694578485e-05, "loss": 0.03544933795928955, "step": 52320 }, { "epoch": 14.85381776894692, "grad_norm": 3.2022197246551514, "learning_rate": 8.515327845586148e-05, "loss": 0.03954409956932068, "step": 52330 }, { "epoch": 14.85665625887028, "grad_norm": 5.517317771911621, "learning_rate": 8.515043996593812e-05, "loss": 0.03756464123725891, "step": 52340 }, { "epoch": 14.859494748793642, "grad_norm": 5.024097919464111, "learning_rate": 8.514760147601476e-05, "loss": 0.027983418107032774, "step": 52350 }, { "epoch": 14.862333238717003, "grad_norm": 4.13739538192749, "learning_rate": 8.51447629860914e-05, "loss": 0.027565941214561462, "step": 52360 }, { "epoch": 14.865171728640362, "grad_norm": 7.301817893981934, "learning_rate": 8.514192449616805e-05, "loss": 0.04110962748527527, "step": 52370 }, { "epoch": 14.868010218563724, "grad_norm": 4.641166687011719, "learning_rate": 8.513908600624469e-05, "loss": 0.015780039131641388, "step": 52380 }, { "epoch": 14.870848708487085, "grad_norm": 1.6691021919250488, "learning_rate": 8.513624751632131e-05, "loss": 0.02632930874824524, "step": 52390 }, { "epoch": 14.873687198410446, "grad_norm": 6.865642070770264, "learning_rate": 8.513340902639796e-05, "loss": 0.06053704023361206, "step": 52400 }, { "epoch": 14.876525688333807, "grad_norm": 11.731450080871582, "learning_rate": 8.51305705364746e-05, "loss": 0.05539149045944214, "step": 52410 }, { "epoch": 14.879364178257168, "grad_norm": 8.948708534240723, "learning_rate": 8.512773204655124e-05, "loss": 0.05718296766281128, "step": 52420 }, { "epoch": 14.882202668180527, "grad_norm": 1.5404996871948242, "learning_rate": 8.512489355662788e-05, "loss": 0.04857009053230286, "step": 52430 }, { "epoch": 14.885041158103888, "grad_norm": 6.185203552246094, "learning_rate": 8.512205506670452e-05, "loss": 0.022160325944423676, "step": 52440 }, { "epoch": 14.88787964802725, "grad_norm": 0.450327605009079, "learning_rate": 8.511921657678116e-05, "loss": 0.023737695813179017, "step": 52450 }, { "epoch": 14.89071813795061, "grad_norm": 8.882545471191406, "learning_rate": 8.511637808685779e-05, "loss": 0.05116015076637268, "step": 52460 }, { "epoch": 14.893556627873972, "grad_norm": 3.0069682598114014, "learning_rate": 8.511353959693443e-05, "loss": 0.028014230728149413, "step": 52470 }, { "epoch": 14.896395117797331, "grad_norm": 1.16648530960083, "learning_rate": 8.511070110701107e-05, "loss": 0.02635745108127594, "step": 52480 }, { "epoch": 14.899233607720692, "grad_norm": 11.484435081481934, "learning_rate": 8.51078626170877e-05, "loss": 0.03438654243946075, "step": 52490 }, { "epoch": 14.902072097644053, "grad_norm": 7.604098320007324, "learning_rate": 8.510502412716436e-05, "loss": 0.026794984936714172, "step": 52500 }, { "epoch": 14.902072097644053, "eval_accuracy": 0.9615947097348508, "eval_loss": 0.11910422146320343, "eval_runtime": 31.0208, "eval_samples_per_second": 506.982, "eval_steps_per_second": 7.93, "step": 52500 }, { "epoch": 14.904910587567414, "grad_norm": 0.1136249452829361, "learning_rate": 8.5102185637241e-05, "loss": 0.02782406210899353, "step": 52510 }, { "epoch": 14.907749077490775, "grad_norm": 0.9823296070098877, "learning_rate": 8.509934714731763e-05, "loss": 0.03094485104084015, "step": 52520 }, { "epoch": 14.910587567414137, "grad_norm": 6.141584873199463, "learning_rate": 8.509650865739427e-05, "loss": 0.03172458112239838, "step": 52530 }, { "epoch": 14.913426057337496, "grad_norm": 11.04259204864502, "learning_rate": 8.509367016747091e-05, "loss": 0.04151504337787628, "step": 52540 }, { "epoch": 14.916264547260857, "grad_norm": 9.582223892211914, "learning_rate": 8.509083167754755e-05, "loss": 0.029722175002098082, "step": 52550 }, { "epoch": 14.919103037184218, "grad_norm": 7.273066997528076, "learning_rate": 8.508799318762419e-05, "loss": 0.023678822815418242, "step": 52560 }, { "epoch": 14.92194152710758, "grad_norm": 1.9548935890197754, "learning_rate": 8.508515469770083e-05, "loss": 0.033037933707237246, "step": 52570 }, { "epoch": 14.92478001703094, "grad_norm": 1.294035792350769, "learning_rate": 8.508231620777747e-05, "loss": 0.040040749311447146, "step": 52580 }, { "epoch": 14.9276185069543, "grad_norm": 13.968194961547852, "learning_rate": 8.50794777178541e-05, "loss": 0.050274431705474854, "step": 52590 }, { "epoch": 14.93045699687766, "grad_norm": 14.652900695800781, "learning_rate": 8.507663922793074e-05, "loss": 0.02755894958972931, "step": 52600 }, { "epoch": 14.933295486801022, "grad_norm": 11.293218612670898, "learning_rate": 8.507380073800738e-05, "loss": 0.06112414598464966, "step": 52610 }, { "epoch": 14.936133976724383, "grad_norm": 4.71428108215332, "learning_rate": 8.507096224808401e-05, "loss": 0.05159469246864319, "step": 52620 }, { "epoch": 14.938972466647744, "grad_norm": 9.22663688659668, "learning_rate": 8.506812375816067e-05, "loss": 0.04534577429294586, "step": 52630 }, { "epoch": 14.941810956571103, "grad_norm": 3.193680763244629, "learning_rate": 8.506528526823731e-05, "loss": 0.05434118509292603, "step": 52640 }, { "epoch": 14.944649446494465, "grad_norm": 3.10563063621521, "learning_rate": 8.506244677831394e-05, "loss": 0.04572090208530426, "step": 52650 }, { "epoch": 14.947487936417826, "grad_norm": 10.260764122009277, "learning_rate": 8.505960828839058e-05, "loss": 0.04893912971019745, "step": 52660 }, { "epoch": 14.950326426341187, "grad_norm": 4.654698848724365, "learning_rate": 8.505676979846722e-05, "loss": 0.05549478530883789, "step": 52670 }, { "epoch": 14.953164916264548, "grad_norm": 9.661906242370605, "learning_rate": 8.505393130854386e-05, "loss": 0.03315366804599762, "step": 52680 }, { "epoch": 14.956003406187907, "grad_norm": 3.403984546661377, "learning_rate": 8.505109281862049e-05, "loss": 0.031098783016204834, "step": 52690 }, { "epoch": 14.958841896111268, "grad_norm": 3.113482713699341, "learning_rate": 8.504825432869714e-05, "loss": 0.04761813879013062, "step": 52700 }, { "epoch": 14.96168038603463, "grad_norm": 5.0839009284973145, "learning_rate": 8.504541583877378e-05, "loss": 0.02483803331851959, "step": 52710 }, { "epoch": 14.96451887595799, "grad_norm": 10.606212615966797, "learning_rate": 8.504257734885041e-05, "loss": 0.03670084178447723, "step": 52720 }, { "epoch": 14.967357365881352, "grad_norm": 7.447025775909424, "learning_rate": 8.503973885892705e-05, "loss": 0.01570524275302887, "step": 52730 }, { "epoch": 14.970195855804711, "grad_norm": 2.658383369445801, "learning_rate": 8.50369003690037e-05, "loss": 0.06445603370666504, "step": 52740 }, { "epoch": 14.973034345728072, "grad_norm": 4.66203498840332, "learning_rate": 8.503406187908032e-05, "loss": 0.014038428664207458, "step": 52750 }, { "epoch": 14.975872835651433, "grad_norm": 1.796512246131897, "learning_rate": 8.503122338915698e-05, "loss": 0.043055981397628784, "step": 52760 }, { "epoch": 14.978711325574794, "grad_norm": 3.048551559448242, "learning_rate": 8.502838489923362e-05, "loss": 0.035670405626296996, "step": 52770 }, { "epoch": 14.981549815498155, "grad_norm": 2.936340093612671, "learning_rate": 8.502554640931025e-05, "loss": 0.02280242145061493, "step": 52780 }, { "epoch": 14.984388305421517, "grad_norm": 1.5961838960647583, "learning_rate": 8.502270791938689e-05, "loss": 0.05337930917739868, "step": 52790 }, { "epoch": 14.987226795344876, "grad_norm": 4.143683910369873, "learning_rate": 8.501986942946353e-05, "loss": 0.032545506954193115, "step": 52800 }, { "epoch": 14.990065285268237, "grad_norm": 3.4018266201019287, "learning_rate": 8.501703093954017e-05, "loss": 0.02733646333217621, "step": 52810 }, { "epoch": 14.992903775191598, "grad_norm": 1.1462609767913818, "learning_rate": 8.50141924496168e-05, "loss": 0.012983845174312591, "step": 52820 }, { "epoch": 14.99574226511496, "grad_norm": 9.14090633392334, "learning_rate": 8.501135395969345e-05, "loss": 0.0259353905916214, "step": 52830 }, { "epoch": 14.99858075503832, "grad_norm": 5.760972023010254, "learning_rate": 8.50085154697701e-05, "loss": 0.026335883140563964, "step": 52840 }, { "epoch": 15.00141924496168, "grad_norm": 11.581393241882324, "learning_rate": 8.500567697984672e-05, "loss": 0.028559917211532594, "step": 52850 }, { "epoch": 15.00425773488504, "grad_norm": 6.410826206207275, "learning_rate": 8.500283848992336e-05, "loss": 0.0230854332447052, "step": 52860 }, { "epoch": 15.007096224808402, "grad_norm": 6.874464988708496, "learning_rate": 8.5e-05, "loss": 0.016904139518737794, "step": 52870 }, { "epoch": 15.009934714731763, "grad_norm": 10.186416625976562, "learning_rate": 8.499716151007663e-05, "loss": 0.02468867152929306, "step": 52880 }, { "epoch": 15.012773204655124, "grad_norm": 8.226420402526855, "learning_rate": 8.499432302015329e-05, "loss": 0.026024246215820314, "step": 52890 }, { "epoch": 15.015611694578483, "grad_norm": 3.9801876544952393, "learning_rate": 8.499148453022993e-05, "loss": 0.023551414906978606, "step": 52900 }, { "epoch": 15.018450184501845, "grad_norm": 4.600092887878418, "learning_rate": 8.498864604030656e-05, "loss": 0.025222939252853394, "step": 52910 }, { "epoch": 15.021288674425206, "grad_norm": 0.8289909958839417, "learning_rate": 8.49858075503832e-05, "loss": 0.025393322110176086, "step": 52920 }, { "epoch": 15.024127164348567, "grad_norm": 5.408454418182373, "learning_rate": 8.498296906045984e-05, "loss": 0.03954998254776001, "step": 52930 }, { "epoch": 15.026965654271928, "grad_norm": 0.8199604749679565, "learning_rate": 8.498013057053648e-05, "loss": 0.023568087816238405, "step": 52940 }, { "epoch": 15.029804144195289, "grad_norm": 15.933345794677734, "learning_rate": 8.497729208061311e-05, "loss": 0.0334645688533783, "step": 52950 }, { "epoch": 15.032642634118648, "grad_norm": 3.764054536819458, "learning_rate": 8.497445359068976e-05, "loss": 0.014909225702285766, "step": 52960 }, { "epoch": 15.03548112404201, "grad_norm": 11.115058898925781, "learning_rate": 8.49716151007664e-05, "loss": 0.02324901521205902, "step": 52970 }, { "epoch": 15.03831961396537, "grad_norm": 0.42527395486831665, "learning_rate": 8.496877661084303e-05, "loss": 0.01743047833442688, "step": 52980 }, { "epoch": 15.041158103888732, "grad_norm": 0.5302337408065796, "learning_rate": 8.496593812091968e-05, "loss": 0.008684401214122773, "step": 52990 }, { "epoch": 15.043996593812093, "grad_norm": 3.21989107131958, "learning_rate": 8.496309963099632e-05, "loss": 0.014872929453849793, "step": 53000 }, { "epoch": 15.043996593812093, "eval_accuracy": 0.9634386723469193, "eval_loss": 0.11136753112077713, "eval_runtime": 31.3226, "eval_samples_per_second": 502.098, "eval_steps_per_second": 7.854, "step": 53000 }, { "epoch": 15.046835083735452, "grad_norm": 2.858579397201538, "learning_rate": 8.496026114107294e-05, "loss": 0.02118138372898102, "step": 53010 }, { "epoch": 15.049673573658813, "grad_norm": 0.9128031134605408, "learning_rate": 8.495742265114959e-05, "loss": 0.013003897666931153, "step": 53020 }, { "epoch": 15.052512063582174, "grad_norm": 9.113799095153809, "learning_rate": 8.495458416122624e-05, "loss": 0.041965633630752563, "step": 53030 }, { "epoch": 15.055350553505535, "grad_norm": 2.1945927143096924, "learning_rate": 8.495174567130287e-05, "loss": 0.0187469482421875, "step": 53040 }, { "epoch": 15.058189043428897, "grad_norm": 2.8984506130218506, "learning_rate": 8.494890718137951e-05, "loss": 0.03352283835411072, "step": 53050 }, { "epoch": 15.061027533352256, "grad_norm": 2.4544174671173096, "learning_rate": 8.494606869145615e-05, "loss": 0.012337620556354522, "step": 53060 }, { "epoch": 15.063866023275617, "grad_norm": 6.353875160217285, "learning_rate": 8.494323020153279e-05, "loss": 0.03873030245304108, "step": 53070 }, { "epoch": 15.066704513198978, "grad_norm": 1.4572876691818237, "learning_rate": 8.494039171160942e-05, "loss": 0.026953238248825073, "step": 53080 }, { "epoch": 15.06954300312234, "grad_norm": 13.734784126281738, "learning_rate": 8.493755322168608e-05, "loss": 0.024659039080142976, "step": 53090 }, { "epoch": 15.0723814930457, "grad_norm": 3.3762965202331543, "learning_rate": 8.49347147317627e-05, "loss": 0.01574217975139618, "step": 53100 }, { "epoch": 15.07521998296906, "grad_norm": 4.7035956382751465, "learning_rate": 8.493187624183934e-05, "loss": 0.024960589408874512, "step": 53110 }, { "epoch": 15.07805847289242, "grad_norm": 11.74179744720459, "learning_rate": 8.492903775191599e-05, "loss": 0.02742968499660492, "step": 53120 }, { "epoch": 15.080896962815782, "grad_norm": 3.068833351135254, "learning_rate": 8.492619926199263e-05, "loss": 0.02853260636329651, "step": 53130 }, { "epoch": 15.083735452739143, "grad_norm": 4.965625762939453, "learning_rate": 8.492336077206926e-05, "loss": 0.018844975531101225, "step": 53140 }, { "epoch": 15.086573942662504, "grad_norm": 17.976947784423828, "learning_rate": 8.49205222821459e-05, "loss": 0.027571189403533935, "step": 53150 }, { "epoch": 15.089412432585865, "grad_norm": 4.006868839263916, "learning_rate": 8.491768379222255e-05, "loss": 0.018645693361759186, "step": 53160 }, { "epoch": 15.092250922509225, "grad_norm": 2.344867467880249, "learning_rate": 8.491484530229918e-05, "loss": 0.01034977361559868, "step": 53170 }, { "epoch": 15.095089412432586, "grad_norm": 4.115565776824951, "learning_rate": 8.491200681237582e-05, "loss": 0.0225346639752388, "step": 53180 }, { "epoch": 15.097927902355947, "grad_norm": 3.8482604026794434, "learning_rate": 8.490916832245246e-05, "loss": 0.017920659482479097, "step": 53190 }, { "epoch": 15.100766392279308, "grad_norm": 2.0540499687194824, "learning_rate": 8.490632983252909e-05, "loss": 0.016692161560058594, "step": 53200 }, { "epoch": 15.103604882202669, "grad_norm": 5.664206504821777, "learning_rate": 8.490349134260573e-05, "loss": 0.020225676894187927, "step": 53210 }, { "epoch": 15.106443372126028, "grad_norm": 1.425145149230957, "learning_rate": 8.490065285268237e-05, "loss": 0.01829567402601242, "step": 53220 }, { "epoch": 15.10928186204939, "grad_norm": 4.028144836425781, "learning_rate": 8.489781436275901e-05, "loss": 0.029241713881492614, "step": 53230 }, { "epoch": 15.11212035197275, "grad_norm": 8.185257911682129, "learning_rate": 8.489497587283566e-05, "loss": 0.01918734759092331, "step": 53240 }, { "epoch": 15.114958841896112, "grad_norm": 6.857207775115967, "learning_rate": 8.48921373829123e-05, "loss": 0.030198761820793153, "step": 53250 }, { "epoch": 15.117797331819473, "grad_norm": 2.811000347137451, "learning_rate": 8.488929889298894e-05, "loss": 0.024341876804828643, "step": 53260 }, { "epoch": 15.120635821742832, "grad_norm": 7.477023124694824, "learning_rate": 8.488646040306557e-05, "loss": 0.029148364067077638, "step": 53270 }, { "epoch": 15.123474311666193, "grad_norm": 1.9254635572433472, "learning_rate": 8.488362191314221e-05, "loss": 0.032135629653930665, "step": 53280 }, { "epoch": 15.126312801589554, "grad_norm": 12.80935287475586, "learning_rate": 8.488078342321886e-05, "loss": 0.01927918493747711, "step": 53290 }, { "epoch": 15.129151291512915, "grad_norm": 14.936166763305664, "learning_rate": 8.487794493329549e-05, "loss": 0.02948797345161438, "step": 53300 }, { "epoch": 15.131989781436276, "grad_norm": 2.925687789916992, "learning_rate": 8.487510644337213e-05, "loss": 0.028370019793510438, "step": 53310 }, { "epoch": 15.134828271359636, "grad_norm": 4.047255039215088, "learning_rate": 8.487226795344877e-05, "loss": 0.026260924339294434, "step": 53320 }, { "epoch": 15.137666761282997, "grad_norm": 6.451000213623047, "learning_rate": 8.48694294635254e-05, "loss": 0.017392192780971528, "step": 53330 }, { "epoch": 15.140505251206358, "grad_norm": 6.477872848510742, "learning_rate": 8.486659097360204e-05, "loss": 0.022298693656921387, "step": 53340 }, { "epoch": 15.14334374112972, "grad_norm": 2.901130437850952, "learning_rate": 8.486375248367868e-05, "loss": 0.037961924076080324, "step": 53350 }, { "epoch": 15.14618223105308, "grad_norm": 5.307724952697754, "learning_rate": 8.486091399375533e-05, "loss": 0.024045148491859437, "step": 53360 }, { "epoch": 15.149020720976441, "grad_norm": 1.505552053451538, "learning_rate": 8.485807550383197e-05, "loss": 0.027203640341758727, "step": 53370 }, { "epoch": 15.1518592108998, "grad_norm": 3.445411443710327, "learning_rate": 8.485523701390861e-05, "loss": 0.017229658365249634, "step": 53380 }, { "epoch": 15.154697700823162, "grad_norm": 3.822455883026123, "learning_rate": 8.485239852398525e-05, "loss": 0.031219345331192017, "step": 53390 }, { "epoch": 15.157536190746523, "grad_norm": 2.3856401443481445, "learning_rate": 8.484956003406188e-05, "loss": 0.0303028404712677, "step": 53400 }, { "epoch": 15.160374680669884, "grad_norm": 14.556412696838379, "learning_rate": 8.484672154413852e-05, "loss": 0.050366705656051634, "step": 53410 }, { "epoch": 15.163213170593245, "grad_norm": 7.2573652267456055, "learning_rate": 8.484388305421516e-05, "loss": 0.014065055549144745, "step": 53420 }, { "epoch": 15.166051660516604, "grad_norm": 2.4567673206329346, "learning_rate": 8.48410445642918e-05, "loss": 0.014977070689201354, "step": 53430 }, { "epoch": 15.168890150439966, "grad_norm": 4.0306878089904785, "learning_rate": 8.483820607436844e-05, "loss": 0.02399582862854004, "step": 53440 }, { "epoch": 15.171728640363327, "grad_norm": 2.532724380493164, "learning_rate": 8.483536758444508e-05, "loss": 0.011108486354351044, "step": 53450 }, { "epoch": 15.174567130286688, "grad_norm": 4.917425632476807, "learning_rate": 8.483252909452171e-05, "loss": 0.017559367418289184, "step": 53460 }, { "epoch": 15.177405620210049, "grad_norm": 2.377610445022583, "learning_rate": 8.48299744535907e-05, "loss": 0.02271555960178375, "step": 53470 }, { "epoch": 15.180244110133408, "grad_norm": 7.6386823654174805, "learning_rate": 8.482713596366733e-05, "loss": 0.019404828548431396, "step": 53480 }, { "epoch": 15.18308260005677, "grad_norm": 7.318428039550781, "learning_rate": 8.482429747374397e-05, "loss": 0.01831010729074478, "step": 53490 }, { "epoch": 15.18592108998013, "grad_norm": 13.758485794067383, "learning_rate": 8.482145898382062e-05, "loss": 0.045812129974365234, "step": 53500 }, { "epoch": 15.18592108998013, "eval_accuracy": 0.9558720671456731, "eval_loss": 0.13972848653793335, "eval_runtime": 31.1871, "eval_samples_per_second": 504.279, "eval_steps_per_second": 7.888, "step": 53500 }, { "epoch": 15.188759579903492, "grad_norm": 3.0384340286254883, "learning_rate": 8.481862049389724e-05, "loss": 0.03476028740406036, "step": 53510 }, { "epoch": 15.191598069826853, "grad_norm": 8.93148422241211, "learning_rate": 8.481578200397388e-05, "loss": 0.037573397159576416, "step": 53520 }, { "epoch": 15.194436559750212, "grad_norm": 6.068247318267822, "learning_rate": 8.481294351405053e-05, "loss": 0.0215740829706192, "step": 53530 }, { "epoch": 15.197275049673573, "grad_norm": 4.60642147064209, "learning_rate": 8.481010502412717e-05, "loss": 0.03872728049755096, "step": 53540 }, { "epoch": 15.200113539596934, "grad_norm": 3.9938864707946777, "learning_rate": 8.480726653420381e-05, "loss": 0.029471418261528014, "step": 53550 }, { "epoch": 15.202952029520295, "grad_norm": 6.822102069854736, "learning_rate": 8.480442804428045e-05, "loss": 0.023526379466056825, "step": 53560 }, { "epoch": 15.205790519443656, "grad_norm": 2.222734212875366, "learning_rate": 8.480158955435709e-05, "loss": 0.01734481304883957, "step": 53570 }, { "epoch": 15.208629009367018, "grad_norm": 1.5611189603805542, "learning_rate": 8.479875106443372e-05, "loss": 0.045385193824768064, "step": 53580 }, { "epoch": 15.211467499290377, "grad_norm": 4.646595001220703, "learning_rate": 8.479591257451036e-05, "loss": 0.03704947233200073, "step": 53590 }, { "epoch": 15.214305989213738, "grad_norm": 4.166716575622559, "learning_rate": 8.4793074084587e-05, "loss": 0.02110726684331894, "step": 53600 }, { "epoch": 15.217144479137099, "grad_norm": 4.020148754119873, "learning_rate": 8.479023559466364e-05, "loss": 0.024294754862785338, "step": 53610 }, { "epoch": 15.21998296906046, "grad_norm": 0.2567417621612549, "learning_rate": 8.478739710474029e-05, "loss": 0.018196302652359008, "step": 53620 }, { "epoch": 15.222821458983821, "grad_norm": 6.354002952575684, "learning_rate": 8.478455861481693e-05, "loss": 0.031344667077064514, "step": 53630 }, { "epoch": 15.22565994890718, "grad_norm": 1.950029969215393, "learning_rate": 8.478172012489355e-05, "loss": 0.01933310478925705, "step": 53640 }, { "epoch": 15.228498438830542, "grad_norm": 12.206835746765137, "learning_rate": 8.47788816349702e-05, "loss": 0.03785330653190613, "step": 53650 }, { "epoch": 15.231336928753903, "grad_norm": 15.296836853027344, "learning_rate": 8.477604314504684e-05, "loss": 0.03131443858146667, "step": 53660 }, { "epoch": 15.234175418677264, "grad_norm": 6.091537952423096, "learning_rate": 8.477320465512348e-05, "loss": 0.02741886079311371, "step": 53670 }, { "epoch": 15.237013908600625, "grad_norm": 3.2014384269714355, "learning_rate": 8.477036616520012e-05, "loss": 0.012884004414081574, "step": 53680 }, { "epoch": 15.239852398523984, "grad_norm": 8.4446382522583, "learning_rate": 8.476752767527676e-05, "loss": 0.029363781213760376, "step": 53690 }, { "epoch": 15.242690888447346, "grad_norm": 13.960970878601074, "learning_rate": 8.47646891853534e-05, "loss": 0.026152205467224122, "step": 53700 }, { "epoch": 15.245529378370707, "grad_norm": 1.270023226737976, "learning_rate": 8.476185069543003e-05, "loss": 0.018164780735969544, "step": 53710 }, { "epoch": 15.248367868294068, "grad_norm": 0.8989373445510864, "learning_rate": 8.475901220550667e-05, "loss": 0.02691385746002197, "step": 53720 }, { "epoch": 15.251206358217429, "grad_norm": 10.848620414733887, "learning_rate": 8.475617371558331e-05, "loss": 0.01985987424850464, "step": 53730 }, { "epoch": 15.25404484814079, "grad_norm": 7.492671012878418, "learning_rate": 8.475333522565995e-05, "loss": 0.03982653319835663, "step": 53740 }, { "epoch": 15.25688333806415, "grad_norm": 6.352535247802734, "learning_rate": 8.47504967357366e-05, "loss": 0.022130705416202545, "step": 53750 }, { "epoch": 15.25972182798751, "grad_norm": 4.262563705444336, "learning_rate": 8.474765824581324e-05, "loss": 0.02525729238986969, "step": 53760 }, { "epoch": 15.262560317910872, "grad_norm": 0.6703700423240662, "learning_rate": 8.474481975588987e-05, "loss": 0.016026677191257478, "step": 53770 }, { "epoch": 15.265398807834233, "grad_norm": 13.317543029785156, "learning_rate": 8.47419812659665e-05, "loss": 0.021447065472602844, "step": 53780 }, { "epoch": 15.268237297757594, "grad_norm": 5.439362049102783, "learning_rate": 8.473914277604315e-05, "loss": 0.022591273486614227, "step": 53790 }, { "epoch": 15.271075787680953, "grad_norm": 0.9924070835113525, "learning_rate": 8.473630428611979e-05, "loss": 0.013536173105239867, "step": 53800 }, { "epoch": 15.273914277604314, "grad_norm": 1.9649178981781006, "learning_rate": 8.473346579619643e-05, "loss": 0.019028496742248536, "step": 53810 }, { "epoch": 15.276752767527675, "grad_norm": 5.477230072021484, "learning_rate": 8.473062730627307e-05, "loss": 0.04178065359592438, "step": 53820 }, { "epoch": 15.279591257451036, "grad_norm": 7.263782501220703, "learning_rate": 8.472778881634971e-05, "loss": 0.02689291536808014, "step": 53830 }, { "epoch": 15.282429747374398, "grad_norm": 0.9777436852455139, "learning_rate": 8.472495032642634e-05, "loss": 0.019415967166423798, "step": 53840 }, { "epoch": 15.285268237297757, "grad_norm": 6.003324508666992, "learning_rate": 8.472211183650298e-05, "loss": 0.022689004242420197, "step": 53850 }, { "epoch": 15.288106727221118, "grad_norm": 9.073564529418945, "learning_rate": 8.471927334657962e-05, "loss": 0.02712928056716919, "step": 53860 }, { "epoch": 15.290945217144479, "grad_norm": 1.9147794246673584, "learning_rate": 8.471643485665627e-05, "loss": 0.03310129344463349, "step": 53870 }, { "epoch": 15.29378370706784, "grad_norm": 1.485855221748352, "learning_rate": 8.47135963667329e-05, "loss": 0.03053884506225586, "step": 53880 }, { "epoch": 15.296622196991201, "grad_norm": 4.232726097106934, "learning_rate": 8.471075787680955e-05, "loss": 0.023817449808120728, "step": 53890 }, { "epoch": 15.29946068691456, "grad_norm": 12.246906280517578, "learning_rate": 8.470791938688618e-05, "loss": 0.033807724714279175, "step": 53900 }, { "epoch": 15.302299176837922, "grad_norm": 5.464107990264893, "learning_rate": 8.470508089696282e-05, "loss": 0.024816635251045226, "step": 53910 }, { "epoch": 15.305137666761283, "grad_norm": 5.937697887420654, "learning_rate": 8.470224240703946e-05, "loss": 0.04006586968898773, "step": 53920 }, { "epoch": 15.307976156684644, "grad_norm": 1.50701904296875, "learning_rate": 8.46994039171161e-05, "loss": 0.017366278171539306, "step": 53930 }, { "epoch": 15.310814646608005, "grad_norm": 4.248182773590088, "learning_rate": 8.469656542719274e-05, "loss": 0.024303600192070007, "step": 53940 }, { "epoch": 15.313653136531366, "grad_norm": 2.161425828933716, "learning_rate": 8.469372693726938e-05, "loss": 0.01707477420568466, "step": 53950 }, { "epoch": 15.316491626454726, "grad_norm": 7.709280490875244, "learning_rate": 8.469088844734601e-05, "loss": 0.018223875761032106, "step": 53960 }, { "epoch": 15.319330116378087, "grad_norm": 8.556921005249023, "learning_rate": 8.468804995742265e-05, "loss": 0.03257838785648346, "step": 53970 }, { "epoch": 15.322168606301448, "grad_norm": 7.517209529876709, "learning_rate": 8.46852114674993e-05, "loss": 0.027686682343482972, "step": 53980 }, { "epoch": 15.325007096224809, "grad_norm": 1.236788034439087, "learning_rate": 8.468237297757593e-05, "loss": 0.01426074355840683, "step": 53990 }, { "epoch": 15.32784558614817, "grad_norm": 3.0803632736206055, "learning_rate": 8.467953448765256e-05, "loss": 0.02618541717529297, "step": 54000 }, { "epoch": 15.32784558614817, "eval_accuracy": 0.9631843326762892, "eval_loss": 0.11433736234903336, "eval_runtime": 31.7487, "eval_samples_per_second": 495.359, "eval_steps_per_second": 7.748, "step": 54000 }, { "epoch": 15.33068407607153, "grad_norm": 3.7513182163238525, "learning_rate": 8.467669599772922e-05, "loss": 0.017883220314979555, "step": 54010 }, { "epoch": 15.33352256599489, "grad_norm": 0.6390168070793152, "learning_rate": 8.467385750780586e-05, "loss": 0.04821707010269165, "step": 54020 }, { "epoch": 15.336361055918251, "grad_norm": 10.022395133972168, "learning_rate": 8.467101901788249e-05, "loss": 0.018045893311500548, "step": 54030 }, { "epoch": 15.339199545841613, "grad_norm": 5.511784076690674, "learning_rate": 8.466818052795913e-05, "loss": 0.025122317671775817, "step": 54040 }, { "epoch": 15.342038035764974, "grad_norm": 7.796726226806641, "learning_rate": 8.466534203803577e-05, "loss": 0.04648490250110626, "step": 54050 }, { "epoch": 15.344876525688333, "grad_norm": 3.3285627365112305, "learning_rate": 8.466250354811241e-05, "loss": 0.024082866311073304, "step": 54060 }, { "epoch": 15.347715015611694, "grad_norm": 2.4231514930725098, "learning_rate": 8.465966505818905e-05, "loss": 0.029388442635536194, "step": 54070 }, { "epoch": 15.350553505535055, "grad_norm": 11.6298828125, "learning_rate": 8.46568265682657e-05, "loss": 0.03692953884601593, "step": 54080 }, { "epoch": 15.353391995458416, "grad_norm": 3.9724791049957275, "learning_rate": 8.465398807834232e-05, "loss": 0.022999082505702973, "step": 54090 }, { "epoch": 15.356230485381777, "grad_norm": 3.0545289516448975, "learning_rate": 8.465114958841896e-05, "loss": 0.028710386157035826, "step": 54100 }, { "epoch": 15.359068975305137, "grad_norm": 3.168630361557007, "learning_rate": 8.46483110984956e-05, "loss": 0.02780725955963135, "step": 54110 }, { "epoch": 15.361907465228498, "grad_norm": 0.7495129704475403, "learning_rate": 8.464547260857225e-05, "loss": 0.008597607910633086, "step": 54120 }, { "epoch": 15.364745955151859, "grad_norm": 0.25669223070144653, "learning_rate": 8.464263411864887e-05, "loss": 0.019466498494148256, "step": 54130 }, { "epoch": 15.36758444507522, "grad_norm": 5.771591663360596, "learning_rate": 8.463979562872553e-05, "loss": 0.029212260246276857, "step": 54140 }, { "epoch": 15.370422934998581, "grad_norm": 2.0156846046447754, "learning_rate": 8.463695713880217e-05, "loss": 0.03037104308605194, "step": 54150 }, { "epoch": 15.373261424921942, "grad_norm": 2.4536187648773193, "learning_rate": 8.46341186488788e-05, "loss": 0.023899687826633452, "step": 54160 }, { "epoch": 15.376099914845302, "grad_norm": 1.5051302909851074, "learning_rate": 8.463128015895544e-05, "loss": 0.016792848706245422, "step": 54170 }, { "epoch": 15.378938404768663, "grad_norm": 12.736126899719238, "learning_rate": 8.462844166903208e-05, "loss": 0.035494789481163025, "step": 54180 }, { "epoch": 15.381776894692024, "grad_norm": 4.557518482208252, "learning_rate": 8.462560317910871e-05, "loss": 0.025106295943260193, "step": 54190 }, { "epoch": 15.384615384615385, "grad_norm": 11.785857200622559, "learning_rate": 8.462276468918535e-05, "loss": 0.044714349508285525, "step": 54200 }, { "epoch": 15.387453874538746, "grad_norm": 3.202505588531494, "learning_rate": 8.4619926199262e-05, "loss": 0.02049727886915207, "step": 54210 }, { "epoch": 15.390292364462105, "grad_norm": 2.1362318992614746, "learning_rate": 8.461708770933863e-05, "loss": 0.013331833481788635, "step": 54220 }, { "epoch": 15.393130854385467, "grad_norm": 3.6306371688842773, "learning_rate": 8.461424921941527e-05, "loss": 0.012118640542030334, "step": 54230 }, { "epoch": 15.395969344308828, "grad_norm": 3.8929033279418945, "learning_rate": 8.461141072949191e-05, "loss": 0.02974376380443573, "step": 54240 }, { "epoch": 15.398807834232189, "grad_norm": 9.148716926574707, "learning_rate": 8.460857223956856e-05, "loss": 0.018864558637142183, "step": 54250 }, { "epoch": 15.40164632415555, "grad_norm": 14.939241409301758, "learning_rate": 8.460573374964518e-05, "loss": 0.049407050013542175, "step": 54260 }, { "epoch": 15.40448481407891, "grad_norm": 5.407922744750977, "learning_rate": 8.460289525972184e-05, "loss": 0.014245152473449707, "step": 54270 }, { "epoch": 15.40732330400227, "grad_norm": 11.050570487976074, "learning_rate": 8.460005676979848e-05, "loss": 0.03409420251846314, "step": 54280 }, { "epoch": 15.410161793925631, "grad_norm": 8.218360900878906, "learning_rate": 8.459721827987511e-05, "loss": 0.020531482994556427, "step": 54290 }, { "epoch": 15.413000283848993, "grad_norm": 0.9123543500900269, "learning_rate": 8.459437978995175e-05, "loss": 0.03399288058280945, "step": 54300 }, { "epoch": 15.415838773772354, "grad_norm": 1.9880450963974, "learning_rate": 8.459154130002839e-05, "loss": 0.02451392710208893, "step": 54310 }, { "epoch": 15.418677263695713, "grad_norm": 2.1012115478515625, "learning_rate": 8.458870281010502e-05, "loss": 0.026399418711662292, "step": 54320 }, { "epoch": 15.421515753619074, "grad_norm": 3.144155740737915, "learning_rate": 8.458586432018166e-05, "loss": 0.027680817246437072, "step": 54330 }, { "epoch": 15.424354243542435, "grad_norm": 1.895432710647583, "learning_rate": 8.458302583025832e-05, "loss": 0.021748249232769013, "step": 54340 }, { "epoch": 15.427192733465796, "grad_norm": 7.695284366607666, "learning_rate": 8.458018734033494e-05, "loss": 0.02895972728729248, "step": 54350 }, { "epoch": 15.430031223389157, "grad_norm": 1.6202532052993774, "learning_rate": 8.457734885041158e-05, "loss": 0.01592489331960678, "step": 54360 }, { "epoch": 15.432869713312519, "grad_norm": 3.5763144493103027, "learning_rate": 8.457451036048823e-05, "loss": 0.04506300091743469, "step": 54370 }, { "epoch": 15.435708203235878, "grad_norm": 0.7253667712211609, "learning_rate": 8.457167187056487e-05, "loss": 0.02437674254179001, "step": 54380 }, { "epoch": 15.438546693159239, "grad_norm": 10.76182746887207, "learning_rate": 8.45688333806415e-05, "loss": 0.04124364852905273, "step": 54390 }, { "epoch": 15.4413851830826, "grad_norm": 0.7431447505950928, "learning_rate": 8.456599489071814e-05, "loss": 0.029517734050750734, "step": 54400 }, { "epoch": 15.444223673005961, "grad_norm": 15.151039123535156, "learning_rate": 8.456315640079479e-05, "loss": 0.04049486517906189, "step": 54410 }, { "epoch": 15.447062162929322, "grad_norm": 3.646165370941162, "learning_rate": 8.456031791087142e-05, "loss": 0.02199735939502716, "step": 54420 }, { "epoch": 15.449900652852682, "grad_norm": 7.934549331665039, "learning_rate": 8.455747942094806e-05, "loss": 0.03410484790802002, "step": 54430 }, { "epoch": 15.452739142776043, "grad_norm": 14.28009033203125, "learning_rate": 8.45546409310247e-05, "loss": 0.045516836643219, "step": 54440 }, { "epoch": 15.455577632699404, "grad_norm": 9.804656028747559, "learning_rate": 8.455180244110133e-05, "loss": 0.0263959139585495, "step": 54450 }, { "epoch": 15.458416122622765, "grad_norm": 3.664119005203247, "learning_rate": 8.454896395117797e-05, "loss": 0.02227897047996521, "step": 54460 }, { "epoch": 15.461254612546126, "grad_norm": 8.203856468200684, "learning_rate": 8.454612546125463e-05, "loss": 0.020636704564094544, "step": 54470 }, { "epoch": 15.464093102469485, "grad_norm": 3.1648714542388916, "learning_rate": 8.454328697133125e-05, "loss": 0.03570373356342316, "step": 54480 }, { "epoch": 15.466931592392847, "grad_norm": 2.6219470500946045, "learning_rate": 8.45404484814079e-05, "loss": 0.03272389769554138, "step": 54490 }, { "epoch": 15.469770082316208, "grad_norm": 10.881244659423828, "learning_rate": 8.453760999148454e-05, "loss": 0.05371968746185303, "step": 54500 }, { "epoch": 15.469770082316208, "eval_accuracy": 0.9592420677815222, "eval_loss": 0.13407358527183533, "eval_runtime": 31.4472, "eval_samples_per_second": 500.108, "eval_steps_per_second": 7.823, "step": 54500 }, { "epoch": 15.472608572239569, "grad_norm": 10.07542610168457, "learning_rate": 8.453477150156118e-05, "loss": 0.04315173923969269, "step": 54510 }, { "epoch": 15.47544706216293, "grad_norm": 7.209736347198486, "learning_rate": 8.45319330116378e-05, "loss": 0.03973647058010101, "step": 54520 }, { "epoch": 15.478285552086291, "grad_norm": 3.3896737098693848, "learning_rate": 8.452909452171445e-05, "loss": 0.02482490837574005, "step": 54530 }, { "epoch": 15.48112404200965, "grad_norm": 0.6716392040252686, "learning_rate": 8.45262560317911e-05, "loss": 0.04154448807239532, "step": 54540 }, { "epoch": 15.483962531933011, "grad_norm": 8.893574714660645, "learning_rate": 8.452341754186773e-05, "loss": 0.03340011239051819, "step": 54550 }, { "epoch": 15.486801021856373, "grad_norm": 6.536942481994629, "learning_rate": 8.452057905194437e-05, "loss": 0.014489671587944031, "step": 54560 }, { "epoch": 15.489639511779734, "grad_norm": 2.938420534133911, "learning_rate": 8.451774056202101e-05, "loss": 0.02795588970184326, "step": 54570 }, { "epoch": 15.492478001703095, "grad_norm": 8.389741897583008, "learning_rate": 8.451490207209764e-05, "loss": 0.029088133573532106, "step": 54580 }, { "epoch": 15.495316491626454, "grad_norm": 1.0364203453063965, "learning_rate": 8.451206358217428e-05, "loss": 0.016360311210155486, "step": 54590 }, { "epoch": 15.498154981549815, "grad_norm": 2.46856689453125, "learning_rate": 8.450922509225094e-05, "loss": 0.01980867236852646, "step": 54600 }, { "epoch": 15.500993471473176, "grad_norm": 8.08748722076416, "learning_rate": 8.450638660232756e-05, "loss": 0.033151456713676454, "step": 54610 }, { "epoch": 15.503831961396537, "grad_norm": 12.501373291015625, "learning_rate": 8.45035481124042e-05, "loss": 0.018897634744644166, "step": 54620 }, { "epoch": 15.506670451319899, "grad_norm": 7.797924518585205, "learning_rate": 8.450070962248085e-05, "loss": 0.024894019961357115, "step": 54630 }, { "epoch": 15.509508941243258, "grad_norm": 11.26822280883789, "learning_rate": 8.449787113255749e-05, "loss": 0.02593979239463806, "step": 54640 }, { "epoch": 15.512347431166619, "grad_norm": 5.739212989807129, "learning_rate": 8.449503264263412e-05, "loss": 0.02486390620470047, "step": 54650 }, { "epoch": 15.51518592108998, "grad_norm": 5.1653642654418945, "learning_rate": 8.449219415271076e-05, "loss": 0.031486839056015015, "step": 54660 }, { "epoch": 15.518024411013341, "grad_norm": 5.867390155792236, "learning_rate": 8.448935566278741e-05, "loss": 0.04039141535758972, "step": 54670 }, { "epoch": 15.520862900936702, "grad_norm": 7.855792045593262, "learning_rate": 8.448651717286404e-05, "loss": 0.043772509694099425, "step": 54680 }, { "epoch": 15.523701390860062, "grad_norm": 6.4003496170043945, "learning_rate": 8.448367868294068e-05, "loss": 0.033394047617912294, "step": 54690 }, { "epoch": 15.526539880783423, "grad_norm": 3.44984769821167, "learning_rate": 8.448084019301732e-05, "loss": 0.02004280537366867, "step": 54700 }, { "epoch": 15.529378370706784, "grad_norm": 0.9175532460212708, "learning_rate": 8.447800170309395e-05, "loss": 0.014948099851608276, "step": 54710 }, { "epoch": 15.532216860630145, "grad_norm": 5.156901836395264, "learning_rate": 8.447516321317059e-05, "loss": 0.022969256341457366, "step": 54720 }, { "epoch": 15.535055350553506, "grad_norm": 10.004914283752441, "learning_rate": 8.447232472324723e-05, "loss": 0.021813562512397765, "step": 54730 }, { "epoch": 15.537893840476865, "grad_norm": 1.4644076824188232, "learning_rate": 8.446948623332388e-05, "loss": 0.02282424122095108, "step": 54740 }, { "epoch": 15.540732330400227, "grad_norm": 9.742193222045898, "learning_rate": 8.446664774340052e-05, "loss": 0.02879665493965149, "step": 54750 }, { "epoch": 15.543570820323588, "grad_norm": 4.354197978973389, "learning_rate": 8.446380925347716e-05, "loss": 0.03276260197162628, "step": 54760 }, { "epoch": 15.546409310246949, "grad_norm": 3.7555503845214844, "learning_rate": 8.44609707635538e-05, "loss": 0.015233895182609558, "step": 54770 }, { "epoch": 15.54924780017031, "grad_norm": 6.296515941619873, "learning_rate": 8.445813227363043e-05, "loss": 0.017288655042648315, "step": 54780 }, { "epoch": 15.552086290093671, "grad_norm": 1.4583958387374878, "learning_rate": 8.445529378370707e-05, "loss": 0.040284544229507446, "step": 54790 }, { "epoch": 15.55492478001703, "grad_norm": 3.4453704357147217, "learning_rate": 8.445245529378371e-05, "loss": 0.024253936111927034, "step": 54800 }, { "epoch": 15.557763269940391, "grad_norm": 4.137217998504639, "learning_rate": 8.444961680386035e-05, "loss": 0.031335747241973876, "step": 54810 }, { "epoch": 15.560601759863752, "grad_norm": 3.6918506622314453, "learning_rate": 8.444677831393699e-05, "loss": 0.012523356080055236, "step": 54820 }, { "epoch": 15.563440249787114, "grad_norm": 11.648348808288574, "learning_rate": 8.444393982401363e-05, "loss": 0.02171445041894913, "step": 54830 }, { "epoch": 15.566278739710475, "grad_norm": 2.579878807067871, "learning_rate": 8.444110133409026e-05, "loss": 0.03140862584114075, "step": 54840 }, { "epoch": 15.569117229633834, "grad_norm": 8.60331916809082, "learning_rate": 8.44382628441669e-05, "loss": 0.034834560751914975, "step": 54850 }, { "epoch": 15.571955719557195, "grad_norm": 2.809657096862793, "learning_rate": 8.443542435424354e-05, "loss": 0.055110251903533934, "step": 54860 }, { "epoch": 15.574794209480556, "grad_norm": 4.75227689743042, "learning_rate": 8.443258586432019e-05, "loss": 0.04067589640617371, "step": 54870 }, { "epoch": 15.577632699403917, "grad_norm": 5.746793270111084, "learning_rate": 8.442974737439683e-05, "loss": 0.019145742058753967, "step": 54880 }, { "epoch": 15.580471189327278, "grad_norm": 1.67842435836792, "learning_rate": 8.442690888447347e-05, "loss": 0.02290988713502884, "step": 54890 }, { "epoch": 15.58330967925064, "grad_norm": 9.37649154663086, "learning_rate": 8.44240703945501e-05, "loss": 0.01786271333694458, "step": 54900 }, { "epoch": 15.586148169173999, "grad_norm": 8.962061882019043, "learning_rate": 8.442123190462674e-05, "loss": 0.04273948967456818, "step": 54910 }, { "epoch": 15.58898665909736, "grad_norm": 8.013415336608887, "learning_rate": 8.441839341470338e-05, "loss": 0.03322238624095917, "step": 54920 }, { "epoch": 15.591825149020721, "grad_norm": 5.692356109619141, "learning_rate": 8.441555492478002e-05, "loss": 0.02370072901248932, "step": 54930 }, { "epoch": 15.594663638944082, "grad_norm": 8.148560523986816, "learning_rate": 8.441271643485666e-05, "loss": 0.0259392648935318, "step": 54940 }, { "epoch": 15.597502128867443, "grad_norm": 5.666191577911377, "learning_rate": 8.44098779449333e-05, "loss": 0.022132568061351776, "step": 54950 }, { "epoch": 15.600340618790803, "grad_norm": 10.347456932067871, "learning_rate": 8.440703945500994e-05, "loss": 0.029416689276695253, "step": 54960 }, { "epoch": 15.603179108714164, "grad_norm": 4.348835468292236, "learning_rate": 8.440420096508657e-05, "loss": 0.014265596866607666, "step": 54970 }, { "epoch": 15.606017598637525, "grad_norm": 17.19972038269043, "learning_rate": 8.440136247516321e-05, "loss": 0.022851523756980897, "step": 54980 }, { "epoch": 15.608856088560886, "grad_norm": 7.24250602722168, "learning_rate": 8.439852398523986e-05, "loss": 0.031148865818977356, "step": 54990 }, { "epoch": 15.611694578484247, "grad_norm": 2.0472657680511475, "learning_rate": 8.43956854953165e-05, "loss": 0.045480182766914366, "step": 55000 }, { "epoch": 15.611694578484247, "eval_accuracy": 0.9544096140395498, "eval_loss": 0.13611218333244324, "eval_runtime": 31.4101, "eval_samples_per_second": 500.699, "eval_steps_per_second": 7.832, "step": 55000 }, { "epoch": 15.614533068407606, "grad_norm": 0.725804328918457, "learning_rate": 8.439284700539314e-05, "loss": 0.023694561421871187, "step": 55010 }, { "epoch": 15.617371558330968, "grad_norm": 3.9917492866516113, "learning_rate": 8.439000851546978e-05, "loss": 0.027720621228218077, "step": 55020 }, { "epoch": 15.620210048254329, "grad_norm": 3.7484641075134277, "learning_rate": 8.438717002554641e-05, "loss": 0.011597867310047149, "step": 55030 }, { "epoch": 15.62304853817769, "grad_norm": 6.442530155181885, "learning_rate": 8.438433153562305e-05, "loss": 0.02480282336473465, "step": 55040 }, { "epoch": 15.625887028101051, "grad_norm": 0.9566351175308228, "learning_rate": 8.438149304569969e-05, "loss": 0.03094039261341095, "step": 55050 }, { "epoch": 15.62872551802441, "grad_norm": 9.80335521697998, "learning_rate": 8.437865455577633e-05, "loss": 0.04263542592525482, "step": 55060 }, { "epoch": 15.631564007947771, "grad_norm": 0.9957788586616516, "learning_rate": 8.437581606585297e-05, "loss": 0.01051989123225212, "step": 55070 }, { "epoch": 15.634402497871132, "grad_norm": 12.291481971740723, "learning_rate": 8.437297757592961e-05, "loss": 0.016485369205474852, "step": 55080 }, { "epoch": 15.637240987794494, "grad_norm": 1.9296355247497559, "learning_rate": 8.437013908600626e-05, "loss": 0.025524410605430602, "step": 55090 }, { "epoch": 15.640079477717855, "grad_norm": 2.2079684734344482, "learning_rate": 8.436730059608288e-05, "loss": 0.02186250388622284, "step": 55100 }, { "epoch": 15.642917967641214, "grad_norm": 14.009074211120605, "learning_rate": 8.436446210615952e-05, "loss": 0.043946081399917604, "step": 55110 }, { "epoch": 15.645756457564575, "grad_norm": 6.56678581237793, "learning_rate": 8.436162361623617e-05, "loss": 0.014149743318557739, "step": 55120 }, { "epoch": 15.648594947487936, "grad_norm": 0.9099812507629395, "learning_rate": 8.43587851263128e-05, "loss": 0.033816903829574585, "step": 55130 }, { "epoch": 15.651433437411297, "grad_norm": 1.2540795803070068, "learning_rate": 8.435594663638945e-05, "loss": 0.012104620784521103, "step": 55140 }, { "epoch": 15.654271927334658, "grad_norm": 8.036087989807129, "learning_rate": 8.435310814646609e-05, "loss": 0.031072506308555604, "step": 55150 }, { "epoch": 15.65711041725802, "grad_norm": 5.32780647277832, "learning_rate": 8.435026965654272e-05, "loss": 0.03889361619949341, "step": 55160 }, { "epoch": 15.659948907181379, "grad_norm": 5.735215187072754, "learning_rate": 8.434743116661936e-05, "loss": 0.0414595752954483, "step": 55170 }, { "epoch": 15.66278739710474, "grad_norm": 6.035196781158447, "learning_rate": 8.4344592676696e-05, "loss": 0.020461562275886535, "step": 55180 }, { "epoch": 15.665625887028101, "grad_norm": 0.9485716223716736, "learning_rate": 8.434175418677264e-05, "loss": 0.02586342990398407, "step": 55190 }, { "epoch": 15.668464376951462, "grad_norm": 9.054306030273438, "learning_rate": 8.433891569684928e-05, "loss": 0.02514852285385132, "step": 55200 }, { "epoch": 15.671302866874823, "grad_norm": 0.6084137558937073, "learning_rate": 8.433607720692592e-05, "loss": 0.02879243791103363, "step": 55210 }, { "epoch": 15.674141356798183, "grad_norm": 1.6793603897094727, "learning_rate": 8.433323871700257e-05, "loss": 0.010233908891677856, "step": 55220 }, { "epoch": 15.676979846721544, "grad_norm": 1.3449037075042725, "learning_rate": 8.43304002270792e-05, "loss": 0.021717290580272674, "step": 55230 }, { "epoch": 15.679818336644905, "grad_norm": 6.453930854797363, "learning_rate": 8.432756173715584e-05, "loss": 0.024816076457500457, "step": 55240 }, { "epoch": 15.682656826568266, "grad_norm": 6.76559591293335, "learning_rate": 8.432472324723248e-05, "loss": 0.012948113679885864, "step": 55250 }, { "epoch": 15.685495316491627, "grad_norm": 3.3474678993225098, "learning_rate": 8.43218847573091e-05, "loss": 0.029218363761901855, "step": 55260 }, { "epoch": 15.688333806414986, "grad_norm": 8.351326942443848, "learning_rate": 8.431904626738576e-05, "loss": 0.026746863126754762, "step": 55270 }, { "epoch": 15.691172296338348, "grad_norm": 10.07962417602539, "learning_rate": 8.43162077774624e-05, "loss": 0.0370498925447464, "step": 55280 }, { "epoch": 15.694010786261709, "grad_norm": 1.937565803527832, "learning_rate": 8.431336928753903e-05, "loss": 0.02836006283760071, "step": 55290 }, { "epoch": 15.69684927618507, "grad_norm": 2.2049403190612793, "learning_rate": 8.431053079761567e-05, "loss": 0.024386924505233765, "step": 55300 }, { "epoch": 15.69968776610843, "grad_norm": 11.942442893981934, "learning_rate": 8.430769230769231e-05, "loss": 0.025810697674751283, "step": 55310 }, { "epoch": 15.702526256031792, "grad_norm": 15.803805351257324, "learning_rate": 8.430485381776895e-05, "loss": 0.0414841890335083, "step": 55320 }, { "epoch": 15.705364745955151, "grad_norm": 2.537261962890625, "learning_rate": 8.430201532784558e-05, "loss": 0.019336915016174315, "step": 55330 }, { "epoch": 15.708203235878512, "grad_norm": 2.325127363204956, "learning_rate": 8.429917683792224e-05, "loss": 0.013833549618721009, "step": 55340 }, { "epoch": 15.711041725801874, "grad_norm": 3.4864227771759033, "learning_rate": 8.429633834799888e-05, "loss": 0.035663875937461856, "step": 55350 }, { "epoch": 15.713880215725235, "grad_norm": 2.592761516571045, "learning_rate": 8.42934998580755e-05, "loss": 0.028659674525260925, "step": 55360 }, { "epoch": 15.716718705648596, "grad_norm": 10.83828353881836, "learning_rate": 8.429066136815215e-05, "loss": 0.03129350543022156, "step": 55370 }, { "epoch": 15.719557195571955, "grad_norm": 3.1333580017089844, "learning_rate": 8.428782287822879e-05, "loss": 0.019594284892082214, "step": 55380 }, { "epoch": 15.722395685495316, "grad_norm": 14.388250350952148, "learning_rate": 8.428498438830542e-05, "loss": 0.01917848885059357, "step": 55390 }, { "epoch": 15.725234175418677, "grad_norm": 12.465145111083984, "learning_rate": 8.428214589838207e-05, "loss": 0.034659892320632935, "step": 55400 }, { "epoch": 15.728072665342038, "grad_norm": 2.8771822452545166, "learning_rate": 8.427930740845871e-05, "loss": 0.025818976759910583, "step": 55410 }, { "epoch": 15.7309111552654, "grad_norm": 10.621774673461914, "learning_rate": 8.427646891853534e-05, "loss": 0.0350786954164505, "step": 55420 }, { "epoch": 15.733749645188759, "grad_norm": 4.423511505126953, "learning_rate": 8.427363042861198e-05, "loss": 0.03854709267616272, "step": 55430 }, { "epoch": 15.73658813511212, "grad_norm": 1.963225245475769, "learning_rate": 8.427079193868862e-05, "loss": 0.03947713673114776, "step": 55440 }, { "epoch": 15.739426625035481, "grad_norm": 3.383223056793213, "learning_rate": 8.426795344876526e-05, "loss": 0.021287114918231965, "step": 55450 }, { "epoch": 15.742265114958842, "grad_norm": 2.511078119277954, "learning_rate": 8.426511495884189e-05, "loss": 0.016826075315475465, "step": 55460 }, { "epoch": 15.745103604882203, "grad_norm": 8.995229721069336, "learning_rate": 8.426227646891855e-05, "loss": 0.03690308928489685, "step": 55470 }, { "epoch": 15.747942094805563, "grad_norm": 11.649239540100098, "learning_rate": 8.425943797899519e-05, "loss": 0.02400638908147812, "step": 55480 }, { "epoch": 15.750780584728924, "grad_norm": 12.552679061889648, "learning_rate": 8.425659948907182e-05, "loss": 0.05099513530731201, "step": 55490 }, { "epoch": 15.753619074652285, "grad_norm": 2.8938004970550537, "learning_rate": 8.425376099914846e-05, "loss": 0.020146849751472472, "step": 55500 }, { "epoch": 15.753619074652285, "eval_accuracy": 0.9609588605582756, "eval_loss": 0.12461204826831818, "eval_runtime": 31.6124, "eval_samples_per_second": 497.494, "eval_steps_per_second": 7.782, "step": 55500 }, { "epoch": 15.756457564575646, "grad_norm": 0.6011388301849365, "learning_rate": 8.42509225092251e-05, "loss": 0.014214943349361419, "step": 55510 }, { "epoch": 15.759296054499007, "grad_norm": 6.048890590667725, "learning_rate": 8.424808401930173e-05, "loss": 0.035259172320365906, "step": 55520 }, { "epoch": 15.762134544422366, "grad_norm": 2.1683619022369385, "learning_rate": 8.424524552937837e-05, "loss": 0.026011309027671813, "step": 55530 }, { "epoch": 15.764973034345727, "grad_norm": 4.43717098236084, "learning_rate": 8.424240703945502e-05, "loss": 0.020434191823005675, "step": 55540 }, { "epoch": 15.767811524269089, "grad_norm": 4.325998306274414, "learning_rate": 8.423956854953165e-05, "loss": 0.03220485746860504, "step": 55550 }, { "epoch": 15.77065001419245, "grad_norm": 2.4194178581237793, "learning_rate": 8.423673005960829e-05, "loss": 0.02003835141658783, "step": 55560 }, { "epoch": 15.77348850411581, "grad_norm": 4.828169345855713, "learning_rate": 8.423389156968493e-05, "loss": 0.016806913912296294, "step": 55570 }, { "epoch": 15.776326994039172, "grad_norm": 3.605966091156006, "learning_rate": 8.423105307976157e-05, "loss": 0.02392922043800354, "step": 55580 }, { "epoch": 15.779165483962531, "grad_norm": 3.2854690551757812, "learning_rate": 8.42282145898382e-05, "loss": 0.01690668612718582, "step": 55590 }, { "epoch": 15.782003973885892, "grad_norm": 8.527112007141113, "learning_rate": 8.422537609991486e-05, "loss": 0.02466445565223694, "step": 55600 }, { "epoch": 15.784842463809253, "grad_norm": 15.029330253601074, "learning_rate": 8.42225376099915e-05, "loss": 0.026534241437911988, "step": 55610 }, { "epoch": 15.787680953732615, "grad_norm": 18.220033645629883, "learning_rate": 8.421969912006813e-05, "loss": 0.04281447231769562, "step": 55620 }, { "epoch": 15.790519443655976, "grad_norm": 17.181987762451172, "learning_rate": 8.421686063014477e-05, "loss": 0.018392178416252136, "step": 55630 }, { "epoch": 15.793357933579335, "grad_norm": 9.275490760803223, "learning_rate": 8.421402214022141e-05, "loss": 0.020517879724502565, "step": 55640 }, { "epoch": 15.796196423502696, "grad_norm": 0.9653279185295105, "learning_rate": 8.421118365029804e-05, "loss": 0.03457046747207641, "step": 55650 }, { "epoch": 15.799034913426057, "grad_norm": 4.480401515960693, "learning_rate": 8.420834516037468e-05, "loss": 0.02772296667098999, "step": 55660 }, { "epoch": 15.801873403349418, "grad_norm": 7.011767387390137, "learning_rate": 8.420550667045133e-05, "loss": 0.04459327757358551, "step": 55670 }, { "epoch": 15.80471189327278, "grad_norm": 0.9134606122970581, "learning_rate": 8.420266818052796e-05, "loss": 0.014770305156707764, "step": 55680 }, { "epoch": 15.80755038319614, "grad_norm": 3.7655885219573975, "learning_rate": 8.41998296906046e-05, "loss": 0.0340694934129715, "step": 55690 }, { "epoch": 15.8103888731195, "grad_norm": 2.3053646087646484, "learning_rate": 8.419699120068124e-05, "loss": 0.03109254240989685, "step": 55700 }, { "epoch": 15.813227363042861, "grad_norm": 1.0620684623718262, "learning_rate": 8.419415271075789e-05, "loss": 0.01757873147726059, "step": 55710 }, { "epoch": 15.816065852966222, "grad_norm": 5.15792179107666, "learning_rate": 8.419131422083451e-05, "loss": 0.027486610412597656, "step": 55720 }, { "epoch": 15.818904342889583, "grad_norm": 6.560937404632568, "learning_rate": 8.418847573091115e-05, "loss": 0.016028845310211183, "step": 55730 }, { "epoch": 15.821742832812944, "grad_norm": 16.208751678466797, "learning_rate": 8.41856372409878e-05, "loss": 0.026366108655929567, "step": 55740 }, { "epoch": 15.824581322736304, "grad_norm": 1.8737319707870483, "learning_rate": 8.418279875106444e-05, "loss": 0.04411182701587677, "step": 55750 }, { "epoch": 15.827419812659665, "grad_norm": 12.58658504486084, "learning_rate": 8.417996026114108e-05, "loss": 0.033492755889892575, "step": 55760 }, { "epoch": 15.830258302583026, "grad_norm": 7.875770568847656, "learning_rate": 8.417712177121772e-05, "loss": 0.025324517488479616, "step": 55770 }, { "epoch": 15.833096792506387, "grad_norm": 1.199244499206543, "learning_rate": 8.417428328129435e-05, "loss": 0.022986453771591187, "step": 55780 }, { "epoch": 15.835935282429748, "grad_norm": 3.8129796981811523, "learning_rate": 8.417144479137099e-05, "loss": 0.02322043925523758, "step": 55790 }, { "epoch": 15.838773772353107, "grad_norm": 0.5839950442314148, "learning_rate": 8.416860630144764e-05, "loss": 0.02498307228088379, "step": 55800 }, { "epoch": 15.841612262276469, "grad_norm": 12.658780097961426, "learning_rate": 8.416576781152427e-05, "loss": 0.03638279438018799, "step": 55810 }, { "epoch": 15.84445075219983, "grad_norm": 0.9448693990707397, "learning_rate": 8.416292932160091e-05, "loss": 0.0205583393573761, "step": 55820 }, { "epoch": 15.84728924212319, "grad_norm": 4.23764705657959, "learning_rate": 8.416009083167755e-05, "loss": 0.023648782074451445, "step": 55830 }, { "epoch": 15.850127732046552, "grad_norm": 6.288824558258057, "learning_rate": 8.415725234175418e-05, "loss": 0.02515784800052643, "step": 55840 }, { "epoch": 15.852966221969911, "grad_norm": 0.7288370728492737, "learning_rate": 8.415441385183082e-05, "loss": 0.012754830718040466, "step": 55850 }, { "epoch": 15.855804711893272, "grad_norm": 0.5737016797065735, "learning_rate": 8.415157536190747e-05, "loss": 0.04049540162086487, "step": 55860 }, { "epoch": 15.858643201816633, "grad_norm": 8.069538116455078, "learning_rate": 8.41487368719841e-05, "loss": 0.04047139883041382, "step": 55870 }, { "epoch": 15.861481691739995, "grad_norm": 1.805380940437317, "learning_rate": 8.414589838206075e-05, "loss": 0.01842280328273773, "step": 55880 }, { "epoch": 15.864320181663356, "grad_norm": 10.589919090270996, "learning_rate": 8.414305989213739e-05, "loss": 0.02534562349319458, "step": 55890 }, { "epoch": 15.867158671586715, "grad_norm": 3.550118923187256, "learning_rate": 8.414022140221403e-05, "loss": 0.0255268394947052, "step": 55900 }, { "epoch": 15.869997161510076, "grad_norm": 14.242570877075195, "learning_rate": 8.413738291229066e-05, "loss": 0.01810208261013031, "step": 55910 }, { "epoch": 15.872835651433437, "grad_norm": 10.449312210083008, "learning_rate": 8.41345444223673e-05, "loss": 0.026680964231491088, "step": 55920 }, { "epoch": 15.875674141356798, "grad_norm": 8.877686500549316, "learning_rate": 8.413170593244395e-05, "loss": 0.022716453671455382, "step": 55930 }, { "epoch": 15.87851263128016, "grad_norm": 2.263636589050293, "learning_rate": 8.412886744252058e-05, "loss": 0.023564670979976655, "step": 55940 }, { "epoch": 15.88135112120352, "grad_norm": 5.455941677093506, "learning_rate": 8.412602895259722e-05, "loss": 0.026516324281692503, "step": 55950 }, { "epoch": 15.88418961112688, "grad_norm": 6.262031078338623, "learning_rate": 8.412319046267387e-05, "loss": 0.0171290248632431, "step": 55960 }, { "epoch": 15.887028101050241, "grad_norm": 10.699884414672852, "learning_rate": 8.412035197275049e-05, "loss": 0.02896207869052887, "step": 55970 }, { "epoch": 15.889866590973602, "grad_norm": 4.022065162658691, "learning_rate": 8.411751348282713e-05, "loss": 0.031614893674850465, "step": 55980 }, { "epoch": 15.892705080896963, "grad_norm": 11.237079620361328, "learning_rate": 8.411467499290378e-05, "loss": 0.025946211814880372, "step": 55990 }, { "epoch": 15.895543570820324, "grad_norm": Infinity, "learning_rate": 8.411183650298042e-05, "loss": 0.030976390838623045, "step": 56000 }, { "epoch": 15.895543570820324, "eval_accuracy": 0.9621033890761111, "eval_loss": 0.11983194202184677, "eval_runtime": 31.3136, "eval_samples_per_second": 502.241, "eval_steps_per_second": 7.856, "step": 56000 }, { "epoch": 15.898382060743684, "grad_norm": 4.911223411560059, "learning_rate": 8.41092818620494e-05, "loss": 0.03257195055484772, "step": 56010 }, { "epoch": 15.901220550667045, "grad_norm": 3.5249905586242676, "learning_rate": 8.410644337212603e-05, "loss": 0.020603126287460326, "step": 56020 }, { "epoch": 15.904059040590406, "grad_norm": 4.776357650756836, "learning_rate": 8.410360488220267e-05, "loss": 0.01293005645275116, "step": 56030 }, { "epoch": 15.906897530513767, "grad_norm": 16.306373596191406, "learning_rate": 8.410076639227931e-05, "loss": 0.028395670652389526, "step": 56040 }, { "epoch": 15.909736020437128, "grad_norm": 0.15443992614746094, "learning_rate": 8.409792790235595e-05, "loss": 0.049327808618545535, "step": 56050 }, { "epoch": 15.912574510360487, "grad_norm": 7.249589443206787, "learning_rate": 8.409508941243259e-05, "loss": 0.04538487792015076, "step": 56060 }, { "epoch": 15.915413000283849, "grad_norm": 14.804144859313965, "learning_rate": 8.409225092250923e-05, "loss": 0.04028477370738983, "step": 56070 }, { "epoch": 15.91825149020721, "grad_norm": 12.310279846191406, "learning_rate": 8.408941243258587e-05, "loss": 0.025865823030471802, "step": 56080 }, { "epoch": 15.92108998013057, "grad_norm": 0.782041609287262, "learning_rate": 8.40865739426625e-05, "loss": 0.017394444346427916, "step": 56090 }, { "epoch": 15.923928470053932, "grad_norm": 1.530235767364502, "learning_rate": 8.408373545273914e-05, "loss": 0.014441056549549103, "step": 56100 }, { "epoch": 15.926766959977293, "grad_norm": 9.699479103088379, "learning_rate": 8.408089696281578e-05, "loss": 0.01742193102836609, "step": 56110 }, { "epoch": 15.929605449900652, "grad_norm": 0.7000200152397156, "learning_rate": 8.407805847289243e-05, "loss": 0.014637525379657745, "step": 56120 }, { "epoch": 15.932443939824013, "grad_norm": 1.259110450744629, "learning_rate": 8.407521998296907e-05, "loss": 0.019371147453784942, "step": 56130 }, { "epoch": 15.935282429747375, "grad_norm": 2.1443381309509277, "learning_rate": 8.407238149304571e-05, "loss": 0.0268601655960083, "step": 56140 }, { "epoch": 15.938120919670736, "grad_norm": 3.9176294803619385, "learning_rate": 8.406954300312234e-05, "loss": 0.02974343001842499, "step": 56150 }, { "epoch": 15.940959409594097, "grad_norm": 1.5984960794448853, "learning_rate": 8.406670451319898e-05, "loss": 0.00898154228925705, "step": 56160 }, { "epoch": 15.943797899517456, "grad_norm": 2.9393582344055176, "learning_rate": 8.406386602327562e-05, "loss": 0.01995365172624588, "step": 56170 }, { "epoch": 15.946636389440817, "grad_norm": 2.334904670715332, "learning_rate": 8.406102753335226e-05, "loss": 0.022166568040847778, "step": 56180 }, { "epoch": 15.949474879364178, "grad_norm": 4.428810119628906, "learning_rate": 8.40581890434289e-05, "loss": 0.02068374752998352, "step": 56190 }, { "epoch": 15.95231336928754, "grad_norm": 0.9818922877311707, "learning_rate": 8.405535055350554e-05, "loss": 0.0217548206448555, "step": 56200 }, { "epoch": 15.9551518592109, "grad_norm": 9.420214653015137, "learning_rate": 8.405251206358218e-05, "loss": 0.025222313404083253, "step": 56210 }, { "epoch": 15.95799034913426, "grad_norm": 5.281280517578125, "learning_rate": 8.404967357365881e-05, "loss": 0.025636616349220275, "step": 56220 }, { "epoch": 15.960828839057621, "grad_norm": 6.161774635314941, "learning_rate": 8.404683508373545e-05, "loss": 0.029733723402023314, "step": 56230 }, { "epoch": 15.963667328980982, "grad_norm": 5.249983787536621, "learning_rate": 8.40439965938121e-05, "loss": 0.020673768222332002, "step": 56240 }, { "epoch": 15.966505818904343, "grad_norm": 0.741504967212677, "learning_rate": 8.404115810388874e-05, "loss": 0.02231096476316452, "step": 56250 }, { "epoch": 15.969344308827704, "grad_norm": 2.4594666957855225, "learning_rate": 8.403831961396538e-05, "loss": 0.03507919013500214, "step": 56260 }, { "epoch": 15.972182798751064, "grad_norm": 8.734623908996582, "learning_rate": 8.403548112404202e-05, "loss": 0.014858177304267884, "step": 56270 }, { "epoch": 15.975021288674425, "grad_norm": 3.020758867263794, "learning_rate": 8.403264263411865e-05, "loss": 0.027657362818717956, "step": 56280 }, { "epoch": 15.977859778597786, "grad_norm": 0.1763543337583542, "learning_rate": 8.402980414419529e-05, "loss": 0.01789838671684265, "step": 56290 }, { "epoch": 15.980698268521147, "grad_norm": 5.489733695983887, "learning_rate": 8.402696565427193e-05, "loss": 0.022160105407238007, "step": 56300 }, { "epoch": 15.983536758444508, "grad_norm": 4.364232063293457, "learning_rate": 8.402412716434857e-05, "loss": 0.02629002630710602, "step": 56310 }, { "epoch": 15.986375248367867, "grad_norm": 14.296595573425293, "learning_rate": 8.402128867442521e-05, "loss": 0.03456106185913086, "step": 56320 }, { "epoch": 15.989213738291228, "grad_norm": 9.274201393127441, "learning_rate": 8.401845018450185e-05, "loss": 0.03412656188011169, "step": 56330 }, { "epoch": 15.99205222821459, "grad_norm": 7.401856899261475, "learning_rate": 8.40156116945785e-05, "loss": 0.031022083759307862, "step": 56340 }, { "epoch": 15.99489071813795, "grad_norm": 7.272182464599609, "learning_rate": 8.401277320465512e-05, "loss": 0.046731477975845336, "step": 56350 }, { "epoch": 15.997729208061312, "grad_norm": 7.696033000946045, "learning_rate": 8.400993471473176e-05, "loss": 0.018336690962314606, "step": 56360 }, { "epoch": 16.00056769798467, "grad_norm": 3.114326238632202, "learning_rate": 8.40070962248084e-05, "loss": 0.022405683994293213, "step": 56370 }, { "epoch": 16.003406187908034, "grad_norm": 0.5919771790504456, "learning_rate": 8.400425773488505e-05, "loss": 0.010076545178890228, "step": 56380 }, { "epoch": 16.006244677831393, "grad_norm": 4.794750690460205, "learning_rate": 8.400141924496169e-05, "loss": 0.029783767461776734, "step": 56390 }, { "epoch": 16.009083167754753, "grad_norm": 1.4368441104888916, "learning_rate": 8.399858075503833e-05, "loss": 0.01736864298582077, "step": 56400 }, { "epoch": 16.011921657678116, "grad_norm": 1.3042086362838745, "learning_rate": 8.399574226511496e-05, "loss": 0.008252985030412673, "step": 56410 }, { "epoch": 16.014760147601475, "grad_norm": 12.989331245422363, "learning_rate": 8.39929037751916e-05, "loss": 0.04838740825653076, "step": 56420 }, { "epoch": 16.017598637524838, "grad_norm": 1.1730929613113403, "learning_rate": 8.399006528526824e-05, "loss": 0.01593453735113144, "step": 56430 }, { "epoch": 16.020437127448197, "grad_norm": 14.540125846862793, "learning_rate": 8.398722679534488e-05, "loss": 0.015429207682609558, "step": 56440 }, { "epoch": 16.02327561737156, "grad_norm": 1.9196276664733887, "learning_rate": 8.398438830542152e-05, "loss": 0.03693382143974304, "step": 56450 }, { "epoch": 16.02611410729492, "grad_norm": 3.64133358001709, "learning_rate": 8.398154981549816e-05, "loss": 0.01034165620803833, "step": 56460 }, { "epoch": 16.02895259721828, "grad_norm": 0.6410951018333435, "learning_rate": 8.39787113255748e-05, "loss": 0.020341362059116363, "step": 56470 }, { "epoch": 16.03179108714164, "grad_norm": 0.3791448175907135, "learning_rate": 8.397587283565143e-05, "loss": 0.020900492370128632, "step": 56480 }, { "epoch": 16.034629577065, "grad_norm": 2.473736047744751, "learning_rate": 8.397303434572807e-05, "loss": 0.019682198762893677, "step": 56490 }, { "epoch": 16.037468066988364, "grad_norm": 0.5253610014915466, "learning_rate": 8.397019585580472e-05, "loss": 0.02010924816131592, "step": 56500 }, { "epoch": 16.037468066988364, "eval_accuracy": 0.9591148979462072, "eval_loss": 0.12914350628852844, "eval_runtime": 31.935, "eval_samples_per_second": 492.469, "eval_steps_per_second": 7.703, "step": 56500 }, { "epoch": 16.040306556911723, "grad_norm": 2.5677573680877686, "learning_rate": 8.396735736588136e-05, "loss": 0.017707046866416932, "step": 56510 }, { "epoch": 16.043145046835082, "grad_norm": 12.400981903076172, "learning_rate": 8.3964518875958e-05, "loss": 0.02374941110610962, "step": 56520 }, { "epoch": 16.045983536758445, "grad_norm": 5.282382488250732, "learning_rate": 8.396168038603464e-05, "loss": 0.01573721468448639, "step": 56530 }, { "epoch": 16.048822026681805, "grad_norm": 7.666718006134033, "learning_rate": 8.395884189611127e-05, "loss": 0.010613903403282166, "step": 56540 }, { "epoch": 16.051660516605168, "grad_norm": 3.9401490688323975, "learning_rate": 8.395600340618791e-05, "loss": 0.016375108063220976, "step": 56550 }, { "epoch": 16.054499006528527, "grad_norm": 3.0923590660095215, "learning_rate": 8.395316491626455e-05, "loss": 0.024221356213092803, "step": 56560 }, { "epoch": 16.057337496451886, "grad_norm": 8.81050968170166, "learning_rate": 8.395032642634119e-05, "loss": 0.012352322041988373, "step": 56570 }, { "epoch": 16.06017598637525, "grad_norm": 3.850731611251831, "learning_rate": 8.394748793641783e-05, "loss": 0.010475626587867737, "step": 56580 }, { "epoch": 16.06301447629861, "grad_norm": 5.724727630615234, "learning_rate": 8.394464944649448e-05, "loss": 0.011408202350139618, "step": 56590 }, { "epoch": 16.06585296622197, "grad_norm": 2.9450466632843018, "learning_rate": 8.394181095657112e-05, "loss": 0.009185663610696792, "step": 56600 }, { "epoch": 16.06869145614533, "grad_norm": 0.4280485510826111, "learning_rate": 8.393897246664774e-05, "loss": 0.013979765772819518, "step": 56610 }, { "epoch": 16.07152994606869, "grad_norm": 0.3558211922645569, "learning_rate": 8.393613397672439e-05, "loss": 0.019109795987606048, "step": 56620 }, { "epoch": 16.074368435992053, "grad_norm": 5.710763931274414, "learning_rate": 8.393329548680103e-05, "loss": 0.01879764497280121, "step": 56630 }, { "epoch": 16.077206925915412, "grad_norm": 2.8225626945495605, "learning_rate": 8.393045699687765e-05, "loss": 0.014234524965286256, "step": 56640 }, { "epoch": 16.080045415838775, "grad_norm": 1.5015414953231812, "learning_rate": 8.392761850695431e-05, "loss": 0.006666857749223709, "step": 56650 }, { "epoch": 16.082883905762134, "grad_norm": 1.0233074426651, "learning_rate": 8.392478001703095e-05, "loss": 0.022152113914489745, "step": 56660 }, { "epoch": 16.085722395685494, "grad_norm": 1.9204943180084229, "learning_rate": 8.392194152710758e-05, "loss": 0.019176481664180754, "step": 56670 }, { "epoch": 16.088560885608857, "grad_norm": 2.049398183822632, "learning_rate": 8.391910303718422e-05, "loss": 0.013513170182704926, "step": 56680 }, { "epoch": 16.091399375532216, "grad_norm": 5.297300338745117, "learning_rate": 8.391626454726086e-05, "loss": 0.02534930408000946, "step": 56690 }, { "epoch": 16.09423786545558, "grad_norm": 3.6562960147857666, "learning_rate": 8.39134260573375e-05, "loss": 0.020561394095420838, "step": 56700 }, { "epoch": 16.097076355378938, "grad_norm": 9.539783477783203, "learning_rate": 8.391058756741414e-05, "loss": 0.02547207772731781, "step": 56710 }, { "epoch": 16.099914845302298, "grad_norm": 9.702581405639648, "learning_rate": 8.390774907749079e-05, "loss": 0.03833670914173126, "step": 56720 }, { "epoch": 16.10275333522566, "grad_norm": 6.7406840324401855, "learning_rate": 8.390491058756741e-05, "loss": 0.01657630205154419, "step": 56730 }, { "epoch": 16.10559182514902, "grad_norm": 1.8221476078033447, "learning_rate": 8.390207209764406e-05, "loss": 0.016314813494682313, "step": 56740 }, { "epoch": 16.108430315072383, "grad_norm": 1.935363531112671, "learning_rate": 8.38992336077207e-05, "loss": 0.009916795045137405, "step": 56750 }, { "epoch": 16.111268804995742, "grad_norm": 1.9656075239181519, "learning_rate": 8.389639511779734e-05, "loss": 0.02160351872444153, "step": 56760 }, { "epoch": 16.1141072949191, "grad_norm": 10.291141510009766, "learning_rate": 8.389355662787397e-05, "loss": 0.036981242895126346, "step": 56770 }, { "epoch": 16.116945784842464, "grad_norm": 8.028336524963379, "learning_rate": 8.389071813795062e-05, "loss": 0.02217787057161331, "step": 56780 }, { "epoch": 16.119784274765824, "grad_norm": 10.198317527770996, "learning_rate": 8.388787964802726e-05, "loss": 0.020279482007026672, "step": 56790 }, { "epoch": 16.122622764689186, "grad_norm": 8.702027320861816, "learning_rate": 8.388504115810389e-05, "loss": 0.023785114288330078, "step": 56800 }, { "epoch": 16.125461254612546, "grad_norm": 9.861729621887207, "learning_rate": 8.388220266818053e-05, "loss": 0.02164105325937271, "step": 56810 }, { "epoch": 16.12829974453591, "grad_norm": 2.009347915649414, "learning_rate": 8.387936417825717e-05, "loss": 0.015425843000411988, "step": 56820 }, { "epoch": 16.131138234459268, "grad_norm": 4.039822578430176, "learning_rate": 8.38765256883338e-05, "loss": 0.02053661197423935, "step": 56830 }, { "epoch": 16.133976724382627, "grad_norm": 0.49414023756980896, "learning_rate": 8.387368719841044e-05, "loss": 0.01741681694984436, "step": 56840 }, { "epoch": 16.13681521430599, "grad_norm": 12.763054847717285, "learning_rate": 8.38708487084871e-05, "loss": 0.0228745698928833, "step": 56850 }, { "epoch": 16.13965370422935, "grad_norm": 7.280646324157715, "learning_rate": 8.386801021856372e-05, "loss": 0.014351931214332581, "step": 56860 }, { "epoch": 16.142492194152712, "grad_norm": 5.627215385437012, "learning_rate": 8.386517172864037e-05, "loss": 0.01622956097126007, "step": 56870 }, { "epoch": 16.14533068407607, "grad_norm": 4.212461948394775, "learning_rate": 8.386233323871701e-05, "loss": 0.01635368764400482, "step": 56880 }, { "epoch": 16.14816917399943, "grad_norm": 7.619419097900391, "learning_rate": 8.385949474879365e-05, "loss": 0.025797906517982482, "step": 56890 }, { "epoch": 16.151007663922794, "grad_norm": 0.8816103935241699, "learning_rate": 8.385665625887028e-05, "loss": 0.015209449827671051, "step": 56900 }, { "epoch": 16.153846153846153, "grad_norm": 1.5230587720870972, "learning_rate": 8.385381776894693e-05, "loss": 0.020472793281078337, "step": 56910 }, { "epoch": 16.156684643769516, "grad_norm": 14.331717491149902, "learning_rate": 8.385097927902357e-05, "loss": 0.028105777502059937, "step": 56920 }, { "epoch": 16.159523133692876, "grad_norm": 1.133758306503296, "learning_rate": 8.38481407891002e-05, "loss": 0.01614253968000412, "step": 56930 }, { "epoch": 16.162361623616235, "grad_norm": 10.500707626342773, "learning_rate": 8.384530229917684e-05, "loss": 0.027049621939659117, "step": 56940 }, { "epoch": 16.165200113539598, "grad_norm": 9.087316513061523, "learning_rate": 8.384246380925348e-05, "loss": 0.030893945693969728, "step": 56950 }, { "epoch": 16.168038603462957, "grad_norm": 10.016810417175293, "learning_rate": 8.383962531933011e-05, "loss": 0.019363540410995483, "step": 56960 }, { "epoch": 16.17087709338632, "grad_norm": 1.9782463312149048, "learning_rate": 8.383678682940675e-05, "loss": 0.020191127061843873, "step": 56970 }, { "epoch": 16.17371558330968, "grad_norm": 10.08120059967041, "learning_rate": 8.383394833948341e-05, "loss": 0.033452361822128296, "step": 56980 }, { "epoch": 16.17655407323304, "grad_norm": 6.860118865966797, "learning_rate": 8.383110984956004e-05, "loss": 0.02506357729434967, "step": 56990 }, { "epoch": 16.1793925631564, "grad_norm": 14.535609245300293, "learning_rate": 8.382827135963668e-05, "loss": 0.035907596349716187, "step": 57000 }, { "epoch": 16.1793925631564, "eval_accuracy": 0.9640745215234946, "eval_loss": 0.11561466753482819, "eval_runtime": 31.6189, "eval_samples_per_second": 497.392, "eval_steps_per_second": 7.78, "step": 57000 }, { "epoch": 16.18223105307976, "grad_norm": 3.973383903503418, "learning_rate": 8.382543286971332e-05, "loss": 0.025158500671386717, "step": 57010 }, { "epoch": 16.185069543003124, "grad_norm": 4.1740312576293945, "learning_rate": 8.382259437978996e-05, "loss": 0.011600501090288162, "step": 57020 }, { "epoch": 16.187908032926483, "grad_norm": 2.513554096221924, "learning_rate": 8.381975588986659e-05, "loss": 0.021694007515907287, "step": 57030 }, { "epoch": 16.190746522849842, "grad_norm": 1.7866590023040771, "learning_rate": 8.381691739994323e-05, "loss": 0.014931215345859528, "step": 57040 }, { "epoch": 16.193585012773205, "grad_norm": 2.149397611618042, "learning_rate": 8.381407891001988e-05, "loss": 0.02089792340993881, "step": 57050 }, { "epoch": 16.196423502696565, "grad_norm": 9.424501419067383, "learning_rate": 8.381124042009651e-05, "loss": 0.02378803789615631, "step": 57060 }, { "epoch": 16.199261992619927, "grad_norm": 1.4272844791412354, "learning_rate": 8.380840193017315e-05, "loss": 0.032920295000076295, "step": 57070 }, { "epoch": 16.202100482543287, "grad_norm": 4.679747104644775, "learning_rate": 8.38055634402498e-05, "loss": 0.031467515230178836, "step": 57080 }, { "epoch": 16.204938972466646, "grad_norm": 0.3514966368675232, "learning_rate": 8.380272495032642e-05, "loss": 0.016771838068962097, "step": 57090 }, { "epoch": 16.20777746239001, "grad_norm": 8.899028778076172, "learning_rate": 8.379988646040306e-05, "loss": 0.03600646853446961, "step": 57100 }, { "epoch": 16.21061595231337, "grad_norm": 10.548735618591309, "learning_rate": 8.379704797047972e-05, "loss": 0.022897185385227205, "step": 57110 }, { "epoch": 16.21345444223673, "grad_norm": 6.314229965209961, "learning_rate": 8.379420948055635e-05, "loss": 0.03035309910774231, "step": 57120 }, { "epoch": 16.21629293216009, "grad_norm": 3.9411020278930664, "learning_rate": 8.379137099063299e-05, "loss": 0.01534806489944458, "step": 57130 }, { "epoch": 16.21913142208345, "grad_norm": 1.3754514455795288, "learning_rate": 8.378853250070963e-05, "loss": 0.017643196880817412, "step": 57140 }, { "epoch": 16.221969912006813, "grad_norm": 5.0410614013671875, "learning_rate": 8.378569401078627e-05, "loss": 0.010111761093139649, "step": 57150 }, { "epoch": 16.224808401930172, "grad_norm": 4.362121105194092, "learning_rate": 8.37828555208629e-05, "loss": 0.03830148577690125, "step": 57160 }, { "epoch": 16.227646891853535, "grad_norm": 11.422591209411621, "learning_rate": 8.378001703093954e-05, "loss": 0.02887851595878601, "step": 57170 }, { "epoch": 16.230485381776894, "grad_norm": 1.5751036405563354, "learning_rate": 8.37771785410162e-05, "loss": 0.04523343145847321, "step": 57180 }, { "epoch": 16.233323871700254, "grad_norm": 0.8377181887626648, "learning_rate": 8.377434005109282e-05, "loss": 0.026569089293479918, "step": 57190 }, { "epoch": 16.236162361623617, "grad_norm": 4.400909423828125, "learning_rate": 8.377150156116946e-05, "loss": 0.016935981810092926, "step": 57200 }, { "epoch": 16.239000851546976, "grad_norm": 11.719658851623535, "learning_rate": 8.37686630712461e-05, "loss": 0.014204981923103332, "step": 57210 }, { "epoch": 16.24183934147034, "grad_norm": 4.207752704620361, "learning_rate": 8.376582458132273e-05, "loss": 0.01809924840927124, "step": 57220 }, { "epoch": 16.244677831393698, "grad_norm": 7.823772430419922, "learning_rate": 8.376298609139937e-05, "loss": 0.018288904428482057, "step": 57230 }, { "epoch": 16.24751632131706, "grad_norm": 3.182079553604126, "learning_rate": 8.376014760147602e-05, "loss": 0.04123339354991913, "step": 57240 }, { "epoch": 16.25035481124042, "grad_norm": 4.608572959899902, "learning_rate": 8.375730911155266e-05, "loss": 0.025735455751419067, "step": 57250 }, { "epoch": 16.25319330116378, "grad_norm": 11.234136581420898, "learning_rate": 8.37544706216293e-05, "loss": 0.029076090455055235, "step": 57260 }, { "epoch": 16.256031791087143, "grad_norm": 18.861520767211914, "learning_rate": 8.375163213170594e-05, "loss": 0.02638309299945831, "step": 57270 }, { "epoch": 16.258870281010502, "grad_norm": 6.439869403839111, "learning_rate": 8.374879364178258e-05, "loss": 0.03639518618583679, "step": 57280 }, { "epoch": 16.261708770933865, "grad_norm": 0.38243794441223145, "learning_rate": 8.374595515185921e-05, "loss": 0.03366603255271912, "step": 57290 }, { "epoch": 16.264547260857224, "grad_norm": 3.2586543560028076, "learning_rate": 8.374311666193585e-05, "loss": 0.018071483075618743, "step": 57300 }, { "epoch": 16.267385750780583, "grad_norm": 17.52294921875, "learning_rate": 8.37402781720125e-05, "loss": 0.022135978937149046, "step": 57310 }, { "epoch": 16.270224240703946, "grad_norm": 3.7334818840026855, "learning_rate": 8.373743968208913e-05, "loss": 0.019068683683872222, "step": 57320 }, { "epoch": 16.273062730627306, "grad_norm": 1.8096365928649902, "learning_rate": 8.373460119216577e-05, "loss": 0.03038056194782257, "step": 57330 }, { "epoch": 16.27590122055067, "grad_norm": 1.243525505065918, "learning_rate": 8.373176270224242e-05, "loss": 0.020485574007034303, "step": 57340 }, { "epoch": 16.278739710474028, "grad_norm": 8.22391128540039, "learning_rate": 8.372892421231904e-05, "loss": 0.03525983989238739, "step": 57350 }, { "epoch": 16.281578200397387, "grad_norm": 9.366171836853027, "learning_rate": 8.372608572239568e-05, "loss": 0.037952476739883424, "step": 57360 }, { "epoch": 16.28441669032075, "grad_norm": 3.075228691101074, "learning_rate": 8.372324723247233e-05, "loss": 0.025078901648521425, "step": 57370 }, { "epoch": 16.28725518024411, "grad_norm": 5.465268611907959, "learning_rate": 8.372040874254897e-05, "loss": 0.036406916379928586, "step": 57380 }, { "epoch": 16.290093670167472, "grad_norm": 2.449941873550415, "learning_rate": 8.371757025262561e-05, "loss": 0.012875911593437196, "step": 57390 }, { "epoch": 16.29293216009083, "grad_norm": 7.457828521728516, "learning_rate": 8.371473176270225e-05, "loss": 0.019717252254486083, "step": 57400 }, { "epoch": 16.29577065001419, "grad_norm": 8.481057167053223, "learning_rate": 8.371189327277889e-05, "loss": 0.015264761447906495, "step": 57410 }, { "epoch": 16.298609139937554, "grad_norm": 3.624656915664673, "learning_rate": 8.370905478285552e-05, "loss": 0.018616043031215668, "step": 57420 }, { "epoch": 16.301447629860913, "grad_norm": 5.0557403564453125, "learning_rate": 8.370621629293216e-05, "loss": 0.020565032958984375, "step": 57430 }, { "epoch": 16.304286119784276, "grad_norm": 2.042957067489624, "learning_rate": 8.37033778030088e-05, "loss": 0.024635624885559083, "step": 57440 }, { "epoch": 16.307124609707635, "grad_norm": 7.351503372192383, "learning_rate": 8.370053931308544e-05, "loss": 0.037080830335617064, "step": 57450 }, { "epoch": 16.309963099630995, "grad_norm": 10.515963554382324, "learning_rate": 8.369770082316208e-05, "loss": 0.025804191827774048, "step": 57460 }, { "epoch": 16.312801589554358, "grad_norm": 3.777642250061035, "learning_rate": 8.369486233323873e-05, "loss": 0.011177933216094971, "step": 57470 }, { "epoch": 16.315640079477717, "grad_norm": 6.366386413574219, "learning_rate": 8.369202384331535e-05, "loss": 0.031482675671577455, "step": 57480 }, { "epoch": 16.31847856940108, "grad_norm": 1.4926985502243042, "learning_rate": 8.3689185353392e-05, "loss": 0.018231646716594697, "step": 57490 }, { "epoch": 16.32131705932444, "grad_norm": 5.067385196685791, "learning_rate": 8.368634686346864e-05, "loss": 0.016429007053375244, "step": 57500 }, { "epoch": 16.32131705932444, "eval_accuracy": 0.9640745215234946, "eval_loss": 0.11266815662384033, "eval_runtime": 31.5059, "eval_samples_per_second": 499.176, "eval_steps_per_second": 7.808, "step": 57500 }, { "epoch": 16.3241555492478, "grad_norm": 3.4043490886688232, "learning_rate": 8.368350837354528e-05, "loss": 0.03504315614700317, "step": 57510 }, { "epoch": 16.32699403917116, "grad_norm": 5.386589527130127, "learning_rate": 8.368066988362192e-05, "loss": 0.013249245285987855, "step": 57520 }, { "epoch": 16.32983252909452, "grad_norm": 2.719717025756836, "learning_rate": 8.367783139369856e-05, "loss": 0.016923144459724426, "step": 57530 }, { "epoch": 16.332671019017884, "grad_norm": 1.1243789196014404, "learning_rate": 8.36749929037752e-05, "loss": 0.021266916394233705, "step": 57540 }, { "epoch": 16.335509508941243, "grad_norm": 3.2733981609344482, "learning_rate": 8.367215441385183e-05, "loss": 0.029911166429519652, "step": 57550 }, { "epoch": 16.338347998864602, "grad_norm": 3.686540126800537, "learning_rate": 8.366931592392847e-05, "loss": 0.01913428455591202, "step": 57560 }, { "epoch": 16.341186488787965, "grad_norm": 8.905865669250488, "learning_rate": 8.366647743400511e-05, "loss": 0.019753411412239075, "step": 57570 }, { "epoch": 16.344024978711325, "grad_norm": 2.0028085708618164, "learning_rate": 8.366363894408175e-05, "loss": 0.014767123758792878, "step": 57580 }, { "epoch": 16.346863468634687, "grad_norm": 8.34296989440918, "learning_rate": 8.36608004541584e-05, "loss": 0.01722279340028763, "step": 57590 }, { "epoch": 16.349701958558047, "grad_norm": 1.5266138315200806, "learning_rate": 8.365796196423504e-05, "loss": 0.02117514908313751, "step": 57600 }, { "epoch": 16.352540448481406, "grad_norm": 6.234114170074463, "learning_rate": 8.365512347431166e-05, "loss": 0.02072383016347885, "step": 57610 }, { "epoch": 16.35537893840477, "grad_norm": 7.617072582244873, "learning_rate": 8.36522849843883e-05, "loss": 0.02148745656013489, "step": 57620 }, { "epoch": 16.35821742832813, "grad_norm": 9.410679817199707, "learning_rate": 8.364944649446495e-05, "loss": 0.019561731815338136, "step": 57630 }, { "epoch": 16.36105591825149, "grad_norm": 3.7705769538879395, "learning_rate": 8.364660800454159e-05, "loss": 0.01389913707971573, "step": 57640 }, { "epoch": 16.36389440817485, "grad_norm": 4.646562576293945, "learning_rate": 8.364376951461823e-05, "loss": 0.02539682686328888, "step": 57650 }, { "epoch": 16.366732898098213, "grad_norm": 4.98703670501709, "learning_rate": 8.364093102469487e-05, "loss": 0.01383742094039917, "step": 57660 }, { "epoch": 16.369571388021573, "grad_norm": 7.859958648681641, "learning_rate": 8.36380925347715e-05, "loss": 0.04223350584506989, "step": 57670 }, { "epoch": 16.372409877944932, "grad_norm": 0.5775028467178345, "learning_rate": 8.363525404484814e-05, "loss": 0.03109874129295349, "step": 57680 }, { "epoch": 16.375248367868295, "grad_norm": 8.082518577575684, "learning_rate": 8.363241555492478e-05, "loss": 0.024701231718063356, "step": 57690 }, { "epoch": 16.378086857791654, "grad_norm": 3.0618207454681396, "learning_rate": 8.362957706500142e-05, "loss": 0.029514485597610475, "step": 57700 }, { "epoch": 16.380925347715017, "grad_norm": 3.8841583728790283, "learning_rate": 8.362673857507807e-05, "loss": 0.03482574224472046, "step": 57710 }, { "epoch": 16.383763837638377, "grad_norm": 7.379438877105713, "learning_rate": 8.36239000851547e-05, "loss": 0.021716126799583436, "step": 57720 }, { "epoch": 16.386602327561736, "grad_norm": 7.598274230957031, "learning_rate": 8.362106159523135e-05, "loss": 0.015579144656658172, "step": 57730 }, { "epoch": 16.3894408174851, "grad_norm": 1.0270344018936157, "learning_rate": 8.361822310530798e-05, "loss": 0.015401656925678252, "step": 57740 }, { "epoch": 16.392279307408458, "grad_norm": 3.3055825233459473, "learning_rate": 8.361538461538462e-05, "loss": 0.014878253638744354, "step": 57750 }, { "epoch": 16.39511779733182, "grad_norm": 0.4192140996456146, "learning_rate": 8.361254612546126e-05, "loss": 0.028828465938568117, "step": 57760 }, { "epoch": 16.39795628725518, "grad_norm": 1.9138195514678955, "learning_rate": 8.360970763553789e-05, "loss": 0.0379542350769043, "step": 57770 }, { "epoch": 16.40079477717854, "grad_norm": 8.491437911987305, "learning_rate": 8.360686914561454e-05, "loss": 0.03638689815998077, "step": 57780 }, { "epoch": 16.403633267101903, "grad_norm": 5.084245681762695, "learning_rate": 8.360403065569118e-05, "loss": 0.01824689209461212, "step": 57790 }, { "epoch": 16.406471757025262, "grad_norm": 12.565242767333984, "learning_rate": 8.360119216576781e-05, "loss": 0.017689278721809386, "step": 57800 }, { "epoch": 16.409310246948625, "grad_norm": 2.6246073246002197, "learning_rate": 8.359835367584445e-05, "loss": 0.013861703872680663, "step": 57810 }, { "epoch": 16.412148736871984, "grad_norm": 2.0474650859832764, "learning_rate": 8.359551518592109e-05, "loss": 0.024439796805381775, "step": 57820 }, { "epoch": 16.414987226795343, "grad_norm": 0.5992058515548706, "learning_rate": 8.359267669599773e-05, "loss": 0.009769780933856964, "step": 57830 }, { "epoch": 16.417825716718706, "grad_norm": 0.8119294047355652, "learning_rate": 8.358983820607438e-05, "loss": 0.016229116916656496, "step": 57840 }, { "epoch": 16.420664206642066, "grad_norm": 2.0341272354125977, "learning_rate": 8.358699971615102e-05, "loss": 0.021486197412014008, "step": 57850 }, { "epoch": 16.42350269656543, "grad_norm": 9.836889266967773, "learning_rate": 8.358416122622766e-05, "loss": 0.02454804927110672, "step": 57860 }, { "epoch": 16.426341186488788, "grad_norm": 0.10809176415205002, "learning_rate": 8.358132273630429e-05, "loss": 0.029806384444236757, "step": 57870 }, { "epoch": 16.429179676412147, "grad_norm": 2.020871639251709, "learning_rate": 8.357848424638093e-05, "loss": 0.03331070840358734, "step": 57880 }, { "epoch": 16.43201816633551, "grad_norm": 3.713773250579834, "learning_rate": 8.357564575645757e-05, "loss": 0.03311811685562134, "step": 57890 }, { "epoch": 16.43485665625887, "grad_norm": 7.971744537353516, "learning_rate": 8.35728072665342e-05, "loss": 0.023117053508758544, "step": 57900 }, { "epoch": 16.437695146182232, "grad_norm": 12.512771606445312, "learning_rate": 8.356996877661085e-05, "loss": 0.015941639244556428, "step": 57910 }, { "epoch": 16.44053363610559, "grad_norm": 3.032975435256958, "learning_rate": 8.35671302866875e-05, "loss": 0.026855844259262084, "step": 57920 }, { "epoch": 16.44337212602895, "grad_norm": 4.288434028625488, "learning_rate": 8.356429179676412e-05, "loss": 0.009644306451082229, "step": 57930 }, { "epoch": 16.446210615952314, "grad_norm": 7.594199180603027, "learning_rate": 8.356145330684076e-05, "loss": 0.021636776626110077, "step": 57940 }, { "epoch": 16.449049105875673, "grad_norm": 0.735028088092804, "learning_rate": 8.35586148169174e-05, "loss": 0.04598793387413025, "step": 57950 }, { "epoch": 16.451887595799036, "grad_norm": 5.356010913848877, "learning_rate": 8.355577632699405e-05, "loss": 0.026205438375473022, "step": 57960 }, { "epoch": 16.454726085722395, "grad_norm": 0.9242346286773682, "learning_rate": 8.355293783707067e-05, "loss": 0.023983070254325868, "step": 57970 }, { "epoch": 16.457564575645755, "grad_norm": 10.547995567321777, "learning_rate": 8.355009934714733e-05, "loss": 0.02581058442592621, "step": 57980 }, { "epoch": 16.460403065569118, "grad_norm": 17.382551193237305, "learning_rate": 8.354726085722397e-05, "loss": 0.02995608150959015, "step": 57990 }, { "epoch": 16.463241555492477, "grad_norm": 10.663006782531738, "learning_rate": 8.35444223673006e-05, "loss": 0.039972490072250365, "step": 58000 }, { "epoch": 16.463241555492477, "eval_accuracy": 0.956698671075221, "eval_loss": 0.1387191116809845, "eval_runtime": 32.4613, "eval_samples_per_second": 484.484, "eval_steps_per_second": 7.578, "step": 58000 }, { "epoch": 16.46608004541584, "grad_norm": 2.595625877380371, "learning_rate": 8.354158387737724e-05, "loss": 0.026548388600349426, "step": 58010 }, { "epoch": 16.4689185353392, "grad_norm": 3.061826705932617, "learning_rate": 8.353874538745388e-05, "loss": 0.028435802459716795, "step": 58020 }, { "epoch": 16.471757025262562, "grad_norm": 3.808105945587158, "learning_rate": 8.353590689753051e-05, "loss": 0.021903225779533388, "step": 58030 }, { "epoch": 16.47459551518592, "grad_norm": 14.7695951461792, "learning_rate": 8.353306840760716e-05, "loss": 0.03147459626197815, "step": 58040 }, { "epoch": 16.47743400510928, "grad_norm": 0.28551092743873596, "learning_rate": 8.35302299176838e-05, "loss": 0.028281348943710326, "step": 58050 }, { "epoch": 16.480272495032644, "grad_norm": 10.403939247131348, "learning_rate": 8.352739142776043e-05, "loss": 0.037682768702507016, "step": 58060 }, { "epoch": 16.483110984956003, "grad_norm": 8.957832336425781, "learning_rate": 8.352455293783707e-05, "loss": 0.024968945980072023, "step": 58070 }, { "epoch": 16.485949474879366, "grad_norm": 11.300315856933594, "learning_rate": 8.352171444791371e-05, "loss": 0.018435643613338472, "step": 58080 }, { "epoch": 16.488787964802725, "grad_norm": 9.459074020385742, "learning_rate": 8.351887595799036e-05, "loss": 0.030626952648162842, "step": 58090 }, { "epoch": 16.491626454726084, "grad_norm": 1.922043800354004, "learning_rate": 8.351603746806698e-05, "loss": 0.007774235308170318, "step": 58100 }, { "epoch": 16.494464944649447, "grad_norm": 4.1167731285095215, "learning_rate": 8.351319897814364e-05, "loss": 0.018003752827644347, "step": 58110 }, { "epoch": 16.497303434572807, "grad_norm": 7.29007625579834, "learning_rate": 8.351036048822028e-05, "loss": 0.02688470184803009, "step": 58120 }, { "epoch": 16.50014192449617, "grad_norm": 4.435036659240723, "learning_rate": 8.350752199829691e-05, "loss": 0.013380326330661774, "step": 58130 }, { "epoch": 16.50298041441953, "grad_norm": 0.9292892217636108, "learning_rate": 8.350468350837355e-05, "loss": 0.022972561419010162, "step": 58140 }, { "epoch": 16.50581890434289, "grad_norm": 6.01042366027832, "learning_rate": 8.350184501845019e-05, "loss": 0.0166106715798378, "step": 58150 }, { "epoch": 16.50865739426625, "grad_norm": 13.110054016113281, "learning_rate": 8.349900652852682e-05, "loss": 0.03879846334457397, "step": 58160 }, { "epoch": 16.51149588418961, "grad_norm": 0.42446956038475037, "learning_rate": 8.349616803860346e-05, "loss": 0.021194837987422943, "step": 58170 }, { "epoch": 16.514334374112973, "grad_norm": 6.163670063018799, "learning_rate": 8.349332954868011e-05, "loss": 0.03990270495414734, "step": 58180 }, { "epoch": 16.517172864036333, "grad_norm": 1.6155263185501099, "learning_rate": 8.349049105875674e-05, "loss": 0.023068951070308687, "step": 58190 }, { "epoch": 16.520011353959692, "grad_norm": 2.1065757274627686, "learning_rate": 8.348765256883338e-05, "loss": 0.03105611801147461, "step": 58200 }, { "epoch": 16.522849843883055, "grad_norm": 6.374539375305176, "learning_rate": 8.348481407891003e-05, "loss": 0.023350685834884644, "step": 58210 }, { "epoch": 16.525688333806414, "grad_norm": 8.94660472869873, "learning_rate": 8.348197558898667e-05, "loss": 0.026402127742767335, "step": 58220 }, { "epoch": 16.528526823729777, "grad_norm": 0.7009008526802063, "learning_rate": 8.34791370990633e-05, "loss": 0.02437014877796173, "step": 58230 }, { "epoch": 16.531365313653136, "grad_norm": 2.0632848739624023, "learning_rate": 8.347629860913995e-05, "loss": 0.015159733593463898, "step": 58240 }, { "epoch": 16.534203803576496, "grad_norm": 6.802942752838135, "learning_rate": 8.347346011921659e-05, "loss": 0.025207504630088806, "step": 58250 }, { "epoch": 16.53704229349986, "grad_norm": 2.2284159660339355, "learning_rate": 8.347062162929322e-05, "loss": 0.028232869505882264, "step": 58260 }, { "epoch": 16.539880783423218, "grad_norm": 6.927571773529053, "learning_rate": 8.346778313936986e-05, "loss": 0.011921193450689316, "step": 58270 }, { "epoch": 16.54271927334658, "grad_norm": 10.382787704467773, "learning_rate": 8.34649446494465e-05, "loss": 0.021317094564437866, "step": 58280 }, { "epoch": 16.54555776326994, "grad_norm": 13.224753379821777, "learning_rate": 8.346210615952313e-05, "loss": 0.041150495409965515, "step": 58290 }, { "epoch": 16.5483962531933, "grad_norm": 8.97059154510498, "learning_rate": 8.345926766959977e-05, "loss": 0.021298563480377196, "step": 58300 }, { "epoch": 16.551234743116662, "grad_norm": 3.087956666946411, "learning_rate": 8.345642917967643e-05, "loss": 0.017362378537654877, "step": 58310 }, { "epoch": 16.55407323304002, "grad_norm": 0.6908025145530701, "learning_rate": 8.345359068975305e-05, "loss": 0.010384636372327805, "step": 58320 }, { "epoch": 16.556911722963385, "grad_norm": 11.758752822875977, "learning_rate": 8.34507521998297e-05, "loss": 0.014615438878536224, "step": 58330 }, { "epoch": 16.559750212886744, "grad_norm": 7.739632606506348, "learning_rate": 8.344791370990634e-05, "loss": 0.02874714732170105, "step": 58340 }, { "epoch": 16.562588702810103, "grad_norm": 8.88198471069336, "learning_rate": 8.344507521998298e-05, "loss": 0.021249204874038696, "step": 58350 }, { "epoch": 16.565427192733466, "grad_norm": 1.5235759019851685, "learning_rate": 8.34422367300596e-05, "loss": 0.029982563853263856, "step": 58360 }, { "epoch": 16.568265682656826, "grad_norm": 0.500368058681488, "learning_rate": 8.343939824013625e-05, "loss": 0.020798631012439728, "step": 58370 }, { "epoch": 16.57110417258019, "grad_norm": 0.6884090304374695, "learning_rate": 8.343655975021289e-05, "loss": 0.013786274194717407, "step": 58380 }, { "epoch": 16.573942662503548, "grad_norm": 7.296133995056152, "learning_rate": 8.343372126028953e-05, "loss": 0.025480160117149354, "step": 58390 }, { "epoch": 16.57678115242691, "grad_norm": 5.508730888366699, "learning_rate": 8.343088277036617e-05, "loss": 0.018541747331619264, "step": 58400 }, { "epoch": 16.57961964235027, "grad_norm": 10.622841835021973, "learning_rate": 8.342804428044281e-05, "loss": 0.013938701152801514, "step": 58410 }, { "epoch": 16.58245813227363, "grad_norm": 7.3429999351501465, "learning_rate": 8.342520579051944e-05, "loss": 0.033525124192237854, "step": 58420 }, { "epoch": 16.585296622196992, "grad_norm": 1.1720824241638184, "learning_rate": 8.342236730059608e-05, "loss": 0.026029011607170104, "step": 58430 }, { "epoch": 16.58813511212035, "grad_norm": 1.4224919080734253, "learning_rate": 8.341952881067274e-05, "loss": 0.03980385959148407, "step": 58440 }, { "epoch": 16.590973602043714, "grad_norm": 7.893923282623291, "learning_rate": 8.341669032074936e-05, "loss": 0.047191083431243896, "step": 58450 }, { "epoch": 16.593812091967074, "grad_norm": 1.7177177667617798, "learning_rate": 8.3413851830826e-05, "loss": 0.01860113739967346, "step": 58460 }, { "epoch": 16.596650581890433, "grad_norm": 1.3261648416519165, "learning_rate": 8.341101334090265e-05, "loss": 0.027608656883239747, "step": 58470 }, { "epoch": 16.599489071813796, "grad_norm": 11.521811485290527, "learning_rate": 8.340845869997161e-05, "loss": 0.03972205221652984, "step": 58480 }, { "epoch": 16.602327561737155, "grad_norm": 8.218220710754395, "learning_rate": 8.340562021004827e-05, "loss": 0.02715291976928711, "step": 58490 }, { "epoch": 16.605166051660518, "grad_norm": 6.369820594787598, "learning_rate": 8.34027817201249e-05, "loss": 0.030585858225822448, "step": 58500 }, { "epoch": 16.605166051660518, "eval_accuracy": 0.9649647103707001, "eval_loss": 0.11148511618375778, "eval_runtime": 31.7598, "eval_samples_per_second": 495.186, "eval_steps_per_second": 7.746, "step": 58500 }, { "epoch": 16.608004541583878, "grad_norm": 5.023155212402344, "learning_rate": 8.339994323020154e-05, "loss": 0.009741977602243424, "step": 58510 }, { "epoch": 16.610843031507237, "grad_norm": 11.585160255432129, "learning_rate": 8.339710474027818e-05, "loss": 0.02634880244731903, "step": 58520 }, { "epoch": 16.6136815214306, "grad_norm": 6.228874683380127, "learning_rate": 8.33942662503548e-05, "loss": 0.01261507123708725, "step": 58530 }, { "epoch": 16.61652001135396, "grad_norm": 0.7988203167915344, "learning_rate": 8.339142776043145e-05, "loss": 0.0111713707447052, "step": 58540 }, { "epoch": 16.619358501277322, "grad_norm": 11.321148872375488, "learning_rate": 8.338858927050809e-05, "loss": 0.0320526659488678, "step": 58550 }, { "epoch": 16.62219699120068, "grad_norm": 9.522676467895508, "learning_rate": 8.338575078058473e-05, "loss": 0.015048983693122863, "step": 58560 }, { "epoch": 16.62503548112404, "grad_norm": 17.335908889770508, "learning_rate": 8.338291229066137e-05, "loss": 0.028058427572250366, "step": 58570 }, { "epoch": 16.627873971047404, "grad_norm": 2.0620009899139404, "learning_rate": 8.338007380073801e-05, "loss": 0.032004249095916745, "step": 58580 }, { "epoch": 16.630712460970763, "grad_norm": 4.046170711517334, "learning_rate": 8.337723531081465e-05, "loss": 0.0275095134973526, "step": 58590 }, { "epoch": 16.633550950894126, "grad_norm": 6.5045061111450195, "learning_rate": 8.337439682089128e-05, "loss": 0.03998938202857971, "step": 58600 }, { "epoch": 16.636389440817485, "grad_norm": 1.0320584774017334, "learning_rate": 8.337155833096792e-05, "loss": 0.029005658626556397, "step": 58610 }, { "epoch": 16.639227930740844, "grad_norm": 8.144086837768555, "learning_rate": 8.336871984104458e-05, "loss": 0.03329362571239471, "step": 58620 }, { "epoch": 16.642066420664207, "grad_norm": 1.1369993686676025, "learning_rate": 8.336588135112121e-05, "loss": 0.007962089776992799, "step": 58630 }, { "epoch": 16.644904910587567, "grad_norm": 9.268597602844238, "learning_rate": 8.336304286119785e-05, "loss": 0.028556400537490846, "step": 58640 }, { "epoch": 16.64774340051093, "grad_norm": 8.676448822021484, "learning_rate": 8.336020437127449e-05, "loss": 0.01715867519378662, "step": 58650 }, { "epoch": 16.65058189043429, "grad_norm": 7.841867446899414, "learning_rate": 8.335736588135112e-05, "loss": 0.021545688807964324, "step": 58660 }, { "epoch": 16.653420380357648, "grad_norm": 0.9473310112953186, "learning_rate": 8.335452739142776e-05, "loss": 0.011715412139892578, "step": 58670 }, { "epoch": 16.65625887028101, "grad_norm": 1.8944321870803833, "learning_rate": 8.33516889015044e-05, "loss": 0.032149726152420045, "step": 58680 }, { "epoch": 16.65909736020437, "grad_norm": 1.2174142599105835, "learning_rate": 8.334885041158104e-05, "loss": 0.02364833503961563, "step": 58690 }, { "epoch": 16.661935850127733, "grad_norm": 0.9931562542915344, "learning_rate": 8.334601192165768e-05, "loss": 0.020965445041656493, "step": 58700 }, { "epoch": 16.664774340051093, "grad_norm": 2.5448696613311768, "learning_rate": 8.334317343173432e-05, "loss": 0.019027411937713623, "step": 58710 }, { "epoch": 16.667612829974452, "grad_norm": 1.6934963464736938, "learning_rate": 8.334033494181097e-05, "loss": 0.02984304428100586, "step": 58720 }, { "epoch": 16.670451319897815, "grad_norm": 7.586284637451172, "learning_rate": 8.33374964518876e-05, "loss": 0.021623696386814117, "step": 58730 }, { "epoch": 16.673289809821174, "grad_norm": 4.527450084686279, "learning_rate": 8.333465796196423e-05, "loss": 0.026447987556457518, "step": 58740 }, { "epoch": 16.676128299744537, "grad_norm": 7.088357925415039, "learning_rate": 8.333181947204088e-05, "loss": 0.027219468355178834, "step": 58750 }, { "epoch": 16.678966789667896, "grad_norm": 7.283918380737305, "learning_rate": 8.332898098211752e-05, "loss": 0.017597493529319764, "step": 58760 }, { "epoch": 16.68180527959126, "grad_norm": 0.988620400428772, "learning_rate": 8.332614249219416e-05, "loss": 0.04683432281017304, "step": 58770 }, { "epoch": 16.68464376951462, "grad_norm": 4.270298957824707, "learning_rate": 8.33233040022708e-05, "loss": 0.01735730916261673, "step": 58780 }, { "epoch": 16.687482259437978, "grad_norm": 4.433078289031982, "learning_rate": 8.332046551234743e-05, "loss": 0.018961824476718903, "step": 58790 }, { "epoch": 16.69032074936134, "grad_norm": 7.815520763397217, "learning_rate": 8.331762702242407e-05, "loss": 0.020434993505477905, "step": 58800 }, { "epoch": 16.6931592392847, "grad_norm": 9.323116302490234, "learning_rate": 8.331478853250071e-05, "loss": 0.03925870954990387, "step": 58810 }, { "epoch": 16.695997729208063, "grad_norm": 6.909570693969727, "learning_rate": 8.331195004257735e-05, "loss": 0.027740225195884705, "step": 58820 }, { "epoch": 16.698836219131422, "grad_norm": 7.167376518249512, "learning_rate": 8.3309111552654e-05, "loss": 0.028341460227966308, "step": 58830 }, { "epoch": 16.70167470905478, "grad_norm": 6.4705705642700195, "learning_rate": 8.330627306273064e-05, "loss": 0.018236668407917024, "step": 58840 }, { "epoch": 16.704513198978145, "grad_norm": 4.500127792358398, "learning_rate": 8.330343457280728e-05, "loss": 0.020041263103485106, "step": 58850 }, { "epoch": 16.707351688901504, "grad_norm": 11.18964672088623, "learning_rate": 8.33005960828839e-05, "loss": 0.022896376252174378, "step": 58860 }, { "epoch": 16.710190178824867, "grad_norm": 5.560362339019775, "learning_rate": 8.329775759296055e-05, "loss": 0.012195299565792083, "step": 58870 }, { "epoch": 16.713028668748226, "grad_norm": 10.360980033874512, "learning_rate": 8.329491910303719e-05, "loss": 0.04283861517906189, "step": 58880 }, { "epoch": 16.715867158671585, "grad_norm": 3.417398691177368, "learning_rate": 8.329208061311383e-05, "loss": 0.028645554184913637, "step": 58890 }, { "epoch": 16.71870564859495, "grad_norm": 9.850347518920898, "learning_rate": 8.328924212319047e-05, "loss": 0.01909451186656952, "step": 58900 }, { "epoch": 16.721544138518308, "grad_norm": 1.0660990476608276, "learning_rate": 8.328640363326711e-05, "loss": 0.016973789036273956, "step": 58910 }, { "epoch": 16.72438262844167, "grad_norm": 2.129889965057373, "learning_rate": 8.328356514334374e-05, "loss": 0.014286068081855775, "step": 58920 }, { "epoch": 16.72722111836503, "grad_norm": 16.273263931274414, "learning_rate": 8.328072665342038e-05, "loss": 0.0229129821062088, "step": 58930 }, { "epoch": 16.73005960828839, "grad_norm": 2.727794885635376, "learning_rate": 8.327788816349702e-05, "loss": 0.02416025400161743, "step": 58940 }, { "epoch": 16.732898098211752, "grad_norm": 1.6632486581802368, "learning_rate": 8.327504967357366e-05, "loss": 0.03493282496929169, "step": 58950 }, { "epoch": 16.73573658813511, "grad_norm": 0.533372700214386, "learning_rate": 8.32722111836503e-05, "loss": 0.021981509029865266, "step": 58960 }, { "epoch": 16.738575078058474, "grad_norm": 3.5226762294769287, "learning_rate": 8.326937269372695e-05, "loss": 0.046072971820831296, "step": 58970 }, { "epoch": 16.741413567981834, "grad_norm": 5.532339572906494, "learning_rate": 8.326653420380359e-05, "loss": 0.02512103021144867, "step": 58980 }, { "epoch": 16.744252057905193, "grad_norm": 2.8009884357452393, "learning_rate": 8.326369571388022e-05, "loss": 0.01772029250860214, "step": 58990 }, { "epoch": 16.747090547828556, "grad_norm": 2.3903415203094482, "learning_rate": 8.326085722395686e-05, "loss": 0.012354165315628052, "step": 59000 }, { "epoch": 16.747090547828556, "eval_accuracy": 0.9645832008647549, "eval_loss": 0.10953931510448456, "eval_runtime": 31.5988, "eval_samples_per_second": 497.709, "eval_steps_per_second": 7.785, "step": 59000 }, { "epoch": 16.749929037751915, "grad_norm": 11.503759384155273, "learning_rate": 8.32580187340335e-05, "loss": 0.04351683557033539, "step": 59010 }, { "epoch": 16.752767527675278, "grad_norm": 3.0906810760498047, "learning_rate": 8.325518024411014e-05, "loss": 0.023315902054309844, "step": 59020 }, { "epoch": 16.755606017598637, "grad_norm": 6.008513927459717, "learning_rate": 8.325234175418678e-05, "loss": 0.01908901333808899, "step": 59030 }, { "epoch": 16.758444507521997, "grad_norm": 3.565109968185425, "learning_rate": 8.324950326426342e-05, "loss": 0.02006048709154129, "step": 59040 }, { "epoch": 16.76128299744536, "grad_norm": 2.8562307357788086, "learning_rate": 8.324666477434005e-05, "loss": 0.015707996487617493, "step": 59050 }, { "epoch": 16.76412148736872, "grad_norm": 2.4045372009277344, "learning_rate": 8.324382628441669e-05, "loss": 0.02519112527370453, "step": 59060 }, { "epoch": 16.766959977292082, "grad_norm": 14.199237823486328, "learning_rate": 8.324098779449333e-05, "loss": 0.0317169189453125, "step": 59070 }, { "epoch": 16.76979846721544, "grad_norm": 2.1875901222229004, "learning_rate": 8.323814930456997e-05, "loss": 0.04812244176864624, "step": 59080 }, { "epoch": 16.7726369571388, "grad_norm": 18.521930694580078, "learning_rate": 8.323531081464662e-05, "loss": 0.0313480406999588, "step": 59090 }, { "epoch": 16.775475447062163, "grad_norm": 8.342741966247559, "learning_rate": 8.323247232472326e-05, "loss": 0.02497076541185379, "step": 59100 }, { "epoch": 16.778313936985523, "grad_norm": 4.784858703613281, "learning_rate": 8.32296338347999e-05, "loss": 0.04681958556175232, "step": 59110 }, { "epoch": 16.781152426908886, "grad_norm": 0.9078540802001953, "learning_rate": 8.322679534487653e-05, "loss": 0.01849210411310196, "step": 59120 }, { "epoch": 16.783990916832245, "grad_norm": 5.355259895324707, "learning_rate": 8.322395685495317e-05, "loss": 0.025255858898162842, "step": 59130 }, { "epoch": 16.786829406755604, "grad_norm": 1.1002379655838013, "learning_rate": 8.322111836502981e-05, "loss": 0.023817498981952668, "step": 59140 }, { "epoch": 16.789667896678967, "grad_norm": 3.684565544128418, "learning_rate": 8.321827987510644e-05, "loss": 0.02081771641969681, "step": 59150 }, { "epoch": 16.792506386602327, "grad_norm": 3.4217519760131836, "learning_rate": 8.321544138518309e-05, "loss": 0.0380407840013504, "step": 59160 }, { "epoch": 16.79534487652569, "grad_norm": 4.720285415649414, "learning_rate": 8.321260289525973e-05, "loss": 0.028555679321289062, "step": 59170 }, { "epoch": 16.79818336644905, "grad_norm": 0.4961705207824707, "learning_rate": 8.320976440533636e-05, "loss": 0.030113762617111205, "step": 59180 }, { "epoch": 16.801021856372408, "grad_norm": 0.9919694662094116, "learning_rate": 8.3206925915413e-05, "loss": 0.030449867248535156, "step": 59190 }, { "epoch": 16.80386034629577, "grad_norm": 1.8493183851242065, "learning_rate": 8.320408742548964e-05, "loss": 0.015259429812431335, "step": 59200 }, { "epoch": 16.80669883621913, "grad_norm": 1.274405598640442, "learning_rate": 8.320124893556628e-05, "loss": 0.026192039251327515, "step": 59210 }, { "epoch": 16.809537326142493, "grad_norm": 0.2808459997177124, "learning_rate": 8.319841044564293e-05, "loss": 0.012450910359621047, "step": 59220 }, { "epoch": 16.812375816065853, "grad_norm": 1.5280746221542358, "learning_rate": 8.319557195571957e-05, "loss": 0.025003212690353393, "step": 59230 }, { "epoch": 16.815214305989215, "grad_norm": 3.0192575454711914, "learning_rate": 8.319273346579621e-05, "loss": 0.029531455039978026, "step": 59240 }, { "epoch": 16.818052795912575, "grad_norm": 5.311906814575195, "learning_rate": 8.318989497587284e-05, "loss": 0.02772926092147827, "step": 59250 }, { "epoch": 16.820891285835934, "grad_norm": 20.13922882080078, "learning_rate": 8.318705648594948e-05, "loss": 0.057452791929244997, "step": 59260 }, { "epoch": 16.823729775759297, "grad_norm": 9.292319297790527, "learning_rate": 8.318421799602612e-05, "loss": 0.02266717404127121, "step": 59270 }, { "epoch": 16.826568265682656, "grad_norm": 1.6498178243637085, "learning_rate": 8.318137950610275e-05, "loss": 0.03798863887786865, "step": 59280 }, { "epoch": 16.82940675560602, "grad_norm": 4.63967752456665, "learning_rate": 8.31785410161794e-05, "loss": 0.01955742686986923, "step": 59290 }, { "epoch": 16.83224524552938, "grad_norm": 0.9928451776504517, "learning_rate": 8.317570252625604e-05, "loss": 0.016254188120365144, "step": 59300 }, { "epoch": 16.835083735452738, "grad_norm": 7.91848611831665, "learning_rate": 8.317286403633267e-05, "loss": 0.024498085677623748, "step": 59310 }, { "epoch": 16.8379222253761, "grad_norm": 2.515704870223999, "learning_rate": 8.317002554640931e-05, "loss": 0.020629404485225676, "step": 59320 }, { "epoch": 16.84076071529946, "grad_norm": 1.1646966934204102, "learning_rate": 8.316718705648595e-05, "loss": 0.021324965357780456, "step": 59330 }, { "epoch": 16.843599205222823, "grad_norm": 5.102069854736328, "learning_rate": 8.31643485665626e-05, "loss": 0.02417541891336441, "step": 59340 }, { "epoch": 16.846437695146182, "grad_norm": 3.9034411907196045, "learning_rate": 8.316151007663924e-05, "loss": 0.04414777159690857, "step": 59350 }, { "epoch": 16.84927618506954, "grad_norm": 7.37410306930542, "learning_rate": 8.315867158671588e-05, "loss": 0.04508165717124939, "step": 59360 }, { "epoch": 16.852114674992904, "grad_norm": 4.48972749710083, "learning_rate": 8.31558330967925e-05, "loss": 0.026401233673095704, "step": 59370 }, { "epoch": 16.854953164916264, "grad_norm": 6.299874782562256, "learning_rate": 8.315299460686915e-05, "loss": 0.02049678862094879, "step": 59380 }, { "epoch": 16.857791654839627, "grad_norm": 3.2093350887298584, "learning_rate": 8.315015611694579e-05, "loss": 0.018880945444107056, "step": 59390 }, { "epoch": 16.860630144762986, "grad_norm": 6.709545135498047, "learning_rate": 8.314731762702243e-05, "loss": 0.015604633092880248, "step": 59400 }, { "epoch": 16.863468634686345, "grad_norm": 6.909293174743652, "learning_rate": 8.314447913709906e-05, "loss": 0.017441083490848542, "step": 59410 }, { "epoch": 16.86630712460971, "grad_norm": 2.27356219291687, "learning_rate": 8.314164064717571e-05, "loss": 0.01717013567686081, "step": 59420 }, { "epoch": 16.869145614533068, "grad_norm": 2.114701271057129, "learning_rate": 8.313880215725235e-05, "loss": 0.03882797956466675, "step": 59430 }, { "epoch": 16.87198410445643, "grad_norm": 1.572272777557373, "learning_rate": 8.313596366732898e-05, "loss": 0.022096604108810425, "step": 59440 }, { "epoch": 16.87482259437979, "grad_norm": 3.547701358795166, "learning_rate": 8.313312517740562e-05, "loss": 0.025479802489280702, "step": 59450 }, { "epoch": 16.87766108430315, "grad_norm": 8.331896781921387, "learning_rate": 8.313028668748226e-05, "loss": 0.027176687121391298, "step": 59460 }, { "epoch": 16.880499574226512, "grad_norm": 11.463362693786621, "learning_rate": 8.312744819755889e-05, "loss": 0.03849641978740692, "step": 59470 }, { "epoch": 16.88333806414987, "grad_norm": 6.4528489112854, "learning_rate": 8.312460970763553e-05, "loss": 0.029990372061729432, "step": 59480 }, { "epoch": 16.886176554073234, "grad_norm": 7.1564483642578125, "learning_rate": 8.312177121771219e-05, "loss": 0.023881636559963226, "step": 59490 }, { "epoch": 16.889015043996594, "grad_norm": 9.189780235290527, "learning_rate": 8.311893272778882e-05, "loss": 0.030628982186317443, "step": 59500 }, { "epoch": 16.889015043996594, "eval_accuracy": 0.965791314300248, "eval_loss": 0.11050871759653091, "eval_runtime": 31.5946, "eval_samples_per_second": 497.775, "eval_steps_per_second": 7.786, "step": 59500 }, { "epoch": 16.891853533919953, "grad_norm": 2.0772604942321777, "learning_rate": 8.311609423786546e-05, "loss": 0.030345064401626588, "step": 59510 }, { "epoch": 16.894692023843316, "grad_norm": 14.330068588256836, "learning_rate": 8.31132557479421e-05, "loss": 0.02902843654155731, "step": 59520 }, { "epoch": 16.897530513766675, "grad_norm": 4.2019243240356445, "learning_rate": 8.311041725801874e-05, "loss": 0.012346556037664413, "step": 59530 }, { "epoch": 16.900369003690038, "grad_norm": 10.613985061645508, "learning_rate": 8.310757876809537e-05, "loss": 0.04536116123199463, "step": 59540 }, { "epoch": 16.903207493613397, "grad_norm": 8.715611457824707, "learning_rate": 8.310474027817202e-05, "loss": 0.02634459137916565, "step": 59550 }, { "epoch": 16.906045983536757, "grad_norm": 9.356816291809082, "learning_rate": 8.310190178824866e-05, "loss": 0.033313590288162234, "step": 59560 }, { "epoch": 16.90888447346012, "grad_norm": 9.63582992553711, "learning_rate": 8.309906329832529e-05, "loss": 0.03300425112247467, "step": 59570 }, { "epoch": 16.91172296338348, "grad_norm": 4.4836320877075195, "learning_rate": 8.309622480840193e-05, "loss": 0.02680702805519104, "step": 59580 }, { "epoch": 16.914561453306842, "grad_norm": 8.611139297485352, "learning_rate": 8.309338631847858e-05, "loss": 0.020978954434394837, "step": 59590 }, { "epoch": 16.9173999432302, "grad_norm": 15.433649063110352, "learning_rate": 8.30905478285552e-05, "loss": 0.027812206745147706, "step": 59600 }, { "epoch": 16.920238433153564, "grad_norm": 5.864755153656006, "learning_rate": 8.308770933863184e-05, "loss": 0.029373759031295778, "step": 59610 }, { "epoch": 16.923076923076923, "grad_norm": 7.978265762329102, "learning_rate": 8.30848708487085e-05, "loss": 0.024450643360614775, "step": 59620 }, { "epoch": 16.925915413000283, "grad_norm": 3.213543176651001, "learning_rate": 8.308203235878513e-05, "loss": 0.018979364633560182, "step": 59630 }, { "epoch": 16.928753902923646, "grad_norm": 3.1002628803253174, "learning_rate": 8.307919386886177e-05, "loss": 0.02413661330938339, "step": 59640 }, { "epoch": 16.931592392847005, "grad_norm": 1.833674430847168, "learning_rate": 8.307635537893841e-05, "loss": 0.025863069295883178, "step": 59650 }, { "epoch": 16.934430882770368, "grad_norm": 0.3098338544368744, "learning_rate": 8.307351688901505e-05, "loss": 0.02430594861507416, "step": 59660 }, { "epoch": 16.937269372693727, "grad_norm": 2.6017730236053467, "learning_rate": 8.307067839909168e-05, "loss": 0.02899605631828308, "step": 59670 }, { "epoch": 16.940107862617086, "grad_norm": 11.811752319335938, "learning_rate": 8.306783990916832e-05, "loss": 0.051928967237472534, "step": 59680 }, { "epoch": 16.94294635254045, "grad_norm": 3.9172732830047607, "learning_rate": 8.306500141924498e-05, "loss": 0.012934400141239167, "step": 59690 }, { "epoch": 16.94578484246381, "grad_norm": 3.5357184410095215, "learning_rate": 8.30621629293216e-05, "loss": 0.022454433143138885, "step": 59700 }, { "epoch": 16.94862333238717, "grad_norm": 15.8103666305542, "learning_rate": 8.305932443939824e-05, "loss": 0.03347194194793701, "step": 59710 }, { "epoch": 16.95146182231053, "grad_norm": 5.368538856506348, "learning_rate": 8.305648594947489e-05, "loss": 0.018606145679950715, "step": 59720 }, { "epoch": 16.95430031223389, "grad_norm": 10.236196517944336, "learning_rate": 8.305364745955151e-05, "loss": 0.020950442552566527, "step": 59730 }, { "epoch": 16.957138802157253, "grad_norm": 6.344552516937256, "learning_rate": 8.305080896962816e-05, "loss": 0.02093493938446045, "step": 59740 }, { "epoch": 16.959977292080612, "grad_norm": 1.1742557287216187, "learning_rate": 8.304797047970481e-05, "loss": 0.012659642100334167, "step": 59750 }, { "epoch": 16.962815782003975, "grad_norm": 3.148052215576172, "learning_rate": 8.304513198978144e-05, "loss": 0.012905122339725494, "step": 59760 }, { "epoch": 16.965654271927335, "grad_norm": 2.044912576675415, "learning_rate": 8.304229349985808e-05, "loss": 0.012452203035354614, "step": 59770 }, { "epoch": 16.968492761850694, "grad_norm": 8.290250778198242, "learning_rate": 8.303945500993472e-05, "loss": 0.02918228507041931, "step": 59780 }, { "epoch": 16.971331251774057, "grad_norm": 0.7254201769828796, "learning_rate": 8.303661652001136e-05, "loss": 0.01370764970779419, "step": 59790 }, { "epoch": 16.974169741697416, "grad_norm": 1.0992618799209595, "learning_rate": 8.303377803008799e-05, "loss": 0.016495805978775025, "step": 59800 }, { "epoch": 16.97700823162078, "grad_norm": 8.768839836120605, "learning_rate": 8.303093954016463e-05, "loss": 0.020415471494197847, "step": 59810 }, { "epoch": 16.97984672154414, "grad_norm": 8.609142303466797, "learning_rate": 8.302810105024129e-05, "loss": 0.021872419118881225, "step": 59820 }, { "epoch": 16.982685211467498, "grad_norm": 9.52709674835205, "learning_rate": 8.302526256031791e-05, "loss": 0.036979538202285764, "step": 59830 }, { "epoch": 16.98552370139086, "grad_norm": 5.274742126464844, "learning_rate": 8.302242407039456e-05, "loss": 0.040659642219543456, "step": 59840 }, { "epoch": 16.98836219131422, "grad_norm": 1.3396081924438477, "learning_rate": 8.30195855804712e-05, "loss": 0.016694553196430206, "step": 59850 }, { "epoch": 16.991200681237583, "grad_norm": 12.425445556640625, "learning_rate": 8.301674709054782e-05, "loss": 0.023501750826835633, "step": 59860 }, { "epoch": 16.994039171160942, "grad_norm": 10.03705883026123, "learning_rate": 8.301390860062447e-05, "loss": 0.021814729273319244, "step": 59870 }, { "epoch": 16.9968776610843, "grad_norm": 8.44933032989502, "learning_rate": 8.301107011070111e-05, "loss": 0.049596580862998965, "step": 59880 }, { "epoch": 16.999716151007664, "grad_norm": 0.7388323545455933, "learning_rate": 8.300823162077775e-05, "loss": 0.027832764387130737, "step": 59890 }, { "epoch": 17.002554640931024, "grad_norm": 0.48154428601264954, "learning_rate": 8.300539313085439e-05, "loss": 0.02041080445051193, "step": 59900 }, { "epoch": 17.005393130854387, "grad_norm": 2.1173973083496094, "learning_rate": 8.300255464093103e-05, "loss": 0.007772276550531388, "step": 59910 }, { "epoch": 17.008231620777746, "grad_norm": 10.271995544433594, "learning_rate": 8.299971615100767e-05, "loss": 0.018062648177146912, "step": 59920 }, { "epoch": 17.011070110701105, "grad_norm": 5.4751787185668945, "learning_rate": 8.29968776610843e-05, "loss": 0.0222108393907547, "step": 59930 }, { "epoch": 17.013908600624468, "grad_norm": 10.109040260314941, "learning_rate": 8.299403917116094e-05, "loss": 0.011389323323965073, "step": 59940 }, { "epoch": 17.016747090547828, "grad_norm": 0.5950800180435181, "learning_rate": 8.29912006812376e-05, "loss": 0.022434996068477632, "step": 59950 }, { "epoch": 17.01958558047119, "grad_norm": 1.5476775169372559, "learning_rate": 8.298836219131423e-05, "loss": 0.011746164411306381, "step": 59960 }, { "epoch": 17.02242407039455, "grad_norm": 0.9059267640113831, "learning_rate": 8.298552370139087e-05, "loss": 0.01673087626695633, "step": 59970 }, { "epoch": 17.025262560317913, "grad_norm": 2.857586622238159, "learning_rate": 8.298268521146751e-05, "loss": 0.016063447296619415, "step": 59980 }, { "epoch": 17.028101050241272, "grad_norm": 7.521078586578369, "learning_rate": 8.297984672154414e-05, "loss": 0.010403391718864442, "step": 59990 }, { "epoch": 17.03093954016463, "grad_norm": 13.28499984741211, "learning_rate": 8.297700823162078e-05, "loss": 0.048198947310447694, "step": 60000 }, { "epoch": 17.03093954016463, "eval_accuracy": 0.9584154638519743, "eval_loss": 0.1367514282464981, "eval_runtime": 31.5703, "eval_samples_per_second": 498.158, "eval_steps_per_second": 7.792, "step": 60000 }, { "epoch": 17.033778030087994, "grad_norm": 1.726984977722168, "learning_rate": 8.297416974169742e-05, "loss": 0.02558598220348358, "step": 60010 }, { "epoch": 17.036616520011354, "grad_norm": 8.808225631713867, "learning_rate": 8.297133125177406e-05, "loss": 0.010977664589881897, "step": 60020 }, { "epoch": 17.039455009934716, "grad_norm": 0.8192585110664368, "learning_rate": 8.29684927618507e-05, "loss": 0.01847737729549408, "step": 60030 }, { "epoch": 17.042293499858076, "grad_norm": 12.757583618164062, "learning_rate": 8.296565427192734e-05, "loss": 0.036665529012680054, "step": 60040 }, { "epoch": 17.045131989781435, "grad_norm": 6.71480131149292, "learning_rate": 8.296281578200398e-05, "loss": 0.03365083038806915, "step": 60050 }, { "epoch": 17.047970479704798, "grad_norm": 0.3680950403213501, "learning_rate": 8.295997729208061e-05, "loss": 0.011141233146190643, "step": 60060 }, { "epoch": 17.050808969628157, "grad_norm": 7.116525650024414, "learning_rate": 8.295713880215725e-05, "loss": 0.018178504705429078, "step": 60070 }, { "epoch": 17.05364745955152, "grad_norm": 5.741809368133545, "learning_rate": 8.29543003122339e-05, "loss": 0.017730489373207092, "step": 60080 }, { "epoch": 17.05648594947488, "grad_norm": 10.484217643737793, "learning_rate": 8.295146182231054e-05, "loss": 0.03090069890022278, "step": 60090 }, { "epoch": 17.05932443939824, "grad_norm": 1.1893121004104614, "learning_rate": 8.294862333238718e-05, "loss": 0.02439778298139572, "step": 60100 }, { "epoch": 17.0621629293216, "grad_norm": 4.7010931968688965, "learning_rate": 8.294578484246382e-05, "loss": 0.017384180426597597, "step": 60110 }, { "epoch": 17.06500141924496, "grad_norm": 5.869739055633545, "learning_rate": 8.294294635254045e-05, "loss": 0.02081870138645172, "step": 60120 }, { "epoch": 17.067839909168324, "grad_norm": 3.771984100341797, "learning_rate": 8.294010786261709e-05, "loss": 0.01817036122083664, "step": 60130 }, { "epoch": 17.070678399091683, "grad_norm": 7.8905558586120605, "learning_rate": 8.293726937269373e-05, "loss": 0.014063933491706848, "step": 60140 }, { "epoch": 17.073516889015043, "grad_norm": 7.925786972045898, "learning_rate": 8.293443088277037e-05, "loss": 0.027467098832130433, "step": 60150 }, { "epoch": 17.076355378938405, "grad_norm": 2.6028711795806885, "learning_rate": 8.293159239284701e-05, "loss": 0.009261946380138397, "step": 60160 }, { "epoch": 17.079193868861765, "grad_norm": 2.34194016456604, "learning_rate": 8.292875390292365e-05, "loss": 0.02392832189798355, "step": 60170 }, { "epoch": 17.082032358785128, "grad_norm": 6.850749969482422, "learning_rate": 8.29259154130003e-05, "loss": 0.016964365541934968, "step": 60180 }, { "epoch": 17.084870848708487, "grad_norm": 0.5283069610595703, "learning_rate": 8.292307692307692e-05, "loss": 0.01907171607017517, "step": 60190 }, { "epoch": 17.087709338631846, "grad_norm": 0.7016083002090454, "learning_rate": 8.292023843315356e-05, "loss": 0.022131794691085817, "step": 60200 }, { "epoch": 17.09054782855521, "grad_norm": 11.678545951843262, "learning_rate": 8.29173999432302e-05, "loss": 0.0329552173614502, "step": 60210 }, { "epoch": 17.09338631847857, "grad_norm": 8.187173843383789, "learning_rate": 8.291456145330685e-05, "loss": 0.014009331166744233, "step": 60220 }, { "epoch": 17.09622480840193, "grad_norm": 1.4075108766555786, "learning_rate": 8.291172296338349e-05, "loss": 0.023814243078231812, "step": 60230 }, { "epoch": 17.09906329832529, "grad_norm": 13.14627742767334, "learning_rate": 8.290888447346013e-05, "loss": 0.023088133335113524, "step": 60240 }, { "epoch": 17.10190178824865, "grad_norm": 15.659075736999512, "learning_rate": 8.290604598353676e-05, "loss": 0.02848309576511383, "step": 60250 }, { "epoch": 17.104740278172013, "grad_norm": 19.55657958984375, "learning_rate": 8.29032074936134e-05, "loss": 0.0240742027759552, "step": 60260 }, { "epoch": 17.107578768095372, "grad_norm": 5.172979831695557, "learning_rate": 8.290036900369004e-05, "loss": 0.022654667496681213, "step": 60270 }, { "epoch": 17.110417258018735, "grad_norm": 4.127310276031494, "learning_rate": 8.289753051376668e-05, "loss": 0.013410001993179321, "step": 60280 }, { "epoch": 17.113255747942095, "grad_norm": 3.83300518989563, "learning_rate": 8.289469202384332e-05, "loss": 0.03008730411529541, "step": 60290 }, { "epoch": 17.116094237865454, "grad_norm": 3.6131479740142822, "learning_rate": 8.289185353391996e-05, "loss": 0.015287838876247406, "step": 60300 }, { "epoch": 17.118932727788817, "grad_norm": 0.22699904441833496, "learning_rate": 8.288901504399659e-05, "loss": 0.01297130435705185, "step": 60310 }, { "epoch": 17.121771217712176, "grad_norm": 2.5407752990722656, "learning_rate": 8.288617655407323e-05, "loss": 0.02008923441171646, "step": 60320 }, { "epoch": 17.12460970763554, "grad_norm": 7.36152458190918, "learning_rate": 8.288333806414987e-05, "loss": 0.01682742089033127, "step": 60330 }, { "epoch": 17.1274481975589, "grad_norm": 12.616373062133789, "learning_rate": 8.288049957422652e-05, "loss": 0.032228341698646544, "step": 60340 }, { "epoch": 17.130286687482258, "grad_norm": 1.2286903858184814, "learning_rate": 8.287766108430316e-05, "loss": 0.008339907974004745, "step": 60350 }, { "epoch": 17.13312517740562, "grad_norm": 0.852236270904541, "learning_rate": 8.28748225943798e-05, "loss": 0.02148095816373825, "step": 60360 }, { "epoch": 17.13596366732898, "grad_norm": 0.7790113091468811, "learning_rate": 8.287198410445644e-05, "loss": 0.020625939965248107, "step": 60370 }, { "epoch": 17.138802157252343, "grad_norm": 7.01544189453125, "learning_rate": 8.286914561453307e-05, "loss": 0.02250695526599884, "step": 60380 }, { "epoch": 17.141640647175702, "grad_norm": 0.8989993929862976, "learning_rate": 8.286630712460971e-05, "loss": 0.01921136528253555, "step": 60390 }, { "epoch": 17.144479137099065, "grad_norm": 10.793028831481934, "learning_rate": 8.286346863468635e-05, "loss": 0.02690858244895935, "step": 60400 }, { "epoch": 17.147317627022424, "grad_norm": 13.074443817138672, "learning_rate": 8.286063014476298e-05, "loss": 0.019533056020736694, "step": 60410 }, { "epoch": 17.150156116945784, "grad_norm": 14.613844871520996, "learning_rate": 8.285779165483963e-05, "loss": 0.03272394835948944, "step": 60420 }, { "epoch": 17.152994606869147, "grad_norm": 8.377421379089355, "learning_rate": 8.285495316491627e-05, "loss": 0.027012503147125243, "step": 60430 }, { "epoch": 17.155833096792506, "grad_norm": 2.1425633430480957, "learning_rate": 8.28521146749929e-05, "loss": 0.0074956871569156645, "step": 60440 }, { "epoch": 17.15867158671587, "grad_norm": 3.3529205322265625, "learning_rate": 8.284927618506954e-05, "loss": 0.017431148886680604, "step": 60450 }, { "epoch": 17.161510076639228, "grad_norm": 1.3230326175689697, "learning_rate": 8.284643769514619e-05, "loss": 0.011084027588367462, "step": 60460 }, { "epoch": 17.164348566562587, "grad_norm": 12.417771339416504, "learning_rate": 8.284359920522283e-05, "loss": 0.028391575813293456, "step": 60470 }, { "epoch": 17.16718705648595, "grad_norm": 3.665861129760742, "learning_rate": 8.284076071529945e-05, "loss": 0.01258855015039444, "step": 60480 }, { "epoch": 17.17002554640931, "grad_norm": 6.475068092346191, "learning_rate": 8.283792222537611e-05, "loss": 0.017808590829372407, "step": 60490 }, { "epoch": 17.172864036332673, "grad_norm": 2.4653563499450684, "learning_rate": 8.283508373545275e-05, "loss": 0.017405638098716737, "step": 60500 }, { "epoch": 17.172864036332673, "eval_accuracy": 0.9638201818528646, "eval_loss": 0.11213576048612595, "eval_runtime": 31.6708, "eval_samples_per_second": 496.577, "eval_steps_per_second": 7.767, "step": 60500 }, { "epoch": 17.175702526256032, "grad_norm": 6.46360445022583, "learning_rate": 8.283224524552938e-05, "loss": 0.008883007615804673, "step": 60510 }, { "epoch": 17.17854101617939, "grad_norm": 3.651374340057373, "learning_rate": 8.282940675560602e-05, "loss": 0.013101382553577423, "step": 60520 }, { "epoch": 17.181379506102754, "grad_norm": 8.114255905151367, "learning_rate": 8.282656826568266e-05, "loss": 0.014031532406806945, "step": 60530 }, { "epoch": 17.184217996026113, "grad_norm": 6.263001441955566, "learning_rate": 8.282372977575929e-05, "loss": 0.010283049196004868, "step": 60540 }, { "epoch": 17.187056485949476, "grad_norm": 2.2577359676361084, "learning_rate": 8.282089128583594e-05, "loss": 0.00542941614985466, "step": 60550 }, { "epoch": 17.189894975872836, "grad_norm": 3.672323703765869, "learning_rate": 8.281805279591259e-05, "loss": 0.008926591277122498, "step": 60560 }, { "epoch": 17.192733465796195, "grad_norm": 1.2772222757339478, "learning_rate": 8.281521430598921e-05, "loss": 0.018028415739536285, "step": 60570 }, { "epoch": 17.195571955719558, "grad_norm": 8.611686706542969, "learning_rate": 8.281237581606585e-05, "loss": 0.011967145651578904, "step": 60580 }, { "epoch": 17.198410445642917, "grad_norm": 5.633968830108643, "learning_rate": 8.28095373261425e-05, "loss": 0.01403355747461319, "step": 60590 }, { "epoch": 17.20124893556628, "grad_norm": 7.445531368255615, "learning_rate": 8.280669883621914e-05, "loss": 0.01686886101961136, "step": 60600 }, { "epoch": 17.20408742548964, "grad_norm": 9.879779815673828, "learning_rate": 8.280386034629577e-05, "loss": 0.020007339119911195, "step": 60610 }, { "epoch": 17.206925915413, "grad_norm": 11.010483741760254, "learning_rate": 8.280102185637242e-05, "loss": 0.02129928916692734, "step": 60620 }, { "epoch": 17.20976440533636, "grad_norm": 6.090151786804199, "learning_rate": 8.279818336644906e-05, "loss": 0.015851506590843202, "step": 60630 }, { "epoch": 17.21260289525972, "grad_norm": 2.9903645515441895, "learning_rate": 8.279534487652569e-05, "loss": 0.023469410836696625, "step": 60640 }, { "epoch": 17.215441385183084, "grad_norm": 0.5268586874008179, "learning_rate": 8.279250638660233e-05, "loss": 0.016330859065055846, "step": 60650 }, { "epoch": 17.218279875106443, "grad_norm": 1.6142635345458984, "learning_rate": 8.278966789667897e-05, "loss": 0.009772557765245438, "step": 60660 }, { "epoch": 17.221118365029803, "grad_norm": 0.33875951170921326, "learning_rate": 8.27868294067556e-05, "loss": 0.01213638037443161, "step": 60670 }, { "epoch": 17.223956854953165, "grad_norm": 0.6342658400535583, "learning_rate": 8.278399091683225e-05, "loss": 0.015492451190948487, "step": 60680 }, { "epoch": 17.226795344876525, "grad_norm": 1.8617217540740967, "learning_rate": 8.27811524269089e-05, "loss": 0.021076194941997528, "step": 60690 }, { "epoch": 17.229633834799888, "grad_norm": 6.909392356872559, "learning_rate": 8.277831393698552e-05, "loss": 0.02256089895963669, "step": 60700 }, { "epoch": 17.232472324723247, "grad_norm": 0.8374917507171631, "learning_rate": 8.277547544706217e-05, "loss": 0.018785883486270905, "step": 60710 }, { "epoch": 17.235310814646606, "grad_norm": 7.933974266052246, "learning_rate": 8.277263695713881e-05, "loss": 0.013684138655662537, "step": 60720 }, { "epoch": 17.23814930456997, "grad_norm": 3.9869275093078613, "learning_rate": 8.276979846721545e-05, "loss": 0.018250496685504915, "step": 60730 }, { "epoch": 17.24098779449333, "grad_norm": 4.982598781585693, "learning_rate": 8.276695997729208e-05, "loss": 0.02122266888618469, "step": 60740 }, { "epoch": 17.24382628441669, "grad_norm": 0.5410777926445007, "learning_rate": 8.276412148736873e-05, "loss": 0.02262820452451706, "step": 60750 }, { "epoch": 17.24666477434005, "grad_norm": 4.1117377281188965, "learning_rate": 8.276128299744537e-05, "loss": 0.021995536983013153, "step": 60760 }, { "epoch": 17.249503264263414, "grad_norm": 13.128637313842773, "learning_rate": 8.2758444507522e-05, "loss": 0.024110031127929688, "step": 60770 }, { "epoch": 17.252341754186773, "grad_norm": 6.246779441833496, "learning_rate": 8.275560601759864e-05, "loss": 0.02954556345939636, "step": 60780 }, { "epoch": 17.255180244110132, "grad_norm": 10.835840225219727, "learning_rate": 8.275276752767528e-05, "loss": 0.014489531517028809, "step": 60790 }, { "epoch": 17.258018734033495, "grad_norm": 1.1595449447631836, "learning_rate": 8.274992903775191e-05, "loss": 0.028317677974700927, "step": 60800 }, { "epoch": 17.260857223956855, "grad_norm": 6.88871955871582, "learning_rate": 8.274709054782855e-05, "loss": 0.03034435510635376, "step": 60810 }, { "epoch": 17.263695713880217, "grad_norm": 9.165369033813477, "learning_rate": 8.274425205790521e-05, "loss": 0.026477175951004028, "step": 60820 }, { "epoch": 17.266534203803577, "grad_norm": 4.424760341644287, "learning_rate": 8.274141356798183e-05, "loss": 0.016534657776355745, "step": 60830 }, { "epoch": 17.269372693726936, "grad_norm": 2.463466167449951, "learning_rate": 8.273857507805848e-05, "loss": 0.016133719682693483, "step": 60840 }, { "epoch": 17.2722111836503, "grad_norm": 1.105623722076416, "learning_rate": 8.273573658813512e-05, "loss": 0.009032898396253587, "step": 60850 }, { "epoch": 17.27504967357366, "grad_norm": 8.224363327026367, "learning_rate": 8.273289809821176e-05, "loss": 0.014842145144939423, "step": 60860 }, { "epoch": 17.27788816349702, "grad_norm": 2.3137259483337402, "learning_rate": 8.273005960828839e-05, "loss": 0.02080821692943573, "step": 60870 }, { "epoch": 17.28072665342038, "grad_norm": 4.120140552520752, "learning_rate": 8.272722111836504e-05, "loss": 0.022870764136314392, "step": 60880 }, { "epoch": 17.28356514334374, "grad_norm": 8.517062187194824, "learning_rate": 8.272438262844168e-05, "loss": 0.02370109558105469, "step": 60890 }, { "epoch": 17.286403633267103, "grad_norm": 0.7718416452407837, "learning_rate": 8.272154413851831e-05, "loss": 0.028670358657836913, "step": 60900 }, { "epoch": 17.289242123190462, "grad_norm": 0.37946709990501404, "learning_rate": 8.271870564859495e-05, "loss": 0.007799313962459564, "step": 60910 }, { "epoch": 17.292080613113825, "grad_norm": 9.967166900634766, "learning_rate": 8.27158671586716e-05, "loss": 0.023309630155563355, "step": 60920 }, { "epoch": 17.294919103037184, "grad_norm": 9.929471015930176, "learning_rate": 8.271302866874822e-05, "loss": 0.01647820174694061, "step": 60930 }, { "epoch": 17.297757592960544, "grad_norm": 7.87197208404541, "learning_rate": 8.271019017882486e-05, "loss": 0.02176760733127594, "step": 60940 }, { "epoch": 17.300596082883906, "grad_norm": 4.767561912536621, "learning_rate": 8.270735168890152e-05, "loss": 0.014724665880203247, "step": 60950 }, { "epoch": 17.303434572807266, "grad_norm": 6.339470863342285, "learning_rate": 8.270451319897815e-05, "loss": 0.028527584671974183, "step": 60960 }, { "epoch": 17.30627306273063, "grad_norm": 1.07589590549469, "learning_rate": 8.270167470905479e-05, "loss": 0.01153789609670639, "step": 60970 }, { "epoch": 17.309111552653988, "grad_norm": 14.57190990447998, "learning_rate": 8.269883621913143e-05, "loss": 0.0345747172832489, "step": 60980 }, { "epoch": 17.311950042577347, "grad_norm": 10.745390892028809, "learning_rate": 8.269599772920807e-05, "loss": 0.021194887161254884, "step": 60990 }, { "epoch": 17.31478853250071, "grad_norm": 5.401939392089844, "learning_rate": 8.26931592392847e-05, "loss": 0.026307469606399535, "step": 61000 }, { "epoch": 17.31478853250071, "eval_accuracy": 0.961849049405481, "eval_loss": 0.12669186294078827, "eval_runtime": 31.7504, "eval_samples_per_second": 495.333, "eval_steps_per_second": 7.748, "step": 61000 }, { "epoch": 17.31762702242407, "grad_norm": 1.6662150621414185, "learning_rate": 8.269032074936134e-05, "loss": 0.019942404329776765, "step": 61010 }, { "epoch": 17.320465512347432, "grad_norm": 2.3525197505950928, "learning_rate": 8.2687482259438e-05, "loss": 0.02618812918663025, "step": 61020 }, { "epoch": 17.323304002270792, "grad_norm": 5.391884803771973, "learning_rate": 8.268464376951462e-05, "loss": 0.023497243225574494, "step": 61030 }, { "epoch": 17.32614249219415, "grad_norm": 3.5315847396850586, "learning_rate": 8.268180527959126e-05, "loss": 0.02437012642621994, "step": 61040 }, { "epoch": 17.328980982117514, "grad_norm": 8.045187950134277, "learning_rate": 8.26789667896679e-05, "loss": 0.024712073802947997, "step": 61050 }, { "epoch": 17.331819472040873, "grad_norm": 1.0701186656951904, "learning_rate": 8.267612829974453e-05, "loss": 0.011471447348594666, "step": 61060 }, { "epoch": 17.334657961964236, "grad_norm": 2.743116617202759, "learning_rate": 8.267328980982117e-05, "loss": 0.01637566536664963, "step": 61070 }, { "epoch": 17.337496451887596, "grad_norm": 17.422468185424805, "learning_rate": 8.267045131989783e-05, "loss": 0.020722249150276185, "step": 61080 }, { "epoch": 17.340334941810955, "grad_norm": 7.825224876403809, "learning_rate": 8.266761282997446e-05, "loss": 0.02752777934074402, "step": 61090 }, { "epoch": 17.343173431734318, "grad_norm": 1.663003921508789, "learning_rate": 8.26647743400511e-05, "loss": 0.014432349801063537, "step": 61100 }, { "epoch": 17.346011921657677, "grad_norm": 1.9922808408737183, "learning_rate": 8.266193585012774e-05, "loss": 0.017636868357658386, "step": 61110 }, { "epoch": 17.34885041158104, "grad_norm": 2.2063710689544678, "learning_rate": 8.265909736020438e-05, "loss": 0.026861193776130676, "step": 61120 }, { "epoch": 17.3516889015044, "grad_norm": 14.567687034606934, "learning_rate": 8.265625887028101e-05, "loss": 0.02321614623069763, "step": 61130 }, { "epoch": 17.35452739142776, "grad_norm": 2.5034542083740234, "learning_rate": 8.265342038035765e-05, "loss": 0.03582393527030945, "step": 61140 }, { "epoch": 17.35736588135112, "grad_norm": 12.30094051361084, "learning_rate": 8.265058189043429e-05, "loss": 0.03541815876960754, "step": 61150 }, { "epoch": 17.36020437127448, "grad_norm": 4.182841777801514, "learning_rate": 8.264774340051093e-05, "loss": 0.023144432902336122, "step": 61160 }, { "epoch": 17.363042861197844, "grad_norm": Infinity, "learning_rate": 8.264490491058757e-05, "loss": 0.033010005950927734, "step": 61170 }, { "epoch": 17.365881351121203, "grad_norm": 5.141995429992676, "learning_rate": 8.264235026965654e-05, "loss": 0.014861783385276795, "step": 61180 }, { "epoch": 17.368719841044566, "grad_norm": 2.766414165496826, "learning_rate": 8.263951177973318e-05, "loss": 0.026583638787269593, "step": 61190 }, { "epoch": 17.371558330967925, "grad_norm": 4.854646682739258, "learning_rate": 8.263667328980982e-05, "loss": 0.020185372233390807, "step": 61200 }, { "epoch": 17.374396820891285, "grad_norm": 4.532007217407227, "learning_rate": 8.263383479988646e-05, "loss": 0.044744473695755, "step": 61210 }, { "epoch": 17.377235310814648, "grad_norm": 8.546868324279785, "learning_rate": 8.26309963099631e-05, "loss": 0.016836707293987275, "step": 61220 }, { "epoch": 17.380073800738007, "grad_norm": 8.586074829101562, "learning_rate": 8.262815782003975e-05, "loss": 0.012118186801671982, "step": 61230 }, { "epoch": 17.38291229066137, "grad_norm": 3.301542043685913, "learning_rate": 8.262531933011638e-05, "loss": 0.033820819854736325, "step": 61240 }, { "epoch": 17.38575078058473, "grad_norm": 5.653260231018066, "learning_rate": 8.262248084019302e-05, "loss": 0.026761633157730103, "step": 61250 }, { "epoch": 17.38858927050809, "grad_norm": 1.653430461883545, "learning_rate": 8.261964235026967e-05, "loss": 0.007496782392263412, "step": 61260 }, { "epoch": 17.39142776043145, "grad_norm": 5.211973667144775, "learning_rate": 8.26168038603463e-05, "loss": 0.018194952607154848, "step": 61270 }, { "epoch": 17.39426625035481, "grad_norm": 2.7129406929016113, "learning_rate": 8.261396537042294e-05, "loss": 0.013098260760307312, "step": 61280 }, { "epoch": 17.397104740278174, "grad_norm": 8.766242027282715, "learning_rate": 8.261112688049958e-05, "loss": 0.029319095611572265, "step": 61290 }, { "epoch": 17.399943230201533, "grad_norm": 6.793268203735352, "learning_rate": 8.260828839057621e-05, "loss": 0.021523159742355347, "step": 61300 }, { "epoch": 17.402781720124892, "grad_norm": 7.518362998962402, "learning_rate": 8.260544990065285e-05, "loss": 0.021864983439445495, "step": 61310 }, { "epoch": 17.405620210048255, "grad_norm": 0.4228460192680359, "learning_rate": 8.260261141072949e-05, "loss": 0.013299587368965148, "step": 61320 }, { "epoch": 17.408458699971614, "grad_norm": 4.653689861297607, "learning_rate": 8.259977292080613e-05, "loss": 0.009516909718513489, "step": 61330 }, { "epoch": 17.411297189894977, "grad_norm": 0.4042937159538269, "learning_rate": 8.259693443088278e-05, "loss": 0.02425500899553299, "step": 61340 }, { "epoch": 17.414135679818337, "grad_norm": 9.169136047363281, "learning_rate": 8.259409594095942e-05, "loss": 0.010941586643457412, "step": 61350 }, { "epoch": 17.416974169741696, "grad_norm": 2.9483869075775146, "learning_rate": 8.259125745103606e-05, "loss": 0.03379940986633301, "step": 61360 }, { "epoch": 17.41981265966506, "grad_norm": 9.949241638183594, "learning_rate": 8.258841896111269e-05, "loss": 0.02597987651824951, "step": 61370 }, { "epoch": 17.422651149588418, "grad_norm": 4.888527870178223, "learning_rate": 8.258558047118933e-05, "loss": 0.023670709133148192, "step": 61380 }, { "epoch": 17.42548963951178, "grad_norm": 8.181744575500488, "learning_rate": 8.258274198126597e-05, "loss": 0.017273417115211485, "step": 61390 }, { "epoch": 17.42832812943514, "grad_norm": 3.6558029651641846, "learning_rate": 8.257990349134261e-05, "loss": 0.01796172559261322, "step": 61400 }, { "epoch": 17.4311666193585, "grad_norm": 2.3941311836242676, "learning_rate": 8.257706500141925e-05, "loss": 0.02153703719377518, "step": 61410 }, { "epoch": 17.434005109281863, "grad_norm": 9.692488670349121, "learning_rate": 8.257422651149589e-05, "loss": 0.020052766799926756, "step": 61420 }, { "epoch": 17.436843599205222, "grad_norm": 8.038256645202637, "learning_rate": 8.257138802157252e-05, "loss": 0.018956753611564636, "step": 61430 }, { "epoch": 17.439682089128585, "grad_norm": 9.291525840759277, "learning_rate": 8.256854953164916e-05, "loss": 0.029993498325347902, "step": 61440 }, { "epoch": 17.442520579051944, "grad_norm": 8.019403457641602, "learning_rate": 8.25657110417258e-05, "loss": 0.03004841208457947, "step": 61450 }, { "epoch": 17.445359068975304, "grad_norm": 3.3659279346466064, "learning_rate": 8.256287255180244e-05, "loss": 0.029397329688072203, "step": 61460 }, { "epoch": 17.448197558898666, "grad_norm": 5.011568069458008, "learning_rate": 8.256003406187909e-05, "loss": 0.022481609880924226, "step": 61470 }, { "epoch": 17.451036048822026, "grad_norm": 7.6243391036987305, "learning_rate": 8.255719557195573e-05, "loss": 0.017621365189552308, "step": 61480 }, { "epoch": 17.45387453874539, "grad_norm": 2.629657030105591, "learning_rate": 8.255435708203237e-05, "loss": 0.011807309836149216, "step": 61490 }, { "epoch": 17.456713028668748, "grad_norm": 1.587230920791626, "learning_rate": 8.2551518592109e-05, "loss": 0.019284310936927795, "step": 61500 }, { "epoch": 17.456713028668748, "eval_accuracy": 0.9609588605582756, "eval_loss": 0.1191679984331131, "eval_runtime": 31.766, "eval_samples_per_second": 495.09, "eval_steps_per_second": 7.744, "step": 61500 }, { "epoch": 17.459551518592107, "grad_norm": 2.1209075450897217, "learning_rate": 8.254868010218564e-05, "loss": 0.012888598442077636, "step": 61510 }, { "epoch": 17.46239000851547, "grad_norm": 7.053223609924316, "learning_rate": 8.254584161226228e-05, "loss": 0.023036378622055053, "step": 61520 }, { "epoch": 17.46522849843883, "grad_norm": 6.634057998657227, "learning_rate": 8.254300312233892e-05, "loss": 0.03232641220092773, "step": 61530 }, { "epoch": 17.468066988362192, "grad_norm": 1.1502784490585327, "learning_rate": 8.254016463241556e-05, "loss": 0.017886632680892946, "step": 61540 }, { "epoch": 17.47090547828555, "grad_norm": 0.5109356045722961, "learning_rate": 8.25373261424922e-05, "loss": 0.016124421358108522, "step": 61550 }, { "epoch": 17.473743968208915, "grad_norm": 8.057290077209473, "learning_rate": 8.253448765256883e-05, "loss": 0.026202934980392455, "step": 61560 }, { "epoch": 17.476582458132274, "grad_norm": 8.621732711791992, "learning_rate": 8.253164916264547e-05, "loss": 0.010917245596647262, "step": 61570 }, { "epoch": 17.479420948055633, "grad_norm": 5.579722881317139, "learning_rate": 8.252881067272211e-05, "loss": 0.01770494282245636, "step": 61580 }, { "epoch": 17.482259437978996, "grad_norm": 1.4923181533813477, "learning_rate": 8.252597218279876e-05, "loss": 0.02207198888063431, "step": 61590 }, { "epoch": 17.485097927902356, "grad_norm": 0.5258405804634094, "learning_rate": 8.25231336928754e-05, "loss": 0.015277965366840363, "step": 61600 }, { "epoch": 17.48793641782572, "grad_norm": 0.5790354609489441, "learning_rate": 8.252029520295204e-05, "loss": 0.01770160347223282, "step": 61610 }, { "epoch": 17.490774907749078, "grad_norm": 2.4939751625061035, "learning_rate": 8.251745671302868e-05, "loss": 0.016967292129993438, "step": 61620 }, { "epoch": 17.493613397672437, "grad_norm": 7.325229167938232, "learning_rate": 8.251461822310531e-05, "loss": 0.02095828950405121, "step": 61630 }, { "epoch": 17.4964518875958, "grad_norm": 7.570591926574707, "learning_rate": 8.251177973318195e-05, "loss": 0.027062812447547914, "step": 61640 }, { "epoch": 17.49929037751916, "grad_norm": 2.952631950378418, "learning_rate": 8.250894124325859e-05, "loss": 0.015240280330181122, "step": 61650 }, { "epoch": 17.502128867442522, "grad_norm": 0.49329328536987305, "learning_rate": 8.250610275333523e-05, "loss": 0.028195285797119142, "step": 61660 }, { "epoch": 17.50496735736588, "grad_norm": 4.536864757537842, "learning_rate": 8.250326426341187e-05, "loss": 0.025210756063461303, "step": 61670 }, { "epoch": 17.50780584728924, "grad_norm": 3.4256057739257812, "learning_rate": 8.250042577348851e-05, "loss": 0.03867365717887879, "step": 61680 }, { "epoch": 17.510644337212604, "grad_norm": 13.087890625, "learning_rate": 8.249758728356514e-05, "loss": 0.04873683452606201, "step": 61690 }, { "epoch": 17.513482827135963, "grad_norm": 5.287320613861084, "learning_rate": 8.249474879364178e-05, "loss": 0.020358946919441224, "step": 61700 }, { "epoch": 17.516321317059326, "grad_norm": 6.921557903289795, "learning_rate": 8.249191030371842e-05, "loss": 0.01727771610021591, "step": 61710 }, { "epoch": 17.519159806982685, "grad_norm": 8.656535148620605, "learning_rate": 8.248907181379507e-05, "loss": 0.031967931985855104, "step": 61720 }, { "epoch": 17.521998296906045, "grad_norm": 9.841416358947754, "learning_rate": 8.248623332387171e-05, "loss": 0.010060474276542664, "step": 61730 }, { "epoch": 17.524836786829407, "grad_norm": 0.8050335645675659, "learning_rate": 8.248339483394835e-05, "loss": 0.025714984536170958, "step": 61740 }, { "epoch": 17.527675276752767, "grad_norm": 2.155386447906494, "learning_rate": 8.248055634402499e-05, "loss": 0.020784711837768553, "step": 61750 }, { "epoch": 17.53051376667613, "grad_norm": 6.113373279571533, "learning_rate": 8.247771785410162e-05, "loss": 0.02968502938747406, "step": 61760 }, { "epoch": 17.53335225659949, "grad_norm": 3.792987585067749, "learning_rate": 8.247487936417826e-05, "loss": 0.014387741684913635, "step": 61770 }, { "epoch": 17.53619074652285, "grad_norm": 2.1913392543792725, "learning_rate": 8.24720408742549e-05, "loss": 0.030770033597946167, "step": 61780 }, { "epoch": 17.53902923644621, "grad_norm": 6.313526630401611, "learning_rate": 8.246920238433153e-05, "loss": 0.029643353819847108, "step": 61790 }, { "epoch": 17.54186772636957, "grad_norm": 9.234797477722168, "learning_rate": 8.246636389440818e-05, "loss": 0.0283705472946167, "step": 61800 }, { "epoch": 17.544706216292933, "grad_norm": 6.143775463104248, "learning_rate": 8.246352540448482e-05, "loss": 0.0158234179019928, "step": 61810 }, { "epoch": 17.547544706216293, "grad_norm": 1.9553626775741577, "learning_rate": 8.246068691456145e-05, "loss": 0.01241091787815094, "step": 61820 }, { "epoch": 17.550383196139652, "grad_norm": 1.0569223165512085, "learning_rate": 8.24578484246381e-05, "loss": 0.01063622534275055, "step": 61830 }, { "epoch": 17.553221686063015, "grad_norm": 2.694061756134033, "learning_rate": 8.245500993471474e-05, "loss": 0.01957698315382004, "step": 61840 }, { "epoch": 17.556060175986374, "grad_norm": 3.3244571685791016, "learning_rate": 8.245217144479138e-05, "loss": 0.027434754371643066, "step": 61850 }, { "epoch": 17.558898665909737, "grad_norm": 5.469682693481445, "learning_rate": 8.244933295486802e-05, "loss": 0.010187521576881409, "step": 61860 }, { "epoch": 17.561737155833097, "grad_norm": 0.5937946438789368, "learning_rate": 8.244649446494466e-05, "loss": 0.01589364856481552, "step": 61870 }, { "epoch": 17.564575645756456, "grad_norm": 13.531807899475098, "learning_rate": 8.24436559750213e-05, "loss": 0.039757069945335385, "step": 61880 }, { "epoch": 17.56741413567982, "grad_norm": 1.991186261177063, "learning_rate": 8.244081748509793e-05, "loss": 0.022796808183193205, "step": 61890 }, { "epoch": 17.570252625603178, "grad_norm": 2.8076136112213135, "learning_rate": 8.243797899517457e-05, "loss": 0.01887902766466141, "step": 61900 }, { "epoch": 17.57309111552654, "grad_norm": 7.56357479095459, "learning_rate": 8.243514050525121e-05, "loss": 0.014006641507148743, "step": 61910 }, { "epoch": 17.5759296054499, "grad_norm": 10.527331352233887, "learning_rate": 8.243230201532784e-05, "loss": 0.033383351564407346, "step": 61920 }, { "epoch": 17.578768095373263, "grad_norm": 3.0198590755462646, "learning_rate": 8.24294635254045e-05, "loss": 0.016942545771598816, "step": 61930 }, { "epoch": 17.581606585296623, "grad_norm": 2.1270992755889893, "learning_rate": 8.242662503548114e-05, "loss": 0.010510009527206422, "step": 61940 }, { "epoch": 17.584445075219982, "grad_norm": 0.9897298812866211, "learning_rate": 8.242378654555776e-05, "loss": 0.01847278028726578, "step": 61950 }, { "epoch": 17.587283565143345, "grad_norm": 8.407822608947754, "learning_rate": 8.24209480556344e-05, "loss": 0.025774875283241273, "step": 61960 }, { "epoch": 17.590122055066704, "grad_norm": 0.4995296597480774, "learning_rate": 8.241810956571105e-05, "loss": 0.01796213984489441, "step": 61970 }, { "epoch": 17.592960544990063, "grad_norm": 9.558378219604492, "learning_rate": 8.241527107578769e-05, "loss": 0.012067146599292755, "step": 61980 }, { "epoch": 17.595799034913426, "grad_norm": 8.898117065429688, "learning_rate": 8.241243258586432e-05, "loss": 0.01925307810306549, "step": 61990 }, { "epoch": 17.598637524836786, "grad_norm": 1.8054234981536865, "learning_rate": 8.240959409594097e-05, "loss": 0.019535760581493377, "step": 62000 }, { "epoch": 17.598637524836786, "eval_accuracy": 0.9594328225344948, "eval_loss": 0.12758444249629974, "eval_runtime": 32.1311, "eval_samples_per_second": 489.463, "eval_steps_per_second": 7.656, "step": 62000 }, { "epoch": 17.60147601476015, "grad_norm": 7.3236775398254395, "learning_rate": 8.24067556060176e-05, "loss": 0.022417618334293364, "step": 62010 }, { "epoch": 17.604314504683508, "grad_norm": 2.7041015625, "learning_rate": 8.240391711609424e-05, "loss": 0.027331614494323732, "step": 62020 }, { "epoch": 17.60715299460687, "grad_norm": 7.156198501586914, "learning_rate": 8.240107862617088e-05, "loss": 0.029489129781723022, "step": 62030 }, { "epoch": 17.60999148453023, "grad_norm": 6.282800674438477, "learning_rate": 8.239824013624752e-05, "loss": 0.018814331293106078, "step": 62040 }, { "epoch": 17.61282997445359, "grad_norm": 11.916584968566895, "learning_rate": 8.239540164632415e-05, "loss": 0.038588258624076846, "step": 62050 }, { "epoch": 17.615668464376952, "grad_norm": 11.690528869628906, "learning_rate": 8.23925631564008e-05, "loss": 0.0323014110326767, "step": 62060 }, { "epoch": 17.61850695430031, "grad_norm": 1.9180601835250854, "learning_rate": 8.238972466647745e-05, "loss": 0.02899269163608551, "step": 62070 }, { "epoch": 17.621345444223675, "grad_norm": 1.3286105394363403, "learning_rate": 8.238688617655407e-05, "loss": 0.020975494384765626, "step": 62080 }, { "epoch": 17.624183934147034, "grad_norm": 3.077463388442993, "learning_rate": 8.238404768663072e-05, "loss": 0.019228251278400423, "step": 62090 }, { "epoch": 17.627022424070393, "grad_norm": 5.05003547668457, "learning_rate": 8.238120919670736e-05, "loss": 0.034769049286842345, "step": 62100 }, { "epoch": 17.629860913993756, "grad_norm": 1.4105678796768188, "learning_rate": 8.237837070678398e-05, "loss": 0.022635513544082643, "step": 62110 }, { "epoch": 17.632699403917115, "grad_norm": 3.652216911315918, "learning_rate": 8.237553221686063e-05, "loss": 0.04560632407665253, "step": 62120 }, { "epoch": 17.63553789384048, "grad_norm": 5.339707851409912, "learning_rate": 8.237269372693728e-05, "loss": 0.011022308468818664, "step": 62130 }, { "epoch": 17.638376383763838, "grad_norm": 2.7681965827941895, "learning_rate": 8.236985523701391e-05, "loss": 0.03116375207901001, "step": 62140 }, { "epoch": 17.641214873687197, "grad_norm": 0.3593093752861023, "learning_rate": 8.236701674709055e-05, "loss": 0.01997450739145279, "step": 62150 }, { "epoch": 17.64405336361056, "grad_norm": 5.6336588859558105, "learning_rate": 8.236417825716719e-05, "loss": 0.04349226951599121, "step": 62160 }, { "epoch": 17.64689185353392, "grad_norm": 8.167986869812012, "learning_rate": 8.236133976724383e-05, "loss": 0.021750986576080322, "step": 62170 }, { "epoch": 17.649730343457282, "grad_norm": 4.768847942352295, "learning_rate": 8.235850127732046e-05, "loss": 0.04157846570014954, "step": 62180 }, { "epoch": 17.65256883338064, "grad_norm": 10.957707405090332, "learning_rate": 8.23556627873971e-05, "loss": 0.03963684439659119, "step": 62190 }, { "epoch": 17.655407323304, "grad_norm": 4.948668956756592, "learning_rate": 8.235282429747376e-05, "loss": 0.03166250288486481, "step": 62200 }, { "epoch": 17.658245813227364, "grad_norm": 2.6129262447357178, "learning_rate": 8.234998580755039e-05, "loss": 0.011470156908035278, "step": 62210 }, { "epoch": 17.661084303150723, "grad_norm": 0.8243390917778015, "learning_rate": 8.234714731762703e-05, "loss": 0.014020107686519623, "step": 62220 }, { "epoch": 17.663922793074086, "grad_norm": 1.3467164039611816, "learning_rate": 8.234430882770367e-05, "loss": 0.01807920038700104, "step": 62230 }, { "epoch": 17.666761282997445, "grad_norm": 1.9685744047164917, "learning_rate": 8.23414703377803e-05, "loss": 0.018755415081977846, "step": 62240 }, { "epoch": 17.669599772920805, "grad_norm": 0.6170058846473694, "learning_rate": 8.233863184785694e-05, "loss": 0.018535336852073668, "step": 62250 }, { "epoch": 17.672438262844167, "grad_norm": 6.998648166656494, "learning_rate": 8.233579335793359e-05, "loss": 0.014197832345962525, "step": 62260 }, { "epoch": 17.675276752767527, "grad_norm": 11.751276016235352, "learning_rate": 8.233295486801022e-05, "loss": 0.024789305031299592, "step": 62270 }, { "epoch": 17.67811524269089, "grad_norm": 7.251413822174072, "learning_rate": 8.233011637808686e-05, "loss": 0.014999964833259582, "step": 62280 }, { "epoch": 17.68095373261425, "grad_norm": 6.438571929931641, "learning_rate": 8.23272778881635e-05, "loss": 0.01924816071987152, "step": 62290 }, { "epoch": 17.68379222253761, "grad_norm": 5.512231826782227, "learning_rate": 8.232443939824014e-05, "loss": 0.016895389556884764, "step": 62300 }, { "epoch": 17.68663071246097, "grad_norm": 2.3840599060058594, "learning_rate": 8.232160090831677e-05, "loss": 0.013796734809875488, "step": 62310 }, { "epoch": 17.68946920238433, "grad_norm": 5.094207763671875, "learning_rate": 8.231876241839341e-05, "loss": 0.02423170506954193, "step": 62320 }, { "epoch": 17.692307692307693, "grad_norm": 9.66804313659668, "learning_rate": 8.231592392847007e-05, "loss": 0.029379761219024657, "step": 62330 }, { "epoch": 17.695146182231053, "grad_norm": 3.0670926570892334, "learning_rate": 8.23130854385467e-05, "loss": 0.017469339072704315, "step": 62340 }, { "epoch": 17.697984672154412, "grad_norm": 7.919584274291992, "learning_rate": 8.231024694862334e-05, "loss": 0.040009519457817076, "step": 62350 }, { "epoch": 17.700823162077775, "grad_norm": 0.6892955303192139, "learning_rate": 8.230740845869998e-05, "loss": 0.02099369019269943, "step": 62360 }, { "epoch": 17.703661652001134, "grad_norm": 3.5605952739715576, "learning_rate": 8.23045699687766e-05, "loss": 0.02239793837070465, "step": 62370 }, { "epoch": 17.706500141924497, "grad_norm": 4.261983394622803, "learning_rate": 8.230173147885325e-05, "loss": 0.012834864854812621, "step": 62380 }, { "epoch": 17.709338631847857, "grad_norm": 3.5467593669891357, "learning_rate": 8.22988929889299e-05, "loss": 0.020592191815376283, "step": 62390 }, { "epoch": 17.71217712177122, "grad_norm": 4.0290021896362305, "learning_rate": 8.229605449900653e-05, "loss": 0.01588050276041031, "step": 62400 }, { "epoch": 17.71501561169458, "grad_norm": 5.264271259307861, "learning_rate": 8.229321600908317e-05, "loss": 0.026083704829216004, "step": 62410 }, { "epoch": 17.717854101617938, "grad_norm": 0.9289742708206177, "learning_rate": 8.229037751915981e-05, "loss": 0.020295150578022003, "step": 62420 }, { "epoch": 17.7206925915413, "grad_norm": 0.3363112211227417, "learning_rate": 8.228753902923645e-05, "loss": 0.021733446419239043, "step": 62430 }, { "epoch": 17.72353108146466, "grad_norm": 1.5515199899673462, "learning_rate": 8.228470053931308e-05, "loss": 0.03489026129245758, "step": 62440 }, { "epoch": 17.726369571388023, "grad_norm": 5.499934673309326, "learning_rate": 8.228186204938972e-05, "loss": 0.01474495530128479, "step": 62450 }, { "epoch": 17.729208061311382, "grad_norm": 9.99197006225586, "learning_rate": 8.227902355946638e-05, "loss": 0.031265202164649966, "step": 62460 }, { "epoch": 17.732046551234742, "grad_norm": 6.757200241088867, "learning_rate": 8.2276185069543e-05, "loss": 0.021450319886207582, "step": 62470 }, { "epoch": 17.734885041158105, "grad_norm": 6.411030292510986, "learning_rate": 8.227334657961965e-05, "loss": 0.009881447255611419, "step": 62480 }, { "epoch": 17.737723531081464, "grad_norm": 1.2655224800109863, "learning_rate": 8.227050808969629e-05, "loss": 0.023757727444171907, "step": 62490 }, { "epoch": 17.740562021004827, "grad_norm": 5.832747936248779, "learning_rate": 8.226766959977292e-05, "loss": 0.006980141997337342, "step": 62500 }, { "epoch": 17.740562021004827, "eval_accuracy": 0.9597507471227825, "eval_loss": 0.12602688372135162, "eval_runtime": 32.1713, "eval_samples_per_second": 488.852, "eval_steps_per_second": 7.647, "step": 62500 }, { "epoch": 17.743400510928186, "grad_norm": 0.4959080219268799, "learning_rate": 8.226483110984956e-05, "loss": 0.018655620515346527, "step": 62510 }, { "epoch": 17.746239000851546, "grad_norm": 5.3150763511657715, "learning_rate": 8.22619926199262e-05, "loss": 0.018986122310161592, "step": 62520 }, { "epoch": 17.74907749077491, "grad_norm": 10.762018203735352, "learning_rate": 8.225915413000284e-05, "loss": 0.01721494346857071, "step": 62530 }, { "epoch": 17.751915980698268, "grad_norm": 12.646697044372559, "learning_rate": 8.225631564007948e-05, "loss": 0.03775920569896698, "step": 62540 }, { "epoch": 17.75475447062163, "grad_norm": 5.9773850440979, "learning_rate": 8.225347715015612e-05, "loss": 0.02860093116760254, "step": 62550 }, { "epoch": 17.75759296054499, "grad_norm": 0.8533363938331604, "learning_rate": 8.225063866023277e-05, "loss": 0.028066220879554748, "step": 62560 }, { "epoch": 17.76043145046835, "grad_norm": 5.873940944671631, "learning_rate": 8.22478001703094e-05, "loss": 0.03162427544593811, "step": 62570 }, { "epoch": 17.763269940391712, "grad_norm": 0.8610804677009583, "learning_rate": 8.224496168038603e-05, "loss": 0.018642157316207886, "step": 62580 }, { "epoch": 17.76610843031507, "grad_norm": 5.9034953117370605, "learning_rate": 8.224212319046269e-05, "loss": 0.03293103873729706, "step": 62590 }, { "epoch": 17.768946920238434, "grad_norm": 12.166813850402832, "learning_rate": 8.223928470053932e-05, "loss": 0.026655450463294983, "step": 62600 }, { "epoch": 17.771785410161794, "grad_norm": 0.6069245338439941, "learning_rate": 8.223644621061596e-05, "loss": 0.024833826720714568, "step": 62610 }, { "epoch": 17.774623900085153, "grad_norm": 0.3689430058002472, "learning_rate": 8.22336077206926e-05, "loss": 0.025038906931877138, "step": 62620 }, { "epoch": 17.777462390008516, "grad_norm": 0.8548313975334167, "learning_rate": 8.223076923076923e-05, "loss": 0.018029791116714478, "step": 62630 }, { "epoch": 17.780300879931875, "grad_norm": 1.6226696968078613, "learning_rate": 8.222793074084587e-05, "loss": 0.028565794229507446, "step": 62640 }, { "epoch": 17.78313936985524, "grad_norm": 13.490941047668457, "learning_rate": 8.222509225092251e-05, "loss": 0.02153516560792923, "step": 62650 }, { "epoch": 17.785977859778598, "grad_norm": 9.21764850616455, "learning_rate": 8.222225376099915e-05, "loss": 0.017442944645881652, "step": 62660 }, { "epoch": 17.788816349701957, "grad_norm": 2.681166887283325, "learning_rate": 8.22194152710758e-05, "loss": 0.013702800869941712, "step": 62670 }, { "epoch": 17.79165483962532, "grad_norm": 1.2050832509994507, "learning_rate": 8.221657678115243e-05, "loss": 0.01701520085334778, "step": 62680 }, { "epoch": 17.79449332954868, "grad_norm": 2.4795150756835938, "learning_rate": 8.221373829122908e-05, "loss": 0.013615964353084565, "step": 62690 }, { "epoch": 17.797331819472042, "grad_norm": 3.0875420570373535, "learning_rate": 8.22108998013057e-05, "loss": 0.012938465178012847, "step": 62700 }, { "epoch": 17.8001703093954, "grad_norm": 11.013349533081055, "learning_rate": 8.220806131138235e-05, "loss": 0.031111732125282288, "step": 62710 }, { "epoch": 17.80300879931876, "grad_norm": 10.807975769042969, "learning_rate": 8.220522282145899e-05, "loss": 0.035066303610801694, "step": 62720 }, { "epoch": 17.805847289242124, "grad_norm": 10.930746078491211, "learning_rate": 8.220238433153563e-05, "loss": 0.013575820624828339, "step": 62730 }, { "epoch": 17.808685779165483, "grad_norm": 3.0772478580474854, "learning_rate": 8.219954584161227e-05, "loss": 0.018264994025230408, "step": 62740 }, { "epoch": 17.811524269088846, "grad_norm": 1.3442137241363525, "learning_rate": 8.219670735168891e-05, "loss": 0.02007365822792053, "step": 62750 }, { "epoch": 17.814362759012205, "grad_norm": 7.879894733428955, "learning_rate": 8.219386886176554e-05, "loss": 0.01938418745994568, "step": 62760 }, { "epoch": 17.817201248935568, "grad_norm": 1.2253329753875732, "learning_rate": 8.219103037184218e-05, "loss": 0.039791280031204225, "step": 62770 }, { "epoch": 17.820039738858927, "grad_norm": 5.614813804626465, "learning_rate": 8.218819188191882e-05, "loss": 0.015275192260742188, "step": 62780 }, { "epoch": 17.822878228782287, "grad_norm": 6.799874305725098, "learning_rate": 8.218535339199546e-05, "loss": 0.01871945261955261, "step": 62790 }, { "epoch": 17.82571671870565, "grad_norm": 7.841037750244141, "learning_rate": 8.21825149020721e-05, "loss": 0.028096556663513184, "step": 62800 }, { "epoch": 17.82855520862901, "grad_norm": 1.605942964553833, "learning_rate": 8.217967641214875e-05, "loss": 0.02815968096256256, "step": 62810 }, { "epoch": 17.83139369855237, "grad_norm": 10.052791595458984, "learning_rate": 8.217683792222539e-05, "loss": 0.06668474674224853, "step": 62820 }, { "epoch": 17.83423218847573, "grad_norm": 23.35610580444336, "learning_rate": 8.217399943230201e-05, "loss": 0.08080804347991943, "step": 62830 }, { "epoch": 17.83707067839909, "grad_norm": 10.331356048583984, "learning_rate": 8.217116094237866e-05, "loss": 0.03516271412372589, "step": 62840 }, { "epoch": 17.839909168322453, "grad_norm": 12.676920890808105, "learning_rate": 8.21683224524553e-05, "loss": 0.03169257640838623, "step": 62850 }, { "epoch": 17.842747658245813, "grad_norm": 12.567449569702148, "learning_rate": 8.216548396253194e-05, "loss": 0.018966612219810487, "step": 62860 }, { "epoch": 17.845586148169176, "grad_norm": 1.1577986478805542, "learning_rate": 8.216264547260858e-05, "loss": 0.03630762100219727, "step": 62870 }, { "epoch": 17.848424638092535, "grad_norm": 1.5280566215515137, "learning_rate": 8.215980698268522e-05, "loss": 0.02344632297754288, "step": 62880 }, { "epoch": 17.851263128015894, "grad_norm": 13.967700004577637, "learning_rate": 8.215696849276185e-05, "loss": 0.026290476322174072, "step": 62890 }, { "epoch": 17.854101617939257, "grad_norm": 4.650578022003174, "learning_rate": 8.215413000283849e-05, "loss": 0.04005018174648285, "step": 62900 }, { "epoch": 17.856940107862616, "grad_norm": 0.17366883158683777, "learning_rate": 8.215129151291513e-05, "loss": 0.008091751486063004, "step": 62910 }, { "epoch": 17.85977859778598, "grad_norm": 2.0971131324768066, "learning_rate": 8.214845302299177e-05, "loss": 0.04074928164482117, "step": 62920 }, { "epoch": 17.86261708770934, "grad_norm": 4.856375694274902, "learning_rate": 8.214561453306841e-05, "loss": 0.02438105344772339, "step": 62930 }, { "epoch": 17.865455577632698, "grad_norm": 2.2290196418762207, "learning_rate": 8.214277604314506e-05, "loss": 0.040464848279953, "step": 62940 }, { "epoch": 17.86829406755606, "grad_norm": 0.29619914293289185, "learning_rate": 8.213993755322168e-05, "loss": 0.023798368871212006, "step": 62950 }, { "epoch": 17.87113255747942, "grad_norm": 8.350001335144043, "learning_rate": 8.213709906329833e-05, "loss": 0.032443073391914365, "step": 62960 }, { "epoch": 17.873971047402783, "grad_norm": 2.495300769805908, "learning_rate": 8.213426057337497e-05, "loss": 0.025326359272003173, "step": 62970 }, { "epoch": 17.876809537326142, "grad_norm": 6.034106731414795, "learning_rate": 8.213142208345161e-05, "loss": 0.02383596748113632, "step": 62980 }, { "epoch": 17.8796480272495, "grad_norm": 10.183959007263184, "learning_rate": 8.212858359352825e-05, "loss": 0.045653107762336734, "step": 62990 }, { "epoch": 17.882486517172865, "grad_norm": 9.86632251739502, "learning_rate": 8.212574510360489e-05, "loss": 0.020216721296310424, "step": 63000 }, { "epoch": 17.882486517172865, "eval_accuracy": 0.9662364087238507, "eval_loss": 0.10448950529098511, "eval_runtime": 31.5176, "eval_samples_per_second": 498.991, "eval_steps_per_second": 7.805, "step": 63000 }, { "epoch": 17.885325007096224, "grad_norm": 5.937552452087402, "learning_rate": 8.212290661368153e-05, "loss": 0.020594467222690583, "step": 63010 }, { "epoch": 17.888163497019587, "grad_norm": 10.451207160949707, "learning_rate": 8.212006812375816e-05, "loss": 0.024574863910675048, "step": 63020 }, { "epoch": 17.891001986942946, "grad_norm": 1.551293969154358, "learning_rate": 8.21172296338348e-05, "loss": 0.024712464213371275, "step": 63030 }, { "epoch": 17.893840476866306, "grad_norm": 3.0373482704162598, "learning_rate": 8.211439114391144e-05, "loss": 0.03469589948654175, "step": 63040 }, { "epoch": 17.89667896678967, "grad_norm": 0.7828530073165894, "learning_rate": 8.211155265398807e-05, "loss": 0.017641761898994447, "step": 63050 }, { "epoch": 17.899517456713028, "grad_norm": 3.3779284954071045, "learning_rate": 8.210871416406473e-05, "loss": 0.01700296550989151, "step": 63060 }, { "epoch": 17.90235594663639, "grad_norm": 2.051417112350464, "learning_rate": 8.210587567414137e-05, "loss": 0.015218839049339294, "step": 63070 }, { "epoch": 17.90519443655975, "grad_norm": 9.505125999450684, "learning_rate": 8.2103037184218e-05, "loss": 0.04159405827522278, "step": 63080 }, { "epoch": 17.90803292648311, "grad_norm": 3.0860536098480225, "learning_rate": 8.210019869429464e-05, "loss": 0.020009826123714446, "step": 63090 }, { "epoch": 17.910871416406472, "grad_norm": 1.8598318099975586, "learning_rate": 8.209736020437128e-05, "loss": 0.0435604989528656, "step": 63100 }, { "epoch": 17.91370990632983, "grad_norm": 1.0046916007995605, "learning_rate": 8.209452171444792e-05, "loss": 0.032159548997879026, "step": 63110 }, { "epoch": 17.916548396253194, "grad_norm": 6.161089897155762, "learning_rate": 8.209168322452455e-05, "loss": 0.01534636914730072, "step": 63120 }, { "epoch": 17.919386886176554, "grad_norm": 0.5256252288818359, "learning_rate": 8.20888447346012e-05, "loss": 0.02654860317707062, "step": 63130 }, { "epoch": 17.922225376099917, "grad_norm": 5.088385105133057, "learning_rate": 8.208600624467784e-05, "loss": 0.018932031095027925, "step": 63140 }, { "epoch": 17.925063866023276, "grad_norm": 8.788341522216797, "learning_rate": 8.208316775475447e-05, "loss": 0.0427392840385437, "step": 63150 }, { "epoch": 17.927902355946635, "grad_norm": 11.259965896606445, "learning_rate": 8.208032926483111e-05, "loss": 0.019377776980400087, "step": 63160 }, { "epoch": 17.930740845869998, "grad_norm": 7.890923500061035, "learning_rate": 8.207749077490775e-05, "loss": 0.029963275790214537, "step": 63170 }, { "epoch": 17.933579335793358, "grad_norm": 1.4626001119613647, "learning_rate": 8.207465228498438e-05, "loss": 0.01323719322681427, "step": 63180 }, { "epoch": 17.93641782571672, "grad_norm": 6.506709098815918, "learning_rate": 8.207181379506104e-05, "loss": 0.017372867465019225, "step": 63190 }, { "epoch": 17.93925631564008, "grad_norm": 1.2769954204559326, "learning_rate": 8.206897530513768e-05, "loss": 0.012759195268154144, "step": 63200 }, { "epoch": 17.94209480556344, "grad_norm": 7.2475481033325195, "learning_rate": 8.20661368152143e-05, "loss": 0.015059612691402435, "step": 63210 }, { "epoch": 17.944933295486802, "grad_norm": 2.2666776180267334, "learning_rate": 8.206329832529095e-05, "loss": 0.04562419652938843, "step": 63220 }, { "epoch": 17.94777178541016, "grad_norm": 3.9278175830841064, "learning_rate": 8.206045983536759e-05, "loss": 0.041058939695358274, "step": 63230 }, { "epoch": 17.950610275333524, "grad_norm": 1.6852155923843384, "learning_rate": 8.205762134544423e-05, "loss": 0.021528658270835877, "step": 63240 }, { "epoch": 17.953448765256883, "grad_norm": 10.25830364227295, "learning_rate": 8.205478285552086e-05, "loss": 0.019893506169319154, "step": 63250 }, { "epoch": 17.956287255180243, "grad_norm": 4.412455081939697, "learning_rate": 8.205194436559751e-05, "loss": 0.015544518828392029, "step": 63260 }, { "epoch": 17.959125745103606, "grad_norm": 9.273332595825195, "learning_rate": 8.204910587567415e-05, "loss": 0.014378215372562408, "step": 63270 }, { "epoch": 17.961964235026965, "grad_norm": 0.8240272402763367, "learning_rate": 8.204626738575078e-05, "loss": 0.02420433610677719, "step": 63280 }, { "epoch": 17.964802724950328, "grad_norm": 4.694282531738281, "learning_rate": 8.204342889582742e-05, "loss": 0.017586009204387666, "step": 63290 }, { "epoch": 17.967641214873687, "grad_norm": 3.016824960708618, "learning_rate": 8.204059040590406e-05, "loss": 0.029995223879814147, "step": 63300 }, { "epoch": 17.970479704797047, "grad_norm": 11.36194133758545, "learning_rate": 8.203775191598069e-05, "loss": 0.04072842001914978, "step": 63310 }, { "epoch": 17.97331819472041, "grad_norm": 4.692788600921631, "learning_rate": 8.203491342605733e-05, "loss": 0.02683298885822296, "step": 63320 }, { "epoch": 17.97615668464377, "grad_norm": 1.0070034265518188, "learning_rate": 8.203207493613399e-05, "loss": 0.031165337562561034, "step": 63330 }, { "epoch": 17.97899517456713, "grad_norm": 0.6627662181854248, "learning_rate": 8.202923644621062e-05, "loss": 0.024088594317436218, "step": 63340 }, { "epoch": 17.98183366449049, "grad_norm": 9.662945747375488, "learning_rate": 8.202639795628726e-05, "loss": 0.03549680709838867, "step": 63350 }, { "epoch": 17.98467215441385, "grad_norm": 3.9854066371917725, "learning_rate": 8.20235594663639e-05, "loss": 0.014979708194732665, "step": 63360 }, { "epoch": 17.987510644337213, "grad_norm": 3.163055181503296, "learning_rate": 8.202072097644054e-05, "loss": 0.023752792179584502, "step": 63370 }, { "epoch": 17.990349134260573, "grad_norm": 5.344048976898193, "learning_rate": 8.201788248651717e-05, "loss": 0.012189032137393951, "step": 63380 }, { "epoch": 17.993187624183935, "grad_norm": 3.682119131088257, "learning_rate": 8.201504399659382e-05, "loss": 0.01623266488313675, "step": 63390 }, { "epoch": 17.996026114107295, "grad_norm": 8.711835861206055, "learning_rate": 8.201220550667046e-05, "loss": 0.0282275527715683, "step": 63400 }, { "epoch": 17.998864604030654, "grad_norm": 1.8471035957336426, "learning_rate": 8.200936701674709e-05, "loss": 0.01651301383972168, "step": 63410 }, { "epoch": 18.001703093954017, "grad_norm": 12.883648872375488, "learning_rate": 8.200652852682373e-05, "loss": 0.028872612118721008, "step": 63420 }, { "epoch": 18.004541583877376, "grad_norm": 11.164959907531738, "learning_rate": 8.200369003690038e-05, "loss": 0.015454404056072235, "step": 63430 }, { "epoch": 18.00738007380074, "grad_norm": 9.231427192687988, "learning_rate": 8.2000851546977e-05, "loss": 0.030927425622940062, "step": 63440 }, { "epoch": 18.0102185637241, "grad_norm": 7.0846076011657715, "learning_rate": 8.199801305705364e-05, "loss": 0.026499417424201966, "step": 63450 }, { "epoch": 18.013057053647458, "grad_norm": 2.6806135177612305, "learning_rate": 8.19951745671303e-05, "loss": 0.017173588275909424, "step": 63460 }, { "epoch": 18.01589554357082, "grad_norm": 9.149405479431152, "learning_rate": 8.199233607720693e-05, "loss": 0.0320206880569458, "step": 63470 }, { "epoch": 18.01873403349418, "grad_norm": 4.1694769859313965, "learning_rate": 8.198949758728357e-05, "loss": 0.01917231231927872, "step": 63480 }, { "epoch": 18.021572523417543, "grad_norm": 1.1832677125930786, "learning_rate": 8.198665909736021e-05, "loss": 0.024472203850746155, "step": 63490 }, { "epoch": 18.024411013340902, "grad_norm": 13.837799072265625, "learning_rate": 8.198382060743685e-05, "loss": 0.02485620677471161, "step": 63500 }, { "epoch": 18.024411013340902, "eval_accuracy": 0.9628664080880015, "eval_loss": 0.11966855823993683, "eval_runtime": 31.2298, "eval_samples_per_second": 503.59, "eval_steps_per_second": 7.877, "step": 63500 }, { "epoch": 18.02724950326426, "grad_norm": 0.33790045976638794, "learning_rate": 8.198098211751348e-05, "loss": 0.011388207972049712, "step": 63510 }, { "epoch": 18.030087993187625, "grad_norm": 6.030017375946045, "learning_rate": 8.197842747658246e-05, "loss": 0.03063642680644989, "step": 63520 }, { "epoch": 18.032926483110984, "grad_norm": 5.377939224243164, "learning_rate": 8.19755889866591e-05, "loss": 0.0260456919670105, "step": 63530 }, { "epoch": 18.035764973034347, "grad_norm": 0.9391288161277771, "learning_rate": 8.197275049673574e-05, "loss": 0.018294480443000794, "step": 63540 }, { "epoch": 18.038603462957706, "grad_norm": 10.59805965423584, "learning_rate": 8.196991200681238e-05, "loss": 0.026727724075317382, "step": 63550 }, { "epoch": 18.04144195288107, "grad_norm": 9.906193733215332, "learning_rate": 8.196707351688901e-05, "loss": 0.017150397598743438, "step": 63560 }, { "epoch": 18.04428044280443, "grad_norm": 1.0210798978805542, "learning_rate": 8.196423502696567e-05, "loss": 0.013381093740463257, "step": 63570 }, { "epoch": 18.047118932727788, "grad_norm": 8.017005920410156, "learning_rate": 8.196139653704231e-05, "loss": 0.019440135359764098, "step": 63580 }, { "epoch": 18.04995742265115, "grad_norm": 1.6258970499038696, "learning_rate": 8.195855804711894e-05, "loss": 0.01793234944343567, "step": 63590 }, { "epoch": 18.05279591257451, "grad_norm": 2.851628065109253, "learning_rate": 8.195571955719558e-05, "loss": 0.005626097694039345, "step": 63600 }, { "epoch": 18.055634402497873, "grad_norm": 8.741817474365234, "learning_rate": 8.195288106727222e-05, "loss": 0.018557682633399963, "step": 63610 }, { "epoch": 18.058472892421232, "grad_norm": 2.799314022064209, "learning_rate": 8.195004257734885e-05, "loss": 0.029077717661857606, "step": 63620 }, { "epoch": 18.06131138234459, "grad_norm": 0.7993476986885071, "learning_rate": 8.194720408742549e-05, "loss": 0.016263784468173982, "step": 63630 }, { "epoch": 18.064149872267954, "grad_norm": 7.793503284454346, "learning_rate": 8.194436559750214e-05, "loss": 0.0186955064535141, "step": 63640 }, { "epoch": 18.066988362191314, "grad_norm": 1.565314769744873, "learning_rate": 8.194152710757877e-05, "loss": 0.02875252068042755, "step": 63650 }, { "epoch": 18.069826852114677, "grad_norm": 0.37805306911468506, "learning_rate": 8.193868861765541e-05, "loss": 0.010606154799461365, "step": 63660 }, { "epoch": 18.072665342038036, "grad_norm": 3.1161158084869385, "learning_rate": 8.193585012773205e-05, "loss": 0.007773499935865402, "step": 63670 }, { "epoch": 18.075503831961395, "grad_norm": 2.411614179611206, "learning_rate": 8.19330116378087e-05, "loss": 0.02073639929294586, "step": 63680 }, { "epoch": 18.078342321884758, "grad_norm": 3.6368751525878906, "learning_rate": 8.193017314788532e-05, "loss": 0.006334617733955383, "step": 63690 }, { "epoch": 18.081180811808117, "grad_norm": 10.789783477783203, "learning_rate": 8.192733465796196e-05, "loss": 0.025414007902145385, "step": 63700 }, { "epoch": 18.08401930173148, "grad_norm": 2.523940324783325, "learning_rate": 8.192449616803862e-05, "loss": 0.01137855276465416, "step": 63710 }, { "epoch": 18.08685779165484, "grad_norm": 0.5612166523933411, "learning_rate": 8.192165767811525e-05, "loss": 0.012892305850982666, "step": 63720 }, { "epoch": 18.0896962815782, "grad_norm": 9.74189281463623, "learning_rate": 8.191881918819189e-05, "loss": 0.019851094484329222, "step": 63730 }, { "epoch": 18.092534771501562, "grad_norm": 1.339308500289917, "learning_rate": 8.191598069826853e-05, "loss": 0.012660935521125793, "step": 63740 }, { "epoch": 18.09537326142492, "grad_norm": 2.1699862480163574, "learning_rate": 8.191314220834516e-05, "loss": 0.014389137923717498, "step": 63750 }, { "epoch": 18.098211751348284, "grad_norm": 3.613295078277588, "learning_rate": 8.19103037184218e-05, "loss": 0.01403622180223465, "step": 63760 }, { "epoch": 18.101050241271643, "grad_norm": 0.926080584526062, "learning_rate": 8.190746522849845e-05, "loss": 0.011268796026706695, "step": 63770 }, { "epoch": 18.103888731195003, "grad_norm": 7.519463062286377, "learning_rate": 8.190462673857508e-05, "loss": 0.024682824313640595, "step": 63780 }, { "epoch": 18.106727221118366, "grad_norm": 6.180432319641113, "learning_rate": 8.190178824865172e-05, "loss": 0.022163230180740356, "step": 63790 }, { "epoch": 18.109565711041725, "grad_norm": 2.1202762126922607, "learning_rate": 8.189894975872836e-05, "loss": 0.013387003540992736, "step": 63800 }, { "epoch": 18.112404200965088, "grad_norm": 3.738833427429199, "learning_rate": 8.1896111268805e-05, "loss": 0.02549264132976532, "step": 63810 }, { "epoch": 18.115242690888447, "grad_norm": 5.993077278137207, "learning_rate": 8.189327277888163e-05, "loss": 0.024049943685531615, "step": 63820 }, { "epoch": 18.118081180811807, "grad_norm": 2.1975038051605225, "learning_rate": 8.189043428895827e-05, "loss": 0.014769497513771056, "step": 63830 }, { "epoch": 18.12091967073517, "grad_norm": 2.6579856872558594, "learning_rate": 8.188759579903492e-05, "loss": 0.014962542057037353, "step": 63840 }, { "epoch": 18.12375816065853, "grad_norm": 0.9042843580245972, "learning_rate": 8.188475730911156e-05, "loss": 0.02386534810066223, "step": 63850 }, { "epoch": 18.12659665058189, "grad_norm": 1.5423873662948608, "learning_rate": 8.18819188191882e-05, "loss": 0.0172099232673645, "step": 63860 }, { "epoch": 18.12943514050525, "grad_norm": 5.565915584564209, "learning_rate": 8.187908032926484e-05, "loss": 0.022585429251194, "step": 63870 }, { "epoch": 18.13227363042861, "grad_norm": 5.982133865356445, "learning_rate": 8.187624183934147e-05, "loss": 0.020662890374660493, "step": 63880 }, { "epoch": 18.135112120351973, "grad_norm": 2.7468416690826416, "learning_rate": 8.187340334941811e-05, "loss": 0.02313216179609299, "step": 63890 }, { "epoch": 18.137950610275333, "grad_norm": 4.347878932952881, "learning_rate": 8.187056485949475e-05, "loss": 0.03242829442024231, "step": 63900 }, { "epoch": 18.140789100198695, "grad_norm": 6.778963088989258, "learning_rate": 8.186772636957139e-05, "loss": 0.03919451236724854, "step": 63910 }, { "epoch": 18.143627590122055, "grad_norm": 13.00778865814209, "learning_rate": 8.186488787964803e-05, "loss": 0.0203878253698349, "step": 63920 }, { "epoch": 18.146466080045414, "grad_norm": 13.537951469421387, "learning_rate": 8.186204938972467e-05, "loss": 0.023000206053256988, "step": 63930 }, { "epoch": 18.149304569968777, "grad_norm": 6.750326156616211, "learning_rate": 8.18592108998013e-05, "loss": 0.018214805424213408, "step": 63940 }, { "epoch": 18.152143059892136, "grad_norm": 1.235145092010498, "learning_rate": 8.185637240987794e-05, "loss": 0.025742003321647645, "step": 63950 }, { "epoch": 18.1549815498155, "grad_norm": 1.247154712677002, "learning_rate": 8.185353391995458e-05, "loss": 0.010801458358764648, "step": 63960 }, { "epoch": 18.15782003973886, "grad_norm": 6.5609660148620605, "learning_rate": 8.185069543003123e-05, "loss": 0.033099958300590517, "step": 63970 }, { "epoch": 18.16065852966222, "grad_norm": 7.6868205070495605, "learning_rate": 8.184785694010787e-05, "loss": 0.015451616048812867, "step": 63980 }, { "epoch": 18.16349701958558, "grad_norm": 0.7117218971252441, "learning_rate": 8.184501845018451e-05, "loss": 0.015498486161231995, "step": 63990 }, { "epoch": 18.16633550950894, "grad_norm": 0.5023856163024902, "learning_rate": 8.184217996026115e-05, "loss": 0.019790877401828767, "step": 64000 }, { "epoch": 18.16633550950894, "eval_accuracy": 0.9602594264640427, "eval_loss": 0.12741507589817047, "eval_runtime": 31.7025, "eval_samples_per_second": 496.08, "eval_steps_per_second": 7.76, "step": 64000 }, { "epoch": 18.169173999432303, "grad_norm": 2.1337358951568604, "learning_rate": 8.183934147033778e-05, "loss": 0.01412832885980606, "step": 64010 }, { "epoch": 18.172012489355662, "grad_norm": 5.427219390869141, "learning_rate": 8.183650298041442e-05, "loss": 0.021152403950691224, "step": 64020 }, { "epoch": 18.174850979279025, "grad_norm": 1.6672537326812744, "learning_rate": 8.183366449049106e-05, "loss": 0.009095242619514466, "step": 64030 }, { "epoch": 18.177689469202384, "grad_norm": 0.5499343276023865, "learning_rate": 8.18308260005677e-05, "loss": 0.00586022213101387, "step": 64040 }, { "epoch": 18.180527959125744, "grad_norm": 1.3497560024261475, "learning_rate": 8.182798751064434e-05, "loss": 0.026206356287002564, "step": 64050 }, { "epoch": 18.183366449049107, "grad_norm": 8.766846656799316, "learning_rate": 8.182514902072098e-05, "loss": 0.022936439514160155, "step": 64060 }, { "epoch": 18.186204938972466, "grad_norm": 2.5361690521240234, "learning_rate": 8.182231053079761e-05, "loss": 0.012121029198169708, "step": 64070 }, { "epoch": 18.18904342889583, "grad_norm": 5.867077827453613, "learning_rate": 8.181947204087425e-05, "loss": 0.012035675346851349, "step": 64080 }, { "epoch": 18.19188191881919, "grad_norm": 5.768041610717773, "learning_rate": 8.18166335509509e-05, "loss": 0.015632781386375427, "step": 64090 }, { "epoch": 18.194720408742548, "grad_norm": 4.355159282684326, "learning_rate": 8.181379506102754e-05, "loss": 0.012428981065750123, "step": 64100 }, { "epoch": 18.19755889866591, "grad_norm": 1.3644630908966064, "learning_rate": 8.181095657110418e-05, "loss": 0.011507098376750947, "step": 64110 }, { "epoch": 18.20039738858927, "grad_norm": 1.4660484790802002, "learning_rate": 8.180811808118082e-05, "loss": 0.012022342532873154, "step": 64120 }, { "epoch": 18.203235878512633, "grad_norm": 5.857143402099609, "learning_rate": 8.180527959125746e-05, "loss": 0.028668275475502013, "step": 64130 }, { "epoch": 18.206074368435992, "grad_norm": 7.809715747833252, "learning_rate": 8.180244110133409e-05, "loss": 0.016119451820850374, "step": 64140 }, { "epoch": 18.20891285835935, "grad_norm": 11.552522659301758, "learning_rate": 8.179960261141073e-05, "loss": 0.022437827289104463, "step": 64150 }, { "epoch": 18.211751348282714, "grad_norm": 7.198179244995117, "learning_rate": 8.179676412148737e-05, "loss": 0.023779657483100892, "step": 64160 }, { "epoch": 18.214589838206074, "grad_norm": 0.9052576422691345, "learning_rate": 8.179392563156401e-05, "loss": 0.014769582450389862, "step": 64170 }, { "epoch": 18.217428328129436, "grad_norm": 5.4091339111328125, "learning_rate": 8.179108714164065e-05, "loss": 0.014927726984024049, "step": 64180 }, { "epoch": 18.220266818052796, "grad_norm": 13.912561416625977, "learning_rate": 8.17882486517173e-05, "loss": 0.017953263223171236, "step": 64190 }, { "epoch": 18.223105307976155, "grad_norm": 3.58343768119812, "learning_rate": 8.178541016179392e-05, "loss": 0.02186986953020096, "step": 64200 }, { "epoch": 18.225943797899518, "grad_norm": 6.5197038650512695, "learning_rate": 8.178257167187056e-05, "loss": 0.021269217133522034, "step": 64210 }, { "epoch": 18.228782287822877, "grad_norm": 0.6840488910675049, "learning_rate": 8.17797331819472e-05, "loss": 0.0069411441683769224, "step": 64220 }, { "epoch": 18.23162077774624, "grad_norm": 2.928089141845703, "learning_rate": 8.177689469202385e-05, "loss": 0.025230550765991212, "step": 64230 }, { "epoch": 18.2344592676696, "grad_norm": 10.981921195983887, "learning_rate": 8.177405620210049e-05, "loss": 0.024006845057010652, "step": 64240 }, { "epoch": 18.23729775759296, "grad_norm": 4.477245807647705, "learning_rate": 8.177121771217713e-05, "loss": 0.010527391731739045, "step": 64250 }, { "epoch": 18.240136247516322, "grad_norm": 1.1175553798675537, "learning_rate": 8.176837922225377e-05, "loss": 0.03738897442817688, "step": 64260 }, { "epoch": 18.24297473743968, "grad_norm": 2.9653480052948, "learning_rate": 8.17655407323304e-05, "loss": 0.014497557282447815, "step": 64270 }, { "epoch": 18.245813227363044, "grad_norm": 11.807784080505371, "learning_rate": 8.176270224240704e-05, "loss": 0.026821306347846983, "step": 64280 }, { "epoch": 18.248651717286403, "grad_norm": 8.044933319091797, "learning_rate": 8.175986375248368e-05, "loss": 0.016881439089775085, "step": 64290 }, { "epoch": 18.251490207209763, "grad_norm": 3.5477685928344727, "learning_rate": 8.175702526256032e-05, "loss": 0.008084752410650254, "step": 64300 }, { "epoch": 18.254328697133126, "grad_norm": 11.40770149230957, "learning_rate": 8.175418677263697e-05, "loss": 0.024506354331970216, "step": 64310 }, { "epoch": 18.257167187056485, "grad_norm": 6.7736897468566895, "learning_rate": 8.17513482827136e-05, "loss": 0.026788806915283202, "step": 64320 }, { "epoch": 18.260005676979848, "grad_norm": 5.127542495727539, "learning_rate": 8.174850979279023e-05, "loss": 0.013246673345565795, "step": 64330 }, { "epoch": 18.262844166903207, "grad_norm": 3.1091461181640625, "learning_rate": 8.174567130286688e-05, "loss": 0.027608779072761536, "step": 64340 }, { "epoch": 18.26568265682657, "grad_norm": 1.223820686340332, "learning_rate": 8.174283281294352e-05, "loss": 0.029618015885353087, "step": 64350 }, { "epoch": 18.26852114674993, "grad_norm": 1.600553035736084, "learning_rate": 8.173999432302016e-05, "loss": 0.02370784431695938, "step": 64360 }, { "epoch": 18.27135963667329, "grad_norm": 5.467772483825684, "learning_rate": 8.17371558330968e-05, "loss": 0.009500958770513535, "step": 64370 }, { "epoch": 18.27419812659665, "grad_norm": 11.989056587219238, "learning_rate": 8.173431734317344e-05, "loss": 0.02884567677974701, "step": 64380 }, { "epoch": 18.27703661652001, "grad_norm": 0.37605565786361694, "learning_rate": 8.173147885325008e-05, "loss": 0.016412146389484406, "step": 64390 }, { "epoch": 18.279875106443374, "grad_norm": 9.794072151184082, "learning_rate": 8.172864036332671e-05, "loss": 0.01564871668815613, "step": 64400 }, { "epoch": 18.282713596366733, "grad_norm": 7.608578681945801, "learning_rate": 8.172580187340335e-05, "loss": 0.05414324402809143, "step": 64410 }, { "epoch": 18.285552086290092, "grad_norm": 21.11903190612793, "learning_rate": 8.172296338347999e-05, "loss": 0.035235723853111266, "step": 64420 }, { "epoch": 18.288390576213455, "grad_norm": 9.885835647583008, "learning_rate": 8.172012489355662e-05, "loss": 0.03422803580760956, "step": 64430 }, { "epoch": 18.291229066136815, "grad_norm": 5.371190071105957, "learning_rate": 8.171728640363328e-05, "loss": 0.021089106798171997, "step": 64440 }, { "epoch": 18.294067556060178, "grad_norm": 0.27432265877723694, "learning_rate": 8.171444791370992e-05, "loss": 0.013120771944522857, "step": 64450 }, { "epoch": 18.296906045983537, "grad_norm": 2.571268081665039, "learning_rate": 8.171160942378655e-05, "loss": 0.02113320529460907, "step": 64460 }, { "epoch": 18.299744535906896, "grad_norm": 11.56346321105957, "learning_rate": 8.170877093386319e-05, "loss": 0.01973898708820343, "step": 64470 }, { "epoch": 18.30258302583026, "grad_norm": 3.8079206943511963, "learning_rate": 8.170593244393983e-05, "loss": 0.0114520862698555, "step": 64480 }, { "epoch": 18.30542151575362, "grad_norm": 10.881686210632324, "learning_rate": 8.170309395401647e-05, "loss": 0.028375059366226196, "step": 64490 }, { "epoch": 18.30826000567698, "grad_norm": 3.4686203002929688, "learning_rate": 8.170025546409311e-05, "loss": 0.03203393816947937, "step": 64500 }, { "epoch": 18.30826000567698, "eval_accuracy": 0.9640109366058371, "eval_loss": 0.11452274024486542, "eval_runtime": 31.9861, "eval_samples_per_second": 491.682, "eval_steps_per_second": 7.691, "step": 64500 }, { "epoch": 18.31109849560034, "grad_norm": 7.38455867767334, "learning_rate": 8.169741697416975e-05, "loss": 0.03204857707023621, "step": 64510 }, { "epoch": 18.3139369855237, "grad_norm": 3.299268960952759, "learning_rate": 8.16945784842464e-05, "loss": 0.020198746025562285, "step": 64520 }, { "epoch": 18.316775475447063, "grad_norm": 1.4064335823059082, "learning_rate": 8.169173999432302e-05, "loss": 0.023576533794403075, "step": 64530 }, { "epoch": 18.319613965370422, "grad_norm": 0.4955042600631714, "learning_rate": 8.168890150439966e-05, "loss": 0.0181998610496521, "step": 64540 }, { "epoch": 18.322452455293785, "grad_norm": 9.189884185791016, "learning_rate": 8.16860630144763e-05, "loss": 0.02198520749807358, "step": 64550 }, { "epoch": 18.325290945217144, "grad_norm": 2.911696672439575, "learning_rate": 8.168322452455293e-05, "loss": 0.008375386148691178, "step": 64560 }, { "epoch": 18.328129435140504, "grad_norm": 1.7371827363967896, "learning_rate": 8.168038603462959e-05, "loss": 0.020772762596607208, "step": 64570 }, { "epoch": 18.330967925063867, "grad_norm": 7.886782169342041, "learning_rate": 8.167754754470623e-05, "loss": 0.027090001106262206, "step": 64580 }, { "epoch": 18.333806414987226, "grad_norm": 2.4615044593811035, "learning_rate": 8.167470905478286e-05, "loss": 0.029046925902366637, "step": 64590 }, { "epoch": 18.33664490491059, "grad_norm": 4.161962985992432, "learning_rate": 8.16718705648595e-05, "loss": 0.016161419451236725, "step": 64600 }, { "epoch": 18.339483394833948, "grad_norm": 7.789259433746338, "learning_rate": 8.166903207493614e-05, "loss": 0.015282918512821198, "step": 64610 }, { "epoch": 18.342321884757308, "grad_norm": 3.1386585235595703, "learning_rate": 8.166619358501278e-05, "loss": 0.015604965388774872, "step": 64620 }, { "epoch": 18.34516037468067, "grad_norm": 0.9156437516212463, "learning_rate": 8.166335509508941e-05, "loss": 0.007730305194854736, "step": 64630 }, { "epoch": 18.34799886460403, "grad_norm": 0.6245124936103821, "learning_rate": 8.166051660516606e-05, "loss": 0.01292480230331421, "step": 64640 }, { "epoch": 18.350837354527393, "grad_norm": 7.491996765136719, "learning_rate": 8.165767811524269e-05, "loss": 0.018374304473400115, "step": 64650 }, { "epoch": 18.353675844450752, "grad_norm": 6.406768798828125, "learning_rate": 8.165483962531933e-05, "loss": 0.0387495219707489, "step": 64660 }, { "epoch": 18.35651433437411, "grad_norm": 0.1988161951303482, "learning_rate": 8.165200113539597e-05, "loss": 0.02027973085641861, "step": 64670 }, { "epoch": 18.359352824297474, "grad_norm": 4.635152339935303, "learning_rate": 8.164916264547261e-05, "loss": 0.013459804654121398, "step": 64680 }, { "epoch": 18.362191314220834, "grad_norm": 15.537267684936523, "learning_rate": 8.164632415554924e-05, "loss": 0.03918320536613464, "step": 64690 }, { "epoch": 18.365029804144196, "grad_norm": 2.991279125213623, "learning_rate": 8.16434856656259e-05, "loss": 0.02377077043056488, "step": 64700 }, { "epoch": 18.367868294067556, "grad_norm": 3.7001233100891113, "learning_rate": 8.164064717570254e-05, "loss": 0.01345890760421753, "step": 64710 }, { "epoch": 18.37070678399092, "grad_norm": 2.105233907699585, "learning_rate": 8.163780868577917e-05, "loss": 0.011597125977277755, "step": 64720 }, { "epoch": 18.373545273914278, "grad_norm": 2.74676513671875, "learning_rate": 8.163497019585581e-05, "loss": 0.024632632732391357, "step": 64730 }, { "epoch": 18.376383763837637, "grad_norm": 1.438855528831482, "learning_rate": 8.163213170593245e-05, "loss": 0.021962599456310274, "step": 64740 }, { "epoch": 18.379222253761, "grad_norm": 8.500609397888184, "learning_rate": 8.162929321600909e-05, "loss": 0.02144559323787689, "step": 64750 }, { "epoch": 18.38206074368436, "grad_norm": 2.9104247093200684, "learning_rate": 8.162645472608572e-05, "loss": 0.019889961183071136, "step": 64760 }, { "epoch": 18.384899233607722, "grad_norm": 0.9933539032936096, "learning_rate": 8.162361623616237e-05, "loss": 0.025526612997055054, "step": 64770 }, { "epoch": 18.38773772353108, "grad_norm": 5.836310386657715, "learning_rate": 8.1620777746239e-05, "loss": 0.012993815541267394, "step": 64780 }, { "epoch": 18.39057621345444, "grad_norm": 1.755178451538086, "learning_rate": 8.161793925631564e-05, "loss": 0.02501714825630188, "step": 64790 }, { "epoch": 18.393414703377804, "grad_norm": 14.083462715148926, "learning_rate": 8.161510076639228e-05, "loss": 0.01563563197851181, "step": 64800 }, { "epoch": 18.396253193301163, "grad_norm": 7.616891384124756, "learning_rate": 8.161226227646893e-05, "loss": 0.02151015102863312, "step": 64810 }, { "epoch": 18.399091683224526, "grad_norm": 3.3544535636901855, "learning_rate": 8.160942378654555e-05, "loss": 0.016945125162601472, "step": 64820 }, { "epoch": 18.401930173147885, "grad_norm": 6.8202595710754395, "learning_rate": 8.16065852966222e-05, "loss": 0.014925295114517212, "step": 64830 }, { "epoch": 18.404768663071245, "grad_norm": 2.5886168479919434, "learning_rate": 8.160374680669885e-05, "loss": 0.01073727160692215, "step": 64840 }, { "epoch": 18.407607152994608, "grad_norm": 8.829425811767578, "learning_rate": 8.160090831677548e-05, "loss": 0.02141517847776413, "step": 64850 }, { "epoch": 18.410445642917967, "grad_norm": 1.5394178628921509, "learning_rate": 8.159806982685212e-05, "loss": 0.028698107600212096, "step": 64860 }, { "epoch": 18.41328413284133, "grad_norm": 2.6815719604492188, "learning_rate": 8.159523133692876e-05, "loss": 0.017949622869491578, "step": 64870 }, { "epoch": 18.41612262276469, "grad_norm": 2.280712842941284, "learning_rate": 8.159239284700539e-05, "loss": 0.026585522294044494, "step": 64880 }, { "epoch": 18.41896111268805, "grad_norm": 6.153099060058594, "learning_rate": 8.158955435708203e-05, "loss": 0.03667968809604645, "step": 64890 }, { "epoch": 18.42179960261141, "grad_norm": 3.3398218154907227, "learning_rate": 8.158671586715868e-05, "loss": 0.008441707491874695, "step": 64900 }, { "epoch": 18.42463809253477, "grad_norm": 1.0757747888565063, "learning_rate": 8.158387737723531e-05, "loss": 0.02430875599384308, "step": 64910 }, { "epoch": 18.427476582458134, "grad_norm": 11.187719345092773, "learning_rate": 8.158103888731195e-05, "loss": 0.013773807883262634, "step": 64920 }, { "epoch": 18.430315072381493, "grad_norm": 6.259658336639404, "learning_rate": 8.15782003973886e-05, "loss": 0.02490702420473099, "step": 64930 }, { "epoch": 18.433153562304852, "grad_norm": 17.728290557861328, "learning_rate": 8.157536190746524e-05, "loss": 0.022846725583076478, "step": 64940 }, { "epoch": 18.435992052228215, "grad_norm": 2.0085222721099854, "learning_rate": 8.157252341754186e-05, "loss": 0.011582992970943451, "step": 64950 }, { "epoch": 18.438830542151575, "grad_norm": 1.6925238370895386, "learning_rate": 8.15696849276185e-05, "loss": 0.015410149097442627, "step": 64960 }, { "epoch": 18.441669032074937, "grad_norm": 4.890226364135742, "learning_rate": 8.156684643769516e-05, "loss": 0.03174746334552765, "step": 64970 }, { "epoch": 18.444507521998297, "grad_norm": 7.692101955413818, "learning_rate": 8.156400794777179e-05, "loss": 0.028371542692184448, "step": 64980 }, { "epoch": 18.447346011921656, "grad_norm": 2.4370031356811523, "learning_rate": 8.156116945784843e-05, "loss": 0.02862280011177063, "step": 64990 }, { "epoch": 18.45018450184502, "grad_norm": 4.500633716583252, "learning_rate": 8.155833096792507e-05, "loss": 0.03583382368087769, "step": 65000 }, { "epoch": 18.45018450184502, "eval_accuracy": 0.9598779169580975, "eval_loss": 0.12365498393774033, "eval_runtime": 32.0445, "eval_samples_per_second": 490.786, "eval_steps_per_second": 7.677, "step": 65000 }, { "epoch": 18.45302299176838, "grad_norm": 0.15921132266521454, "learning_rate": 8.15554924780017e-05, "loss": 0.01450244039297104, "step": 65010 }, { "epoch": 18.45586148169174, "grad_norm": 2.5077812671661377, "learning_rate": 8.155265398807834e-05, "loss": 0.025313353538513182, "step": 65020 }, { "epoch": 18.4586999716151, "grad_norm": 3.5951077938079834, "learning_rate": 8.154981549815498e-05, "loss": 0.019084098935127258, "step": 65030 }, { "epoch": 18.46153846153846, "grad_norm": 2.805438280105591, "learning_rate": 8.154697700823162e-05, "loss": 0.028533899784088136, "step": 65040 }, { "epoch": 18.464376951461823, "grad_norm": 8.419462203979492, "learning_rate": 8.154413851830826e-05, "loss": 0.018583706021308898, "step": 65050 }, { "epoch": 18.467215441385182, "grad_norm": 4.189601421356201, "learning_rate": 8.15413000283849e-05, "loss": 0.016246131062507628, "step": 65060 }, { "epoch": 18.470053931308545, "grad_norm": 0.6846535801887512, "learning_rate": 8.153846153846155e-05, "loss": 0.014149273931980132, "step": 65070 }, { "epoch": 18.472892421231904, "grad_norm": 1.6015697717666626, "learning_rate": 8.153562304853817e-05, "loss": 0.013975532352924347, "step": 65080 }, { "epoch": 18.475730911155264, "grad_norm": 1.1435742378234863, "learning_rate": 8.153278455861482e-05, "loss": 0.01093505248427391, "step": 65090 }, { "epoch": 18.478569401078627, "grad_norm": 1.3598769903182983, "learning_rate": 8.152994606869147e-05, "loss": 0.015113480389118195, "step": 65100 }, { "epoch": 18.481407891001986, "grad_norm": 5.738377571105957, "learning_rate": 8.15271075787681e-05, "loss": 0.024793604016304018, "step": 65110 }, { "epoch": 18.48424638092535, "grad_norm": 2.031440019607544, "learning_rate": 8.152426908884474e-05, "loss": 0.018024921417236328, "step": 65120 }, { "epoch": 18.487084870848708, "grad_norm": 4.7869462966918945, "learning_rate": 8.152143059892138e-05, "loss": 0.01986464262008667, "step": 65130 }, { "epoch": 18.48992336077207, "grad_norm": 3.7340545654296875, "learning_rate": 8.151859210899801e-05, "loss": 0.015004071593284606, "step": 65140 }, { "epoch": 18.49276185069543, "grad_norm": 2.174328565597534, "learning_rate": 8.151575361907465e-05, "loss": 0.0166971817612648, "step": 65150 }, { "epoch": 18.49560034061879, "grad_norm": 11.978513717651367, "learning_rate": 8.151291512915129e-05, "loss": 0.030484890937805174, "step": 65160 }, { "epoch": 18.498438830542153, "grad_norm": 6.7397990226745605, "learning_rate": 8.151007663922793e-05, "loss": 0.0289802223443985, "step": 65170 }, { "epoch": 18.501277320465512, "grad_norm": 2.4745960235595703, "learning_rate": 8.150723814930457e-05, "loss": 0.014822545647621154, "step": 65180 }, { "epoch": 18.504115810388875, "grad_norm": 10.138374328613281, "learning_rate": 8.150439965938122e-05, "loss": 0.023067659139633177, "step": 65190 }, { "epoch": 18.506954300312234, "grad_norm": 9.499871253967285, "learning_rate": 8.150156116945786e-05, "loss": 0.015490874648094177, "step": 65200 }, { "epoch": 18.509792790235593, "grad_norm": 16.6196231842041, "learning_rate": 8.149872267953449e-05, "loss": 0.03123834729194641, "step": 65210 }, { "epoch": 18.512631280158956, "grad_norm": 6.706479072570801, "learning_rate": 8.149588418961113e-05, "loss": 0.009177811443805695, "step": 65220 }, { "epoch": 18.515469770082316, "grad_norm": 11.436570167541504, "learning_rate": 8.149304569968777e-05, "loss": 0.025717338919639586, "step": 65230 }, { "epoch": 18.51830826000568, "grad_norm": 1.1007322072982788, "learning_rate": 8.149020720976441e-05, "loss": 0.008171102404594422, "step": 65240 }, { "epoch": 18.521146749929038, "grad_norm": 0.3315299451351166, "learning_rate": 8.148736871984105e-05, "loss": 0.006200124323368072, "step": 65250 }, { "epoch": 18.523985239852397, "grad_norm": 11.3115873336792, "learning_rate": 8.148453022991769e-05, "loss": 0.01734861433506012, "step": 65260 }, { "epoch": 18.52682372977576, "grad_norm": 19.757917404174805, "learning_rate": 8.148169173999432e-05, "loss": 0.03324038684368134, "step": 65270 }, { "epoch": 18.52966221969912, "grad_norm": 2.587345838546753, "learning_rate": 8.147885325007096e-05, "loss": 0.019647155702114106, "step": 65280 }, { "epoch": 18.532500709622482, "grad_norm": 4.350657939910889, "learning_rate": 8.14760147601476e-05, "loss": 0.018183624744415282, "step": 65290 }, { "epoch": 18.53533919954584, "grad_norm": 1.657505989074707, "learning_rate": 8.147317627022424e-05, "loss": 0.01055358499288559, "step": 65300 }, { "epoch": 18.5381776894692, "grad_norm": 1.2367732524871826, "learning_rate": 8.147033778030089e-05, "loss": 0.013584132492542266, "step": 65310 }, { "epoch": 18.541016179392564, "grad_norm": 0.5722014904022217, "learning_rate": 8.146749929037753e-05, "loss": 0.013419798016548157, "step": 65320 }, { "epoch": 18.543854669315923, "grad_norm": 3.7260754108428955, "learning_rate": 8.146466080045417e-05, "loss": 0.013349471986293793, "step": 65330 }, { "epoch": 18.546693159239286, "grad_norm": 0.7379252910614014, "learning_rate": 8.14618223105308e-05, "loss": 0.013547423481941222, "step": 65340 }, { "epoch": 18.549531649162645, "grad_norm": 7.842015743255615, "learning_rate": 8.145898382060744e-05, "loss": 0.026204794645309448, "step": 65350 }, { "epoch": 18.552370139086005, "grad_norm": 1.1814987659454346, "learning_rate": 8.145614533068408e-05, "loss": 0.019396062195301055, "step": 65360 }, { "epoch": 18.555208629009368, "grad_norm": 2.6612191200256348, "learning_rate": 8.145330684076072e-05, "loss": 0.011805374920368195, "step": 65370 }, { "epoch": 18.558047118932727, "grad_norm": 8.61876392364502, "learning_rate": 8.145046835083736e-05, "loss": 0.017999276518821716, "step": 65380 }, { "epoch": 18.56088560885609, "grad_norm": 13.031587600708008, "learning_rate": 8.1447629860914e-05, "loss": 0.028682655096054076, "step": 65390 }, { "epoch": 18.56372409877945, "grad_norm": 3.239630699157715, "learning_rate": 8.144479137099063e-05, "loss": 0.02604726850986481, "step": 65400 }, { "epoch": 18.56656258870281, "grad_norm": 5.915991306304932, "learning_rate": 8.144195288106727e-05, "loss": 0.018662264943122862, "step": 65410 }, { "epoch": 18.56940107862617, "grad_norm": 3.464056968688965, "learning_rate": 8.143911439114391e-05, "loss": 0.013923452794551849, "step": 65420 }, { "epoch": 18.57223956854953, "grad_norm": 1.9145278930664062, "learning_rate": 8.143627590122056e-05, "loss": 0.01487433910369873, "step": 65430 }, { "epoch": 18.575078058472894, "grad_norm": 13.171759605407715, "learning_rate": 8.14334374112972e-05, "loss": 0.011693964898586272, "step": 65440 }, { "epoch": 18.577916548396253, "grad_norm": 10.689671516418457, "learning_rate": 8.143059892137384e-05, "loss": 0.020308999717235564, "step": 65450 }, { "epoch": 18.580755038319612, "grad_norm": 5.099935531616211, "learning_rate": 8.142776043145048e-05, "loss": 0.018070951104164124, "step": 65460 }, { "epoch": 18.583593528242975, "grad_norm": 1.1860604286193848, "learning_rate": 8.142492194152711e-05, "loss": 0.007710698246955872, "step": 65470 }, { "epoch": 18.586432018166335, "grad_norm": 5.3310723304748535, "learning_rate": 8.142208345160375e-05, "loss": 0.01653881072998047, "step": 65480 }, { "epoch": 18.589270508089697, "grad_norm": 3.0680739879608154, "learning_rate": 8.141924496168039e-05, "loss": 0.020436204969882965, "step": 65490 }, { "epoch": 18.592108998013057, "grad_norm": 0.19042538106441498, "learning_rate": 8.141640647175703e-05, "loss": 0.020362281799316408, "step": 65500 }, { "epoch": 18.592108998013057, "eval_accuracy": 0.9671265975710561, "eval_loss": 0.10823927074670792, "eval_runtime": 32.0506, "eval_samples_per_second": 490.692, "eval_steps_per_second": 7.675, "step": 65500 }, { "epoch": 18.594947487936416, "grad_norm": 0.9644738435745239, "learning_rate": 8.141356798183367e-05, "loss": 0.019485700130462646, "step": 65510 }, { "epoch": 18.59778597785978, "grad_norm": 0.5033319592475891, "learning_rate": 8.141072949191031e-05, "loss": 0.013396617770195008, "step": 65520 }, { "epoch": 18.60062446778314, "grad_norm": 3.123328924179077, "learning_rate": 8.140789100198694e-05, "loss": 0.011749409139156342, "step": 65530 }, { "epoch": 18.6034629577065, "grad_norm": 15.192595481872559, "learning_rate": 8.140505251206358e-05, "loss": 0.031108838319778443, "step": 65540 }, { "epoch": 18.60630144762986, "grad_norm": 0.7971093654632568, "learning_rate": 8.140221402214022e-05, "loss": 0.008194833993911743, "step": 65550 }, { "epoch": 18.609139937553223, "grad_norm": 2.494683027267456, "learning_rate": 8.139937553221687e-05, "loss": 0.017994403839111328, "step": 65560 }, { "epoch": 18.611978427476583, "grad_norm": 3.4145376682281494, "learning_rate": 8.139653704229351e-05, "loss": 0.012980231642723083, "step": 65570 }, { "epoch": 18.614816917399942, "grad_norm": 13.31275749206543, "learning_rate": 8.139369855237015e-05, "loss": 0.011567264050245284, "step": 65580 }, { "epoch": 18.617655407323305, "grad_norm": 3.671393632888794, "learning_rate": 8.139086006244678e-05, "loss": 0.03883555829524994, "step": 65590 }, { "epoch": 18.620493897246664, "grad_norm": 21.00157928466797, "learning_rate": 8.138802157252342e-05, "loss": 0.034800782799720764, "step": 65600 }, { "epoch": 18.623332387170027, "grad_norm": 13.980853080749512, "learning_rate": 8.138518308260006e-05, "loss": 0.02740713059902191, "step": 65610 }, { "epoch": 18.626170877093386, "grad_norm": 2.1741952896118164, "learning_rate": 8.13823445926767e-05, "loss": 0.027539867162704467, "step": 65620 }, { "epoch": 18.629009367016746, "grad_norm": 1.7575544118881226, "learning_rate": 8.137950610275334e-05, "loss": 0.019827230274677275, "step": 65630 }, { "epoch": 18.63184785694011, "grad_norm": 12.440897941589355, "learning_rate": 8.137666761282998e-05, "loss": 0.033045870065689084, "step": 65640 }, { "epoch": 18.634686346863468, "grad_norm": 8.510391235351562, "learning_rate": 8.137382912290662e-05, "loss": 0.021538083255290986, "step": 65650 }, { "epoch": 18.63752483678683, "grad_norm": 2.480590343475342, "learning_rate": 8.137099063298325e-05, "loss": 0.012605838477611542, "step": 65660 }, { "epoch": 18.64036332671019, "grad_norm": 2.146552801132202, "learning_rate": 8.13681521430599e-05, "loss": 0.010300079733133316, "step": 65670 }, { "epoch": 18.64320181663355, "grad_norm": 12.616165161132812, "learning_rate": 8.136531365313654e-05, "loss": 0.017979632318019866, "step": 65680 }, { "epoch": 18.646040306556912, "grad_norm": 1.010509729385376, "learning_rate": 8.136247516321316e-05, "loss": 0.025213581323623658, "step": 65690 }, { "epoch": 18.648878796480272, "grad_norm": 0.7157605290412903, "learning_rate": 8.135963667328982e-05, "loss": 0.015295769274234771, "step": 65700 }, { "epoch": 18.651717286403635, "grad_norm": 3.0977420806884766, "learning_rate": 8.135679818336646e-05, "loss": 0.009039697796106338, "step": 65710 }, { "epoch": 18.654555776326994, "grad_norm": 1.2158349752426147, "learning_rate": 8.135395969344309e-05, "loss": 0.011754780262708663, "step": 65720 }, { "epoch": 18.657394266250353, "grad_norm": 4.4673638343811035, "learning_rate": 8.135112120351973e-05, "loss": 0.0157989576458931, "step": 65730 }, { "epoch": 18.660232756173716, "grad_norm": 0.3597680628299713, "learning_rate": 8.134828271359637e-05, "loss": 0.020639072358608245, "step": 65740 }, { "epoch": 18.663071246097076, "grad_norm": 3.230433702468872, "learning_rate": 8.134544422367301e-05, "loss": 0.009913558512926102, "step": 65750 }, { "epoch": 18.66590973602044, "grad_norm": 2.2301106452941895, "learning_rate": 8.134260573374964e-05, "loss": 0.019906213879585265, "step": 65760 }, { "epoch": 18.668748225943798, "grad_norm": 11.6491117477417, "learning_rate": 8.13397672438263e-05, "loss": 0.028836414217948914, "step": 65770 }, { "epoch": 18.671586715867157, "grad_norm": 3.1171281337738037, "learning_rate": 8.133692875390294e-05, "loss": 0.009900050610303879, "step": 65780 }, { "epoch": 18.67442520579052, "grad_norm": 0.781021773815155, "learning_rate": 8.133409026397956e-05, "loss": 0.03698730170726776, "step": 65790 }, { "epoch": 18.67726369571388, "grad_norm": 10.685922622680664, "learning_rate": 8.13312517740562e-05, "loss": 0.030243587493896485, "step": 65800 }, { "epoch": 18.680102185637242, "grad_norm": 5.416703224182129, "learning_rate": 8.132841328413285e-05, "loss": 0.01442263126373291, "step": 65810 }, { "epoch": 18.6829406755606, "grad_norm": 7.635152339935303, "learning_rate": 8.132557479420947e-05, "loss": 0.025737428665161134, "step": 65820 }, { "epoch": 18.68577916548396, "grad_norm": 0.8116246461868286, "learning_rate": 8.132273630428613e-05, "loss": 0.01686970591545105, "step": 65830 }, { "epoch": 18.688617655407324, "grad_norm": 0.4055250287055969, "learning_rate": 8.131989781436277e-05, "loss": 0.014805729687213897, "step": 65840 }, { "epoch": 18.691456145330683, "grad_norm": 4.816773891448975, "learning_rate": 8.13170593244394e-05, "loss": 0.011856859922409058, "step": 65850 }, { "epoch": 18.694294635254046, "grad_norm": 3.883345603942871, "learning_rate": 8.131422083451604e-05, "loss": 0.015941402316093443, "step": 65860 }, { "epoch": 18.697133125177405, "grad_norm": 1.9226481914520264, "learning_rate": 8.131138234459268e-05, "loss": 0.011912903189659119, "step": 65870 }, { "epoch": 18.699971615100765, "grad_norm": 2.6523337364196777, "learning_rate": 8.130854385466932e-05, "loss": 0.009482324868440629, "step": 65880 }, { "epoch": 18.702810105024128, "grad_norm": 3.491548776626587, "learning_rate": 8.130570536474595e-05, "loss": 0.02799041271209717, "step": 65890 }, { "epoch": 18.705648594947487, "grad_norm": 1.8625236749649048, "learning_rate": 8.13028668748226e-05, "loss": 0.007141588628292084, "step": 65900 }, { "epoch": 18.70848708487085, "grad_norm": 10.894309997558594, "learning_rate": 8.130002838489925e-05, "loss": 0.020209994912147523, "step": 65910 }, { "epoch": 18.71132557479421, "grad_norm": 5.925015926361084, "learning_rate": 8.129718989497587e-05, "loss": 0.010579054057598115, "step": 65920 }, { "epoch": 18.714164064717572, "grad_norm": 6.101390361785889, "learning_rate": 8.129435140505252e-05, "loss": 0.010245073586702347, "step": 65930 }, { "epoch": 18.71700255464093, "grad_norm": 4.651159763336182, "learning_rate": 8.129151291512916e-05, "loss": 0.017720875144004822, "step": 65940 }, { "epoch": 18.71984104456429, "grad_norm": 4.947421073913574, "learning_rate": 8.128867442520578e-05, "loss": 0.018265710771083833, "step": 65950 }, { "epoch": 18.722679534487654, "grad_norm": 7.7095746994018555, "learning_rate": 8.128583593528243e-05, "loss": 0.01981608271598816, "step": 65960 }, { "epoch": 18.725518024411013, "grad_norm": 1.5604537725448608, "learning_rate": 8.128299744535908e-05, "loss": 0.015255461633205413, "step": 65970 }, { "epoch": 18.728356514334376, "grad_norm": 4.790753364562988, "learning_rate": 8.128015895543571e-05, "loss": 0.024638724327087403, "step": 65980 }, { "epoch": 18.731195004257735, "grad_norm": 1.3473572731018066, "learning_rate": 8.127732046551235e-05, "loss": 0.014015762507915497, "step": 65990 }, { "epoch": 18.734033494181094, "grad_norm": 1.5037555694580078, "learning_rate": 8.127476582458132e-05, "loss": 0.05017166137695313, "step": 66000 }, { "epoch": 18.734033494181094, "eval_accuracy": 0.964837540535385, "eval_loss": 0.11509203910827637, "eval_runtime": 31.6327, "eval_samples_per_second": 497.176, "eval_steps_per_second": 7.777, "step": 66000 }, { "epoch": 18.736871984104457, "grad_norm": 17.559885025024414, "learning_rate": 8.127192733465797e-05, "loss": 0.036193746328353885, "step": 66010 }, { "epoch": 18.739710474027817, "grad_norm": 18.118698120117188, "learning_rate": 8.126908884473461e-05, "loss": 0.023644256591796874, "step": 66020 }, { "epoch": 18.74254896395118, "grad_norm": 2.231119155883789, "learning_rate": 8.126625035481124e-05, "loss": 0.045569732785224915, "step": 66030 }, { "epoch": 18.74538745387454, "grad_norm": 1.703681468963623, "learning_rate": 8.126341186488788e-05, "loss": 0.012719261646270751, "step": 66040 }, { "epoch": 18.748225943797898, "grad_norm": 5.024110317230225, "learning_rate": 8.126057337496452e-05, "loss": 0.012549507617950439, "step": 66050 }, { "epoch": 18.75106443372126, "grad_norm": 3.1849050521850586, "learning_rate": 8.125773488504116e-05, "loss": 0.01445792019367218, "step": 66060 }, { "epoch": 18.75390292364462, "grad_norm": 9.782297134399414, "learning_rate": 8.125489639511779e-05, "loss": 0.01607317179441452, "step": 66070 }, { "epoch": 18.756741413567983, "grad_norm": 2.452918291091919, "learning_rate": 8.125205790519445e-05, "loss": 0.03378154039382934, "step": 66080 }, { "epoch": 18.759579903491343, "grad_norm": 2.312612295150757, "learning_rate": 8.124921941527109e-05, "loss": 0.012735034525394439, "step": 66090 }, { "epoch": 18.762418393414702, "grad_norm": 7.722110271453857, "learning_rate": 8.124638092534772e-05, "loss": 0.021268382668495178, "step": 66100 }, { "epoch": 18.765256883338065, "grad_norm": 11.429648399353027, "learning_rate": 8.124354243542436e-05, "loss": 0.020001563429832458, "step": 66110 }, { "epoch": 18.768095373261424, "grad_norm": 2.895681858062744, "learning_rate": 8.1240703945501e-05, "loss": 0.02193840593099594, "step": 66120 }, { "epoch": 18.770933863184787, "grad_norm": 7.935652256011963, "learning_rate": 8.123786545557763e-05, "loss": 0.04819098711013794, "step": 66130 }, { "epoch": 18.773772353108146, "grad_norm": 11.845431327819824, "learning_rate": 8.123502696565427e-05, "loss": 0.021462681889533996, "step": 66140 }, { "epoch": 18.776610843031506, "grad_norm": 2.0651376247406006, "learning_rate": 8.123218847573092e-05, "loss": 0.018990828096866606, "step": 66150 }, { "epoch": 18.77944933295487, "grad_norm": 2.8424670696258545, "learning_rate": 8.122934998580755e-05, "loss": 0.02519553303718567, "step": 66160 }, { "epoch": 18.782287822878228, "grad_norm": 0.7492388486862183, "learning_rate": 8.122651149588419e-05, "loss": 0.006743628531694412, "step": 66170 }, { "epoch": 18.78512631280159, "grad_norm": 8.703424453735352, "learning_rate": 8.122367300596083e-05, "loss": 0.014591945707798004, "step": 66180 }, { "epoch": 18.78796480272495, "grad_norm": 1.5100408792495728, "learning_rate": 8.122083451603748e-05, "loss": 0.010236608982086181, "step": 66190 }, { "epoch": 18.79080329264831, "grad_norm": 7.783727645874023, "learning_rate": 8.12179960261141e-05, "loss": 0.019305528700351716, "step": 66200 }, { "epoch": 18.793641782571672, "grad_norm": 6.598199367523193, "learning_rate": 8.121515753619076e-05, "loss": 0.01837824434041977, "step": 66210 }, { "epoch": 18.79648027249503, "grad_norm": 5.368304252624512, "learning_rate": 8.12123190462674e-05, "loss": 0.027355998754501343, "step": 66220 }, { "epoch": 18.799318762418395, "grad_norm": 3.307619333267212, "learning_rate": 8.120948055634403e-05, "loss": 0.0271921843290329, "step": 66230 }, { "epoch": 18.802157252341754, "grad_norm": 2.775135040283203, "learning_rate": 8.120664206642067e-05, "loss": 0.025950881838798522, "step": 66240 }, { "epoch": 18.804995742265113, "grad_norm": 5.855447769165039, "learning_rate": 8.120380357649731e-05, "loss": 0.036981263756752016, "step": 66250 }, { "epoch": 18.807834232188476, "grad_norm": 7.363571643829346, "learning_rate": 8.120096508657394e-05, "loss": 0.04288973212242127, "step": 66260 }, { "epoch": 18.810672722111835, "grad_norm": 4.994757175445557, "learning_rate": 8.119812659665058e-05, "loss": 0.01854916363954544, "step": 66270 }, { "epoch": 18.8135112120352, "grad_norm": 6.26474142074585, "learning_rate": 8.119528810672723e-05, "loss": 0.012716639041900634, "step": 66280 }, { "epoch": 18.816349701958558, "grad_norm": 2.603522539138794, "learning_rate": 8.119244961680386e-05, "loss": 0.02817658185958862, "step": 66290 }, { "epoch": 18.81918819188192, "grad_norm": 7.4962639808654785, "learning_rate": 8.11896111268805e-05, "loss": 0.03160146474838257, "step": 66300 }, { "epoch": 18.82202668180528, "grad_norm": 4.705212116241455, "learning_rate": 8.118677263695714e-05, "loss": 0.023783554136753083, "step": 66310 }, { "epoch": 18.82486517172864, "grad_norm": 3.021087169647217, "learning_rate": 8.118393414703379e-05, "loss": 0.013346585631370544, "step": 66320 }, { "epoch": 18.827703661652002, "grad_norm": 3.620481252670288, "learning_rate": 8.118109565711041e-05, "loss": 0.030485108494758606, "step": 66330 }, { "epoch": 18.83054215157536, "grad_norm": 8.69963264465332, "learning_rate": 8.117825716718706e-05, "loss": 0.02241632789373398, "step": 66340 }, { "epoch": 18.833380641498724, "grad_norm": 2.1544408798217773, "learning_rate": 8.117541867726371e-05, "loss": 0.037103688716888426, "step": 66350 }, { "epoch": 18.836219131422084, "grad_norm": 0.8527352809906006, "learning_rate": 8.117258018734034e-05, "loss": 0.02223041355609894, "step": 66360 }, { "epoch": 18.839057621345443, "grad_norm": 0.90654456615448, "learning_rate": 8.116974169741698e-05, "loss": 0.014562907814979553, "step": 66370 }, { "epoch": 18.841896111268806, "grad_norm": 3.5208656787872314, "learning_rate": 8.116690320749362e-05, "loss": 0.00752825140953064, "step": 66380 }, { "epoch": 18.844734601192165, "grad_norm": 1.0482949018478394, "learning_rate": 8.116406471757025e-05, "loss": 0.010385850816965103, "step": 66390 }, { "epoch": 18.847573091115528, "grad_norm": 4.282619476318359, "learning_rate": 8.116122622764689e-05, "loss": 0.019163095951080324, "step": 66400 }, { "epoch": 18.850411581038887, "grad_norm": 11.027481079101562, "learning_rate": 8.115838773772355e-05, "loss": 0.03325257301330566, "step": 66410 }, { "epoch": 18.853250070962247, "grad_norm": 1.6965198516845703, "learning_rate": 8.115554924780017e-05, "loss": 0.016721759736537934, "step": 66420 }, { "epoch": 18.85608856088561, "grad_norm": 1.1206939220428467, "learning_rate": 8.115271075787681e-05, "loss": 0.0070926487445831295, "step": 66430 }, { "epoch": 18.85892705080897, "grad_norm": 0.26779115200042725, "learning_rate": 8.114987226795346e-05, "loss": 0.025253993272781373, "step": 66440 }, { "epoch": 18.861765540732332, "grad_norm": 0.7530791163444519, "learning_rate": 8.11470337780301e-05, "loss": 0.020818400382995605, "step": 66450 }, { "epoch": 18.86460403065569, "grad_norm": 3.3261451721191406, "learning_rate": 8.114419528810672e-05, "loss": 0.022585004568099976, "step": 66460 }, { "epoch": 18.86744252057905, "grad_norm": 6.81146240234375, "learning_rate": 8.114135679818337e-05, "loss": 0.016673743724822998, "step": 66470 }, { "epoch": 18.870281010502413, "grad_norm": 1.1730128526687622, "learning_rate": 8.113851830826001e-05, "loss": 0.015111240744590759, "step": 66480 }, { "epoch": 18.873119500425773, "grad_norm": 6.115340232849121, "learning_rate": 8.113567981833665e-05, "loss": 0.021538375318050383, "step": 66490 }, { "epoch": 18.875957990349136, "grad_norm": 2.6992621421813965, "learning_rate": 8.113284132841329e-05, "loss": 0.020711855590343477, "step": 66500 }, { "epoch": 18.875957990349136, "eval_accuracy": 0.9640109366058371, "eval_loss": 0.12057431787252426, "eval_runtime": 31.6811, "eval_samples_per_second": 496.415, "eval_steps_per_second": 7.765, "step": 66500 }, { "epoch": 18.878796480272495, "grad_norm": 9.589396476745605, "learning_rate": 8.113000283848993e-05, "loss": 0.047894620895385744, "step": 66510 }, { "epoch": 18.881634970195854, "grad_norm": 1.3040424585342407, "learning_rate": 8.112716434856656e-05, "loss": 0.026215952634811402, "step": 66520 }, { "epoch": 18.884473460119217, "grad_norm": 4.642782688140869, "learning_rate": 8.11243258586432e-05, "loss": 0.0335545539855957, "step": 66530 }, { "epoch": 18.887311950042577, "grad_norm": 15.508852005004883, "learning_rate": 8.112148736871984e-05, "loss": 0.0157991424202919, "step": 66540 }, { "epoch": 18.89015043996594, "grad_norm": 5.99678373336792, "learning_rate": 8.111864887879648e-05, "loss": 0.03736512362957001, "step": 66550 }, { "epoch": 18.8929889298893, "grad_norm": 4.121882438659668, "learning_rate": 8.111581038887313e-05, "loss": 0.02517453134059906, "step": 66560 }, { "epoch": 18.895827419812658, "grad_norm": 3.6094770431518555, "learning_rate": 8.111297189894977e-05, "loss": 0.027205502986907958, "step": 66570 }, { "epoch": 18.89866590973602, "grad_norm": 12.144388198852539, "learning_rate": 8.11101334090264e-05, "loss": 0.020541377365589142, "step": 66580 }, { "epoch": 18.90150439965938, "grad_norm": 1.298830509185791, "learning_rate": 8.110729491910304e-05, "loss": 0.03413808345794678, "step": 66590 }, { "epoch": 18.904342889582743, "grad_norm": 7.557719707489014, "learning_rate": 8.110445642917968e-05, "loss": 0.01710568219423294, "step": 66600 }, { "epoch": 18.907181379506103, "grad_norm": 3.173307418823242, "learning_rate": 8.110161793925632e-05, "loss": 0.02641441524028778, "step": 66610 }, { "epoch": 18.910019869429462, "grad_norm": 0.7265740633010864, "learning_rate": 8.109877944933296e-05, "loss": 0.012198587507009506, "step": 66620 }, { "epoch": 18.912858359352825, "grad_norm": 17.35437774658203, "learning_rate": 8.10959409594096e-05, "loss": 0.030594530701637267, "step": 66630 }, { "epoch": 18.915696849276184, "grad_norm": 3.9852724075317383, "learning_rate": 8.109310246948624e-05, "loss": 0.02220355123281479, "step": 66640 }, { "epoch": 18.918535339199547, "grad_norm": 0.9912310242652893, "learning_rate": 8.109026397956287e-05, "loss": 0.02435201108455658, "step": 66650 }, { "epoch": 18.921373829122906, "grad_norm": 7.441756725311279, "learning_rate": 8.108742548963951e-05, "loss": 0.039067605137825014, "step": 66660 }, { "epoch": 18.92421231904627, "grad_norm": 1.6327534914016724, "learning_rate": 8.108458699971615e-05, "loss": 0.023820698261260986, "step": 66670 }, { "epoch": 18.92705080896963, "grad_norm": 5.430531024932861, "learning_rate": 8.10817485097928e-05, "loss": 0.024405109882354736, "step": 66680 }, { "epoch": 18.929889298892988, "grad_norm": 13.121145248413086, "learning_rate": 8.107891001986944e-05, "loss": 0.02522510290145874, "step": 66690 }, { "epoch": 18.93272778881635, "grad_norm": 0.9832379221916199, "learning_rate": 8.107607152994608e-05, "loss": 0.01745489239692688, "step": 66700 }, { "epoch": 18.93556627873971, "grad_norm": 17.05642318725586, "learning_rate": 8.10732330400227e-05, "loss": 0.02806151509284973, "step": 66710 }, { "epoch": 18.93840476866307, "grad_norm": 0.6314168572425842, "learning_rate": 8.107039455009935e-05, "loss": 0.03050752282142639, "step": 66720 }, { "epoch": 18.941243258586432, "grad_norm": 2.886826515197754, "learning_rate": 8.106755606017599e-05, "loss": 0.01392044723033905, "step": 66730 }, { "epoch": 18.94408174850979, "grad_norm": 1.569759488105774, "learning_rate": 8.106471757025263e-05, "loss": 0.016734452545642854, "step": 66740 }, { "epoch": 18.946920238433155, "grad_norm": 9.503257751464844, "learning_rate": 8.106187908032927e-05, "loss": 0.01802990436553955, "step": 66750 }, { "epoch": 18.949758728356514, "grad_norm": 2.374508857727051, "learning_rate": 8.105904059040591e-05, "loss": 0.030473613739013673, "step": 66760 }, { "epoch": 18.952597218279877, "grad_norm": 2.867720127105713, "learning_rate": 8.105620210048255e-05, "loss": 0.02514689564704895, "step": 66770 }, { "epoch": 18.955435708203236, "grad_norm": 10.971470832824707, "learning_rate": 8.105336361055918e-05, "loss": 0.021542105078697204, "step": 66780 }, { "epoch": 18.958274198126595, "grad_norm": 3.179478406906128, "learning_rate": 8.105052512063582e-05, "loss": 0.021673890948295593, "step": 66790 }, { "epoch": 18.96111268804996, "grad_norm": 4.309968948364258, "learning_rate": 8.104768663071246e-05, "loss": 0.014268122613430023, "step": 66800 }, { "epoch": 18.963951177973318, "grad_norm": 7.718191146850586, "learning_rate": 8.10448481407891e-05, "loss": 0.02030482143163681, "step": 66810 }, { "epoch": 18.96678966789668, "grad_norm": 2.393521308898926, "learning_rate": 8.104200965086575e-05, "loss": 0.01478860229253769, "step": 66820 }, { "epoch": 18.96962815782004, "grad_norm": 7.92020845413208, "learning_rate": 8.103917116094239e-05, "loss": 0.011255685240030289, "step": 66830 }, { "epoch": 18.9724666477434, "grad_norm": 14.875160217285156, "learning_rate": 8.103633267101902e-05, "loss": 0.026578035950660706, "step": 66840 }, { "epoch": 18.975305137666762, "grad_norm": 0.6984143853187561, "learning_rate": 8.103349418109566e-05, "loss": 0.029433563351631165, "step": 66850 }, { "epoch": 18.97814362759012, "grad_norm": 2.8797895908355713, "learning_rate": 8.10306556911723e-05, "loss": 0.01952017694711685, "step": 66860 }, { "epoch": 18.980982117513484, "grad_norm": 5.100076198577881, "learning_rate": 8.102781720124894e-05, "loss": 0.022707422077655793, "step": 66870 }, { "epoch": 18.983820607436844, "grad_norm": 0.7674856185913086, "learning_rate": 8.102497871132558e-05, "loss": 0.014295876026153564, "step": 66880 }, { "epoch": 18.986659097360203, "grad_norm": 12.749394416809082, "learning_rate": 8.102214022140222e-05, "loss": 0.021755361557006837, "step": 66890 }, { "epoch": 18.989497587283566, "grad_norm": 0.5015447735786438, "learning_rate": 8.101930173147886e-05, "loss": 0.014195278286933899, "step": 66900 }, { "epoch": 18.992336077206925, "grad_norm": 5.684432029724121, "learning_rate": 8.101646324155549e-05, "loss": 0.026375368237495422, "step": 66910 }, { "epoch": 18.995174567130288, "grad_norm": 8.181107521057129, "learning_rate": 8.101362475163213e-05, "loss": 0.02488187253475189, "step": 66920 }, { "epoch": 18.998013057053647, "grad_norm": 8.39255428314209, "learning_rate": 8.101078626170877e-05, "loss": 0.029263943433761597, "step": 66930 }, { "epoch": 19.000851546977007, "grad_norm": 9.619133949279785, "learning_rate": 8.10079477717854e-05, "loss": 0.025025132298469543, "step": 66940 }, { "epoch": 19.00369003690037, "grad_norm": 2.117828607559204, "learning_rate": 8.100510928186206e-05, "loss": 0.033998480439186095, "step": 66950 }, { "epoch": 19.00652852682373, "grad_norm": 3.756300449371338, "learning_rate": 8.10022707919387e-05, "loss": 0.015250203013420106, "step": 66960 }, { "epoch": 19.009367016747092, "grad_norm": 2.767303705215454, "learning_rate": 8.099943230201533e-05, "loss": 0.005189386755228042, "step": 66970 }, { "epoch": 19.01220550667045, "grad_norm": 16.027650833129883, "learning_rate": 8.099659381209197e-05, "loss": 0.03313213288784027, "step": 66980 }, { "epoch": 19.01504399659381, "grad_norm": 12.516337394714355, "learning_rate": 8.099375532216861e-05, "loss": 0.022537827491760254, "step": 66990 }, { "epoch": 19.017882486517173, "grad_norm": 3.3282933235168457, "learning_rate": 8.099091683224525e-05, "loss": 0.018492187559604644, "step": 67000 }, { "epoch": 19.017882486517173, "eval_accuracy": 0.9661728238061932, "eval_loss": 0.10742823779582977, "eval_runtime": 31.8494, "eval_samples_per_second": 493.792, "eval_steps_per_second": 7.724, "step": 67000 }, { "epoch": 19.020720976440533, "grad_norm": 0.7128183841705322, "learning_rate": 8.098807834232189e-05, "loss": 0.02031441926956177, "step": 67010 }, { "epoch": 19.023559466363896, "grad_norm": 5.169584274291992, "learning_rate": 8.098523985239853e-05, "loss": 0.018671661615371704, "step": 67020 }, { "epoch": 19.026397956287255, "grad_norm": 2.880197763442993, "learning_rate": 8.098240136247517e-05, "loss": 0.02809334099292755, "step": 67030 }, { "epoch": 19.029236446210614, "grad_norm": 3.767319679260254, "learning_rate": 8.09795628725518e-05, "loss": 0.009692888706922531, "step": 67040 }, { "epoch": 19.032074936133977, "grad_norm": 7.517703056335449, "learning_rate": 8.097672438262844e-05, "loss": 0.024506066739559174, "step": 67050 }, { "epoch": 19.034913426057336, "grad_norm": 0.9031392335891724, "learning_rate": 8.097388589270509e-05, "loss": 0.0055957984179258345, "step": 67060 }, { "epoch": 19.0377519159807, "grad_norm": 5.282407283782959, "learning_rate": 8.097104740278171e-05, "loss": 0.012705722451210022, "step": 67070 }, { "epoch": 19.04059040590406, "grad_norm": 6.722054481506348, "learning_rate": 8.096820891285837e-05, "loss": 0.016569796204566955, "step": 67080 }, { "epoch": 19.043428895827418, "grad_norm": 2.3723578453063965, "learning_rate": 8.096537042293501e-05, "loss": 0.009938886761665345, "step": 67090 }, { "epoch": 19.04626738575078, "grad_norm": 2.4302635192871094, "learning_rate": 8.096253193301164e-05, "loss": 0.010542555898427963, "step": 67100 }, { "epoch": 19.04910587567414, "grad_norm": 15.297137260437012, "learning_rate": 8.095969344308828e-05, "loss": 0.013072320818901062, "step": 67110 }, { "epoch": 19.051944365597503, "grad_norm": 2.111640691757202, "learning_rate": 8.095685495316492e-05, "loss": 0.013304086029529571, "step": 67120 }, { "epoch": 19.054782855520862, "grad_norm": 0.5536283254623413, "learning_rate": 8.095401646324156e-05, "loss": 0.0153475821018219, "step": 67130 }, { "epoch": 19.057621345444225, "grad_norm": 3.7595982551574707, "learning_rate": 8.09511779733182e-05, "loss": 0.008560802042484283, "step": 67140 }, { "epoch": 19.060459835367585, "grad_norm": 0.7707223892211914, "learning_rate": 8.094833948339484e-05, "loss": 0.01635643094778061, "step": 67150 }, { "epoch": 19.063298325290944, "grad_norm": 3.6618599891662598, "learning_rate": 8.094550099347149e-05, "loss": 0.03128852248191834, "step": 67160 }, { "epoch": 19.066136815214307, "grad_norm": 3.980419635772705, "learning_rate": 8.094266250354811e-05, "loss": 0.023784548044204712, "step": 67170 }, { "epoch": 19.068975305137666, "grad_norm": 5.53279447555542, "learning_rate": 8.093982401362475e-05, "loss": 0.017350564897060394, "step": 67180 }, { "epoch": 19.07181379506103, "grad_norm": 1.5272711515426636, "learning_rate": 8.09369855237014e-05, "loss": 0.009153299778699876, "step": 67190 }, { "epoch": 19.07465228498439, "grad_norm": 9.337668418884277, "learning_rate": 8.093414703377802e-05, "loss": 0.01281513124704361, "step": 67200 }, { "epoch": 19.077490774907748, "grad_norm": 3.136249303817749, "learning_rate": 8.093130854385468e-05, "loss": 0.006052959710359573, "step": 67210 }, { "epoch": 19.08032926483111, "grad_norm": 0.6747031211853027, "learning_rate": 8.092847005393132e-05, "loss": 0.013777542114257812, "step": 67220 }, { "epoch": 19.08316775475447, "grad_norm": 7.410195827484131, "learning_rate": 8.092563156400795e-05, "loss": 0.016757294535636902, "step": 67230 }, { "epoch": 19.086006244677833, "grad_norm": 1.0990087985992432, "learning_rate": 8.092279307408459e-05, "loss": 0.009844745695590972, "step": 67240 }, { "epoch": 19.088844734601192, "grad_norm": 2.769007682800293, "learning_rate": 8.091995458416123e-05, "loss": 0.007458788901567459, "step": 67250 }, { "epoch": 19.09168322452455, "grad_norm": 1.658687710762024, "learning_rate": 8.091711609423787e-05, "loss": 0.011466432362794876, "step": 67260 }, { "epoch": 19.094521714447914, "grad_norm": 1.5208580493927002, "learning_rate": 8.09142776043145e-05, "loss": 0.008203256875276566, "step": 67270 }, { "epoch": 19.097360204371274, "grad_norm": 0.12263114750385284, "learning_rate": 8.091143911439116e-05, "loss": 0.015834109485149385, "step": 67280 }, { "epoch": 19.100198694294637, "grad_norm": 0.8305667042732239, "learning_rate": 8.09086006244678e-05, "loss": 0.007405736297369003, "step": 67290 }, { "epoch": 19.103037184217996, "grad_norm": 0.3608134686946869, "learning_rate": 8.090576213454442e-05, "loss": 0.014245216548442841, "step": 67300 }, { "epoch": 19.105875674141355, "grad_norm": 1.3385645151138306, "learning_rate": 8.090292364462107e-05, "loss": 0.011370453983545303, "step": 67310 }, { "epoch": 19.10871416406472, "grad_norm": 10.860981941223145, "learning_rate": 8.090008515469771e-05, "loss": 0.031423071026802064, "step": 67320 }, { "epoch": 19.111552653988078, "grad_norm": 2.063197135925293, "learning_rate": 8.089724666477433e-05, "loss": 0.01963375210762024, "step": 67330 }, { "epoch": 19.11439114391144, "grad_norm": 8.667553901672363, "learning_rate": 8.089440817485099e-05, "loss": 0.020614391565322875, "step": 67340 }, { "epoch": 19.1172296338348, "grad_norm": 2.4544875621795654, "learning_rate": 8.089156968492763e-05, "loss": 0.010786810517311096, "step": 67350 }, { "epoch": 19.12006812375816, "grad_norm": 5.245517730712891, "learning_rate": 8.088873119500426e-05, "loss": 0.014381052553653717, "step": 67360 }, { "epoch": 19.122906613681522, "grad_norm": 6.7719950675964355, "learning_rate": 8.08858927050809e-05, "loss": 0.014396995306015015, "step": 67370 }, { "epoch": 19.12574510360488, "grad_norm": 10.405839920043945, "learning_rate": 8.088305421515754e-05, "loss": 0.018640251457691194, "step": 67380 }, { "epoch": 19.128583593528244, "grad_norm": 0.6709728837013245, "learning_rate": 8.088021572523418e-05, "loss": 0.0157562181353569, "step": 67390 }, { "epoch": 19.131422083451604, "grad_norm": 2.6977698802948, "learning_rate": 8.087737723531081e-05, "loss": 0.010232111811637879, "step": 67400 }, { "epoch": 19.134260573374963, "grad_norm": 11.183005332946777, "learning_rate": 8.087453874538747e-05, "loss": 0.011555835604667664, "step": 67410 }, { "epoch": 19.137099063298326, "grad_norm": 11.41781997680664, "learning_rate": 8.08717002554641e-05, "loss": 0.029315918684005737, "step": 67420 }, { "epoch": 19.139937553221685, "grad_norm": 1.9492456912994385, "learning_rate": 8.086886176554073e-05, "loss": 0.018679100275039672, "step": 67430 }, { "epoch": 19.142776043145048, "grad_norm": 1.1353530883789062, "learning_rate": 8.086602327561738e-05, "loss": 0.01314205229282379, "step": 67440 }, { "epoch": 19.145614533068407, "grad_norm": 1.0492925643920898, "learning_rate": 8.086318478569402e-05, "loss": 0.024735867977142334, "step": 67450 }, { "epoch": 19.148453022991767, "grad_norm": 3.6296498775482178, "learning_rate": 8.086034629577065e-05, "loss": 0.021997874975204466, "step": 67460 }, { "epoch": 19.15129151291513, "grad_norm": 0.559613049030304, "learning_rate": 8.085750780584729e-05, "loss": 0.02434665262699127, "step": 67470 }, { "epoch": 19.15413000283849, "grad_norm": 4.4978556632995605, "learning_rate": 8.085466931592394e-05, "loss": 0.012002518028020858, "step": 67480 }, { "epoch": 19.15696849276185, "grad_norm": 1.7960566282272339, "learning_rate": 8.085183082600057e-05, "loss": 0.01233261376619339, "step": 67490 }, { "epoch": 19.15980698268521, "grad_norm": 3.5794947147369385, "learning_rate": 8.084899233607721e-05, "loss": 0.008021872490644455, "step": 67500 }, { "epoch": 19.15980698268521, "eval_accuracy": 0.9592420677815222, "eval_loss": 0.12999430298805237, "eval_runtime": 31.6083, "eval_samples_per_second": 497.56, "eval_steps_per_second": 7.783, "step": 67500 }, { "epoch": 19.162645472608574, "grad_norm": 1.2775534391403198, "learning_rate": 8.084615384615385e-05, "loss": 0.014560607075691224, "step": 67510 }, { "epoch": 19.165483962531933, "grad_norm": 1.8521090745925903, "learning_rate": 8.084331535623048e-05, "loss": 0.02767760157585144, "step": 67520 }, { "epoch": 19.168322452455293, "grad_norm": 0.9166882038116455, "learning_rate": 8.084047686630712e-05, "loss": 0.010507382452487946, "step": 67530 }, { "epoch": 19.171160942378656, "grad_norm": 5.8645830154418945, "learning_rate": 8.083763837638378e-05, "loss": 0.025987327098846436, "step": 67540 }, { "epoch": 19.173999432302015, "grad_norm": 2.1302120685577393, "learning_rate": 8.08347998864604e-05, "loss": 0.012335435301065446, "step": 67550 }, { "epoch": 19.176837922225378, "grad_norm": 10.883798599243164, "learning_rate": 8.083196139653705e-05, "loss": 0.007305572181940079, "step": 67560 }, { "epoch": 19.179676412148737, "grad_norm": 0.6601458787918091, "learning_rate": 8.082912290661369e-05, "loss": 0.01694016009569168, "step": 67570 }, { "epoch": 19.182514902072096, "grad_norm": 9.057695388793945, "learning_rate": 8.082628441669033e-05, "loss": 0.013481160998344422, "step": 67580 }, { "epoch": 19.18535339199546, "grad_norm": 1.173474669456482, "learning_rate": 8.082344592676696e-05, "loss": 0.020177052915096284, "step": 67590 }, { "epoch": 19.18819188191882, "grad_norm": 6.526787281036377, "learning_rate": 8.08206074368436e-05, "loss": 0.01318686306476593, "step": 67600 }, { "epoch": 19.19103037184218, "grad_norm": 2.1249332427978516, "learning_rate": 8.081776894692025e-05, "loss": 0.006008858978748322, "step": 67610 }, { "epoch": 19.19386886176554, "grad_norm": 0.16272367537021637, "learning_rate": 8.081493045699688e-05, "loss": 0.010288356989622115, "step": 67620 }, { "epoch": 19.1967073516889, "grad_norm": 0.6398535370826721, "learning_rate": 8.081209196707352e-05, "loss": 0.018589666485786437, "step": 67630 }, { "epoch": 19.199545841612263, "grad_norm": 0.6388561725616455, "learning_rate": 8.080925347715016e-05, "loss": 0.01729421615600586, "step": 67640 }, { "epoch": 19.202384331535622, "grad_norm": 1.0805052518844604, "learning_rate": 8.080641498722679e-05, "loss": 0.012442007660865784, "step": 67650 }, { "epoch": 19.205222821458985, "grad_norm": 6.400195598602295, "learning_rate": 8.080357649730343e-05, "loss": 0.019930429756641388, "step": 67660 }, { "epoch": 19.208061311382345, "grad_norm": 9.064736366271973, "learning_rate": 8.080073800738007e-05, "loss": 0.03814596235752106, "step": 67670 }, { "epoch": 19.210899801305704, "grad_norm": 0.1559750884771347, "learning_rate": 8.079789951745672e-05, "loss": 0.014321824908256531, "step": 67680 }, { "epoch": 19.213738291229067, "grad_norm": 1.3881678581237793, "learning_rate": 8.079506102753336e-05, "loss": 0.009494108706712722, "step": 67690 }, { "epoch": 19.216576781152426, "grad_norm": 2.005899429321289, "learning_rate": 8.079222253761e-05, "loss": 0.02582116723060608, "step": 67700 }, { "epoch": 19.21941527107579, "grad_norm": 0.5148826241493225, "learning_rate": 8.078938404768664e-05, "loss": 0.008900617063045502, "step": 67710 }, { "epoch": 19.22225376099915, "grad_norm": 5.008853912353516, "learning_rate": 8.078654555776327e-05, "loss": 0.021077825129032134, "step": 67720 }, { "epoch": 19.225092250922508, "grad_norm": 0.9692829847335815, "learning_rate": 8.078370706783991e-05, "loss": 0.023941811919212342, "step": 67730 }, { "epoch": 19.22793074084587, "grad_norm": 17.336599349975586, "learning_rate": 8.078086857791656e-05, "loss": 0.014363613724708558, "step": 67740 }, { "epoch": 19.23076923076923, "grad_norm": 3.8277463912963867, "learning_rate": 8.077803008799319e-05, "loss": 0.018578089773654938, "step": 67750 }, { "epoch": 19.233607720692593, "grad_norm": 7.376985549926758, "learning_rate": 8.077519159806983e-05, "loss": 0.01765679270029068, "step": 67760 }, { "epoch": 19.236446210615952, "grad_norm": 3.771419048309326, "learning_rate": 8.077235310814647e-05, "loss": 0.010608582198619843, "step": 67770 }, { "epoch": 19.23928470053931, "grad_norm": 10.57840347290039, "learning_rate": 8.07695146182231e-05, "loss": 0.03190801739692688, "step": 67780 }, { "epoch": 19.242123190462674, "grad_norm": 0.2734811305999756, "learning_rate": 8.076667612829974e-05, "loss": 0.02803638279438019, "step": 67790 }, { "epoch": 19.244961680386034, "grad_norm": 5.873191833496094, "learning_rate": 8.076383763837638e-05, "loss": 0.021638505160808563, "step": 67800 }, { "epoch": 19.247800170309397, "grad_norm": 2.8172056674957275, "learning_rate": 8.076099914845303e-05, "loss": 0.00960276871919632, "step": 67810 }, { "epoch": 19.250638660232756, "grad_norm": 1.21724534034729, "learning_rate": 8.075816065852967e-05, "loss": 0.020796990394592284, "step": 67820 }, { "epoch": 19.253477150156115, "grad_norm": 0.6179468631744385, "learning_rate": 8.075532216860631e-05, "loss": 0.009797509759664536, "step": 67830 }, { "epoch": 19.256315640079478, "grad_norm": 5.0413818359375, "learning_rate": 8.075248367868295e-05, "loss": 0.02460653930902481, "step": 67840 }, { "epoch": 19.259154130002837, "grad_norm": 8.126354217529297, "learning_rate": 8.074964518875958e-05, "loss": 0.016722001135349274, "step": 67850 }, { "epoch": 19.2619926199262, "grad_norm": 0.4409915804862976, "learning_rate": 8.074680669883622e-05, "loss": 0.008022047579288483, "step": 67860 }, { "epoch": 19.26483110984956, "grad_norm": 8.339662551879883, "learning_rate": 8.074396820891286e-05, "loss": 0.014708764851093292, "step": 67870 }, { "epoch": 19.267669599772923, "grad_norm": 14.161458969116211, "learning_rate": 8.07411297189895e-05, "loss": 0.023420609533786774, "step": 67880 }, { "epoch": 19.270508089696282, "grad_norm": 5.470942974090576, "learning_rate": 8.073829122906614e-05, "loss": 0.017591893672943115, "step": 67890 }, { "epoch": 19.27334657961964, "grad_norm": 2.383192777633667, "learning_rate": 8.073545273914278e-05, "loss": 0.016281697154045104, "step": 67900 }, { "epoch": 19.276185069543004, "grad_norm": 4.8740973472595215, "learning_rate": 8.073261424921941e-05, "loss": 0.030304166674613952, "step": 67910 }, { "epoch": 19.279023559466363, "grad_norm": 7.9033684730529785, "learning_rate": 8.072977575929605e-05, "loss": 0.017916691303253175, "step": 67920 }, { "epoch": 19.281862049389726, "grad_norm": 16.310129165649414, "learning_rate": 8.07269372693727e-05, "loss": 0.017711320519447328, "step": 67930 }, { "epoch": 19.284700539313086, "grad_norm": 2.117884397506714, "learning_rate": 8.072409877944934e-05, "loss": 0.01555851399898529, "step": 67940 }, { "epoch": 19.287539029236445, "grad_norm": 3.372568130493164, "learning_rate": 8.072126028952598e-05, "loss": 0.007022402435541153, "step": 67950 }, { "epoch": 19.290377519159808, "grad_norm": 0.4804041087627411, "learning_rate": 8.071842179960262e-05, "loss": 0.009142833948135375, "step": 67960 }, { "epoch": 19.293216009083167, "grad_norm": 8.029982566833496, "learning_rate": 8.071558330967926e-05, "loss": 0.018873147666454315, "step": 67970 }, { "epoch": 19.29605449900653, "grad_norm": 0.36158791184425354, "learning_rate": 8.071274481975589e-05, "loss": 0.009248950332403184, "step": 67980 }, { "epoch": 19.29889298892989, "grad_norm": 10.57125186920166, "learning_rate": 8.070990632983253e-05, "loss": 0.017669256031513213, "step": 67990 }, { "epoch": 19.30173147885325, "grad_norm": 2.9398984909057617, "learning_rate": 8.070706783990917e-05, "loss": 0.010069814324378968, "step": 68000 }, { "epoch": 19.30173147885325, "eval_accuracy": 0.9672537674063713, "eval_loss": 0.10566136986017227, "eval_runtime": 32.0337, "eval_samples_per_second": 490.951, "eval_steps_per_second": 7.679, "step": 68000 }, { "epoch": 19.30456996877661, "grad_norm": 0.36136746406555176, "learning_rate": 8.070422934998581e-05, "loss": 0.020288358628749847, "step": 68010 }, { "epoch": 19.30740845869997, "grad_norm": 6.821678161621094, "learning_rate": 8.070139086006245e-05, "loss": 0.02235458791255951, "step": 68020 }, { "epoch": 19.310246948623334, "grad_norm": 1.2413313388824463, "learning_rate": 8.06985523701391e-05, "loss": 0.005752924084663391, "step": 68030 }, { "epoch": 19.313085438546693, "grad_norm": 1.815874695777893, "learning_rate": 8.069571388021572e-05, "loss": 0.011402224749326706, "step": 68040 }, { "epoch": 19.315923928470053, "grad_norm": 4.294982433319092, "learning_rate": 8.069287539029236e-05, "loss": 0.024358630180358887, "step": 68050 }, { "epoch": 19.318762418393415, "grad_norm": 16.838899612426758, "learning_rate": 8.0690036900369e-05, "loss": 0.014200448989868164, "step": 68060 }, { "epoch": 19.321600908316775, "grad_norm": 5.7158050537109375, "learning_rate": 8.068719841044565e-05, "loss": 0.023680722713470458, "step": 68070 }, { "epoch": 19.324439398240138, "grad_norm": 2.2217957973480225, "learning_rate": 8.068435992052229e-05, "loss": 0.02281351536512375, "step": 68080 }, { "epoch": 19.327277888163497, "grad_norm": 0.7452578544616699, "learning_rate": 8.068152143059893e-05, "loss": 0.013897983729839325, "step": 68090 }, { "epoch": 19.330116378086856, "grad_norm": 2.279170274734497, "learning_rate": 8.067868294067557e-05, "loss": 0.018126335740089417, "step": 68100 }, { "epoch": 19.33295486801022, "grad_norm": 1.5987281799316406, "learning_rate": 8.06758444507522e-05, "loss": 0.017642366886138915, "step": 68110 }, { "epoch": 19.33579335793358, "grad_norm": 0.5822604298591614, "learning_rate": 8.067300596082884e-05, "loss": 0.01753823906183243, "step": 68120 }, { "epoch": 19.33863184785694, "grad_norm": 2.722149133682251, "learning_rate": 8.067016747090548e-05, "loss": 0.010779988765716553, "step": 68130 }, { "epoch": 19.3414703377803, "grad_norm": 0.3484378159046173, "learning_rate": 8.066732898098212e-05, "loss": 0.016800422966480256, "step": 68140 }, { "epoch": 19.34430882770366, "grad_norm": 1.1629972457885742, "learning_rate": 8.066449049105876e-05, "loss": 0.016008684039115907, "step": 68150 }, { "epoch": 19.347147317627023, "grad_norm": 2.117344379425049, "learning_rate": 8.06616520011354e-05, "loss": 0.012802721560001373, "step": 68160 }, { "epoch": 19.349985807550382, "grad_norm": 1.9862496852874756, "learning_rate": 8.065881351121203e-05, "loss": 0.022111015021800996, "step": 68170 }, { "epoch": 19.352824297473745, "grad_norm": 8.672078132629395, "learning_rate": 8.065597502128868e-05, "loss": 0.014741088449954986, "step": 68180 }, { "epoch": 19.355662787397105, "grad_norm": 1.0371514558792114, "learning_rate": 8.065313653136532e-05, "loss": 0.007393895089626313, "step": 68190 }, { "epoch": 19.358501277320464, "grad_norm": 3.1068577766418457, "learning_rate": 8.065029804144196e-05, "loss": 0.010084652155637742, "step": 68200 }, { "epoch": 19.361339767243827, "grad_norm": 1.4325002431869507, "learning_rate": 8.06474595515186e-05, "loss": 0.02285253703594208, "step": 68210 }, { "epoch": 19.364178257167186, "grad_norm": 1.3104113340377808, "learning_rate": 8.064462106159524e-05, "loss": 0.012064166367053986, "step": 68220 }, { "epoch": 19.36701674709055, "grad_norm": 5.637192726135254, "learning_rate": 8.064178257167188e-05, "loss": 0.014824016392230988, "step": 68230 }, { "epoch": 19.36985523701391, "grad_norm": 1.9618721008300781, "learning_rate": 8.063894408174851e-05, "loss": 0.014382511377334595, "step": 68240 }, { "epoch": 19.372693726937268, "grad_norm": 0.652323305606842, "learning_rate": 8.063610559182515e-05, "loss": 0.01997605860233307, "step": 68250 }, { "epoch": 19.37553221686063, "grad_norm": 1.9782042503356934, "learning_rate": 8.063355095089413e-05, "loss": 0.03351330459117889, "step": 68260 }, { "epoch": 19.37837070678399, "grad_norm": 3.842339277267456, "learning_rate": 8.063071246097077e-05, "loss": 0.017319580912590025, "step": 68270 }, { "epoch": 19.381209196707353, "grad_norm": 3.4852116107940674, "learning_rate": 8.06278739710474e-05, "loss": 0.013397429883480073, "step": 68280 }, { "epoch": 19.384047686630712, "grad_norm": 3.6600663661956787, "learning_rate": 8.062503548112404e-05, "loss": 0.018483200669288637, "step": 68290 }, { "epoch": 19.386886176554075, "grad_norm": 6.97262716293335, "learning_rate": 8.062219699120068e-05, "loss": 0.016976676881313324, "step": 68300 }, { "epoch": 19.389724666477434, "grad_norm": 14.758373260498047, "learning_rate": 8.061935850127732e-05, "loss": 0.020726034045219423, "step": 68310 }, { "epoch": 19.392563156400794, "grad_norm": 5.3285651206970215, "learning_rate": 8.061652001135397e-05, "loss": 0.007838141173124313, "step": 68320 }, { "epoch": 19.395401646324157, "grad_norm": 0.40738874673843384, "learning_rate": 8.061368152143061e-05, "loss": 0.007996352016925811, "step": 68330 }, { "epoch": 19.398240136247516, "grad_norm": 8.883698463439941, "learning_rate": 8.061084303150725e-05, "loss": 0.029583722352981567, "step": 68340 }, { "epoch": 19.40107862617088, "grad_norm": 1.2910749912261963, "learning_rate": 8.060800454158388e-05, "loss": 0.015966296195983887, "step": 68350 }, { "epoch": 19.403917116094238, "grad_norm": 4.066921234130859, "learning_rate": 8.060516605166052e-05, "loss": 0.017843152582645416, "step": 68360 }, { "epoch": 19.406755606017597, "grad_norm": 10.746665000915527, "learning_rate": 8.060232756173716e-05, "loss": 0.015083371102809906, "step": 68370 }, { "epoch": 19.40959409594096, "grad_norm": 1.8514008522033691, "learning_rate": 8.059948907181379e-05, "loss": 0.010478422045707703, "step": 68380 }, { "epoch": 19.41243258586432, "grad_norm": 1.0810657739639282, "learning_rate": 8.059665058189044e-05, "loss": 0.007983805984258652, "step": 68390 }, { "epoch": 19.415271075787683, "grad_norm": 1.9075071811676025, "learning_rate": 8.059381209196708e-05, "loss": 0.01799801141023636, "step": 68400 }, { "epoch": 19.418109565711042, "grad_norm": 1.0062581300735474, "learning_rate": 8.059097360204371e-05, "loss": 0.024819447100162505, "step": 68410 }, { "epoch": 19.4209480556344, "grad_norm": 11.236578941345215, "learning_rate": 8.058813511212035e-05, "loss": 0.011575204879045486, "step": 68420 }, { "epoch": 19.423786545557764, "grad_norm": 9.953205108642578, "learning_rate": 8.0585296622197e-05, "loss": 0.019811382889747618, "step": 68430 }, { "epoch": 19.426625035481123, "grad_norm": 2.9195587635040283, "learning_rate": 8.058245813227364e-05, "loss": 0.0164811834692955, "step": 68440 }, { "epoch": 19.429463525404486, "grad_norm": 3.004770040512085, "learning_rate": 8.057961964235026e-05, "loss": 0.014394600689411164, "step": 68450 }, { "epoch": 19.432302015327846, "grad_norm": 4.019406795501709, "learning_rate": 8.057678115242692e-05, "loss": 0.013678045570850372, "step": 68460 }, { "epoch": 19.435140505251205, "grad_norm": 1.4142735004425049, "learning_rate": 8.057394266250356e-05, "loss": 0.01651117354631424, "step": 68470 }, { "epoch": 19.437978995174568, "grad_norm": 6.617769241333008, "learning_rate": 8.057110417258019e-05, "loss": 0.014800643920898438, "step": 68480 }, { "epoch": 19.440817485097927, "grad_norm": 12.624940872192383, "learning_rate": 8.056826568265683e-05, "loss": 0.011644546687602998, "step": 68490 }, { "epoch": 19.44365597502129, "grad_norm": 1.4502366781234741, "learning_rate": 8.056542719273347e-05, "loss": 0.02547231912612915, "step": 68500 }, { "epoch": 19.44365597502129, "eval_accuracy": 0.9623577287467413, "eval_loss": 0.12027938663959503, "eval_runtime": 31.8958, "eval_samples_per_second": 493.074, "eval_steps_per_second": 7.713, "step": 68500 }, { "epoch": 19.44649446494465, "grad_norm": 2.7482173442840576, "learning_rate": 8.05625887028101e-05, "loss": 0.025292092561721803, "step": 68510 }, { "epoch": 19.44933295486801, "grad_norm": 15.106111526489258, "learning_rate": 8.055975021288675e-05, "loss": 0.03591836988925934, "step": 68520 }, { "epoch": 19.45217144479137, "grad_norm": 4.529141902923584, "learning_rate": 8.05569117229634e-05, "loss": 0.015529727935791016, "step": 68530 }, { "epoch": 19.45500993471473, "grad_norm": 13.03493595123291, "learning_rate": 8.055407323304002e-05, "loss": 0.016385802626609804, "step": 68540 }, { "epoch": 19.457848424638094, "grad_norm": 1.7573965787887573, "learning_rate": 8.055123474311666e-05, "loss": 0.023104332387447357, "step": 68550 }, { "epoch": 19.460686914561453, "grad_norm": 1.6557745933532715, "learning_rate": 8.05483962531933e-05, "loss": 0.0061702758073806764, "step": 68560 }, { "epoch": 19.463525404484812, "grad_norm": 2.6736443042755127, "learning_rate": 8.054555776326995e-05, "loss": 0.011512688547372817, "step": 68570 }, { "epoch": 19.466363894408175, "grad_norm": 6.430649280548096, "learning_rate": 8.054271927334657e-05, "loss": 0.021578523516654968, "step": 68580 }, { "epoch": 19.469202384331535, "grad_norm": 10.6021089553833, "learning_rate": 8.053988078342323e-05, "loss": 0.012772506475448609, "step": 68590 }, { "epoch": 19.472040874254898, "grad_norm": 2.297525644302368, "learning_rate": 8.053704229349987e-05, "loss": 0.01806233674287796, "step": 68600 }, { "epoch": 19.474879364178257, "grad_norm": 0.6878420114517212, "learning_rate": 8.05342038035765e-05, "loss": 0.01141258180141449, "step": 68610 }, { "epoch": 19.477717854101616, "grad_norm": 1.08841872215271, "learning_rate": 8.053136531365314e-05, "loss": 0.011026586592197418, "step": 68620 }, { "epoch": 19.48055634402498, "grad_norm": 8.123515129089355, "learning_rate": 8.052852682372978e-05, "loss": 0.022044964134693146, "step": 68630 }, { "epoch": 19.48339483394834, "grad_norm": 1.9518377780914307, "learning_rate": 8.052568833380641e-05, "loss": 0.008070898056030274, "step": 68640 }, { "epoch": 19.4862333238717, "grad_norm": 2.4339699745178223, "learning_rate": 8.052284984388305e-05, "loss": 0.034346505999565125, "step": 68650 }, { "epoch": 19.48907181379506, "grad_norm": 1.3761080503463745, "learning_rate": 8.05200113539597e-05, "loss": 0.015331877768039704, "step": 68660 }, { "epoch": 19.49191030371842, "grad_norm": 5.9419684410095215, "learning_rate": 8.051717286403633e-05, "loss": 0.026616877317428587, "step": 68670 }, { "epoch": 19.494748793641783, "grad_norm": 11.315434455871582, "learning_rate": 8.051433437411297e-05, "loss": 0.014980483055114745, "step": 68680 }, { "epoch": 19.497587283565142, "grad_norm": 1.7705806493759155, "learning_rate": 8.051149588418962e-05, "loss": 0.02223304808139801, "step": 68690 }, { "epoch": 19.500425773488505, "grad_norm": 0.7497650384902954, "learning_rate": 8.050865739426626e-05, "loss": 0.023962581157684328, "step": 68700 }, { "epoch": 19.503264263411864, "grad_norm": 2.380741596221924, "learning_rate": 8.050581890434288e-05, "loss": 0.02096230834722519, "step": 68710 }, { "epoch": 19.506102753335227, "grad_norm": 3.907726287841797, "learning_rate": 8.050298041441954e-05, "loss": 0.03797944188117981, "step": 68720 }, { "epoch": 19.508941243258587, "grad_norm": 0.4190168082714081, "learning_rate": 8.050014192449618e-05, "loss": 0.009597226977348328, "step": 68730 }, { "epoch": 19.511779733181946, "grad_norm": 8.626215934753418, "learning_rate": 8.049730343457281e-05, "loss": 0.035277745127677916, "step": 68740 }, { "epoch": 19.51461822310531, "grad_norm": 1.285332202911377, "learning_rate": 8.049446494464945e-05, "loss": 0.029807767271995543, "step": 68750 }, { "epoch": 19.51745671302867, "grad_norm": 4.7375688552856445, "learning_rate": 8.049162645472609e-05, "loss": 0.03547104001045227, "step": 68760 }, { "epoch": 19.52029520295203, "grad_norm": 3.6163253784179688, "learning_rate": 8.048878796480272e-05, "loss": 0.012176578491926193, "step": 68770 }, { "epoch": 19.52313369287539, "grad_norm": 1.5618643760681152, "learning_rate": 8.048594947487936e-05, "loss": 0.009652698040008545, "step": 68780 }, { "epoch": 19.52597218279875, "grad_norm": 0.4965190291404724, "learning_rate": 8.048311098495602e-05, "loss": 0.020688846707344055, "step": 68790 }, { "epoch": 19.528810672722113, "grad_norm": 16.246986389160156, "learning_rate": 8.048027249503264e-05, "loss": 0.07054977416992188, "step": 68800 }, { "epoch": 19.531649162645472, "grad_norm": 0.8282925486564636, "learning_rate": 8.047743400510929e-05, "loss": 0.01789591759443283, "step": 68810 }, { "epoch": 19.534487652568835, "grad_norm": 6.707474231719971, "learning_rate": 8.047459551518593e-05, "loss": 0.016325725615024565, "step": 68820 }, { "epoch": 19.537326142492194, "grad_norm": 0.343199223279953, "learning_rate": 8.047175702526257e-05, "loss": 0.028074952960014343, "step": 68830 }, { "epoch": 19.540164632415554, "grad_norm": 1.5699849128723145, "learning_rate": 8.04689185353392e-05, "loss": 0.023118598759174346, "step": 68840 }, { "epoch": 19.543003122338916, "grad_norm": 1.3957401514053345, "learning_rate": 8.046608004541585e-05, "loss": 0.017350849509239197, "step": 68850 }, { "epoch": 19.545841612262276, "grad_norm": 5.136393070220947, "learning_rate": 8.046324155549249e-05, "loss": 0.022270841896533965, "step": 68860 }, { "epoch": 19.54868010218564, "grad_norm": 2.1466891765594482, "learning_rate": 8.046040306556912e-05, "loss": 0.012609714269638061, "step": 68870 }, { "epoch": 19.551518592108998, "grad_norm": 5.206986427307129, "learning_rate": 8.045756457564576e-05, "loss": 0.019027863442897797, "step": 68880 }, { "epoch": 19.554357082032357, "grad_norm": 6.954113960266113, "learning_rate": 8.04547260857224e-05, "loss": 0.013817405700683594, "step": 68890 }, { "epoch": 19.55719557195572, "grad_norm": 1.9676868915557861, "learning_rate": 8.045188759579903e-05, "loss": 0.014141739904880523, "step": 68900 }, { "epoch": 19.56003406187908, "grad_norm": 13.528081893920898, "learning_rate": 8.044904910587567e-05, "loss": 0.026004457473754884, "step": 68910 }, { "epoch": 19.562872551802442, "grad_norm": 14.59493350982666, "learning_rate": 8.044621061595233e-05, "loss": 0.03138493895530701, "step": 68920 }, { "epoch": 19.565711041725802, "grad_norm": 1.2393336296081543, "learning_rate": 8.044337212602895e-05, "loss": 0.026313084363937377, "step": 68930 }, { "epoch": 19.56854953164916, "grad_norm": 13.831196784973145, "learning_rate": 8.04405336361056e-05, "loss": 0.02973119616508484, "step": 68940 }, { "epoch": 19.571388021572524, "grad_norm": 9.97261905670166, "learning_rate": 8.043769514618224e-05, "loss": 0.018589992821216584, "step": 68950 }, { "epoch": 19.574226511495883, "grad_norm": 1.2044906616210938, "learning_rate": 8.043485665625888e-05, "loss": 0.02998480498790741, "step": 68960 }, { "epoch": 19.577065001419246, "grad_norm": 20.05248260498047, "learning_rate": 8.04320181663355e-05, "loss": 0.02988308072090149, "step": 68970 }, { "epoch": 19.579903491342606, "grad_norm": 15.3196439743042, "learning_rate": 8.042917967641215e-05, "loss": 0.014380241930484771, "step": 68980 }, { "epoch": 19.582741981265965, "grad_norm": 12.566217422485352, "learning_rate": 8.04263411864888e-05, "loss": 0.016055886447429658, "step": 68990 }, { "epoch": 19.585580471189328, "grad_norm": 0.3085486590862274, "learning_rate": 8.042350269656543e-05, "loss": 0.013671569526195526, "step": 69000 }, { "epoch": 19.585580471189328, "eval_accuracy": 0.9593692376168372, "eval_loss": 0.13230754435062408, "eval_runtime": 31.8925, "eval_samples_per_second": 493.126, "eval_steps_per_second": 7.713, "step": 69000 }, { "epoch": 19.588418961112687, "grad_norm": 0.4700465500354767, "learning_rate": 8.042066420664207e-05, "loss": 0.026226577162742615, "step": 69010 }, { "epoch": 19.59125745103605, "grad_norm": 5.278825759887695, "learning_rate": 8.041782571671871e-05, "loss": 0.00673285573720932, "step": 69020 }, { "epoch": 19.59409594095941, "grad_norm": 2.240048408508301, "learning_rate": 8.041498722679534e-05, "loss": 0.030140620470046998, "step": 69030 }, { "epoch": 19.59693443088277, "grad_norm": 1.9747363328933716, "learning_rate": 8.041214873687198e-05, "loss": 0.01521899402141571, "step": 69040 }, { "epoch": 19.59977292080613, "grad_norm": 13.838067054748535, "learning_rate": 8.040931024694864e-05, "loss": 0.013614904880523682, "step": 69050 }, { "epoch": 19.60261141072949, "grad_norm": 1.7056885957717896, "learning_rate": 8.040647175702527e-05, "loss": 0.016552548110485076, "step": 69060 }, { "epoch": 19.605449900652854, "grad_norm": 2.1976730823516846, "learning_rate": 8.04036332671019e-05, "loss": 0.02237391322851181, "step": 69070 }, { "epoch": 19.608288390576213, "grad_norm": 1.5214805603027344, "learning_rate": 8.040079477717855e-05, "loss": 0.01581161469221115, "step": 69080 }, { "epoch": 19.611126880499576, "grad_norm": 1.4361978769302368, "learning_rate": 8.039795628725519e-05, "loss": 0.014688368141651153, "step": 69090 }, { "epoch": 19.613965370422935, "grad_norm": 5.594478607177734, "learning_rate": 8.039511779733182e-05, "loss": 0.014558854699134826, "step": 69100 }, { "epoch": 19.616803860346295, "grad_norm": 10.732208251953125, "learning_rate": 8.039227930740846e-05, "loss": 0.02499958574771881, "step": 69110 }, { "epoch": 19.619642350269658, "grad_norm": 4.799899101257324, "learning_rate": 8.03894408174851e-05, "loss": 0.02225075364112854, "step": 69120 }, { "epoch": 19.622480840193017, "grad_norm": 4.448549747467041, "learning_rate": 8.038660232756174e-05, "loss": 0.035136446356773376, "step": 69130 }, { "epoch": 19.62531933011638, "grad_norm": 0.06879843026399612, "learning_rate": 8.038376383763838e-05, "loss": 0.022039005160331727, "step": 69140 }, { "epoch": 19.62815782003974, "grad_norm": 14.831464767456055, "learning_rate": 8.038092534771502e-05, "loss": 0.026927202939987183, "step": 69150 }, { "epoch": 19.6309963099631, "grad_norm": 3.4224581718444824, "learning_rate": 8.037808685779165e-05, "loss": 0.03298492729663849, "step": 69160 }, { "epoch": 19.63383479988646, "grad_norm": 10.462454795837402, "learning_rate": 8.03752483678683e-05, "loss": 0.02494146078824997, "step": 69170 }, { "epoch": 19.63667328980982, "grad_norm": 2.098823308944702, "learning_rate": 8.037240987794493e-05, "loss": 0.032711532711982724, "step": 69180 }, { "epoch": 19.639511779733184, "grad_norm": 1.413672685623169, "learning_rate": 8.036957138802158e-05, "loss": 0.020758078992366792, "step": 69190 }, { "epoch": 19.642350269656543, "grad_norm": 0.10148569941520691, "learning_rate": 8.036673289809822e-05, "loss": 0.009158961474895477, "step": 69200 }, { "epoch": 19.645188759579902, "grad_norm": 7.626468658447266, "learning_rate": 8.036389440817486e-05, "loss": 0.023182570934295654, "step": 69210 }, { "epoch": 19.648027249503265, "grad_norm": 2.296173334121704, "learning_rate": 8.036105591825149e-05, "loss": 0.008154605329036713, "step": 69220 }, { "epoch": 19.650865739426624, "grad_norm": 2.589481830596924, "learning_rate": 8.035821742832813e-05, "loss": 0.02610575556755066, "step": 69230 }, { "epoch": 19.653704229349987, "grad_norm": 7.635892868041992, "learning_rate": 8.035537893840477e-05, "loss": 0.012900859117507935, "step": 69240 }, { "epoch": 19.656542719273347, "grad_norm": 2.0708858966827393, "learning_rate": 8.035254044848141e-05, "loss": 0.012569186091423035, "step": 69250 }, { "epoch": 19.659381209196706, "grad_norm": 6.0787787437438965, "learning_rate": 8.034970195855805e-05, "loss": 0.012606383860111236, "step": 69260 }, { "epoch": 19.66221969912007, "grad_norm": 2.274489402770996, "learning_rate": 8.03468634686347e-05, "loss": 0.022255726158618927, "step": 69270 }, { "epoch": 19.665058189043428, "grad_norm": 1.3657938241958618, "learning_rate": 8.034402497871133e-05, "loss": 0.014625145494937897, "step": 69280 }, { "epoch": 19.66789667896679, "grad_norm": 4.934107303619385, "learning_rate": 8.034118648878796e-05, "loss": 0.009336713701486588, "step": 69290 }, { "epoch": 19.67073516889015, "grad_norm": 0.7426437139511108, "learning_rate": 8.03383479988646e-05, "loss": 0.011014585942029953, "step": 69300 }, { "epoch": 19.67357365881351, "grad_norm": 0.8515448570251465, "learning_rate": 8.033550950894125e-05, "loss": 0.013521890342235564, "step": 69310 }, { "epoch": 19.676412148736873, "grad_norm": 0.4950335621833801, "learning_rate": 8.033267101901789e-05, "loss": 0.011190605908632278, "step": 69320 }, { "epoch": 19.679250638660232, "grad_norm": 8.686479568481445, "learning_rate": 8.032983252909453e-05, "loss": 0.0067523129284381865, "step": 69330 }, { "epoch": 19.682089128583595, "grad_norm": 1.1687695980072021, "learning_rate": 8.032699403917117e-05, "loss": 0.01453712284564972, "step": 69340 }, { "epoch": 19.684927618506954, "grad_norm": 6.46031379699707, "learning_rate": 8.03241555492478e-05, "loss": 0.010612066835165024, "step": 69350 }, { "epoch": 19.687766108430313, "grad_norm": 7.104761600494385, "learning_rate": 8.032131705932444e-05, "loss": 0.018320584297180177, "step": 69360 }, { "epoch": 19.690604598353676, "grad_norm": 7.480422496795654, "learning_rate": 8.031847856940108e-05, "loss": 0.024844080209732056, "step": 69370 }, { "epoch": 19.693443088277036, "grad_norm": 1.9221047163009644, "learning_rate": 8.031564007947772e-05, "loss": 0.0582061767578125, "step": 69380 }, { "epoch": 19.6962815782004, "grad_norm": 0.8026178479194641, "learning_rate": 8.031280158955436e-05, "loss": 0.007932065427303315, "step": 69390 }, { "epoch": 19.699120068123758, "grad_norm": 5.988377094268799, "learning_rate": 8.0309963099631e-05, "loss": 0.008467693626880646, "step": 69400 }, { "epoch": 19.701958558047117, "grad_norm": 1.679275631904602, "learning_rate": 8.030712460970765e-05, "loss": 0.03526282012462616, "step": 69410 }, { "epoch": 19.70479704797048, "grad_norm": 11.361992835998535, "learning_rate": 8.030428611978427e-05, "loss": 0.01715822219848633, "step": 69420 }, { "epoch": 19.70763553789384, "grad_norm": 0.5744953751564026, "learning_rate": 8.030144762986091e-05, "loss": 0.02265937626361847, "step": 69430 }, { "epoch": 19.710474027817202, "grad_norm": 1.8528063297271729, "learning_rate": 8.029860913993756e-05, "loss": 0.013503438234329224, "step": 69440 }, { "epoch": 19.71331251774056, "grad_norm": 2.933100938796997, "learning_rate": 8.02957706500142e-05, "loss": 0.017415373027324675, "step": 69450 }, { "epoch": 19.716151007663925, "grad_norm": 5.731636047363281, "learning_rate": 8.029293216009084e-05, "loss": 0.015204900503158569, "step": 69460 }, { "epoch": 19.718989497587284, "grad_norm": 2.7832765579223633, "learning_rate": 8.029009367016748e-05, "loss": 0.030335879325866698, "step": 69470 }, { "epoch": 19.721827987510643, "grad_norm": 6.3767900466918945, "learning_rate": 8.028725518024411e-05, "loss": 0.018050846457481385, "step": 69480 }, { "epoch": 19.724666477434006, "grad_norm": 3.4176506996154785, "learning_rate": 8.028441669032075e-05, "loss": 0.017700466513633727, "step": 69490 }, { "epoch": 19.727504967357365, "grad_norm": 4.745724678039551, "learning_rate": 8.028157820039739e-05, "loss": 0.01572309136390686, "step": 69500 }, { "epoch": 19.727504967357365, "eval_accuracy": 0.9616582946525084, "eval_loss": 0.12550418078899384, "eval_runtime": 31.6521, "eval_samples_per_second": 496.87, "eval_steps_per_second": 7.772, "step": 69500 }, { "epoch": 19.73034345728073, "grad_norm": 3.1548526287078857, "learning_rate": 8.027873971047403e-05, "loss": 0.028902781009674073, "step": 69510 }, { "epoch": 19.733181947204088, "grad_norm": 1.549743890762329, "learning_rate": 8.027590122055067e-05, "loss": 0.024157786369323732, "step": 69520 }, { "epoch": 19.736020437127447, "grad_norm": 0.5650414228439331, "learning_rate": 8.027306273062732e-05, "loss": 0.01640947163105011, "step": 69530 }, { "epoch": 19.73885892705081, "grad_norm": 2.0185089111328125, "learning_rate": 8.027022424070396e-05, "loss": 0.02525729835033417, "step": 69540 }, { "epoch": 19.74169741697417, "grad_norm": 3.5437769889831543, "learning_rate": 8.026738575078058e-05, "loss": 0.01942034512758255, "step": 69550 }, { "epoch": 19.744535906897532, "grad_norm": 7.796629428863525, "learning_rate": 8.026454726085723e-05, "loss": 0.014662231504917144, "step": 69560 }, { "epoch": 19.74737439682089, "grad_norm": 1.618914246559143, "learning_rate": 8.026170877093387e-05, "loss": 0.025937393307685852, "step": 69570 }, { "epoch": 19.75021288674425, "grad_norm": 4.295910835266113, "learning_rate": 8.02588702810105e-05, "loss": 0.014453108608722686, "step": 69580 }, { "epoch": 19.753051376667614, "grad_norm": 2.167728900909424, "learning_rate": 8.025603179108715e-05, "loss": 0.012874837219715118, "step": 69590 }, { "epoch": 19.755889866590973, "grad_norm": 12.813432693481445, "learning_rate": 8.025319330116379e-05, "loss": 0.021997025609016417, "step": 69600 }, { "epoch": 19.758728356514336, "grad_norm": 9.523062705993652, "learning_rate": 8.025035481124042e-05, "loss": 0.01497279703617096, "step": 69610 }, { "epoch": 19.761566846437695, "grad_norm": 5.072718620300293, "learning_rate": 8.024751632131706e-05, "loss": 0.02813688516616821, "step": 69620 }, { "epoch": 19.764405336361055, "grad_norm": 4.230207920074463, "learning_rate": 8.02446778313937e-05, "loss": 0.015277157723903655, "step": 69630 }, { "epoch": 19.767243826284417, "grad_norm": 1.1602848768234253, "learning_rate": 8.024183934147034e-05, "loss": 0.02338716685771942, "step": 69640 }, { "epoch": 19.770082316207777, "grad_norm": 1.6782395839691162, "learning_rate": 8.023900085154698e-05, "loss": 0.012429679185152054, "step": 69650 }, { "epoch": 19.77292080613114, "grad_norm": 10.235223770141602, "learning_rate": 8.023616236162363e-05, "loss": 0.014614701271057129, "step": 69660 }, { "epoch": 19.7757592960545, "grad_norm": 5.014599323272705, "learning_rate": 8.023332387170027e-05, "loss": 0.013847139477729798, "step": 69670 }, { "epoch": 19.77859778597786, "grad_norm": 9.123796463012695, "learning_rate": 8.02304853817769e-05, "loss": 0.01719532459974289, "step": 69680 }, { "epoch": 19.78143627590122, "grad_norm": 1.3546220064163208, "learning_rate": 8.022764689185354e-05, "loss": 0.005158590525388718, "step": 69690 }, { "epoch": 19.78427476582458, "grad_norm": 6.31652307510376, "learning_rate": 8.022480840193018e-05, "loss": 0.015220491588115692, "step": 69700 }, { "epoch": 19.787113255747943, "grad_norm": 9.803191184997559, "learning_rate": 8.02219699120068e-05, "loss": 0.019742248952388762, "step": 69710 }, { "epoch": 19.789951745671303, "grad_norm": 0.5209247469902039, "learning_rate": 8.021913142208346e-05, "loss": 0.00744052529335022, "step": 69720 }, { "epoch": 19.792790235594662, "grad_norm": 2.5495564937591553, "learning_rate": 8.02162929321601e-05, "loss": 0.01600113958120346, "step": 69730 }, { "epoch": 19.795628725518025, "grad_norm": 2.423266887664795, "learning_rate": 8.021345444223673e-05, "loss": 0.009242112934589385, "step": 69740 }, { "epoch": 19.798467215441384, "grad_norm": 11.537529945373535, "learning_rate": 8.021061595231337e-05, "loss": 0.01511542797088623, "step": 69750 }, { "epoch": 19.801305705364747, "grad_norm": 0.7733875513076782, "learning_rate": 8.020777746239001e-05, "loss": 0.019382932782173158, "step": 69760 }, { "epoch": 19.804144195288107, "grad_norm": 7.190354824066162, "learning_rate": 8.020493897246665e-05, "loss": 0.016523183882236482, "step": 69770 }, { "epoch": 19.806982685211466, "grad_norm": 1.3888078927993774, "learning_rate": 8.020210048254328e-05, "loss": 0.016259367763996124, "step": 69780 }, { "epoch": 19.80982117513483, "grad_norm": 4.544417381286621, "learning_rate": 8.019926199261994e-05, "loss": 0.03132299184799194, "step": 69790 }, { "epoch": 19.812659665058188, "grad_norm": 1.9963366985321045, "learning_rate": 8.019642350269658e-05, "loss": 0.007616961002349853, "step": 69800 }, { "epoch": 19.81549815498155, "grad_norm": 4.9850993156433105, "learning_rate": 8.01935850127732e-05, "loss": 0.01671280264854431, "step": 69810 }, { "epoch": 19.81833664490491, "grad_norm": 2.2570724487304688, "learning_rate": 8.019074652284985e-05, "loss": 0.02907361090183258, "step": 69820 }, { "epoch": 19.82117513482827, "grad_norm": 9.15283489227295, "learning_rate": 8.018790803292649e-05, "loss": 0.00781503990292549, "step": 69830 }, { "epoch": 19.824013624751633, "grad_norm": 0.6080517768859863, "learning_rate": 8.018506954300312e-05, "loss": 0.014565247297286987, "step": 69840 }, { "epoch": 19.826852114674992, "grad_norm": 10.303339004516602, "learning_rate": 8.018223105307977e-05, "loss": 0.0220239520072937, "step": 69850 }, { "epoch": 19.829690604598355, "grad_norm": 6.275253772735596, "learning_rate": 8.017939256315641e-05, "loss": 0.013669410347938537, "step": 69860 }, { "epoch": 19.832529094521714, "grad_norm": 0.2501889169216156, "learning_rate": 8.017655407323304e-05, "loss": 0.017661070823669432, "step": 69870 }, { "epoch": 19.835367584445073, "grad_norm": 1.7205308675765991, "learning_rate": 8.017371558330968e-05, "loss": 0.030998238921165468, "step": 69880 }, { "epoch": 19.838206074368436, "grad_norm": 2.869626760482788, "learning_rate": 8.017087709338632e-05, "loss": 0.01047343760728836, "step": 69890 }, { "epoch": 19.841044564291796, "grad_norm": 2.089573621749878, "learning_rate": 8.016803860346296e-05, "loss": 0.01836358904838562, "step": 69900 }, { "epoch": 19.84388305421516, "grad_norm": 0.397853285074234, "learning_rate": 8.016520011353959e-05, "loss": 0.012542331218719482, "step": 69910 }, { "epoch": 19.846721544138518, "grad_norm": 10.944541931152344, "learning_rate": 8.016236162361625e-05, "loss": 0.017021429538726807, "step": 69920 }, { "epoch": 19.84956003406188, "grad_norm": 1.6689707040786743, "learning_rate": 8.015952313369289e-05, "loss": 0.017763373255729676, "step": 69930 }, { "epoch": 19.85239852398524, "grad_norm": 1.750494122505188, "learning_rate": 8.015668464376952e-05, "loss": 0.009762341529130936, "step": 69940 }, { "epoch": 19.8552370139086, "grad_norm": 3.302927017211914, "learning_rate": 8.015384615384616e-05, "loss": 0.03229142427444458, "step": 69950 }, { "epoch": 19.858075503831962, "grad_norm": 3.7164931297302246, "learning_rate": 8.01510076639228e-05, "loss": 0.024825701117515565, "step": 69960 }, { "epoch": 19.86091399375532, "grad_norm": 0.9139042496681213, "learning_rate": 8.014816917399943e-05, "loss": 0.014837440848350526, "step": 69970 }, { "epoch": 19.863752483678685, "grad_norm": 6.463283061981201, "learning_rate": 8.014533068407608e-05, "loss": 0.02670870125293732, "step": 69980 }, { "epoch": 19.866590973602044, "grad_norm": 9.262341499328613, "learning_rate": 8.014249219415272e-05, "loss": 0.03314664363861084, "step": 69990 }, { "epoch": 19.869429463525403, "grad_norm": 0.8242834210395813, "learning_rate": 8.013965370422935e-05, "loss": 0.007536008954048157, "step": 70000 }, { "epoch": 19.869429463525403, "eval_accuracy": 0.962802823170344, "eval_loss": 0.12460049986839294, "eval_runtime": 31.5585, "eval_samples_per_second": 498.344, "eval_steps_per_second": 7.795, "step": 70000 }, { "epoch": 19.872267953448766, "grad_norm": 6.0503315925598145, "learning_rate": 8.013681521430599e-05, "loss": 0.01712670028209686, "step": 70010 }, { "epoch": 19.875106443372125, "grad_norm": 0.711719810962677, "learning_rate": 8.013397672438263e-05, "loss": 0.018879419565200804, "step": 70020 }, { "epoch": 19.87794493329549, "grad_norm": 6.612565517425537, "learning_rate": 8.013113823445928e-05, "loss": 0.015574388206005096, "step": 70030 }, { "epoch": 19.880783423218848, "grad_norm": 1.5285192728042603, "learning_rate": 8.01282997445359e-05, "loss": 0.01017269566655159, "step": 70040 }, { "epoch": 19.883621913142207, "grad_norm": 0.736617922782898, "learning_rate": 8.012546125461256e-05, "loss": 0.018401379883289336, "step": 70050 }, { "epoch": 19.88646040306557, "grad_norm": 8.752188682556152, "learning_rate": 8.012262276468919e-05, "loss": 0.021568451821804047, "step": 70060 }, { "epoch": 19.88929889298893, "grad_norm": 16.844388961791992, "learning_rate": 8.011978427476583e-05, "loss": 0.022290828824043273, "step": 70070 }, { "epoch": 19.892137382912292, "grad_norm": 1.6637965440750122, "learning_rate": 8.011694578484247e-05, "loss": 0.013622608780860902, "step": 70080 }, { "epoch": 19.89497587283565, "grad_norm": 10.465700149536133, "learning_rate": 8.011410729491911e-05, "loss": 0.029503101110458375, "step": 70090 }, { "epoch": 19.89781436275901, "grad_norm": 1.5931602716445923, "learning_rate": 8.011126880499574e-05, "loss": 0.006964816898107529, "step": 70100 }, { "epoch": 19.900652852682374, "grad_norm": 16.223539352416992, "learning_rate": 8.010843031507238e-05, "loss": 0.023791635036468507, "step": 70110 }, { "epoch": 19.903491342605733, "grad_norm": 0.9101476073265076, "learning_rate": 8.010559182514903e-05, "loss": 0.028920650482177734, "step": 70120 }, { "epoch": 19.906329832529096, "grad_norm": 2.0515339374542236, "learning_rate": 8.010275333522566e-05, "loss": 0.01323588639497757, "step": 70130 }, { "epoch": 19.909168322452455, "grad_norm": 5.446404933929443, "learning_rate": 8.00999148453023e-05, "loss": 0.00625767856836319, "step": 70140 }, { "epoch": 19.912006812375814, "grad_norm": 5.3146467208862305, "learning_rate": 8.009707635537894e-05, "loss": 0.0213604599237442, "step": 70150 }, { "epoch": 19.914845302299177, "grad_norm": 1.8785371780395508, "learning_rate": 8.009423786545557e-05, "loss": 0.033167564868927, "step": 70160 }, { "epoch": 19.917683792222537, "grad_norm": 0.9094081521034241, "learning_rate": 8.009139937553221e-05, "loss": 0.0048977967351675035, "step": 70170 }, { "epoch": 19.9205222821459, "grad_norm": 0.49379199743270874, "learning_rate": 8.008856088560887e-05, "loss": 0.026901325583457945, "step": 70180 }, { "epoch": 19.92336077206926, "grad_norm": 10.196534156799316, "learning_rate": 8.00857223956855e-05, "loss": 0.017138615250587463, "step": 70190 }, { "epoch": 19.92619926199262, "grad_norm": 8.169548988342285, "learning_rate": 8.008288390576214e-05, "loss": 0.02445916533470154, "step": 70200 }, { "epoch": 19.92903775191598, "grad_norm": 2.5194480419158936, "learning_rate": 8.008004541583878e-05, "loss": 0.008446452021598817, "step": 70210 }, { "epoch": 19.93187624183934, "grad_norm": 1.267454981803894, "learning_rate": 8.007720692591542e-05, "loss": 0.0110305555164814, "step": 70220 }, { "epoch": 19.934714731762703, "grad_norm": 2.1513097286224365, "learning_rate": 8.007436843599205e-05, "loss": 0.014187799394130706, "step": 70230 }, { "epoch": 19.937553221686063, "grad_norm": 6.587697505950928, "learning_rate": 8.007152994606869e-05, "loss": 0.013854707777500152, "step": 70240 }, { "epoch": 19.940391711609422, "grad_norm": 0.5239924192428589, "learning_rate": 8.006869145614534e-05, "loss": 0.0256253182888031, "step": 70250 }, { "epoch": 19.943230201532785, "grad_norm": 0.8720202445983887, "learning_rate": 8.006585296622197e-05, "loss": 0.006948457658290863, "step": 70260 }, { "epoch": 19.946068691456144, "grad_norm": 2.0155763626098633, "learning_rate": 8.006301447629861e-05, "loss": 0.013918130099773407, "step": 70270 }, { "epoch": 19.948907181379507, "grad_norm": 5.3240861892700195, "learning_rate": 8.006017598637526e-05, "loss": 0.01625025421380997, "step": 70280 }, { "epoch": 19.951745671302866, "grad_norm": 3.647378921508789, "learning_rate": 8.005733749645188e-05, "loss": 0.025415289402008056, "step": 70290 }, { "epoch": 19.95458416122623, "grad_norm": 5.258601665496826, "learning_rate": 8.005449900652852e-05, "loss": 0.020264589786529542, "step": 70300 }, { "epoch": 19.95742265114959, "grad_norm": 0.9246200919151306, "learning_rate": 8.005166051660517e-05, "loss": 0.010485036671161652, "step": 70310 }, { "epoch": 19.960261141072948, "grad_norm": 8.737808227539062, "learning_rate": 8.004882202668181e-05, "loss": 0.020587009191513062, "step": 70320 }, { "epoch": 19.96309963099631, "grad_norm": 4.19546365737915, "learning_rate": 8.004598353675845e-05, "loss": 0.022262267768383026, "step": 70330 }, { "epoch": 19.96593812091967, "grad_norm": 0.8448171019554138, "learning_rate": 8.004314504683509e-05, "loss": 0.005003838986158371, "step": 70340 }, { "epoch": 19.968776610843033, "grad_norm": 1.529058814048767, "learning_rate": 8.004030655691173e-05, "loss": 0.016503478586673736, "step": 70350 }, { "epoch": 19.971615100766392, "grad_norm": 2.432154655456543, "learning_rate": 8.003746806698836e-05, "loss": 0.02536535859107971, "step": 70360 }, { "epoch": 19.974453590689752, "grad_norm": 3.5854225158691406, "learning_rate": 8.0034629577065e-05, "loss": 0.016874317824840546, "step": 70370 }, { "epoch": 19.977292080613115, "grad_norm": 1.7768503427505493, "learning_rate": 8.003179108714166e-05, "loss": 0.011259014159440995, "step": 70380 }, { "epoch": 19.980130570536474, "grad_norm": 2.8667845726013184, "learning_rate": 8.002895259721828e-05, "loss": 0.015071055293083191, "step": 70390 }, { "epoch": 19.982969060459837, "grad_norm": 2.7725303173065186, "learning_rate": 8.002611410729492e-05, "loss": 0.012452834844589233, "step": 70400 }, { "epoch": 19.985807550383196, "grad_norm": 3.431821584701538, "learning_rate": 8.002327561737157e-05, "loss": 0.016131222248077393, "step": 70410 }, { "epoch": 19.988646040306556, "grad_norm": 3.3145828247070312, "learning_rate": 8.00204371274482e-05, "loss": 0.013110698759555816, "step": 70420 }, { "epoch": 19.99148453022992, "grad_norm": 0.8674981594085693, "learning_rate": 8.001759863752484e-05, "loss": 0.012944674491882325, "step": 70430 }, { "epoch": 19.994323020153278, "grad_norm": 5.62935209274292, "learning_rate": 8.001476014760148e-05, "loss": 0.009259013086557388, "step": 70440 }, { "epoch": 19.99716151007664, "grad_norm": 2.8510632514953613, "learning_rate": 8.001192165767812e-05, "loss": 0.013773289322853089, "step": 70450 }, { "epoch": 20.0, "grad_norm": 0.5516908168792725, "learning_rate": 8.000908316775476e-05, "loss": 0.024528275430202483, "step": 70460 }, { "epoch": 20.00283848992336, "grad_norm": 7.436856746673584, "learning_rate": 8.00062446778314e-05, "loss": 0.016245432198047638, "step": 70470 }, { "epoch": 20.005676979846722, "grad_norm": 1.6136023998260498, "learning_rate": 8.000340618790804e-05, "loss": 0.011430624127388, "step": 70480 }, { "epoch": 20.00851546977008, "grad_norm": 0.8578248620033264, "learning_rate": 8.000056769798467e-05, "loss": 0.007121416926383972, "step": 70490 }, { "epoch": 20.011353959693444, "grad_norm": 8.089055061340332, "learning_rate": 7.999772920806131e-05, "loss": 0.016089314222335817, "step": 70500 }, { "epoch": 20.011353959693444, "eval_accuracy": 0.9650282952883577, "eval_loss": 0.112466961145401, "eval_runtime": 31.6424, "eval_samples_per_second": 497.023, "eval_steps_per_second": 7.774, "step": 70500 }, { "epoch": 20.014192449616804, "grad_norm": 0.6011362671852112, "learning_rate": 7.999489071813795e-05, "loss": 0.008259302377700806, "step": 70510 }, { "epoch": 20.017030939540163, "grad_norm": 2.6536293029785156, "learning_rate": 7.99920522282146e-05, "loss": 0.020915262401103973, "step": 70520 }, { "epoch": 20.019869429463526, "grad_norm": 5.806552410125732, "learning_rate": 7.998921373829124e-05, "loss": 0.010254844278097152, "step": 70530 }, { "epoch": 20.022707919386885, "grad_norm": 1.3889726400375366, "learning_rate": 7.998637524836788e-05, "loss": 0.008887986093759537, "step": 70540 }, { "epoch": 20.025546409310248, "grad_norm": 0.1703825443983078, "learning_rate": 7.99835367584445e-05, "loss": 0.013272418081760407, "step": 70550 }, { "epoch": 20.028384899233608, "grad_norm": 2.1090433597564697, "learning_rate": 7.998069826852115e-05, "loss": 0.006426654756069183, "step": 70560 }, { "epoch": 20.031223389156967, "grad_norm": 1.681519627571106, "learning_rate": 7.997785977859779e-05, "loss": 0.015022775530815125, "step": 70570 }, { "epoch": 20.03406187908033, "grad_norm": 4.431528091430664, "learning_rate": 7.997502128867443e-05, "loss": 0.019916574656963348, "step": 70580 }, { "epoch": 20.03690036900369, "grad_norm": 0.5745396018028259, "learning_rate": 7.997218279875107e-05, "loss": 0.011699742078781128, "step": 70590 }, { "epoch": 20.039738858927052, "grad_norm": 9.14401912689209, "learning_rate": 7.996934430882771e-05, "loss": 0.01926853507757187, "step": 70600 }, { "epoch": 20.04257734885041, "grad_norm": 9.073312759399414, "learning_rate": 7.996650581890435e-05, "loss": 0.015802636742591858, "step": 70610 }, { "epoch": 20.04541583877377, "grad_norm": 8.742959022521973, "learning_rate": 7.996366732898098e-05, "loss": 0.022052106261253358, "step": 70620 }, { "epoch": 20.048254328697134, "grad_norm": 0.25531259179115295, "learning_rate": 7.996082883905762e-05, "loss": 0.009354813396930695, "step": 70630 }, { "epoch": 20.051092818620493, "grad_norm": 1.383939504623413, "learning_rate": 7.995799034913426e-05, "loss": 0.018287202715873717, "step": 70640 }, { "epoch": 20.053931308543856, "grad_norm": 2.431122303009033, "learning_rate": 7.99551518592109e-05, "loss": 0.010712487995624543, "step": 70650 }, { "epoch": 20.056769798467215, "grad_norm": 0.8003904819488525, "learning_rate": 7.995231336928755e-05, "loss": 0.012613195180892944, "step": 70660 }, { "epoch": 20.059608288390578, "grad_norm": 4.319883346557617, "learning_rate": 7.994947487936419e-05, "loss": 0.0128926619887352, "step": 70670 }, { "epoch": 20.062446778313937, "grad_norm": 0.8970504403114319, "learning_rate": 7.994663638944082e-05, "loss": 0.008167168498039246, "step": 70680 }, { "epoch": 20.065285268237297, "grad_norm": 3.034276247024536, "learning_rate": 7.994379789951746e-05, "loss": 0.013646291196346283, "step": 70690 }, { "epoch": 20.06812375816066, "grad_norm": 12.887155532836914, "learning_rate": 7.99409594095941e-05, "loss": 0.018387052416801452, "step": 70700 }, { "epoch": 20.07096224808402, "grad_norm": 3.731816291809082, "learning_rate": 7.993812091967074e-05, "loss": 0.007870706915855407, "step": 70710 }, { "epoch": 20.07380073800738, "grad_norm": 0.980358898639679, "learning_rate": 7.993528242974738e-05, "loss": 0.00835033431649208, "step": 70720 }, { "epoch": 20.07663922793074, "grad_norm": 0.06576377153396606, "learning_rate": 7.993244393982402e-05, "loss": 0.00818023383617401, "step": 70730 }, { "epoch": 20.0794777178541, "grad_norm": 1.553149700164795, "learning_rate": 7.992960544990066e-05, "loss": 0.019675710797309877, "step": 70740 }, { "epoch": 20.082316207777463, "grad_norm": 0.5809218287467957, "learning_rate": 7.992676695997729e-05, "loss": 0.009852318465709687, "step": 70750 }, { "epoch": 20.085154697700823, "grad_norm": 5.539701461791992, "learning_rate": 7.992392847005393e-05, "loss": 0.017030355334281922, "step": 70760 }, { "epoch": 20.087993187624186, "grad_norm": 19.671899795532227, "learning_rate": 7.992108998013057e-05, "loss": 0.01973271667957306, "step": 70770 }, { "epoch": 20.090831677547545, "grad_norm": 0.4359962046146393, "learning_rate": 7.991825149020722e-05, "loss": 0.016238373517990113, "step": 70780 }, { "epoch": 20.093670167470904, "grad_norm": 1.2678791284561157, "learning_rate": 7.991541300028386e-05, "loss": 0.01931024491786957, "step": 70790 }, { "epoch": 20.096508657394267, "grad_norm": 0.18864713609218597, "learning_rate": 7.99125745103605e-05, "loss": 0.016872239112854005, "step": 70800 }, { "epoch": 20.099347147317626, "grad_norm": 1.9875191450119019, "learning_rate": 7.990973602043713e-05, "loss": 0.011762619018554688, "step": 70810 }, { "epoch": 20.10218563724099, "grad_norm": 2.685734272003174, "learning_rate": 7.990689753051377e-05, "loss": 0.012342245131731034, "step": 70820 }, { "epoch": 20.10502412716435, "grad_norm": 4.351536750793457, "learning_rate": 7.990405904059041e-05, "loss": 0.021676430106163026, "step": 70830 }, { "epoch": 20.107862617087708, "grad_norm": 3.1585872173309326, "learning_rate": 7.990122055066705e-05, "loss": 0.010295553505420685, "step": 70840 }, { "epoch": 20.11070110701107, "grad_norm": 0.46891143918037415, "learning_rate": 7.989838206074369e-05, "loss": 0.007197104394435883, "step": 70850 }, { "epoch": 20.11353959693443, "grad_norm": 9.117058753967285, "learning_rate": 7.989554357082033e-05, "loss": 0.026901519298553465, "step": 70860 }, { "epoch": 20.116378086857793, "grad_norm": 7.406280517578125, "learning_rate": 7.989270508089697e-05, "loss": 0.017835670709609987, "step": 70870 }, { "epoch": 20.119216576781152, "grad_norm": 0.4278368055820465, "learning_rate": 7.98898665909736e-05, "loss": 0.008642089366912842, "step": 70880 }, { "epoch": 20.12205506670451, "grad_norm": 4.760260105133057, "learning_rate": 7.988702810105024e-05, "loss": 0.017266520857810976, "step": 70890 }, { "epoch": 20.124893556627875, "grad_norm": 3.2486774921417236, "learning_rate": 7.988418961112689e-05, "loss": 0.021109884977340697, "step": 70900 }, { "epoch": 20.127732046551234, "grad_norm": 7.284914016723633, "learning_rate": 7.988135112120351e-05, "loss": 0.02543071210384369, "step": 70910 }, { "epoch": 20.130570536474597, "grad_norm": 3.347695827484131, "learning_rate": 7.987851263128017e-05, "loss": 0.013283728063106537, "step": 70920 }, { "epoch": 20.133409026397956, "grad_norm": 2.3983588218688965, "learning_rate": 7.987567414135681e-05, "loss": 0.010482189059257508, "step": 70930 }, { "epoch": 20.136247516321315, "grad_norm": 2.587460994720459, "learning_rate": 7.987283565143344e-05, "loss": 0.012927180528640747, "step": 70940 }, { "epoch": 20.13908600624468, "grad_norm": 1.217547059059143, "learning_rate": 7.986999716151008e-05, "loss": 0.01064080446958542, "step": 70950 }, { "epoch": 20.141924496168038, "grad_norm": 4.618635654449463, "learning_rate": 7.986715867158672e-05, "loss": 0.016633853316307068, "step": 70960 }, { "epoch": 20.1447629860914, "grad_norm": 4.599666595458984, "learning_rate": 7.986432018166336e-05, "loss": 0.007615604996681213, "step": 70970 }, { "epoch": 20.14760147601476, "grad_norm": 7.990434646606445, "learning_rate": 7.986148169174e-05, "loss": 0.035049337148666385, "step": 70980 }, { "epoch": 20.15043996593812, "grad_norm": 4.115412712097168, "learning_rate": 7.985864320181664e-05, "loss": 0.02891141176223755, "step": 70990 }, { "epoch": 20.153278455861482, "grad_norm": 9.228787422180176, "learning_rate": 7.985580471189327e-05, "loss": 0.023379792273044587, "step": 71000 }, { "epoch": 20.153278455861482, "eval_accuracy": 0.9644560310294398, "eval_loss": 0.1151220053434372, "eval_runtime": 31.5462, "eval_samples_per_second": 498.538, "eval_steps_per_second": 7.798, "step": 71000 }, { "epoch": 20.15611694578484, "grad_norm": 0.9651784300804138, "learning_rate": 7.985296622196991e-05, "loss": 0.008450717478990556, "step": 71010 }, { "epoch": 20.158955435708204, "grad_norm": 1.4010860919952393, "learning_rate": 7.985012773204655e-05, "loss": 0.026739388704299927, "step": 71020 }, { "epoch": 20.161793925631564, "grad_norm": 0.8490244746208191, "learning_rate": 7.98472892421232e-05, "loss": 0.016658060252666473, "step": 71030 }, { "epoch": 20.164632415554923, "grad_norm": 0.5647145509719849, "learning_rate": 7.984445075219982e-05, "loss": 0.005608601123094558, "step": 71040 }, { "epoch": 20.167470905478286, "grad_norm": 7.153414249420166, "learning_rate": 7.984161226227648e-05, "loss": 0.010309536755084992, "step": 71050 }, { "epoch": 20.170309395401645, "grad_norm": 4.291512966156006, "learning_rate": 7.983877377235312e-05, "loss": 0.013422916829586028, "step": 71060 }, { "epoch": 20.173147885325008, "grad_norm": 8.023852348327637, "learning_rate": 7.983593528242975e-05, "loss": 0.010602827370166778, "step": 71070 }, { "epoch": 20.175986375248367, "grad_norm": 0.68719482421875, "learning_rate": 7.983309679250639e-05, "loss": 0.0068502120673656465, "step": 71080 }, { "epoch": 20.17882486517173, "grad_norm": 8.918876647949219, "learning_rate": 7.983025830258303e-05, "loss": 0.016236656904220582, "step": 71090 }, { "epoch": 20.18166335509509, "grad_norm": 0.6813021302223206, "learning_rate": 7.982741981265966e-05, "loss": 0.023597031831741333, "step": 71100 }, { "epoch": 20.18450184501845, "grad_norm": 0.4900694191455841, "learning_rate": 7.98245813227363e-05, "loss": 0.011836731433868408, "step": 71110 }, { "epoch": 20.187340334941812, "grad_norm": 10.9681978225708, "learning_rate": 7.982174283281295e-05, "loss": 0.020834103226661682, "step": 71120 }, { "epoch": 20.19017882486517, "grad_norm": 5.577882289886475, "learning_rate": 7.981890434288958e-05, "loss": 0.0135760098695755, "step": 71130 }, { "epoch": 20.193017314788534, "grad_norm": 0.5071020722389221, "learning_rate": 7.981606585296622e-05, "loss": 0.008869817852973938, "step": 71140 }, { "epoch": 20.195855804711893, "grad_norm": 6.140706539154053, "learning_rate": 7.981322736304287e-05, "loss": 0.015114164352416993, "step": 71150 }, { "epoch": 20.198694294635253, "grad_norm": 0.5593485236167908, "learning_rate": 7.98103888731195e-05, "loss": 0.015102194249629974, "step": 71160 }, { "epoch": 20.201532784558616, "grad_norm": 1.271063208580017, "learning_rate": 7.980755038319613e-05, "loss": 0.014655356109142304, "step": 71170 }, { "epoch": 20.204371274481975, "grad_norm": 2.1790623664855957, "learning_rate": 7.980471189327279e-05, "loss": 0.007778779417276382, "step": 71180 }, { "epoch": 20.207209764405338, "grad_norm": 1.5992292165756226, "learning_rate": 7.980187340334943e-05, "loss": 0.01679815649986267, "step": 71190 }, { "epoch": 20.210048254328697, "grad_norm": 1.2398967742919922, "learning_rate": 7.979903491342606e-05, "loss": 0.01423579454421997, "step": 71200 }, { "epoch": 20.212886744252057, "grad_norm": 0.20961888134479523, "learning_rate": 7.97961964235027e-05, "loss": 0.013106675446033477, "step": 71210 }, { "epoch": 20.21572523417542, "grad_norm": 7.693957805633545, "learning_rate": 7.979335793357934e-05, "loss": 0.018496458232402802, "step": 71220 }, { "epoch": 20.21856372409878, "grad_norm": 1.2590605020523071, "learning_rate": 7.979051944365597e-05, "loss": 0.01798439621925354, "step": 71230 }, { "epoch": 20.22140221402214, "grad_norm": 3.11205792427063, "learning_rate": 7.978768095373261e-05, "loss": 0.01695810705423355, "step": 71240 }, { "epoch": 20.2242407039455, "grad_norm": 1.706710934638977, "learning_rate": 7.978484246380927e-05, "loss": 0.009011100977659225, "step": 71250 }, { "epoch": 20.22707919386886, "grad_norm": 1.7847572565078735, "learning_rate": 7.97820039738859e-05, "loss": 0.013674171268939972, "step": 71260 }, { "epoch": 20.229917683792223, "grad_norm": 0.8101648092269897, "learning_rate": 7.977916548396253e-05, "loss": 0.017009571194648743, "step": 71270 }, { "epoch": 20.232756173715583, "grad_norm": 2.692492961883545, "learning_rate": 7.977632699403918e-05, "loss": 0.009096066653728484, "step": 71280 }, { "epoch": 20.235594663638945, "grad_norm": 10.445018768310547, "learning_rate": 7.977348850411582e-05, "loss": 0.01693447232246399, "step": 71290 }, { "epoch": 20.238433153562305, "grad_norm": 9.105262756347656, "learning_rate": 7.977065001419245e-05, "loss": 0.015661458671092986, "step": 71300 }, { "epoch": 20.241271643485664, "grad_norm": 12.29133415222168, "learning_rate": 7.97678115242691e-05, "loss": 0.01562212109565735, "step": 71310 }, { "epoch": 20.244110133409027, "grad_norm": 7.209648609161377, "learning_rate": 7.976497303434574e-05, "loss": 0.014545503258705138, "step": 71320 }, { "epoch": 20.246948623332386, "grad_norm": 1.1836363077163696, "learning_rate": 7.976213454442237e-05, "loss": 0.019958382844924925, "step": 71330 }, { "epoch": 20.24978711325575, "grad_norm": 2.5779213905334473, "learning_rate": 7.975929605449901e-05, "loss": 0.008409702777862548, "step": 71340 }, { "epoch": 20.25262560317911, "grad_norm": 16.579957962036133, "learning_rate": 7.975645756457565e-05, "loss": 0.028837484121322633, "step": 71350 }, { "epoch": 20.255464093102468, "grad_norm": 0.8662018775939941, "learning_rate": 7.975361907465228e-05, "loss": 0.012503746151924133, "step": 71360 }, { "epoch": 20.25830258302583, "grad_norm": 1.3396071195602417, "learning_rate": 7.975078058472892e-05, "loss": 0.009117329120635986, "step": 71370 }, { "epoch": 20.26114107294919, "grad_norm": 6.912156105041504, "learning_rate": 7.974794209480558e-05, "loss": 0.021028077602386473, "step": 71380 }, { "epoch": 20.263979562872553, "grad_norm": 1.0493494272232056, "learning_rate": 7.97451036048822e-05, "loss": 0.008940963447093964, "step": 71390 }, { "epoch": 20.266818052795912, "grad_norm": 3.1840806007385254, "learning_rate": 7.974226511495885e-05, "loss": 0.00422750860452652, "step": 71400 }, { "epoch": 20.26965654271927, "grad_norm": 5.137458324432373, "learning_rate": 7.973942662503549e-05, "loss": 0.009950690716505051, "step": 71410 }, { "epoch": 20.272495032642635, "grad_norm": 1.6262131929397583, "learning_rate": 7.973658813511213e-05, "loss": 0.011805801093578339, "step": 71420 }, { "epoch": 20.275333522565994, "grad_norm": 10.116072654724121, "learning_rate": 7.973374964518876e-05, "loss": 0.015828925371170043, "step": 71430 }, { "epoch": 20.278172012489357, "grad_norm": 12.012731552124023, "learning_rate": 7.97309111552654e-05, "loss": 0.028188514709472656, "step": 71440 }, { "epoch": 20.281010502412716, "grad_norm": 1.8385461568832397, "learning_rate": 7.972807266534205e-05, "loss": 0.013013137876987458, "step": 71450 }, { "epoch": 20.283848992336075, "grad_norm": 1.95732581615448, "learning_rate": 7.972523417541868e-05, "loss": 0.007443028688430786, "step": 71460 }, { "epoch": 20.28668748225944, "grad_norm": 5.327107906341553, "learning_rate": 7.972239568549532e-05, "loss": 0.008409370481967927, "step": 71470 }, { "epoch": 20.289525972182798, "grad_norm": 2.0625081062316895, "learning_rate": 7.971955719557196e-05, "loss": 0.018064771592617036, "step": 71480 }, { "epoch": 20.29236446210616, "grad_norm": 10.508517265319824, "learning_rate": 7.971671870564859e-05, "loss": 0.011872991174459457, "step": 71490 }, { "epoch": 20.29520295202952, "grad_norm": 8.100379943847656, "learning_rate": 7.971388021572523e-05, "loss": 0.024738314747810363, "step": 71500 }, { "epoch": 20.29520295202952, "eval_accuracy": 0.9661728238061932, "eval_loss": 0.10654716193675995, "eval_runtime": 31.8601, "eval_samples_per_second": 493.627, "eval_steps_per_second": 7.721, "step": 71500 }, { "epoch": 20.298041441952883, "grad_norm": 6.455258369445801, "learning_rate": 7.971104172580189e-05, "loss": 0.03311143219470978, "step": 71510 }, { "epoch": 20.300879931876242, "grad_norm": 3.1452577114105225, "learning_rate": 7.970820323587851e-05, "loss": 0.016617275774478912, "step": 71520 }, { "epoch": 20.3037184217996, "grad_norm": 13.45455265045166, "learning_rate": 7.970536474595516e-05, "loss": 0.031100407242774963, "step": 71530 }, { "epoch": 20.306556911722964, "grad_norm": 2.1702699661254883, "learning_rate": 7.970281010502412e-05, "loss": 0.019925877451896667, "step": 71540 }, { "epoch": 20.309395401646324, "grad_norm": 0.5071220397949219, "learning_rate": 7.969997161510076e-05, "loss": 0.01536392718553543, "step": 71550 }, { "epoch": 20.312233891569687, "grad_norm": 3.798987865447998, "learning_rate": 7.969713312517742e-05, "loss": 0.018707436323165894, "step": 71560 }, { "epoch": 20.315072381493046, "grad_norm": 16.85640525817871, "learning_rate": 7.969429463525405e-05, "loss": 0.018195712566375734, "step": 71570 }, { "epoch": 20.317910871416405, "grad_norm": 16.111982345581055, "learning_rate": 7.969145614533069e-05, "loss": 0.017200061678886415, "step": 71580 }, { "epoch": 20.320749361339768, "grad_norm": 13.741687774658203, "learning_rate": 7.968861765540733e-05, "loss": 0.016931161284446716, "step": 71590 }, { "epoch": 20.323587851263127, "grad_norm": 6.790251731872559, "learning_rate": 7.968577916548397e-05, "loss": 0.015344303846359254, "step": 71600 }, { "epoch": 20.32642634118649, "grad_norm": 1.0825552940368652, "learning_rate": 7.96829406755606e-05, "loss": 0.01314530372619629, "step": 71610 }, { "epoch": 20.32926483110985, "grad_norm": 1.0809060335159302, "learning_rate": 7.968010218563724e-05, "loss": 0.024292610585689545, "step": 71620 }, { "epoch": 20.33210332103321, "grad_norm": 6.901791572570801, "learning_rate": 7.96772636957139e-05, "loss": 0.013001339137554168, "step": 71630 }, { "epoch": 20.334941810956572, "grad_norm": 10.481952667236328, "learning_rate": 7.967442520579052e-05, "loss": 0.02448507398366928, "step": 71640 }, { "epoch": 20.33778030087993, "grad_norm": 10.30474853515625, "learning_rate": 7.967158671586716e-05, "loss": 0.05966736674308777, "step": 71650 }, { "epoch": 20.340618790803294, "grad_norm": 1.1744650602340698, "learning_rate": 7.96687482259438e-05, "loss": 0.03822956085205078, "step": 71660 }, { "epoch": 20.343457280726653, "grad_norm": 12.141825675964355, "learning_rate": 7.966590973602043e-05, "loss": 0.01278379261493683, "step": 71670 }, { "epoch": 20.346295770650013, "grad_norm": 0.8915175795555115, "learning_rate": 7.966307124609707e-05, "loss": 0.024162687361240387, "step": 71680 }, { "epoch": 20.349134260573376, "grad_norm": 4.455752849578857, "learning_rate": 7.966023275617373e-05, "loss": 0.016721682250499727, "step": 71690 }, { "epoch": 20.351972750496735, "grad_norm": 3.03383731842041, "learning_rate": 7.965739426625036e-05, "loss": 0.028349217772483826, "step": 71700 }, { "epoch": 20.354811240420098, "grad_norm": 3.06288480758667, "learning_rate": 7.9654555776327e-05, "loss": 0.01646275520324707, "step": 71710 }, { "epoch": 20.357649730343457, "grad_norm": 1.9586575031280518, "learning_rate": 7.965171728640364e-05, "loss": 0.005636728554964066, "step": 71720 }, { "epoch": 20.360488220266816, "grad_norm": 2.7683238983154297, "learning_rate": 7.964887879648028e-05, "loss": 0.01677453815937042, "step": 71730 }, { "epoch": 20.36332671019018, "grad_norm": 0.9296435713768005, "learning_rate": 7.964604030655691e-05, "loss": 0.0143295019865036, "step": 71740 }, { "epoch": 20.36616520011354, "grad_norm": 5.427598476409912, "learning_rate": 7.964320181663355e-05, "loss": 0.015647386014461518, "step": 71750 }, { "epoch": 20.3690036900369, "grad_norm": 2.572540760040283, "learning_rate": 7.964036332671019e-05, "loss": 0.016397115588188172, "step": 71760 }, { "epoch": 20.37184217996026, "grad_norm": 14.900172233581543, "learning_rate": 7.963752483678683e-05, "loss": 0.01482781171798706, "step": 71770 }, { "epoch": 20.37468066988362, "grad_norm": 6.146739959716797, "learning_rate": 7.963468634686348e-05, "loss": 0.022622056305408478, "step": 71780 }, { "epoch": 20.377519159806983, "grad_norm": 0.31290292739868164, "learning_rate": 7.963184785694012e-05, "loss": 0.002663133107125759, "step": 71790 }, { "epoch": 20.380357649730342, "grad_norm": 0.7098491191864014, "learning_rate": 7.962900936701674e-05, "loss": 0.007530221343040466, "step": 71800 }, { "epoch": 20.383196139653705, "grad_norm": 8.688029289245605, "learning_rate": 7.962617087709339e-05, "loss": 0.02052065432071686, "step": 71810 }, { "epoch": 20.386034629577065, "grad_norm": 5.2097649574279785, "learning_rate": 7.962333238717003e-05, "loss": 0.01468944400548935, "step": 71820 }, { "epoch": 20.388873119500424, "grad_norm": 2.7574174404144287, "learning_rate": 7.962049389724667e-05, "loss": 0.008304913341999055, "step": 71830 }, { "epoch": 20.391711609423787, "grad_norm": 3.9530792236328125, "learning_rate": 7.961765540732331e-05, "loss": 0.006035934388637543, "step": 71840 }, { "epoch": 20.394550099347146, "grad_norm": 1.443293571472168, "learning_rate": 7.961481691739995e-05, "loss": 0.015250900387763977, "step": 71850 }, { "epoch": 20.39738858927051, "grad_norm": 7.124764442443848, "learning_rate": 7.961197842747658e-05, "loss": 0.009171187132596969, "step": 71860 }, { "epoch": 20.40022707919387, "grad_norm": 0.6649703979492188, "learning_rate": 7.960913993755322e-05, "loss": 0.017971611022949217, "step": 71870 }, { "epoch": 20.40306556911723, "grad_norm": 2.4708728790283203, "learning_rate": 7.960630144762986e-05, "loss": 0.018482764065265656, "step": 71880 }, { "epoch": 20.40590405904059, "grad_norm": 6.341677188873291, "learning_rate": 7.96034629577065e-05, "loss": 0.01345614492893219, "step": 71890 }, { "epoch": 20.40874254896395, "grad_norm": 1.9383199214935303, "learning_rate": 7.960062446778314e-05, "loss": 0.029498499631881715, "step": 71900 }, { "epoch": 20.411581038887313, "grad_norm": 10.272661209106445, "learning_rate": 7.959778597785979e-05, "loss": 0.019552084803581237, "step": 71910 }, { "epoch": 20.414419528810672, "grad_norm": 0.3369351327419281, "learning_rate": 7.959494748793643e-05, "loss": 0.016626468300819396, "step": 71920 }, { "epoch": 20.417258018734035, "grad_norm": 11.512466430664062, "learning_rate": 7.959210899801305e-05, "loss": 0.02207489013671875, "step": 71930 }, { "epoch": 20.420096508657394, "grad_norm": 1.1375325918197632, "learning_rate": 7.95892705080897e-05, "loss": 0.019103460013866425, "step": 71940 }, { "epoch": 20.422934998580754, "grad_norm": 9.873573303222656, "learning_rate": 7.958643201816634e-05, "loss": 0.019082090258598326, "step": 71950 }, { "epoch": 20.425773488504117, "grad_norm": 1.6722835302352905, "learning_rate": 7.958359352824298e-05, "loss": 0.017862191796302794, "step": 71960 }, { "epoch": 20.428611978427476, "grad_norm": 0.7831377387046814, "learning_rate": 7.958075503831962e-05, "loss": 0.010959525406360627, "step": 71970 }, { "epoch": 20.43145046835084, "grad_norm": 0.2760637402534485, "learning_rate": 7.957791654839626e-05, "loss": 0.016851162910461424, "step": 71980 }, { "epoch": 20.434288958274198, "grad_norm": 2.167778253555298, "learning_rate": 7.957507805847289e-05, "loss": 0.021981926262378694, "step": 71990 }, { "epoch": 20.437127448197558, "grad_norm": 1.963661789894104, "learning_rate": 7.957223956854953e-05, "loss": 0.027234962582588194, "step": 72000 }, { "epoch": 20.437127448197558, "eval_accuracy": 0.9650918802060151, "eval_loss": 0.11085671931505203, "eval_runtime": 32.2812, "eval_samples_per_second": 487.188, "eval_steps_per_second": 7.621, "step": 72000 }, { "epoch": 20.43996593812092, "grad_norm": 2.861565351486206, "learning_rate": 7.956940107862617e-05, "loss": 0.014334313571453094, "step": 72010 }, { "epoch": 20.44280442804428, "grad_norm": 0.9935620427131653, "learning_rate": 7.956656258870281e-05, "loss": 0.023954546451568602, "step": 72020 }, { "epoch": 20.445642917967643, "grad_norm": 1.6452908515930176, "learning_rate": 7.956372409877946e-05, "loss": 0.011409955471754074, "step": 72030 }, { "epoch": 20.448481407891002, "grad_norm": 3.152693510055542, "learning_rate": 7.95608856088561e-05, "loss": 0.007183098793029785, "step": 72040 }, { "epoch": 20.45131989781436, "grad_norm": 1.5322866439819336, "learning_rate": 7.955804711893274e-05, "loss": 0.007374898344278335, "step": 72050 }, { "epoch": 20.454158387737724, "grad_norm": 3.7526168823242188, "learning_rate": 7.955520862900937e-05, "loss": 0.012207742035388946, "step": 72060 }, { "epoch": 20.456996877661084, "grad_norm": 12.91372013092041, "learning_rate": 7.955237013908601e-05, "loss": 0.012936016917228699, "step": 72070 }, { "epoch": 20.459835367584446, "grad_norm": 0.5344229340553284, "learning_rate": 7.954953164916265e-05, "loss": 0.018308672308921813, "step": 72080 }, { "epoch": 20.462673857507806, "grad_norm": 1.184501051902771, "learning_rate": 7.954669315923929e-05, "loss": 0.02302212417125702, "step": 72090 }, { "epoch": 20.465512347431165, "grad_norm": 2.184994697570801, "learning_rate": 7.954385466931593e-05, "loss": 0.034254243969917296, "step": 72100 }, { "epoch": 20.468350837354528, "grad_norm": 9.629460334777832, "learning_rate": 7.954101617939257e-05, "loss": 0.0381091833114624, "step": 72110 }, { "epoch": 20.471189327277887, "grad_norm": 0.7039865255355835, "learning_rate": 7.95381776894692e-05, "loss": 0.004887149482965469, "step": 72120 }, { "epoch": 20.47402781720125, "grad_norm": 8.712130546569824, "learning_rate": 7.953533919954584e-05, "loss": 0.007143816351890564, "step": 72130 }, { "epoch": 20.47686630712461, "grad_norm": 3.7989861965179443, "learning_rate": 7.953250070962248e-05, "loss": 0.029581409692764283, "step": 72140 }, { "epoch": 20.47970479704797, "grad_norm": 2.2428572177886963, "learning_rate": 7.952966221969912e-05, "loss": 0.019566893577575684, "step": 72150 }, { "epoch": 20.48254328697133, "grad_norm": 8.025857925415039, "learning_rate": 7.952682372977577e-05, "loss": 0.023201459646224977, "step": 72160 }, { "epoch": 20.48538177689469, "grad_norm": 0.7326478958129883, "learning_rate": 7.952398523985241e-05, "loss": 0.01832791864871979, "step": 72170 }, { "epoch": 20.488220266818054, "grad_norm": 7.723518371582031, "learning_rate": 7.952114674992905e-05, "loss": 0.012235413491725921, "step": 72180 }, { "epoch": 20.491058756741413, "grad_norm": 0.8384155035018921, "learning_rate": 7.951830826000568e-05, "loss": 0.01628859043121338, "step": 72190 }, { "epoch": 20.493897246664773, "grad_norm": 0.47885945439338684, "learning_rate": 7.951546977008232e-05, "loss": 0.011692804098129273, "step": 72200 }, { "epoch": 20.496735736588136, "grad_norm": 1.621778130531311, "learning_rate": 7.951263128015896e-05, "loss": 0.01482357382774353, "step": 72210 }, { "epoch": 20.499574226511495, "grad_norm": 1.566728949546814, "learning_rate": 7.950979279023559e-05, "loss": 0.019316935539245607, "step": 72220 }, { "epoch": 20.502412716434858, "grad_norm": 8.736160278320312, "learning_rate": 7.950695430031224e-05, "loss": 0.03253134787082672, "step": 72230 }, { "epoch": 20.505251206358217, "grad_norm": 1.2695624828338623, "learning_rate": 7.950411581038888e-05, "loss": 0.012108281999826432, "step": 72240 }, { "epoch": 20.50808969628158, "grad_norm": 0.06632253527641296, "learning_rate": 7.950127732046551e-05, "loss": 0.014710676670074464, "step": 72250 }, { "epoch": 20.51092818620494, "grad_norm": 7.1121625900268555, "learning_rate": 7.949843883054215e-05, "loss": 0.014168836176395416, "step": 72260 }, { "epoch": 20.5137666761283, "grad_norm": 4.94971227645874, "learning_rate": 7.94956003406188e-05, "loss": 0.011790807545185088, "step": 72270 }, { "epoch": 20.51660516605166, "grad_norm": 19.84990119934082, "learning_rate": 7.949276185069544e-05, "loss": 0.01199587732553482, "step": 72280 }, { "epoch": 20.51944365597502, "grad_norm": 1.730820655822754, "learning_rate": 7.948992336077208e-05, "loss": 0.01737298220396042, "step": 72290 }, { "epoch": 20.522282145898384, "grad_norm": 0.3394525647163391, "learning_rate": 7.948708487084872e-05, "loss": 0.012470385432243348, "step": 72300 }, { "epoch": 20.525120635821743, "grad_norm": 2.8416941165924072, "learning_rate": 7.948424638092536e-05, "loss": 0.010157190263271332, "step": 72310 }, { "epoch": 20.527959125745102, "grad_norm": 5.284004211425781, "learning_rate": 7.948140789100199e-05, "loss": 0.011029845476150513, "step": 72320 }, { "epoch": 20.530797615668465, "grad_norm": 5.133011817932129, "learning_rate": 7.947856940107863e-05, "loss": 0.025474944710731508, "step": 72330 }, { "epoch": 20.533636105591825, "grad_norm": 0.8183083534240723, "learning_rate": 7.947573091115527e-05, "loss": 0.02821999490261078, "step": 72340 }, { "epoch": 20.536474595515188, "grad_norm": 4.689294338226318, "learning_rate": 7.94728924212319e-05, "loss": 0.033982190489768985, "step": 72350 }, { "epoch": 20.539313085438547, "grad_norm": 1.7934982776641846, "learning_rate": 7.947005393130855e-05, "loss": 0.01371917873620987, "step": 72360 }, { "epoch": 20.542151575361906, "grad_norm": 1.614597201347351, "learning_rate": 7.94672154413852e-05, "loss": 0.009933491051197053, "step": 72370 }, { "epoch": 20.54499006528527, "grad_norm": 9.98063850402832, "learning_rate": 7.946437695146182e-05, "loss": 0.03616088330745697, "step": 72380 }, { "epoch": 20.54782855520863, "grad_norm": 10.279441833496094, "learning_rate": 7.946153846153846e-05, "loss": 0.015198688209056854, "step": 72390 }, { "epoch": 20.55066704513199, "grad_norm": 7.6630730628967285, "learning_rate": 7.94586999716151e-05, "loss": 0.01767825335264206, "step": 72400 }, { "epoch": 20.55350553505535, "grad_norm": 11.035643577575684, "learning_rate": 7.945586148169175e-05, "loss": 0.011328937858343125, "step": 72410 }, { "epoch": 20.55634402497871, "grad_norm": 3.737757682800293, "learning_rate": 7.945302299176837e-05, "loss": 0.011093376576900483, "step": 72420 }, { "epoch": 20.559182514902073, "grad_norm": 5.453857898712158, "learning_rate": 7.945018450184503e-05, "loss": 0.014840850234031677, "step": 72430 }, { "epoch": 20.562021004825432, "grad_norm": 2.1554341316223145, "learning_rate": 7.944734601192167e-05, "loss": 0.009334130585193634, "step": 72440 }, { "epoch": 20.564859494748795, "grad_norm": 4.127039432525635, "learning_rate": 7.94445075219983e-05, "loss": 0.014179280400276184, "step": 72450 }, { "epoch": 20.567697984672154, "grad_norm": 1.2953083515167236, "learning_rate": 7.944166903207494e-05, "loss": 0.012179459631443023, "step": 72460 }, { "epoch": 20.570536474595514, "grad_norm": 1.1966592073440552, "learning_rate": 7.943883054215158e-05, "loss": 0.010234113037586211, "step": 72470 }, { "epoch": 20.573374964518877, "grad_norm": 1.6623189449310303, "learning_rate": 7.943599205222821e-05, "loss": 0.007945075631141663, "step": 72480 }, { "epoch": 20.576213454442236, "grad_norm": 1.375534176826477, "learning_rate": 7.943315356230486e-05, "loss": 0.0140870600938797, "step": 72490 }, { "epoch": 20.5790519443656, "grad_norm": 3.179778575897217, "learning_rate": 7.94303150723815e-05, "loss": 0.026906004548072814, "step": 72500 }, { "epoch": 20.5790519443656, "eval_accuracy": 0.9663635785591658, "eval_loss": 0.11196941882371902, "eval_runtime": 31.55, "eval_samples_per_second": 498.479, "eval_steps_per_second": 7.797, "step": 72500 }, { "epoch": 20.581890434288958, "grad_norm": 0.11507121473550797, "learning_rate": 7.942747658245813e-05, "loss": 0.011539269983768464, "step": 72510 }, { "epoch": 20.584728924212317, "grad_norm": 8.79147720336914, "learning_rate": 7.942463809253477e-05, "loss": 0.01557164490222931, "step": 72520 }, { "epoch": 20.58756741413568, "grad_norm": 0.8278812766075134, "learning_rate": 7.942179960261142e-05, "loss": 0.012269531190395356, "step": 72530 }, { "epoch": 20.59040590405904, "grad_norm": 20.315732955932617, "learning_rate": 7.941896111268806e-05, "loss": 0.021502546966075897, "step": 72540 }, { "epoch": 20.593244393982403, "grad_norm": 1.9884330034255981, "learning_rate": 7.941612262276468e-05, "loss": 0.017670203745365144, "step": 72550 }, { "epoch": 20.596082883905762, "grad_norm": 1.3758779764175415, "learning_rate": 7.941328413284134e-05, "loss": 0.018218497931957244, "step": 72560 }, { "epoch": 20.59892137382912, "grad_norm": 4.684302806854248, "learning_rate": 7.941044564291798e-05, "loss": 0.006948423385620117, "step": 72570 }, { "epoch": 20.601759863752484, "grad_norm": 4.039017200469971, "learning_rate": 7.940760715299461e-05, "loss": 0.015477192401885987, "step": 72580 }, { "epoch": 20.604598353675843, "grad_norm": 4.502068042755127, "learning_rate": 7.940476866307125e-05, "loss": 0.01728564202785492, "step": 72590 }, { "epoch": 20.607436843599206, "grad_norm": 10.67024040222168, "learning_rate": 7.940193017314789e-05, "loss": 0.014483582973480225, "step": 72600 }, { "epoch": 20.610275333522566, "grad_norm": 21.431047439575195, "learning_rate": 7.939909168322452e-05, "loss": 0.023728176951408386, "step": 72610 }, { "epoch": 20.61311382344593, "grad_norm": 2.125991106033325, "learning_rate": 7.939625319330116e-05, "loss": 0.009162011742591857, "step": 72620 }, { "epoch": 20.615952313369288, "grad_norm": 3.0115318298339844, "learning_rate": 7.939341470337782e-05, "loss": 0.010436663031578064, "step": 72630 }, { "epoch": 20.618790803292647, "grad_norm": 1.3310275077819824, "learning_rate": 7.939057621345444e-05, "loss": 0.0088840551674366, "step": 72640 }, { "epoch": 20.62162929321601, "grad_norm": 14.502790451049805, "learning_rate": 7.938773772353108e-05, "loss": 0.01096387803554535, "step": 72650 }, { "epoch": 20.62446778313937, "grad_norm": 2.0159082412719727, "learning_rate": 7.938489923360773e-05, "loss": 0.020039521157741547, "step": 72660 }, { "epoch": 20.627306273062732, "grad_norm": 11.093973159790039, "learning_rate": 7.938206074368437e-05, "loss": 0.01522703468799591, "step": 72670 }, { "epoch": 20.63014476298609, "grad_norm": 17.858243942260742, "learning_rate": 7.9379222253761e-05, "loss": 0.020603251457214356, "step": 72680 }, { "epoch": 20.63298325290945, "grad_norm": 6.4766621589660645, "learning_rate": 7.937638376383765e-05, "loss": 0.025670135021209718, "step": 72690 }, { "epoch": 20.635821742832814, "grad_norm": 9.239554405212402, "learning_rate": 7.937354527391428e-05, "loss": 0.017160232365131377, "step": 72700 }, { "epoch": 20.638660232756173, "grad_norm": 11.623858451843262, "learning_rate": 7.937070678399092e-05, "loss": 0.023585006594657898, "step": 72710 }, { "epoch": 20.641498722679536, "grad_norm": 1.536014437675476, "learning_rate": 7.936786829406756e-05, "loss": 0.014897760748863221, "step": 72720 }, { "epoch": 20.644337212602895, "grad_norm": 3.0355446338653564, "learning_rate": 7.93650298041442e-05, "loss": 0.016392450034618377, "step": 72730 }, { "epoch": 20.647175702526255, "grad_norm": 2.6414432525634766, "learning_rate": 7.936219131422083e-05, "loss": 0.008637484163045883, "step": 72740 }, { "epoch": 20.650014192449618, "grad_norm": 6.1766862869262695, "learning_rate": 7.935935282429747e-05, "loss": 0.02251293361186981, "step": 72750 }, { "epoch": 20.652852682372977, "grad_norm": 8.996399879455566, "learning_rate": 7.935651433437413e-05, "loss": 0.01978897154331207, "step": 72760 }, { "epoch": 20.65569117229634, "grad_norm": 8.1782865524292, "learning_rate": 7.935367584445075e-05, "loss": 0.01567893773317337, "step": 72770 }, { "epoch": 20.6585296622197, "grad_norm": 0.18052145838737488, "learning_rate": 7.93508373545274e-05, "loss": 0.015609405934810638, "step": 72780 }, { "epoch": 20.66136815214306, "grad_norm": 0.7489326596260071, "learning_rate": 7.934799886460404e-05, "loss": 0.016970519721508027, "step": 72790 }, { "epoch": 20.66420664206642, "grad_norm": 2.3605284690856934, "learning_rate": 7.934516037468066e-05, "loss": 0.026287132501602174, "step": 72800 }, { "epoch": 20.66704513198978, "grad_norm": 1.7063428163528442, "learning_rate": 7.93423218847573e-05, "loss": 0.018955835700035097, "step": 72810 }, { "epoch": 20.669883621913144, "grad_norm": 1.76027512550354, "learning_rate": 7.933948339483395e-05, "loss": 0.013470149040222168, "step": 72820 }, { "epoch": 20.672722111836503, "grad_norm": 0.7529674172401428, "learning_rate": 7.933664490491059e-05, "loss": 0.03139452338218689, "step": 72830 }, { "epoch": 20.675560601759862, "grad_norm": 0.45633724331855774, "learning_rate": 7.933380641498723e-05, "loss": 0.012667980790138245, "step": 72840 }, { "epoch": 20.678399091683225, "grad_norm": 0.46502992510795593, "learning_rate": 7.933096792506387e-05, "loss": 0.038240844011306764, "step": 72850 }, { "epoch": 20.681237581606585, "grad_norm": 2.5293679237365723, "learning_rate": 7.932812943514051e-05, "loss": 0.015347310900688171, "step": 72860 }, { "epoch": 20.684076071529947, "grad_norm": 0.2258562445640564, "learning_rate": 7.932529094521714e-05, "loss": 0.022741031646728516, "step": 72870 }, { "epoch": 20.686914561453307, "grad_norm": 0.45324718952178955, "learning_rate": 7.932245245529378e-05, "loss": 0.013174809515476227, "step": 72880 }, { "epoch": 20.689753051376666, "grad_norm": 4.431980133056641, "learning_rate": 7.931961396537044e-05, "loss": 0.019937348365783692, "step": 72890 }, { "epoch": 20.69259154130003, "grad_norm": 6.658142566680908, "learning_rate": 7.931677547544707e-05, "loss": 0.02352796643972397, "step": 72900 }, { "epoch": 20.69543003122339, "grad_norm": 1.5227582454681396, "learning_rate": 7.93139369855237e-05, "loss": 0.011709966510534287, "step": 72910 }, { "epoch": 20.69826852114675, "grad_norm": 2.4257354736328125, "learning_rate": 7.931109849560035e-05, "loss": 0.014188449084758758, "step": 72920 }, { "epoch": 20.70110701107011, "grad_norm": 0.863277792930603, "learning_rate": 7.930826000567698e-05, "loss": 0.014848138391971587, "step": 72930 }, { "epoch": 20.70394550099347, "grad_norm": 6.3147125244140625, "learning_rate": 7.930542151575362e-05, "loss": 0.015090684592723846, "step": 72940 }, { "epoch": 20.706783990916833, "grad_norm": 1.402535080909729, "learning_rate": 7.930258302583026e-05, "loss": 0.009539633244276046, "step": 72950 }, { "epoch": 20.709622480840192, "grad_norm": 1.1036694049835205, "learning_rate": 7.92997445359069e-05, "loss": 0.02125362902879715, "step": 72960 }, { "epoch": 20.712460970763555, "grad_norm": 10.724868774414062, "learning_rate": 7.929690604598354e-05, "loss": 0.014958055317401886, "step": 72970 }, { "epoch": 20.715299460686914, "grad_norm": 0.3165739178657532, "learning_rate": 7.929406755606018e-05, "loss": 0.01848827600479126, "step": 72980 }, { "epoch": 20.718137950610274, "grad_norm": 7.2359185218811035, "learning_rate": 7.929122906613682e-05, "loss": 0.01082410141825676, "step": 72990 }, { "epoch": 20.720976440533637, "grad_norm": 3.24294114112854, "learning_rate": 7.928839057621345e-05, "loss": 0.021345661580562593, "step": 73000 }, { "epoch": 20.720976440533637, "eval_accuracy": 0.9629299930056591, "eval_loss": 0.12116753309965134, "eval_runtime": 31.339, "eval_samples_per_second": 501.834, "eval_steps_per_second": 7.85, "step": 73000 }, { "epoch": 20.723814930456996, "grad_norm": 0.4712117910385132, "learning_rate": 7.928555208629009e-05, "loss": 0.015656279027462007, "step": 73010 }, { "epoch": 20.72665342038036, "grad_norm": 1.1789573431015015, "learning_rate": 7.928271359636675e-05, "loss": 0.013817785680294037, "step": 73020 }, { "epoch": 20.729491910303718, "grad_norm": 1.4754494428634644, "learning_rate": 7.927987510644338e-05, "loss": 0.033756738901138304, "step": 73030 }, { "epoch": 20.732330400227077, "grad_norm": 5.361126899719238, "learning_rate": 7.927703661652002e-05, "loss": 0.032455235719680786, "step": 73040 }, { "epoch": 20.73516889015044, "grad_norm": 10.095773696899414, "learning_rate": 7.927419812659666e-05, "loss": 0.03645889163017273, "step": 73050 }, { "epoch": 20.7380073800738, "grad_norm": 0.8621581196784973, "learning_rate": 7.927135963667329e-05, "loss": 0.016415764391422272, "step": 73060 }, { "epoch": 20.740845869997163, "grad_norm": 6.6164374351501465, "learning_rate": 7.926852114674993e-05, "loss": 0.014220431447029114, "step": 73070 }, { "epoch": 20.743684359920522, "grad_norm": 8.197409629821777, "learning_rate": 7.926568265682657e-05, "loss": 0.00881960690021515, "step": 73080 }, { "epoch": 20.746522849843885, "grad_norm": 13.695781707763672, "learning_rate": 7.926284416690321e-05, "loss": 0.019520682096481324, "step": 73090 }, { "epoch": 20.749361339767244, "grad_norm": 13.915450096130371, "learning_rate": 7.926000567697985e-05, "loss": 0.012441352009773254, "step": 73100 }, { "epoch": 20.752199829690603, "grad_norm": 12.172597885131836, "learning_rate": 7.92571671870565e-05, "loss": 0.027883368730545043, "step": 73110 }, { "epoch": 20.755038319613966, "grad_norm": 2.490008592605591, "learning_rate": 7.925432869713313e-05, "loss": 0.00721658319234848, "step": 73120 }, { "epoch": 20.757876809537326, "grad_norm": 0.8751887679100037, "learning_rate": 7.925149020720976e-05, "loss": 0.010434217751026154, "step": 73130 }, { "epoch": 20.76071529946069, "grad_norm": 6.381956577301025, "learning_rate": 7.92486517172864e-05, "loss": 0.008940516412258149, "step": 73140 }, { "epoch": 20.763553789384048, "grad_norm": 2.7959420680999756, "learning_rate": 7.924581322736305e-05, "loss": 0.026629617810249327, "step": 73150 }, { "epoch": 20.766392279307407, "grad_norm": 9.569304466247559, "learning_rate": 7.924297473743969e-05, "loss": 0.019592656195163725, "step": 73160 }, { "epoch": 20.76923076923077, "grad_norm": 1.1693205833435059, "learning_rate": 7.924013624751633e-05, "loss": 0.019284830987453462, "step": 73170 }, { "epoch": 20.77206925915413, "grad_norm": 1.6337071657180786, "learning_rate": 7.923729775759297e-05, "loss": 0.027567052841186525, "step": 73180 }, { "epoch": 20.774907749077492, "grad_norm": 8.254040718078613, "learning_rate": 7.92344592676696e-05, "loss": 0.007244700193405151, "step": 73190 }, { "epoch": 20.77774623900085, "grad_norm": 9.274359703063965, "learning_rate": 7.923162077774624e-05, "loss": 0.023554137349128722, "step": 73200 }, { "epoch": 20.78058472892421, "grad_norm": 0.4924163520336151, "learning_rate": 7.922878228782288e-05, "loss": 0.015244534611701966, "step": 73210 }, { "epoch": 20.783423218847574, "grad_norm": 3.1781094074249268, "learning_rate": 7.922594379789952e-05, "loss": 0.021272359788417815, "step": 73220 }, { "epoch": 20.786261708770933, "grad_norm": 10.52193832397461, "learning_rate": 7.922310530797616e-05, "loss": 0.024418361485004425, "step": 73230 }, { "epoch": 20.789100198694296, "grad_norm": 2.9453516006469727, "learning_rate": 7.92202668180528e-05, "loss": 0.030804479122161867, "step": 73240 }, { "epoch": 20.791938688617655, "grad_norm": 0.8992425203323364, "learning_rate": 7.921742832812945e-05, "loss": 0.019534553587436675, "step": 73250 }, { "epoch": 20.794777178541015, "grad_norm": 2.0761821269989014, "learning_rate": 7.921458983820607e-05, "loss": 0.01790963262319565, "step": 73260 }, { "epoch": 20.797615668464378, "grad_norm": 5.022449970245361, "learning_rate": 7.921175134828271e-05, "loss": 0.00907110795378685, "step": 73270 }, { "epoch": 20.800454158387737, "grad_norm": 2.6330373287200928, "learning_rate": 7.920891285835936e-05, "loss": 0.016720624268054964, "step": 73280 }, { "epoch": 20.8032926483111, "grad_norm": 8.316458702087402, "learning_rate": 7.9206074368436e-05, "loss": 0.01833339035511017, "step": 73290 }, { "epoch": 20.80613113823446, "grad_norm": 10.399785995483398, "learning_rate": 7.920323587851264e-05, "loss": 0.01224859207868576, "step": 73300 }, { "epoch": 20.80896962815782, "grad_norm": 3.4011497497558594, "learning_rate": 7.920039738858928e-05, "loss": 0.01687619388103485, "step": 73310 }, { "epoch": 20.81180811808118, "grad_norm": 6.77141809463501, "learning_rate": 7.919755889866591e-05, "loss": 0.032311511039733884, "step": 73320 }, { "epoch": 20.81464660800454, "grad_norm": 4.743729114532471, "learning_rate": 7.919472040874255e-05, "loss": 0.017578643560409547, "step": 73330 }, { "epoch": 20.817485097927904, "grad_norm": 12.768263816833496, "learning_rate": 7.919188191881919e-05, "loss": 0.022203050553798676, "step": 73340 }, { "epoch": 20.820323587851263, "grad_norm": 2.953514814376831, "learning_rate": 7.918904342889583e-05, "loss": 0.035276031494140624, "step": 73350 }, { "epoch": 20.823162077774622, "grad_norm": 1.0464810132980347, "learning_rate": 7.918620493897247e-05, "loss": 0.019856755435466767, "step": 73360 }, { "epoch": 20.826000567697985, "grad_norm": 0.7617159485816956, "learning_rate": 7.918336644904911e-05, "loss": 0.014259248971939087, "step": 73370 }, { "epoch": 20.828839057621344, "grad_norm": 5.238072395324707, "learning_rate": 7.918052795912576e-05, "loss": 0.028229644894599913, "step": 73380 }, { "epoch": 20.831677547544707, "grad_norm": 1.6024333238601685, "learning_rate": 7.917768946920238e-05, "loss": 0.01618278622627258, "step": 73390 }, { "epoch": 20.834516037468067, "grad_norm": 2.3969497680664062, "learning_rate": 7.917485097927903e-05, "loss": 0.02306237518787384, "step": 73400 }, { "epoch": 20.837354527391426, "grad_norm": 1.034471869468689, "learning_rate": 7.917201248935567e-05, "loss": 0.011136702448129653, "step": 73410 }, { "epoch": 20.84019301731479, "grad_norm": 4.866801738739014, "learning_rate": 7.916917399943231e-05, "loss": 0.01119556576013565, "step": 73420 }, { "epoch": 20.84303150723815, "grad_norm": 15.34523868560791, "learning_rate": 7.916633550950895e-05, "loss": 0.019770808517932892, "step": 73430 }, { "epoch": 20.84586999716151, "grad_norm": 6.16182279586792, "learning_rate": 7.916349701958559e-05, "loss": 0.03611786663532257, "step": 73440 }, { "epoch": 20.84870848708487, "grad_norm": 9.896342277526855, "learning_rate": 7.916065852966222e-05, "loss": 0.026860374212265014, "step": 73450 }, { "epoch": 20.851546977008233, "grad_norm": 3.893913984298706, "learning_rate": 7.915782003973886e-05, "loss": 0.016549192368984222, "step": 73460 }, { "epoch": 20.854385466931593, "grad_norm": 0.46682605147361755, "learning_rate": 7.91549815498155e-05, "loss": 0.022315067052841187, "step": 73470 }, { "epoch": 20.857223956854952, "grad_norm": 2.7312629222869873, "learning_rate": 7.915214305989214e-05, "loss": 0.01133921667933464, "step": 73480 }, { "epoch": 20.860062446778315, "grad_norm": 7.671429634094238, "learning_rate": 7.914930456996878e-05, "loss": 0.010831889510154725, "step": 73490 }, { "epoch": 20.862900936701674, "grad_norm": 1.0821009874343872, "learning_rate": 7.914646608004543e-05, "loss": 0.018389439582824706, "step": 73500 }, { "epoch": 20.862900936701674, "eval_accuracy": 0.9700515037833026, "eval_loss": 0.09482727199792862, "eval_runtime": 31.7055, "eval_samples_per_second": 496.033, "eval_steps_per_second": 7.759, "step": 73500 }, { "epoch": 20.865739426625037, "grad_norm": 9.925872802734375, "learning_rate": 7.914362759012207e-05, "loss": 0.027100187540054322, "step": 73510 }, { "epoch": 20.868577916548396, "grad_norm": 2.02506160736084, "learning_rate": 7.91407891001987e-05, "loss": 0.008456361293792725, "step": 73520 }, { "epoch": 20.871416406471756, "grad_norm": 2.9364569187164307, "learning_rate": 7.913795061027534e-05, "loss": 0.02375752031803131, "step": 73530 }, { "epoch": 20.87425489639512, "grad_norm": 10.55201244354248, "learning_rate": 7.913511212035198e-05, "loss": 0.01722322702407837, "step": 73540 }, { "epoch": 20.877093386318478, "grad_norm": 2.3054146766662598, "learning_rate": 7.91322736304286e-05, "loss": 0.0069063156843185425, "step": 73550 }, { "epoch": 20.87993187624184, "grad_norm": 7.215479850769043, "learning_rate": 7.912943514050526e-05, "loss": 0.012965303659439088, "step": 73560 }, { "epoch": 20.8827703661652, "grad_norm": 4.540820121765137, "learning_rate": 7.91265966505819e-05, "loss": 0.012083213031291961, "step": 73570 }, { "epoch": 20.88560885608856, "grad_norm": 0.2832660377025604, "learning_rate": 7.912375816065853e-05, "loss": 0.01356612890958786, "step": 73580 }, { "epoch": 20.888447346011922, "grad_norm": 2.2344326972961426, "learning_rate": 7.912091967073517e-05, "loss": 0.015434113144874573, "step": 73590 }, { "epoch": 20.89128583593528, "grad_norm": 9.897329330444336, "learning_rate": 7.911808118081181e-05, "loss": 0.014597289264202118, "step": 73600 }, { "epoch": 20.894124325858645, "grad_norm": 13.89268684387207, "learning_rate": 7.911524269088845e-05, "loss": 0.04495354294776917, "step": 73610 }, { "epoch": 20.896962815782004, "grad_norm": 1.9589704275131226, "learning_rate": 7.91124042009651e-05, "loss": 0.013670009374618531, "step": 73620 }, { "epoch": 20.899801305705363, "grad_norm": 0.9423201680183411, "learning_rate": 7.910956571104174e-05, "loss": 0.01837254613637924, "step": 73630 }, { "epoch": 20.902639795628726, "grad_norm": 12.29433822631836, "learning_rate": 7.910672722111836e-05, "loss": 0.02250507175922394, "step": 73640 }, { "epoch": 20.905478285552086, "grad_norm": 5.166441440582275, "learning_rate": 7.9103888731195e-05, "loss": 0.012467760592699051, "step": 73650 }, { "epoch": 20.90831677547545, "grad_norm": 4.902466773986816, "learning_rate": 7.910105024127165e-05, "loss": 0.01773456037044525, "step": 73660 }, { "epoch": 20.911155265398808, "grad_norm": 5.229700088500977, "learning_rate": 7.909821175134829e-05, "loss": 0.021667641401290894, "step": 73670 }, { "epoch": 20.913993755322167, "grad_norm": 0.29768115282058716, "learning_rate": 7.909565711041727e-05, "loss": 0.01981286108493805, "step": 73680 }, { "epoch": 20.91683224524553, "grad_norm": 8.50903034210205, "learning_rate": 7.90928186204939e-05, "loss": 0.03741942048072815, "step": 73690 }, { "epoch": 20.91967073516889, "grad_norm": 7.114592552185059, "learning_rate": 7.908998013057054e-05, "loss": 0.02943575382232666, "step": 73700 }, { "epoch": 20.922509225092252, "grad_norm": 0.8609477877616882, "learning_rate": 7.908714164064718e-05, "loss": 0.012257280200719834, "step": 73710 }, { "epoch": 20.92534771501561, "grad_norm": 4.012287616729736, "learning_rate": 7.908430315072382e-05, "loss": 0.012139906734228134, "step": 73720 }, { "epoch": 20.92818620493897, "grad_norm": 20.529264450073242, "learning_rate": 7.908146466080045e-05, "loss": 0.024718427658081056, "step": 73730 }, { "epoch": 20.931024694862334, "grad_norm": 4.075937271118164, "learning_rate": 7.90786261708771e-05, "loss": 0.02088976800441742, "step": 73740 }, { "epoch": 20.933863184785693, "grad_norm": 1.309096336364746, "learning_rate": 7.907578768095374e-05, "loss": 0.015802517533302307, "step": 73750 }, { "epoch": 20.936701674709056, "grad_norm": 7.873183727264404, "learning_rate": 7.907294919103037e-05, "loss": 0.015766820311546324, "step": 73760 }, { "epoch": 20.939540164632415, "grad_norm": 6.351379871368408, "learning_rate": 7.907011070110701e-05, "loss": 0.027875053882598876, "step": 73770 }, { "epoch": 20.942378654555775, "grad_norm": 2.4245989322662354, "learning_rate": 7.906727221118365e-05, "loss": 0.013500745594501495, "step": 73780 }, { "epoch": 20.945217144479138, "grad_norm": 5.598823547363281, "learning_rate": 7.906443372126028e-05, "loss": 0.018315429985523223, "step": 73790 }, { "epoch": 20.948055634402497, "grad_norm": 9.185812950134277, "learning_rate": 7.906159523133694e-05, "loss": 0.03035212755203247, "step": 73800 }, { "epoch": 20.95089412432586, "grad_norm": 7.553031921386719, "learning_rate": 7.905875674141358e-05, "loss": 0.018949438631534577, "step": 73810 }, { "epoch": 20.95373261424922, "grad_norm": 9.329947471618652, "learning_rate": 7.905591825149021e-05, "loss": 0.025003620982170106, "step": 73820 }, { "epoch": 20.956571104172582, "grad_norm": 6.53037691116333, "learning_rate": 7.905307976156685e-05, "loss": 0.022636470198631287, "step": 73830 }, { "epoch": 20.95940959409594, "grad_norm": 5.085198879241943, "learning_rate": 7.905024127164349e-05, "loss": 0.02745906710624695, "step": 73840 }, { "epoch": 20.9622480840193, "grad_norm": 11.349584579467773, "learning_rate": 7.904740278172013e-05, "loss": 0.017695842683315276, "step": 73850 }, { "epoch": 20.965086573942664, "grad_norm": 0.44437509775161743, "learning_rate": 7.904456429179676e-05, "loss": 0.037027180194854736, "step": 73860 }, { "epoch": 20.967925063866023, "grad_norm": 0.7721370458602905, "learning_rate": 7.904172580187341e-05, "loss": 0.03285959661006928, "step": 73870 }, { "epoch": 20.970763553789386, "grad_norm": 4.44058895111084, "learning_rate": 7.903888731195006e-05, "loss": 0.012057169526815414, "step": 73880 }, { "epoch": 20.973602043712745, "grad_norm": 12.873336791992188, "learning_rate": 7.903604882202668e-05, "loss": 0.042155617475509645, "step": 73890 }, { "epoch": 20.976440533636104, "grad_norm": 2.300752639770508, "learning_rate": 7.903321033210332e-05, "loss": 0.02772943377494812, "step": 73900 }, { "epoch": 20.979279023559467, "grad_norm": 9.823734283447266, "learning_rate": 7.903037184217997e-05, "loss": 0.0104764923453331, "step": 73910 }, { "epoch": 20.982117513482827, "grad_norm": 2.656264305114746, "learning_rate": 7.90275333522566e-05, "loss": 0.01471453458070755, "step": 73920 }, { "epoch": 20.98495600340619, "grad_norm": 1.9895806312561035, "learning_rate": 7.902469486233323e-05, "loss": 0.010947420448064803, "step": 73930 }, { "epoch": 20.98779449332955, "grad_norm": 10.514595985412598, "learning_rate": 7.902185637240989e-05, "loss": 0.014849673211574554, "step": 73940 }, { "epoch": 20.990632983252908, "grad_norm": 5.333828449249268, "learning_rate": 7.901901788248652e-05, "loss": 0.012930047512054444, "step": 73950 }, { "epoch": 20.99347147317627, "grad_norm": 3.018665313720703, "learning_rate": 7.901617939256316e-05, "loss": 0.018447376787662506, "step": 73960 }, { "epoch": 20.99630996309963, "grad_norm": 1.616911768913269, "learning_rate": 7.90133409026398e-05, "loss": 0.027783718705177308, "step": 73970 }, { "epoch": 20.999148453022993, "grad_norm": 0.4653075039386749, "learning_rate": 7.901050241271644e-05, "loss": 0.015703950822353364, "step": 73980 }, { "epoch": 21.001986942946353, "grad_norm": 0.5165928602218628, "learning_rate": 7.900766392279307e-05, "loss": 0.00799490585923195, "step": 73990 }, { "epoch": 21.004825432869712, "grad_norm": 3.2757325172424316, "learning_rate": 7.900482543286972e-05, "loss": 0.00518273264169693, "step": 74000 }, { "epoch": 21.004825432869712, "eval_accuracy": 0.9689705601831246, "eval_loss": 0.10108955949544907, "eval_runtime": 31.1336, "eval_samples_per_second": 505.146, "eval_steps_per_second": 7.901, "step": 74000 }, { "epoch": 21.007663922793075, "grad_norm": 2.6399831771850586, "learning_rate": 7.900198694294637e-05, "loss": 0.006677420437335968, "step": 74010 }, { "epoch": 21.010502412716434, "grad_norm": 1.0280345678329468, "learning_rate": 7.8999148453023e-05, "loss": 0.003554373234510422, "step": 74020 }, { "epoch": 21.013340902639797, "grad_norm": 2.7186496257781982, "learning_rate": 7.899630996309963e-05, "loss": 0.010536560416221618, "step": 74030 }, { "epoch": 21.016179392563156, "grad_norm": 2.8670501708984375, "learning_rate": 7.899347147317628e-05, "loss": 0.011020071804523468, "step": 74040 }, { "epoch": 21.019017882486516, "grad_norm": 4.692769527435303, "learning_rate": 7.89906329832529e-05, "loss": 0.008445170521736146, "step": 74050 }, { "epoch": 21.02185637240988, "grad_norm": 1.6817808151245117, "learning_rate": 7.898779449332955e-05, "loss": 0.00845225602388382, "step": 74060 }, { "epoch": 21.024694862333238, "grad_norm": 14.013290405273438, "learning_rate": 7.89849560034062e-05, "loss": 0.015586079657077789, "step": 74070 }, { "epoch": 21.0275333522566, "grad_norm": 0.6257701516151428, "learning_rate": 7.898211751348283e-05, "loss": 0.011401771008968354, "step": 74080 }, { "epoch": 21.03037184217996, "grad_norm": 1.6471327543258667, "learning_rate": 7.897927902355947e-05, "loss": 0.007209101319313049, "step": 74090 }, { "epoch": 21.03321033210332, "grad_norm": 3.179908037185669, "learning_rate": 7.897644053363611e-05, "loss": 0.011152968555688859, "step": 74100 }, { "epoch": 21.036048822026682, "grad_norm": 0.4030959904193878, "learning_rate": 7.897360204371275e-05, "loss": 0.009203150868415833, "step": 74110 }, { "epoch": 21.03888731195004, "grad_norm": 0.9386562705039978, "learning_rate": 7.897076355378938e-05, "loss": 0.010810703039169312, "step": 74120 }, { "epoch": 21.041725801873405, "grad_norm": 2.520800828933716, "learning_rate": 7.896792506386602e-05, "loss": 0.006613048911094666, "step": 74130 }, { "epoch": 21.044564291796764, "grad_norm": 6.6119866371154785, "learning_rate": 7.896508657394268e-05, "loss": 0.02997497618198395, "step": 74140 }, { "epoch": 21.047402781720123, "grad_norm": 2.750290870666504, "learning_rate": 7.89622480840193e-05, "loss": 0.01177423596382141, "step": 74150 }, { "epoch": 21.050241271643486, "grad_norm": 4.717784881591797, "learning_rate": 7.895940959409595e-05, "loss": 0.02524755597114563, "step": 74160 }, { "epoch": 21.053079761566845, "grad_norm": 9.544171333312988, "learning_rate": 7.895657110417259e-05, "loss": 0.012386641651391982, "step": 74170 }, { "epoch": 21.05591825149021, "grad_norm": 18.918048858642578, "learning_rate": 7.895373261424921e-05, "loss": 0.04360443949699402, "step": 74180 }, { "epoch": 21.058756741413568, "grad_norm": 2.1022586822509766, "learning_rate": 7.895089412432586e-05, "loss": 0.01563587784767151, "step": 74190 }, { "epoch": 21.061595231336927, "grad_norm": 0.31142160296440125, "learning_rate": 7.894805563440251e-05, "loss": 0.005332668125629425, "step": 74200 }, { "epoch": 21.06443372126029, "grad_norm": 1.6475532054901123, "learning_rate": 7.894521714447914e-05, "loss": 0.01780291348695755, "step": 74210 }, { "epoch": 21.06727221118365, "grad_norm": 6.250597953796387, "learning_rate": 7.894237865455578e-05, "loss": 0.01200171336531639, "step": 74220 }, { "epoch": 21.070110701107012, "grad_norm": 0.46693089604377747, "learning_rate": 7.893954016463242e-05, "loss": 0.007612199336290359, "step": 74230 }, { "epoch": 21.07294919103037, "grad_norm": 0.7868378758430481, "learning_rate": 7.893670167470906e-05, "loss": 0.009841600060462951, "step": 74240 }, { "epoch": 21.075787680953734, "grad_norm": 0.13300862908363342, "learning_rate": 7.893386318478569e-05, "loss": 0.014477512240409851, "step": 74250 }, { "epoch": 21.078626170877094, "grad_norm": 2.9693217277526855, "learning_rate": 7.893102469486233e-05, "loss": 0.028074046969413756, "step": 74260 }, { "epoch": 21.081464660800453, "grad_norm": 0.18693657219409943, "learning_rate": 7.892818620493899e-05, "loss": 0.012653912603855132, "step": 74270 }, { "epoch": 21.084303150723816, "grad_norm": 0.18800175189971924, "learning_rate": 7.892534771501562e-05, "loss": 0.01829739660024643, "step": 74280 }, { "epoch": 21.087141640647175, "grad_norm": 9.543168067932129, "learning_rate": 7.892250922509226e-05, "loss": 0.01922588050365448, "step": 74290 }, { "epoch": 21.089980130570538, "grad_norm": 5.368625640869141, "learning_rate": 7.89196707351689e-05, "loss": 0.022974665462970733, "step": 74300 }, { "epoch": 21.092818620493897, "grad_norm": 9.876455307006836, "learning_rate": 7.891683224524553e-05, "loss": 0.011523884534835816, "step": 74310 }, { "epoch": 21.095657110417257, "grad_norm": 3.1807303428649902, "learning_rate": 7.891399375532217e-05, "loss": 0.010734608024358749, "step": 74320 }, { "epoch": 21.09849560034062, "grad_norm": 10.888519287109375, "learning_rate": 7.891115526539881e-05, "loss": 0.018534770607948302, "step": 74330 }, { "epoch": 21.10133409026398, "grad_norm": 0.78104567527771, "learning_rate": 7.890831677547545e-05, "loss": 0.024481944739818573, "step": 74340 }, { "epoch": 21.104172580187342, "grad_norm": 6.779789924621582, "learning_rate": 7.890547828555209e-05, "loss": 0.01251528412103653, "step": 74350 }, { "epoch": 21.1070110701107, "grad_norm": 0.4543623924255371, "learning_rate": 7.890263979562873e-05, "loss": 0.0240698367357254, "step": 74360 }, { "epoch": 21.10984956003406, "grad_norm": 9.469886779785156, "learning_rate": 7.889980130570537e-05, "loss": 0.02046073377132416, "step": 74370 }, { "epoch": 21.112688049957423, "grad_norm": 14.509222984313965, "learning_rate": 7.8896962815782e-05, "loss": 0.016410830616950988, "step": 74380 }, { "epoch": 21.115526539880783, "grad_norm": 0.5220840573310852, "learning_rate": 7.889412432585864e-05, "loss": 0.009171589463949203, "step": 74390 }, { "epoch": 21.118365029804146, "grad_norm": 7.434703826904297, "learning_rate": 7.889128583593528e-05, "loss": 0.02177475243806839, "step": 74400 }, { "epoch": 21.121203519727505, "grad_norm": 1.199174404144287, "learning_rate": 7.888844734601193e-05, "loss": 0.016218411922454833, "step": 74410 }, { "epoch": 21.124042009650864, "grad_norm": 2.3787662982940674, "learning_rate": 7.888560885608857e-05, "loss": 0.007263926416635513, "step": 74420 }, { "epoch": 21.126880499574227, "grad_norm": 7.946519374847412, "learning_rate": 7.888277036616521e-05, "loss": 0.02057839035987854, "step": 74430 }, { "epoch": 21.129718989497587, "grad_norm": 8.170157432556152, "learning_rate": 7.887993187624184e-05, "loss": 0.007064300030469895, "step": 74440 }, { "epoch": 21.13255747942095, "grad_norm": 6.6253767013549805, "learning_rate": 7.887709338631848e-05, "loss": 0.020754867792129518, "step": 74450 }, { "epoch": 21.13539596934431, "grad_norm": 9.66815185546875, "learning_rate": 7.887425489639512e-05, "loss": 0.009997677057981491, "step": 74460 }, { "epoch": 21.138234459267668, "grad_norm": 4.227406978607178, "learning_rate": 7.887141640647176e-05, "loss": 0.010682257264852524, "step": 74470 }, { "epoch": 21.14107294919103, "grad_norm": 0.5994613170623779, "learning_rate": 7.88685779165484e-05, "loss": 0.013827003538608551, "step": 74480 }, { "epoch": 21.14391143911439, "grad_norm": 1.6911752223968506, "learning_rate": 7.886573942662504e-05, "loss": 0.01313171088695526, "step": 74490 }, { "epoch": 21.146749929037753, "grad_norm": 4.3524346351623535, "learning_rate": 7.886290093670168e-05, "loss": 0.008445474505424499, "step": 74500 }, { "epoch": 21.146749929037753, "eval_accuracy": 0.9672537674063713, "eval_loss": 0.10577034205198288, "eval_runtime": 31.4875, "eval_samples_per_second": 499.468, "eval_steps_per_second": 7.813, "step": 74500 }, { "epoch": 21.149588418961113, "grad_norm": 1.578529953956604, "learning_rate": 7.886006244677831e-05, "loss": 0.01444295197725296, "step": 74510 }, { "epoch": 21.152426908884472, "grad_norm": 1.9007748365402222, "learning_rate": 7.885722395685495e-05, "loss": 0.00819685235619545, "step": 74520 }, { "epoch": 21.155265398807835, "grad_norm": 2.874579906463623, "learning_rate": 7.88543854669316e-05, "loss": 0.009656573832035064, "step": 74530 }, { "epoch": 21.158103888731194, "grad_norm": 10.027885437011719, "learning_rate": 7.885154697700824e-05, "loss": 0.011105933040380479, "step": 74540 }, { "epoch": 21.160942378654557, "grad_norm": 5.321685791015625, "learning_rate": 7.884870848708488e-05, "loss": 0.007541386038064956, "step": 74550 }, { "epoch": 21.163780868577916, "grad_norm": 6.998576641082764, "learning_rate": 7.884586999716152e-05, "loss": 0.007888945937156677, "step": 74560 }, { "epoch": 21.166619358501276, "grad_norm": 11.888397216796875, "learning_rate": 7.884303150723815e-05, "loss": 0.018642354011535644, "step": 74570 }, { "epoch": 21.16945784842464, "grad_norm": 2.0892932415008545, "learning_rate": 7.884019301731479e-05, "loss": 0.011233103275299073, "step": 74580 }, { "epoch": 21.172296338347998, "grad_norm": 2.484281301498413, "learning_rate": 7.883735452739143e-05, "loss": 0.00315704345703125, "step": 74590 }, { "epoch": 21.17513482827136, "grad_norm": 5.515682697296143, "learning_rate": 7.883451603746807e-05, "loss": 0.0116035558283329, "step": 74600 }, { "epoch": 21.17797331819472, "grad_norm": 12.491768836975098, "learning_rate": 7.883167754754471e-05, "loss": 0.019374684989452363, "step": 74610 }, { "epoch": 21.18081180811808, "grad_norm": 5.670478343963623, "learning_rate": 7.882883905762135e-05, "loss": 0.008849841356277467, "step": 74620 }, { "epoch": 21.183650298041442, "grad_norm": 1.0811883211135864, "learning_rate": 7.882600056769798e-05, "loss": 0.00990903303027153, "step": 74630 }, { "epoch": 21.1864887879648, "grad_norm": 1.6259132623672485, "learning_rate": 7.882316207777462e-05, "loss": 0.008647838979959488, "step": 74640 }, { "epoch": 21.189327277888165, "grad_norm": 3.8051600456237793, "learning_rate": 7.882032358785126e-05, "loss": 0.015223966538906097, "step": 74650 }, { "epoch": 21.192165767811524, "grad_norm": 1.9164929389953613, "learning_rate": 7.88174850979279e-05, "loss": 0.030438461899757387, "step": 74660 }, { "epoch": 21.195004257734887, "grad_norm": 0.6392512321472168, "learning_rate": 7.881464660800455e-05, "loss": 0.012022168934345245, "step": 74670 }, { "epoch": 21.197842747658246, "grad_norm": 6.6565399169921875, "learning_rate": 7.881180811808119e-05, "loss": 0.01997986137866974, "step": 74680 }, { "epoch": 21.200681237581605, "grad_norm": 0.23077349364757538, "learning_rate": 7.880896962815783e-05, "loss": 0.008937107771635056, "step": 74690 }, { "epoch": 21.20351972750497, "grad_norm": 1.7519068717956543, "learning_rate": 7.880613113823446e-05, "loss": 0.01710183620452881, "step": 74700 }, { "epoch": 21.206358217428328, "grad_norm": 2.4098291397094727, "learning_rate": 7.88032926483111e-05, "loss": 0.012014935910701751, "step": 74710 }, { "epoch": 21.20919670735169, "grad_norm": 0.6433802843093872, "learning_rate": 7.880045415838774e-05, "loss": 0.029374882578849792, "step": 74720 }, { "epoch": 21.21203519727505, "grad_norm": 5.814181327819824, "learning_rate": 7.879761566846438e-05, "loss": 0.006299564242362976, "step": 74730 }, { "epoch": 21.21487368719841, "grad_norm": 6.569656848907471, "learning_rate": 7.879477717854102e-05, "loss": 0.01947879195213318, "step": 74740 }, { "epoch": 21.217712177121772, "grad_norm": 2.881561517715454, "learning_rate": 7.879193868861766e-05, "loss": 0.010992929339408875, "step": 74750 }, { "epoch": 21.22055066704513, "grad_norm": 1.2301818132400513, "learning_rate": 7.878910019869429e-05, "loss": 0.009652945399284362, "step": 74760 }, { "epoch": 21.223389156968494, "grad_norm": 4.122570037841797, "learning_rate": 7.878626170877093e-05, "loss": 0.03406794667243958, "step": 74770 }, { "epoch": 21.226227646891854, "grad_norm": 3.4331257343292236, "learning_rate": 7.878342321884758e-05, "loss": 0.008916236460208893, "step": 74780 }, { "epoch": 21.229066136815213, "grad_norm": 4.2541351318359375, "learning_rate": 7.878058472892422e-05, "loss": 0.021498420834541322, "step": 74790 }, { "epoch": 21.231904626738576, "grad_norm": 1.6433011293411255, "learning_rate": 7.877774623900086e-05, "loss": 0.009510713815689086, "step": 74800 }, { "epoch": 21.234743116661935, "grad_norm": 0.45566901564598083, "learning_rate": 7.87749077490775e-05, "loss": 0.008892115950584412, "step": 74810 }, { "epoch": 21.237581606585298, "grad_norm": 1.3600140810012817, "learning_rate": 7.877206925915414e-05, "loss": 0.012219396978616714, "step": 74820 }, { "epoch": 21.240420096508657, "grad_norm": 5.141730785369873, "learning_rate": 7.876923076923077e-05, "loss": 0.017458860576152802, "step": 74830 }, { "epoch": 21.243258586432017, "grad_norm": 0.5994536280632019, "learning_rate": 7.876639227930741e-05, "loss": 0.010760480165481567, "step": 74840 }, { "epoch": 21.24609707635538, "grad_norm": 0.10164127498865128, "learning_rate": 7.876355378938405e-05, "loss": 0.006358616054058075, "step": 74850 }, { "epoch": 21.24893556627874, "grad_norm": 0.5450071692466736, "learning_rate": 7.876071529946068e-05, "loss": 0.004813630878925323, "step": 74860 }, { "epoch": 21.251774056202102, "grad_norm": 1.0310561656951904, "learning_rate": 7.875787680953733e-05, "loss": 0.025302216410636902, "step": 74870 }, { "epoch": 21.25461254612546, "grad_norm": 5.286652088165283, "learning_rate": 7.875503831961398e-05, "loss": 0.0180318683385849, "step": 74880 }, { "epoch": 21.25745103604882, "grad_norm": 7.939635753631592, "learning_rate": 7.87521998296906e-05, "loss": 0.01227591633796692, "step": 74890 }, { "epoch": 21.260289525972183, "grad_norm": 0.2786083221435547, "learning_rate": 7.874936133976724e-05, "loss": 0.023888009786605834, "step": 74900 }, { "epoch": 21.263128015895543, "grad_norm": 4.865163326263428, "learning_rate": 7.874652284984389e-05, "loss": 0.017589816451072694, "step": 74910 }, { "epoch": 21.265966505818906, "grad_norm": 6.7929182052612305, "learning_rate": 7.874368435992053e-05, "loss": 0.022551374137401582, "step": 74920 }, { "epoch": 21.268804995742265, "grad_norm": 3.380335807800293, "learning_rate": 7.874084586999717e-05, "loss": 0.007152558863162994, "step": 74930 }, { "epoch": 21.271643485665624, "grad_norm": 7.091317653656006, "learning_rate": 7.873800738007381e-05, "loss": 0.012429316341876984, "step": 74940 }, { "epoch": 21.274481975588987, "grad_norm": 0.5314913988113403, "learning_rate": 7.873516889015045e-05, "loss": 0.010797836631536484, "step": 74950 }, { "epoch": 21.277320465512346, "grad_norm": 13.70671272277832, "learning_rate": 7.873233040022708e-05, "loss": 0.030415144562721253, "step": 74960 }, { "epoch": 21.28015895543571, "grad_norm": 5.643704891204834, "learning_rate": 7.872949191030372e-05, "loss": 0.01302773654460907, "step": 74970 }, { "epoch": 21.28299744535907, "grad_norm": 1.379393219947815, "learning_rate": 7.872665342038036e-05, "loss": 0.011286456882953644, "step": 74980 }, { "epoch": 21.285835935282428, "grad_norm": 4.563117027282715, "learning_rate": 7.872381493045699e-05, "loss": 0.02213556319475174, "step": 74990 }, { "epoch": 21.28867442520579, "grad_norm": 8.047471046447754, "learning_rate": 7.872097644053365e-05, "loss": 0.021155691146850585, "step": 75000 }, { "epoch": 21.28867442520579, "eval_accuracy": 0.9647739556177275, "eval_loss": 0.1150110587477684, "eval_runtime": 32.1865, "eval_samples_per_second": 488.62, "eval_steps_per_second": 7.643, "step": 75000 }, { "epoch": 21.29151291512915, "grad_norm": 2.0192182064056396, "learning_rate": 7.871813795061029e-05, "loss": 0.011174393445253372, "step": 75010 }, { "epoch": 21.294351405052513, "grad_norm": 12.190423011779785, "learning_rate": 7.871529946068691e-05, "loss": 0.024303445219993593, "step": 75020 }, { "epoch": 21.297189894975872, "grad_norm": 0.7670546174049377, "learning_rate": 7.871246097076356e-05, "loss": 0.010821713507175446, "step": 75030 }, { "epoch": 21.300028384899235, "grad_norm": 9.03890323638916, "learning_rate": 7.87096224808402e-05, "loss": 0.019540603458881377, "step": 75040 }, { "epoch": 21.302866874822595, "grad_norm": 2.6298537254333496, "learning_rate": 7.870678399091684e-05, "loss": 0.02876976728439331, "step": 75050 }, { "epoch": 21.305705364745954, "grad_norm": 8.473215103149414, "learning_rate": 7.870394550099347e-05, "loss": 0.01282089501619339, "step": 75060 }, { "epoch": 21.308543854669317, "grad_norm": 9.589628219604492, "learning_rate": 7.870110701107012e-05, "loss": 0.029595237970352174, "step": 75070 }, { "epoch": 21.311382344592676, "grad_norm": 0.7744223475456238, "learning_rate": 7.869826852114676e-05, "loss": 0.012272786349058151, "step": 75080 }, { "epoch": 21.31422083451604, "grad_norm": 9.22041130065918, "learning_rate": 7.869543003122339e-05, "loss": 0.010503837466239929, "step": 75090 }, { "epoch": 21.3170593244394, "grad_norm": 3.693596839904785, "learning_rate": 7.869259154130003e-05, "loss": 0.017556154727935792, "step": 75100 }, { "epoch": 21.319897814362758, "grad_norm": 1.0030608177185059, "learning_rate": 7.868975305137667e-05, "loss": 0.0142797589302063, "step": 75110 }, { "epoch": 21.32273630428612, "grad_norm": 4.09512186050415, "learning_rate": 7.86869145614533e-05, "loss": 0.017878469824790955, "step": 75120 }, { "epoch": 21.32557479420948, "grad_norm": 8.11027717590332, "learning_rate": 7.868407607152996e-05, "loss": 0.011373965442180634, "step": 75130 }, { "epoch": 21.328413284132843, "grad_norm": 8.891484260559082, "learning_rate": 7.86812375816066e-05, "loss": 0.011668568849563599, "step": 75140 }, { "epoch": 21.331251774056202, "grad_norm": 3.9640254974365234, "learning_rate": 7.867839909168323e-05, "loss": 0.020840416848659515, "step": 75150 }, { "epoch": 21.33409026397956, "grad_norm": 5.166890621185303, "learning_rate": 7.867556060175987e-05, "loss": 0.025284305214881897, "step": 75160 }, { "epoch": 21.336928753902924, "grad_norm": 0.3052218556404114, "learning_rate": 7.867272211183651e-05, "loss": 0.011096090078353882, "step": 75170 }, { "epoch": 21.339767243826284, "grad_norm": 0.2023923248052597, "learning_rate": 7.866988362191315e-05, "loss": 0.03064195215702057, "step": 75180 }, { "epoch": 21.342605733749647, "grad_norm": 0.2708665132522583, "learning_rate": 7.866704513198978e-05, "loss": 0.01771521717309952, "step": 75190 }, { "epoch": 21.345444223673006, "grad_norm": 3.1364245414733887, "learning_rate": 7.866420664206643e-05, "loss": 0.017460963129997252, "step": 75200 }, { "epoch": 21.348282713596365, "grad_norm": 0.32325777411460876, "learning_rate": 7.866136815214307e-05, "loss": 0.01415797770023346, "step": 75210 }, { "epoch": 21.351121203519728, "grad_norm": 12.002775192260742, "learning_rate": 7.86585296622197e-05, "loss": 0.016395658254623413, "step": 75220 }, { "epoch": 21.353959693443088, "grad_norm": 26.531234741210938, "learning_rate": 7.865569117229634e-05, "loss": 0.04341956377029419, "step": 75230 }, { "epoch": 21.35679818336645, "grad_norm": 0.35819247364997864, "learning_rate": 7.865285268237298e-05, "loss": 0.037037086486816403, "step": 75240 }, { "epoch": 21.35963667328981, "grad_norm": 5.870786190032959, "learning_rate": 7.865001419244961e-05, "loss": 0.01768689453601837, "step": 75250 }, { "epoch": 21.36247516321317, "grad_norm": 1.8148654699325562, "learning_rate": 7.864717570252625e-05, "loss": 0.013791662454605103, "step": 75260 }, { "epoch": 21.365313653136532, "grad_norm": 1.9761031866073608, "learning_rate": 7.864433721260291e-05, "loss": 0.012117818742990494, "step": 75270 }, { "epoch": 21.36815214305989, "grad_norm": 3.0222856998443604, "learning_rate": 7.864149872267954e-05, "loss": 0.015822088718414305, "step": 75280 }, { "epoch": 21.370990632983254, "grad_norm": 2.839536666870117, "learning_rate": 7.863866023275618e-05, "loss": 0.014929421246051788, "step": 75290 }, { "epoch": 21.373829122906614, "grad_norm": 1.188350796699524, "learning_rate": 7.863582174283282e-05, "loss": 0.011323277652263642, "step": 75300 }, { "epoch": 21.376667612829973, "grad_norm": 2.121244430541992, "learning_rate": 7.863298325290946e-05, "loss": 0.028906035423278808, "step": 75310 }, { "epoch": 21.379506102753336, "grad_norm": 0.7676653861999512, "learning_rate": 7.863014476298609e-05, "loss": 0.019589702785015106, "step": 75320 }, { "epoch": 21.382344592676695, "grad_norm": 5.872739791870117, "learning_rate": 7.862730627306274e-05, "loss": 0.01680319309234619, "step": 75330 }, { "epoch": 21.385183082600058, "grad_norm": 2.170499563217163, "learning_rate": 7.862446778313937e-05, "loss": 0.010884857922792434, "step": 75340 }, { "epoch": 21.388021572523417, "grad_norm": 4.428328037261963, "learning_rate": 7.862162929321601e-05, "loss": 0.02672119140625, "step": 75350 }, { "epoch": 21.390860062446777, "grad_norm": 1.1998437643051147, "learning_rate": 7.861879080329265e-05, "loss": 0.01797333061695099, "step": 75360 }, { "epoch": 21.39369855237014, "grad_norm": 2.891197443008423, "learning_rate": 7.86159523133693e-05, "loss": 0.009053567051887512, "step": 75370 }, { "epoch": 21.3965370422935, "grad_norm": 2.026702404022217, "learning_rate": 7.861311382344592e-05, "loss": 0.01224260926246643, "step": 75380 }, { "epoch": 21.39937553221686, "grad_norm": 2.7068800926208496, "learning_rate": 7.861027533352256e-05, "loss": 0.026593488454818726, "step": 75390 }, { "epoch": 21.40221402214022, "grad_norm": 4.440280437469482, "learning_rate": 7.860743684359922e-05, "loss": 0.017116546630859375, "step": 75400 }, { "epoch": 21.405052512063584, "grad_norm": 1.9180214405059814, "learning_rate": 7.860459835367585e-05, "loss": 0.015582957863807678, "step": 75410 }, { "epoch": 21.407891001986943, "grad_norm": 7.818751811981201, "learning_rate": 7.860175986375249e-05, "loss": 0.01647305190563202, "step": 75420 }, { "epoch": 21.410729491910303, "grad_norm": 0.6835595965385437, "learning_rate": 7.859892137382913e-05, "loss": 0.00738518089056015, "step": 75430 }, { "epoch": 21.413567981833665, "grad_norm": 3.5683164596557617, "learning_rate": 7.859608288390577e-05, "loss": 0.013958995044231415, "step": 75440 }, { "epoch": 21.416406471757025, "grad_norm": 0.7420746684074402, "learning_rate": 7.85932443939824e-05, "loss": 0.016392260789871216, "step": 75450 }, { "epoch": 21.419244961680388, "grad_norm": 14.469867706298828, "learning_rate": 7.859040590405904e-05, "loss": 0.022926747798919678, "step": 75460 }, { "epoch": 21.422083451603747, "grad_norm": 0.4592805504798889, "learning_rate": 7.858756741413568e-05, "loss": 0.011049892753362656, "step": 75470 }, { "epoch": 21.424921941527106, "grad_norm": 3.571254253387451, "learning_rate": 7.858472892421232e-05, "loss": 0.015177573263645171, "step": 75480 }, { "epoch": 21.42776043145047, "grad_norm": 3.2654356956481934, "learning_rate": 7.858189043428896e-05, "loss": 0.017203959822654723, "step": 75490 }, { "epoch": 21.43059892137383, "grad_norm": 6.545628070831299, "learning_rate": 7.85790519443656e-05, "loss": 0.014694875478744507, "step": 75500 }, { "epoch": 21.43059892137383, "eval_accuracy": 0.9673809372416863, "eval_loss": 0.10844043642282486, "eval_runtime": 31.977, "eval_samples_per_second": 491.822, "eval_steps_per_second": 7.693, "step": 75500 }, { "epoch": 21.43343741129719, "grad_norm": 3.4959089756011963, "learning_rate": 7.857621345444223e-05, "loss": 0.017986363172531127, "step": 75510 }, { "epoch": 21.43627590122055, "grad_norm": 15.093456268310547, "learning_rate": 7.857337496451887e-05, "loss": 0.024652181565761565, "step": 75520 }, { "epoch": 21.43911439114391, "grad_norm": 2.2097418308258057, "learning_rate": 7.857053647459553e-05, "loss": 0.02050362229347229, "step": 75530 }, { "epoch": 21.441952881067273, "grad_norm": 0.3023211658000946, "learning_rate": 7.856769798467216e-05, "loss": 0.009076818823814392, "step": 75540 }, { "epoch": 21.444791370990632, "grad_norm": 4.824226379394531, "learning_rate": 7.85648594947488e-05, "loss": 0.01609097868204117, "step": 75550 }, { "epoch": 21.447629860913995, "grad_norm": 11.725981712341309, "learning_rate": 7.856202100482544e-05, "loss": 0.008646267652511596, "step": 75560 }, { "epoch": 21.450468350837355, "grad_norm": 2.0100808143615723, "learning_rate": 7.855918251490207e-05, "loss": 0.01589919477701187, "step": 75570 }, { "epoch": 21.453306840760714, "grad_norm": 0.2560100257396698, "learning_rate": 7.855634402497871e-05, "loss": 0.011624252796173096, "step": 75580 }, { "epoch": 21.456145330684077, "grad_norm": 6.72741174697876, "learning_rate": 7.855350553505535e-05, "loss": 0.017058047652244567, "step": 75590 }, { "epoch": 21.458983820607436, "grad_norm": 0.8524512648582458, "learning_rate": 7.855066704513199e-05, "loss": 0.02553977072238922, "step": 75600 }, { "epoch": 21.4618223105308, "grad_norm": 1.851595163345337, "learning_rate": 7.854782855520863e-05, "loss": 0.014857091009616852, "step": 75610 }, { "epoch": 21.46466080045416, "grad_norm": 2.2471249103546143, "learning_rate": 7.854499006528527e-05, "loss": 0.0066909223794937136, "step": 75620 }, { "epoch": 21.467499290377518, "grad_norm": 1.8378700017929077, "learning_rate": 7.854215157536192e-05, "loss": 0.01038406416773796, "step": 75630 }, { "epoch": 21.47033778030088, "grad_norm": 0.9276632070541382, "learning_rate": 7.853931308543854e-05, "loss": 0.006019279360771179, "step": 75640 }, { "epoch": 21.47317627022424, "grad_norm": 4.08128547668457, "learning_rate": 7.853647459551519e-05, "loss": 0.013660694658756255, "step": 75650 }, { "epoch": 21.476014760147603, "grad_norm": 0.3786870539188385, "learning_rate": 7.853363610559183e-05, "loss": 0.013316331803798676, "step": 75660 }, { "epoch": 21.478853250070962, "grad_norm": 6.131454944610596, "learning_rate": 7.853079761566847e-05, "loss": 0.02022210359573364, "step": 75670 }, { "epoch": 21.48169173999432, "grad_norm": 18.037982940673828, "learning_rate": 7.852795912574511e-05, "loss": 0.027197617292404174, "step": 75680 }, { "epoch": 21.484530229917684, "grad_norm": 0.18578213453292847, "learning_rate": 7.852512063582175e-05, "loss": 0.02393554449081421, "step": 75690 }, { "epoch": 21.487368719841044, "grad_norm": 8.948965072631836, "learning_rate": 7.852228214589838e-05, "loss": 0.015443667769432068, "step": 75700 }, { "epoch": 21.490207209764407, "grad_norm": 1.9039208889007568, "learning_rate": 7.851944365597502e-05, "loss": 0.013566416501998902, "step": 75710 }, { "epoch": 21.493045699687766, "grad_norm": 0.17158998548984528, "learning_rate": 7.851660516605166e-05, "loss": 0.013597056269645691, "step": 75720 }, { "epoch": 21.495884189611125, "grad_norm": 8.39345645904541, "learning_rate": 7.85137666761283e-05, "loss": 0.014701043069362641, "step": 75730 }, { "epoch": 21.498722679534488, "grad_norm": 0.5644033551216125, "learning_rate": 7.851092818620494e-05, "loss": 0.010521616786718369, "step": 75740 }, { "epoch": 21.501561169457847, "grad_norm": 8.533385276794434, "learning_rate": 7.850808969628159e-05, "loss": 0.007334190607070923, "step": 75750 }, { "epoch": 21.50439965938121, "grad_norm": 3.7465977668762207, "learning_rate": 7.850525120635823e-05, "loss": 0.013029253482818604, "step": 75760 }, { "epoch": 21.50723814930457, "grad_norm": 6.337586879730225, "learning_rate": 7.850241271643485e-05, "loss": 0.024566058814525605, "step": 75770 }, { "epoch": 21.510076639227933, "grad_norm": 1.4087105989456177, "learning_rate": 7.84995742265115e-05, "loss": 0.016794337332248686, "step": 75780 }, { "epoch": 21.512915129151292, "grad_norm": 15.563828468322754, "learning_rate": 7.849673573658814e-05, "loss": 0.02238583266735077, "step": 75790 }, { "epoch": 21.51575361907465, "grad_norm": 3.300431251525879, "learning_rate": 7.849389724666478e-05, "loss": 0.011002079397439957, "step": 75800 }, { "epoch": 21.518592108998014, "grad_norm": 5.7201128005981445, "learning_rate": 7.849105875674142e-05, "loss": 0.011888142675161362, "step": 75810 }, { "epoch": 21.521430598921373, "grad_norm": 20.418607711791992, "learning_rate": 7.848822026681806e-05, "loss": 0.03444123864173889, "step": 75820 }, { "epoch": 21.524269088844733, "grad_norm": 0.3705821931362152, "learning_rate": 7.848538177689469e-05, "loss": 0.024283920228481293, "step": 75830 }, { "epoch": 21.527107578768096, "grad_norm": 3.9132163524627686, "learning_rate": 7.848254328697133e-05, "loss": 0.015902027487754822, "step": 75840 }, { "epoch": 21.529946068691455, "grad_norm": 1.588884949684143, "learning_rate": 7.847970479704797e-05, "loss": 0.00878513902425766, "step": 75850 }, { "epoch": 21.532784558614818, "grad_norm": 0.15484026074409485, "learning_rate": 7.847686630712461e-05, "loss": 0.015248262882232666, "step": 75860 }, { "epoch": 21.535623048538177, "grad_norm": 5.4234209060668945, "learning_rate": 7.847402781720125e-05, "loss": 0.0068693958222866055, "step": 75870 }, { "epoch": 21.53846153846154, "grad_norm": 4.74229097366333, "learning_rate": 7.84711893272779e-05, "loss": 0.02267167568206787, "step": 75880 }, { "epoch": 21.5413000283849, "grad_norm": 0.520041286945343, "learning_rate": 7.846835083735454e-05, "loss": 0.020007678866386415, "step": 75890 }, { "epoch": 21.54413851830826, "grad_norm": 4.989255428314209, "learning_rate": 7.846551234743117e-05, "loss": 0.016596060991287232, "step": 75900 }, { "epoch": 21.54697700823162, "grad_norm": 7.046559810638428, "learning_rate": 7.846267385750781e-05, "loss": 0.017507560551166534, "step": 75910 }, { "epoch": 21.54981549815498, "grad_norm": 0.6137560606002808, "learning_rate": 7.845983536758445e-05, "loss": 0.0036479607224464417, "step": 75920 }, { "epoch": 21.552653988078344, "grad_norm": 1.9928137063980103, "learning_rate": 7.845699687766109e-05, "loss": 0.005956060439348221, "step": 75930 }, { "epoch": 21.555492478001703, "grad_norm": 0.5439964532852173, "learning_rate": 7.845415838773773e-05, "loss": 0.00621982216835022, "step": 75940 }, { "epoch": 21.558330967925063, "grad_norm": 1.5522284507751465, "learning_rate": 7.845131989781437e-05, "loss": 0.013819333910942078, "step": 75950 }, { "epoch": 21.561169457848425, "grad_norm": 0.7793036699295044, "learning_rate": 7.8448481407891e-05, "loss": 0.013180595636367799, "step": 75960 }, { "epoch": 21.564007947771785, "grad_norm": 7.142220973968506, "learning_rate": 7.844564291796764e-05, "loss": 0.025642618536949158, "step": 75970 }, { "epoch": 21.566846437695148, "grad_norm": 0.406651109457016, "learning_rate": 7.844280442804428e-05, "loss": 0.010029391199350358, "step": 75980 }, { "epoch": 21.569684927618507, "grad_norm": 0.28016993403434753, "learning_rate": 7.843996593812092e-05, "loss": 0.009296253323554993, "step": 75990 }, { "epoch": 21.572523417541866, "grad_norm": 3.4080655574798584, "learning_rate": 7.843712744819757e-05, "loss": 0.016548486053943635, "step": 76000 }, { "epoch": 21.572523417541866, "eval_accuracy": 0.9671265975710561, "eval_loss": 0.10753749310970306, "eval_runtime": 31.5089, "eval_samples_per_second": 499.128, "eval_steps_per_second": 7.807, "step": 76000 }, { "epoch": 21.57536190746523, "grad_norm": 5.59756326675415, "learning_rate": 7.843428895827421e-05, "loss": 0.01600130498409271, "step": 76010 }, { "epoch": 21.57820039738859, "grad_norm": 9.081891059875488, "learning_rate": 7.843145046835085e-05, "loss": 0.03320396840572357, "step": 76020 }, { "epoch": 21.58103888731195, "grad_norm": 0.8349407911300659, "learning_rate": 7.842861197842748e-05, "loss": 0.032330819964408876, "step": 76030 }, { "epoch": 21.58387737723531, "grad_norm": 3.4547901153564453, "learning_rate": 7.842577348850412e-05, "loss": 0.012268371880054474, "step": 76040 }, { "epoch": 21.58671586715867, "grad_norm": 1.637893557548523, "learning_rate": 7.842293499858076e-05, "loss": 0.012229278683662415, "step": 76050 }, { "epoch": 21.589554357082033, "grad_norm": 3.288370132446289, "learning_rate": 7.84200965086574e-05, "loss": 0.010865146666765213, "step": 76060 }, { "epoch": 21.592392847005392, "grad_norm": 3.339815616607666, "learning_rate": 7.841725801873404e-05, "loss": 0.0044495463371276855, "step": 76070 }, { "epoch": 21.595231336928755, "grad_norm": 1.7163646221160889, "learning_rate": 7.841441952881068e-05, "loss": 0.013310173153877258, "step": 76080 }, { "epoch": 21.598069826852115, "grad_norm": 12.90844440460205, "learning_rate": 7.841158103888731e-05, "loss": 0.03339901566505432, "step": 76090 }, { "epoch": 21.600908316775474, "grad_norm": 10.757219314575195, "learning_rate": 7.840874254896395e-05, "loss": 0.019683653116226198, "step": 76100 }, { "epoch": 21.603746806698837, "grad_norm": 6.217966079711914, "learning_rate": 7.84059040590406e-05, "loss": 0.018769189715385437, "step": 76110 }, { "epoch": 21.606585296622196, "grad_norm": 3.0087974071502686, "learning_rate": 7.840306556911724e-05, "loss": 0.019456428289413453, "step": 76120 }, { "epoch": 21.60942378654556, "grad_norm": 0.22842000424861908, "learning_rate": 7.840022707919388e-05, "loss": 0.01441936194896698, "step": 76130 }, { "epoch": 21.61226227646892, "grad_norm": 11.709714889526367, "learning_rate": 7.839738858927052e-05, "loss": 0.009755165874958038, "step": 76140 }, { "epoch": 21.615100766392278, "grad_norm": 5.41524076461792, "learning_rate": 7.839455009934716e-05, "loss": 0.015011473000049591, "step": 76150 }, { "epoch": 21.61793925631564, "grad_norm": 0.8947502970695496, "learning_rate": 7.839171160942379e-05, "loss": 0.018856336176395417, "step": 76160 }, { "epoch": 21.620777746239, "grad_norm": 0.9825923442840576, "learning_rate": 7.838887311950043e-05, "loss": 0.02439998686313629, "step": 76170 }, { "epoch": 21.623616236162363, "grad_norm": 5.807397842407227, "learning_rate": 7.838603462957707e-05, "loss": 0.01727648973464966, "step": 76180 }, { "epoch": 21.626454726085722, "grad_norm": 0.9970225691795349, "learning_rate": 7.83831961396537e-05, "loss": 0.006820657104253769, "step": 76190 }, { "epoch": 21.62929321600908, "grad_norm": 2.366140365600586, "learning_rate": 7.838035764973035e-05, "loss": 0.017223221063613892, "step": 76200 }, { "epoch": 21.632131705932444, "grad_norm": 2.6090049743652344, "learning_rate": 7.8377519159807e-05, "loss": 0.017718872427940367, "step": 76210 }, { "epoch": 21.634970195855804, "grad_norm": 13.620538711547852, "learning_rate": 7.837468066988362e-05, "loss": 0.019850121438503267, "step": 76220 }, { "epoch": 21.637808685779166, "grad_norm": 4.612888336181641, "learning_rate": 7.837184217996026e-05, "loss": 0.013288548588752747, "step": 76230 }, { "epoch": 21.640647175702526, "grad_norm": 4.95379638671875, "learning_rate": 7.83690036900369e-05, "loss": 0.030544334650039674, "step": 76240 }, { "epoch": 21.64348566562589, "grad_norm": 15.60219669342041, "learning_rate": 7.836616520011355e-05, "loss": 0.016857418417930602, "step": 76250 }, { "epoch": 21.646324155549248, "grad_norm": 2.1820733547210693, "learning_rate": 7.836332671019019e-05, "loss": 0.021499449014663698, "step": 76260 }, { "epoch": 21.649162645472607, "grad_norm": 3.8691420555114746, "learning_rate": 7.836048822026683e-05, "loss": 0.03640146553516388, "step": 76270 }, { "epoch": 21.65200113539597, "grad_norm": 17.362730026245117, "learning_rate": 7.835764973034346e-05, "loss": 0.03667094707489014, "step": 76280 }, { "epoch": 21.65483962531933, "grad_norm": 2.120270252227783, "learning_rate": 7.83548112404201e-05, "loss": 0.024965274333953857, "step": 76290 }, { "epoch": 21.657678115242692, "grad_norm": 3.7044076919555664, "learning_rate": 7.835197275049674e-05, "loss": 0.01726778894662857, "step": 76300 }, { "epoch": 21.660516605166052, "grad_norm": 2.1240944862365723, "learning_rate": 7.834913426057338e-05, "loss": 0.028606876730918884, "step": 76310 }, { "epoch": 21.66335509508941, "grad_norm": 6.846428394317627, "learning_rate": 7.834629577065001e-05, "loss": 0.025026938319206236, "step": 76320 }, { "epoch": 21.666193585012774, "grad_norm": 0.16483888030052185, "learning_rate": 7.834345728072666e-05, "loss": 0.016480317711830138, "step": 76330 }, { "epoch": 21.669032074936133, "grad_norm": 1.0717039108276367, "learning_rate": 7.83406187908033e-05, "loss": 0.005578938871622086, "step": 76340 }, { "epoch": 21.671870564859496, "grad_norm": 3.6805405616760254, "learning_rate": 7.833778030087993e-05, "loss": 0.010481615364551545, "step": 76350 }, { "epoch": 21.674709054782856, "grad_norm": 1.2032544612884521, "learning_rate": 7.833494181095657e-05, "loss": 0.009577855467796326, "step": 76360 }, { "epoch": 21.677547544706215, "grad_norm": 1.5188395977020264, "learning_rate": 7.833210332103322e-05, "loss": 0.010832367837429047, "step": 76370 }, { "epoch": 21.680386034629578, "grad_norm": 0.5624155402183533, "learning_rate": 7.832926483110984e-05, "loss": 0.010810618102550507, "step": 76380 }, { "epoch": 21.683224524552937, "grad_norm": 3.3656253814697266, "learning_rate": 7.832642634118648e-05, "loss": 0.011495617032051087, "step": 76390 }, { "epoch": 21.6860630144763, "grad_norm": 1.0415982007980347, "learning_rate": 7.832358785126314e-05, "loss": 0.010781669616699218, "step": 76400 }, { "epoch": 21.68890150439966, "grad_norm": 2.0982673168182373, "learning_rate": 7.832074936133977e-05, "loss": 0.04109894633293152, "step": 76410 }, { "epoch": 21.69173999432302, "grad_norm": 2.817380666732788, "learning_rate": 7.831791087141641e-05, "loss": 0.016175878047943116, "step": 76420 }, { "epoch": 21.69457848424638, "grad_norm": 8.560664176940918, "learning_rate": 7.831507238149305e-05, "loss": 0.011419643461704255, "step": 76430 }, { "epoch": 21.69741697416974, "grad_norm": 7.277251720428467, "learning_rate": 7.831223389156969e-05, "loss": 0.017267069220542906, "step": 76440 }, { "epoch": 21.700255464093104, "grad_norm": 2.763134717941284, "learning_rate": 7.830939540164632e-05, "loss": 0.01617017537355423, "step": 76450 }, { "epoch": 21.703093954016463, "grad_norm": 3.0343339443206787, "learning_rate": 7.830655691172297e-05, "loss": 0.01750296652317047, "step": 76460 }, { "epoch": 21.705932443939822, "grad_norm": 3.3168580532073975, "learning_rate": 7.830371842179962e-05, "loss": 0.0069194786250591275, "step": 76470 }, { "epoch": 21.708770933863185, "grad_norm": 1.5890346765518188, "learning_rate": 7.830087993187624e-05, "loss": 0.005515266954898834, "step": 76480 }, { "epoch": 21.711609423786545, "grad_norm": 4.021758556365967, "learning_rate": 7.829804144195288e-05, "loss": 0.009052052348852157, "step": 76490 }, { "epoch": 21.714447913709908, "grad_norm": 9.180389404296875, "learning_rate": 7.829520295202953e-05, "loss": 0.02535584568977356, "step": 76500 }, { "epoch": 21.714447913709908, "eval_accuracy": 0.9638201818528646, "eval_loss": 0.12102220207452774, "eval_runtime": 31.7455, "eval_samples_per_second": 495.409, "eval_steps_per_second": 7.749, "step": 76500 }, { "epoch": 21.717286403633267, "grad_norm": 2.0180230140686035, "learning_rate": 7.829236446210615e-05, "loss": 0.02152651846408844, "step": 76510 }, { "epoch": 21.720124893556626, "grad_norm": 0.6894105076789856, "learning_rate": 7.82895259721828e-05, "loss": 0.017346447706222533, "step": 76520 }, { "epoch": 21.72296338347999, "grad_norm": 0.2653830945491791, "learning_rate": 7.828668748225945e-05, "loss": 0.006541232764720917, "step": 76530 }, { "epoch": 21.72580187340335, "grad_norm": 4.41383695602417, "learning_rate": 7.828384899233608e-05, "loss": 0.015062984824180604, "step": 76540 }, { "epoch": 21.72864036332671, "grad_norm": 16.263296127319336, "learning_rate": 7.828101050241272e-05, "loss": 0.026714929938316347, "step": 76550 }, { "epoch": 21.73147885325007, "grad_norm": 4.507694721221924, "learning_rate": 7.827817201248936e-05, "loss": 0.01858513355255127, "step": 76560 }, { "epoch": 21.73431734317343, "grad_norm": 3.594895362854004, "learning_rate": 7.8275333522566e-05, "loss": 0.010094435513019561, "step": 76570 }, { "epoch": 21.737155833096793, "grad_norm": 11.130035400390625, "learning_rate": 7.827249503264263e-05, "loss": 0.016239157319068907, "step": 76580 }, { "epoch": 21.739994323020152, "grad_norm": 12.336492538452148, "learning_rate": 7.826965654271927e-05, "loss": 0.015177685022354125, "step": 76590 }, { "epoch": 21.742832812943515, "grad_norm": 0.9573445916175842, "learning_rate": 7.826681805279593e-05, "loss": 0.015393544733524323, "step": 76600 }, { "epoch": 21.745671302866874, "grad_norm": 3.006967067718506, "learning_rate": 7.826397956287255e-05, "loss": 0.010941926389932632, "step": 76610 }, { "epoch": 21.748509792790237, "grad_norm": 8.52607250213623, "learning_rate": 7.82611410729492e-05, "loss": 0.01392655223608017, "step": 76620 }, { "epoch": 21.751348282713597, "grad_norm": 0.6521673202514648, "learning_rate": 7.825830258302584e-05, "loss": 0.016238363087177278, "step": 76630 }, { "epoch": 21.754186772636956, "grad_norm": 2.1173746585845947, "learning_rate": 7.825546409310246e-05, "loss": 0.02088761180639267, "step": 76640 }, { "epoch": 21.75702526256032, "grad_norm": 15.88673210144043, "learning_rate": 7.82526256031791e-05, "loss": 0.02807828187942505, "step": 76650 }, { "epoch": 21.759863752483678, "grad_norm": 11.676653861999512, "learning_rate": 7.825007096224809e-05, "loss": 0.0297097772359848, "step": 76660 }, { "epoch": 21.76270224240704, "grad_norm": 11.488948822021484, "learning_rate": 7.824723247232473e-05, "loss": 0.039601188898086545, "step": 76670 }, { "epoch": 21.7655407323304, "grad_norm": 1.1914842128753662, "learning_rate": 7.824439398240137e-05, "loss": 0.019672128558158874, "step": 76680 }, { "epoch": 21.76837922225376, "grad_norm": 1.257739543914795, "learning_rate": 7.8241555492478e-05, "loss": 0.007462108135223388, "step": 76690 }, { "epoch": 21.771217712177123, "grad_norm": 4.431422710418701, "learning_rate": 7.823871700255464e-05, "loss": 0.018943874537944792, "step": 76700 }, { "epoch": 21.774056202100482, "grad_norm": 2.9444966316223145, "learning_rate": 7.823587851263129e-05, "loss": 0.03041023313999176, "step": 76710 }, { "epoch": 21.776894692023845, "grad_norm": 0.32359835505485535, "learning_rate": 7.823304002270792e-05, "loss": 0.022947846353054045, "step": 76720 }, { "epoch": 21.779733181947204, "grad_norm": 2.0393660068511963, "learning_rate": 7.823020153278456e-05, "loss": 0.01531689167022705, "step": 76730 }, { "epoch": 21.782571671870564, "grad_norm": 0.7711515426635742, "learning_rate": 7.82273630428612e-05, "loss": 0.01182211861014366, "step": 76740 }, { "epoch": 21.785410161793926, "grad_norm": 2.697129249572754, "learning_rate": 7.822452455293784e-05, "loss": 0.003196723759174347, "step": 76750 }, { "epoch": 21.788248651717286, "grad_norm": 8.89659309387207, "learning_rate": 7.822168606301447e-05, "loss": 0.027492699027061463, "step": 76760 }, { "epoch": 21.79108714164065, "grad_norm": 0.5973318219184875, "learning_rate": 7.821884757309111e-05, "loss": 0.009495507925748825, "step": 76770 }, { "epoch": 21.793925631564008, "grad_norm": 3.4102282524108887, "learning_rate": 7.821600908316777e-05, "loss": 0.011741966009140015, "step": 76780 }, { "epoch": 21.796764121487367, "grad_norm": 8.232495307922363, "learning_rate": 7.82131705932444e-05, "loss": 0.018143440783023834, "step": 76790 }, { "epoch": 21.79960261141073, "grad_norm": 2.0071282386779785, "learning_rate": 7.821033210332104e-05, "loss": 0.03609492182731629, "step": 76800 }, { "epoch": 21.80244110133409, "grad_norm": 12.894474983215332, "learning_rate": 7.820749361339768e-05, "loss": 0.03749408721923828, "step": 76810 }, { "epoch": 21.805279591257452, "grad_norm": 9.95533561706543, "learning_rate": 7.820465512347431e-05, "loss": 0.037414824962615965, "step": 76820 }, { "epoch": 21.80811808118081, "grad_norm": 1.8562756776809692, "learning_rate": 7.820181663355095e-05, "loss": 0.011846522241830826, "step": 76830 }, { "epoch": 21.81095657110417, "grad_norm": 1.3606544733047485, "learning_rate": 7.81989781436276e-05, "loss": 0.015765571594238283, "step": 76840 }, { "epoch": 21.813795061027534, "grad_norm": 5.749678134918213, "learning_rate": 7.819613965370423e-05, "loss": 0.017157140374183654, "step": 76850 }, { "epoch": 21.816633550950893, "grad_norm": 10.163677215576172, "learning_rate": 7.819330116378087e-05, "loss": 0.01743210256099701, "step": 76860 }, { "epoch": 21.819472040874256, "grad_norm": 0.16774751245975494, "learning_rate": 7.819046267385751e-05, "loss": 0.004885649681091309, "step": 76870 }, { "epoch": 21.822310530797616, "grad_norm": 1.4132473468780518, "learning_rate": 7.818762418393416e-05, "loss": 0.006147371232509613, "step": 76880 }, { "epoch": 21.825149020720975, "grad_norm": 3.599931240081787, "learning_rate": 7.818478569401078e-05, "loss": 0.019440756738185884, "step": 76890 }, { "epoch": 21.827987510644338, "grad_norm": 0.451099693775177, "learning_rate": 7.818194720408742e-05, "loss": 0.012780244648456573, "step": 76900 }, { "epoch": 21.830826000567697, "grad_norm": 5.143447399139404, "learning_rate": 7.817910871416408e-05, "loss": 0.010682998597621918, "step": 76910 }, { "epoch": 21.83366449049106, "grad_norm": 4.042118549346924, "learning_rate": 7.817627022424071e-05, "loss": 0.022549442946910858, "step": 76920 }, { "epoch": 21.83650298041442, "grad_norm": 3.9703948497772217, "learning_rate": 7.817343173431735e-05, "loss": 0.02118019461631775, "step": 76930 }, { "epoch": 21.83934147033778, "grad_norm": 8.298737525939941, "learning_rate": 7.817059324439399e-05, "loss": 0.007871933281421661, "step": 76940 }, { "epoch": 21.84217996026114, "grad_norm": 1.2813085317611694, "learning_rate": 7.816775475447062e-05, "loss": 0.009709246456623077, "step": 76950 }, { "epoch": 21.8450184501845, "grad_norm": 6.624592304229736, "learning_rate": 7.816491626454726e-05, "loss": 0.019441741704940795, "step": 76960 }, { "epoch": 21.847856940107864, "grad_norm": 3.284335136413574, "learning_rate": 7.81620777746239e-05, "loss": 0.023923635482788086, "step": 76970 }, { "epoch": 21.850695430031223, "grad_norm": 0.8073540925979614, "learning_rate": 7.815923928470054e-05, "loss": 0.009224055707454682, "step": 76980 }, { "epoch": 21.853533919954586, "grad_norm": 3.46307110786438, "learning_rate": 7.815640079477718e-05, "loss": 0.015401774644851684, "step": 76990 }, { "epoch": 21.856372409877945, "grad_norm": 0.9773533940315247, "learning_rate": 7.815356230485382e-05, "loss": 0.012726616859436036, "step": 77000 }, { "epoch": 21.856372409877945, "eval_accuracy": 0.964837540535385, "eval_loss": 0.10746651887893677, "eval_runtime": 31.4459, "eval_samples_per_second": 500.129, "eval_steps_per_second": 7.823, "step": 77000 }, { "epoch": 21.859210899801305, "grad_norm": 0.2333589494228363, "learning_rate": 7.815072381493047e-05, "loss": 0.009247449785470962, "step": 77010 }, { "epoch": 21.862049389724667, "grad_norm": 12.921958923339844, "learning_rate": 7.81478853250071e-05, "loss": 0.025931236147880555, "step": 77020 }, { "epoch": 21.864887879648027, "grad_norm": 8.462388038635254, "learning_rate": 7.814504683508374e-05, "loss": 0.020560833811759948, "step": 77030 }, { "epoch": 21.86772636957139, "grad_norm": 1.0064833164215088, "learning_rate": 7.814220834516039e-05, "loss": 0.02224023938179016, "step": 77040 }, { "epoch": 21.87056485949475, "grad_norm": 4.009249210357666, "learning_rate": 7.813936985523702e-05, "loss": 0.016633112728595734, "step": 77050 }, { "epoch": 21.87340334941811, "grad_norm": 9.028077125549316, "learning_rate": 7.813653136531366e-05, "loss": 0.037223371863365176, "step": 77060 }, { "epoch": 21.87624183934147, "grad_norm": 2.489049196243286, "learning_rate": 7.81336928753903e-05, "loss": 0.020155759155750276, "step": 77070 }, { "epoch": 21.87908032926483, "grad_norm": 10.08251667022705, "learning_rate": 7.813085438546693e-05, "loss": 0.01674887239933014, "step": 77080 }, { "epoch": 21.881918819188193, "grad_norm": 0.4034408628940582, "learning_rate": 7.812801589554357e-05, "loss": 0.0044997379183769224, "step": 77090 }, { "epoch": 21.884757309111553, "grad_norm": 1.4603570699691772, "learning_rate": 7.812517740562021e-05, "loss": 0.007852521538734437, "step": 77100 }, { "epoch": 21.887595799034912, "grad_norm": 0.9303902983665466, "learning_rate": 7.812233891569685e-05, "loss": 0.027444884181022644, "step": 77110 }, { "epoch": 21.890434288958275, "grad_norm": 1.6801143884658813, "learning_rate": 7.81195004257735e-05, "loss": 0.014386960864067077, "step": 77120 }, { "epoch": 21.893272778881634, "grad_norm": 4.740293979644775, "learning_rate": 7.811666193585014e-05, "loss": 0.013454185426235199, "step": 77130 }, { "epoch": 21.896111268804997, "grad_norm": 1.0744178295135498, "learning_rate": 7.811382344592678e-05, "loss": 0.009902026504278183, "step": 77140 }, { "epoch": 21.898949758728357, "grad_norm": 11.454477310180664, "learning_rate": 7.81109849560034e-05, "loss": 0.014002050459384918, "step": 77150 }, { "epoch": 21.901788248651716, "grad_norm": 6.228581428527832, "learning_rate": 7.810814646608005e-05, "loss": 0.024948759377002715, "step": 77160 }, { "epoch": 21.90462673857508, "grad_norm": 0.2029803991317749, "learning_rate": 7.810530797615669e-05, "loss": 0.012694242596626281, "step": 77170 }, { "epoch": 21.907465228498438, "grad_norm": 0.6979201436042786, "learning_rate": 7.810246948623333e-05, "loss": 0.005986906960606575, "step": 77180 }, { "epoch": 21.9103037184218, "grad_norm": 4.384439468383789, "learning_rate": 7.809963099630997e-05, "loss": 0.016067472100257874, "step": 77190 }, { "epoch": 21.91314220834516, "grad_norm": 4.829434394836426, "learning_rate": 7.809679250638661e-05, "loss": 0.008404189348220825, "step": 77200 }, { "epoch": 21.91598069826852, "grad_norm": 0.5981588959693909, "learning_rate": 7.809395401646324e-05, "loss": 0.016064085066318512, "step": 77210 }, { "epoch": 21.918819188191883, "grad_norm": 7.731984615325928, "learning_rate": 7.809111552653988e-05, "loss": 0.02272556722164154, "step": 77220 }, { "epoch": 21.921657678115242, "grad_norm": 1.8182883262634277, "learning_rate": 7.808827703661652e-05, "loss": 0.010691514611244202, "step": 77230 }, { "epoch": 21.924496168038605, "grad_norm": 19.29737091064453, "learning_rate": 7.808543854669316e-05, "loss": 0.030672550201416016, "step": 77240 }, { "epoch": 21.927334657961964, "grad_norm": 1.276174783706665, "learning_rate": 7.80826000567698e-05, "loss": 0.010022606700658798, "step": 77250 }, { "epoch": 21.930173147885323, "grad_norm": 8.187736511230469, "learning_rate": 7.807976156684645e-05, "loss": 0.010281653702259063, "step": 77260 }, { "epoch": 21.933011637808686, "grad_norm": 11.26250171661377, "learning_rate": 7.807692307692307e-05, "loss": 0.02555321455001831, "step": 77270 }, { "epoch": 21.935850127732046, "grad_norm": 0.2935706377029419, "learning_rate": 7.807408458699972e-05, "loss": 0.016476118564605714, "step": 77280 }, { "epoch": 21.93868861765541, "grad_norm": 9.60158634185791, "learning_rate": 7.807124609707636e-05, "loss": 0.01854208707809448, "step": 77290 }, { "epoch": 21.941527107578768, "grad_norm": 1.9438365697860718, "learning_rate": 7.8068407607153e-05, "loss": 0.011578445881605148, "step": 77300 }, { "epoch": 21.944365597502127, "grad_norm": 1.3686802387237549, "learning_rate": 7.806556911722964e-05, "loss": 0.00876007303595543, "step": 77310 }, { "epoch": 21.94720408742549, "grad_norm": 1.525789499282837, "learning_rate": 7.806273062730628e-05, "loss": 0.02529314160346985, "step": 77320 }, { "epoch": 21.95004257734885, "grad_norm": 4.271810531616211, "learning_rate": 7.805989213738292e-05, "loss": 0.02377483993768692, "step": 77330 }, { "epoch": 21.952881067272212, "grad_norm": 5.852486610412598, "learning_rate": 7.805705364745955e-05, "loss": 0.034498760104179384, "step": 77340 }, { "epoch": 21.95571955719557, "grad_norm": 0.8101273775100708, "learning_rate": 7.805421515753619e-05, "loss": 0.008817987143993377, "step": 77350 }, { "epoch": 21.958558047118935, "grad_norm": 0.2558135688304901, "learning_rate": 7.805137666761283e-05, "loss": 0.024744048714637756, "step": 77360 }, { "epoch": 21.961396537042294, "grad_norm": 15.887232780456543, "learning_rate": 7.804853817768946e-05, "loss": 0.012068048864603043, "step": 77370 }, { "epoch": 21.964235026965653, "grad_norm": 2.8790669441223145, "learning_rate": 7.804569968776612e-05, "loss": 0.012857344746589661, "step": 77380 }, { "epoch": 21.967073516889016, "grad_norm": 4.863633155822754, "learning_rate": 7.804286119784276e-05, "loss": 0.036144202947616576, "step": 77390 }, { "epoch": 21.969912006812375, "grad_norm": 2.471874713897705, "learning_rate": 7.804002270791939e-05, "loss": 0.02833322584629059, "step": 77400 }, { "epoch": 21.972750496735735, "grad_norm": 5.715687274932861, "learning_rate": 7.803718421799603e-05, "loss": 0.009659874439239501, "step": 77410 }, { "epoch": 21.975588986659098, "grad_norm": 0.931130051612854, "learning_rate": 7.803434572807267e-05, "loss": 0.01696839779615402, "step": 77420 }, { "epoch": 21.978427476582457, "grad_norm": 5.212191581726074, "learning_rate": 7.803150723814931e-05, "loss": 0.026097530126571657, "step": 77430 }, { "epoch": 21.98126596650582, "grad_norm": 6.8784942626953125, "learning_rate": 7.802866874822595e-05, "loss": 0.027269402146339418, "step": 77440 }, { "epoch": 21.98410445642918, "grad_norm": 0.5826104283332825, "learning_rate": 7.802583025830259e-05, "loss": 0.008418075740337372, "step": 77450 }, { "epoch": 21.986942946352542, "grad_norm": 0.7270312905311584, "learning_rate": 7.802299176837923e-05, "loss": 0.008091999590396881, "step": 77460 }, { "epoch": 21.9897814362759, "grad_norm": 2.320132255554199, "learning_rate": 7.802015327845586e-05, "loss": 0.006875626742839813, "step": 77470 }, { "epoch": 21.99261992619926, "grad_norm": 3.2568306922912598, "learning_rate": 7.80173147885325e-05, "loss": 0.007874561101198196, "step": 77480 }, { "epoch": 21.995458416122624, "grad_norm": 0.14644795656204224, "learning_rate": 7.801447629860914e-05, "loss": 0.0060274101793766025, "step": 77490 }, { "epoch": 21.998296906045983, "grad_norm": 8.998992919921875, "learning_rate": 7.801163780868577e-05, "loss": 0.02764994204044342, "step": 77500 }, { "epoch": 21.998296906045983, "eval_accuracy": 0.9660456539708782, "eval_loss": 0.11295709013938904, "eval_runtime": 31.6057, "eval_samples_per_second": 497.6, "eval_steps_per_second": 7.783, "step": 77500 }, { "epoch": 22.001135395969346, "grad_norm": 0.8961758017539978, "learning_rate": 7.800879931876243e-05, "loss": 0.008397797495126725, "step": 77510 }, { "epoch": 22.003973885892705, "grad_norm": 1.009544849395752, "learning_rate": 7.800596082883907e-05, "loss": 0.021177010238170625, "step": 77520 }, { "epoch": 22.006812375816065, "grad_norm": 6.588967323303223, "learning_rate": 7.80031223389157e-05, "loss": 0.03147798776626587, "step": 77530 }, { "epoch": 22.009650865739427, "grad_norm": 0.9545502066612244, "learning_rate": 7.800028384899234e-05, "loss": 0.018263402581214904, "step": 77540 }, { "epoch": 22.012489355662787, "grad_norm": 2.773300886154175, "learning_rate": 7.799744535906898e-05, "loss": 0.008683116734027862, "step": 77550 }, { "epoch": 22.01532784558615, "grad_norm": 3.157545328140259, "learning_rate": 7.799460686914562e-05, "loss": 0.011756989359855651, "step": 77560 }, { "epoch": 22.01816633550951, "grad_norm": 0.31751975417137146, "learning_rate": 7.799176837922225e-05, "loss": 0.021091081202030182, "step": 77570 }, { "epoch": 22.02100482543287, "grad_norm": 0.3000395894050598, "learning_rate": 7.79889298892989e-05, "loss": 0.0038685090839862823, "step": 77580 }, { "epoch": 22.02384331535623, "grad_norm": 0.4833454489707947, "learning_rate": 7.798609139937554e-05, "loss": 0.008476876467466355, "step": 77590 }, { "epoch": 22.02668180527959, "grad_norm": 0.2859414517879486, "learning_rate": 7.798325290945217e-05, "loss": 0.010607036203145981, "step": 77600 }, { "epoch": 22.029520295202953, "grad_norm": 0.2562928795814514, "learning_rate": 7.798041441952881e-05, "loss": 0.008344996720552444, "step": 77610 }, { "epoch": 22.032358785126313, "grad_norm": 5.452681064605713, "learning_rate": 7.797757592960545e-05, "loss": 0.012287989258766174, "step": 77620 }, { "epoch": 22.035197275049672, "grad_norm": 0.38039126992225647, "learning_rate": 7.797473743968208e-05, "loss": 0.02315075695514679, "step": 77630 }, { "epoch": 22.038035764973035, "grad_norm": 8.661182403564453, "learning_rate": 7.797189894975874e-05, "loss": 0.00963817909359932, "step": 77640 }, { "epoch": 22.040874254896394, "grad_norm": 10.70436954498291, "learning_rate": 7.796906045983538e-05, "loss": 0.01076717972755432, "step": 77650 }, { "epoch": 22.043712744819757, "grad_norm": 0.6941260099411011, "learning_rate": 7.7966221969912e-05, "loss": 0.01910896450281143, "step": 77660 }, { "epoch": 22.046551234743117, "grad_norm": 13.742024421691895, "learning_rate": 7.796338347998865e-05, "loss": 0.010995467752218246, "step": 77670 }, { "epoch": 22.049389724666476, "grad_norm": 8.066434860229492, "learning_rate": 7.796054499006529e-05, "loss": 0.013952425122261048, "step": 77680 }, { "epoch": 22.05222821458984, "grad_norm": 2.0535006523132324, "learning_rate": 7.795770650014193e-05, "loss": 0.009541848301887512, "step": 77690 }, { "epoch": 22.055066704513198, "grad_norm": 1.1072839498519897, "learning_rate": 7.795486801021856e-05, "loss": 0.010959744453430176, "step": 77700 }, { "epoch": 22.05790519443656, "grad_norm": 0.14257793128490448, "learning_rate": 7.795202952029521e-05, "loss": 0.018217116594314575, "step": 77710 }, { "epoch": 22.06074368435992, "grad_norm": 1.2833832502365112, "learning_rate": 7.794919103037185e-05, "loss": 0.004118860885500908, "step": 77720 }, { "epoch": 22.06358217428328, "grad_norm": 0.1957813948392868, "learning_rate": 7.794635254044848e-05, "loss": 0.015275177359580994, "step": 77730 }, { "epoch": 22.066420664206642, "grad_norm": 1.174944519996643, "learning_rate": 7.794351405052512e-05, "loss": 0.006693316996097565, "step": 77740 }, { "epoch": 22.069259154130002, "grad_norm": 1.5817304849624634, "learning_rate": 7.794067556060177e-05, "loss": 0.02046239227056503, "step": 77750 }, { "epoch": 22.072097644053365, "grad_norm": 8.418137550354004, "learning_rate": 7.793783707067839e-05, "loss": 0.027798721194267274, "step": 77760 }, { "epoch": 22.074936133976724, "grad_norm": 5.5687336921691895, "learning_rate": 7.793499858075505e-05, "loss": 0.008561762422323227, "step": 77770 }, { "epoch": 22.077774623900083, "grad_norm": 6.29403018951416, "learning_rate": 7.793216009083169e-05, "loss": 0.025265347957611085, "step": 77780 }, { "epoch": 22.080613113823446, "grad_norm": 0.5373438000679016, "learning_rate": 7.792932160090832e-05, "loss": 0.01051487699151039, "step": 77790 }, { "epoch": 22.083451603746806, "grad_norm": 1.3243863582611084, "learning_rate": 7.792648311098496e-05, "loss": 0.006049828231334686, "step": 77800 }, { "epoch": 22.08629009367017, "grad_norm": 1.5712735652923584, "learning_rate": 7.79236446210616e-05, "loss": 0.013195675611495972, "step": 77810 }, { "epoch": 22.089128583593528, "grad_norm": 0.260774165391922, "learning_rate": 7.792080613113824e-05, "loss": 0.015640179812908172, "step": 77820 }, { "epoch": 22.09196707351689, "grad_norm": 1.150002360343933, "learning_rate": 7.791796764121487e-05, "loss": 0.012659856677055359, "step": 77830 }, { "epoch": 22.09480556344025, "grad_norm": 12.027323722839355, "learning_rate": 7.791512915129152e-05, "loss": 0.011055758595466614, "step": 77840 }, { "epoch": 22.09764405336361, "grad_norm": 0.07149851322174072, "learning_rate": 7.791229066136817e-05, "loss": 0.009861954301595689, "step": 77850 }, { "epoch": 22.100482543286972, "grad_norm": 1.3277777433395386, "learning_rate": 7.79094521714448e-05, "loss": 0.0067562595009803775, "step": 77860 }, { "epoch": 22.10332103321033, "grad_norm": 0.8369110226631165, "learning_rate": 7.790661368152143e-05, "loss": 0.012772098183631897, "step": 77870 }, { "epoch": 22.106159523133694, "grad_norm": 7.399005889892578, "learning_rate": 7.790377519159808e-05, "loss": 0.00803334340453148, "step": 77880 }, { "epoch": 22.108998013057054, "grad_norm": 10.434040069580078, "learning_rate": 7.79009367016747e-05, "loss": 0.010350019484758378, "step": 77890 }, { "epoch": 22.111836502980413, "grad_norm": 4.601600646972656, "learning_rate": 7.789809821175135e-05, "loss": 0.012400143593549729, "step": 77900 }, { "epoch": 22.114674992903776, "grad_norm": 4.953150272369385, "learning_rate": 7.7895259721828e-05, "loss": 0.017364446818828583, "step": 77910 }, { "epoch": 22.117513482827135, "grad_norm": 1.8699755668640137, "learning_rate": 7.789242123190463e-05, "loss": 0.01287461370229721, "step": 77920 }, { "epoch": 22.1203519727505, "grad_norm": 6.1669230461120605, "learning_rate": 7.788958274198127e-05, "loss": 0.018391843140125274, "step": 77930 }, { "epoch": 22.123190462673858, "grad_norm": 1.3146318197250366, "learning_rate": 7.788674425205791e-05, "loss": 0.013735891878604889, "step": 77940 }, { "epoch": 22.126028952597217, "grad_norm": 6.031404972076416, "learning_rate": 7.788390576213455e-05, "loss": 0.016026775538921356, "step": 77950 }, { "epoch": 22.12886744252058, "grad_norm": 12.811196327209473, "learning_rate": 7.788106727221118e-05, "loss": 0.018257009983062743, "step": 77960 }, { "epoch": 22.13170593244394, "grad_norm": 11.070796012878418, "learning_rate": 7.787822878228783e-05, "loss": 0.017827069759368895, "step": 77970 }, { "epoch": 22.134544422367302, "grad_norm": 4.018449306488037, "learning_rate": 7.787539029236448e-05, "loss": 0.013120408356189727, "step": 77980 }, { "epoch": 22.13738291229066, "grad_norm": 3.4805305004119873, "learning_rate": 7.78725518024411e-05, "loss": 0.009723368287086486, "step": 77990 }, { "epoch": 22.14022140221402, "grad_norm": 3.1845874786376953, "learning_rate": 7.786971331251775e-05, "loss": 0.021074341237545015, "step": 78000 }, { "epoch": 22.14022140221402, "eval_accuracy": 0.9673809372416863, "eval_loss": 0.10670884698629379, "eval_runtime": 31.4212, "eval_samples_per_second": 500.523, "eval_steps_per_second": 7.829, "step": 78000 }, { "epoch": 22.143059892137384, "grad_norm": 5.378195762634277, "learning_rate": 7.786687482259439e-05, "loss": 0.011688557267189027, "step": 78010 }, { "epoch": 22.145898382060743, "grad_norm": 0.1714269071817398, "learning_rate": 7.786403633267101e-05, "loss": 0.006844799965620041, "step": 78020 }, { "epoch": 22.148736871984106, "grad_norm": 0.8463479280471802, "learning_rate": 7.786119784274766e-05, "loss": 0.007517029345035553, "step": 78030 }, { "epoch": 22.151575361907465, "grad_norm": 3.1646523475646973, "learning_rate": 7.785835935282431e-05, "loss": 0.011973054707050323, "step": 78040 }, { "epoch": 22.154413851830824, "grad_norm": 2.057929754257202, "learning_rate": 7.785552086290094e-05, "loss": 0.00851019024848938, "step": 78050 }, { "epoch": 22.157252341754187, "grad_norm": 0.6269627213478088, "learning_rate": 7.785268237297758e-05, "loss": 0.013988684117794036, "step": 78060 }, { "epoch": 22.160090831677547, "grad_norm": 0.35166695713996887, "learning_rate": 7.784984388305422e-05, "loss": 0.020649422705173493, "step": 78070 }, { "epoch": 22.16292932160091, "grad_norm": 2.507573127746582, "learning_rate": 7.784700539313086e-05, "loss": 0.009508591145277023, "step": 78080 }, { "epoch": 22.16576781152427, "grad_norm": 0.4796569347381592, "learning_rate": 7.784416690320749e-05, "loss": 0.013066285848617553, "step": 78090 }, { "epoch": 22.16860630144763, "grad_norm": 0.13566182553768158, "learning_rate": 7.784132841328413e-05, "loss": 0.022416481375694276, "step": 78100 }, { "epoch": 22.17144479137099, "grad_norm": 10.376028060913086, "learning_rate": 7.783848992336077e-05, "loss": 0.009210991114377976, "step": 78110 }, { "epoch": 22.17428328129435, "grad_norm": 5.886260032653809, "learning_rate": 7.783565143343741e-05, "loss": 0.012082375586032867, "step": 78120 }, { "epoch": 22.177121771217713, "grad_norm": 2.1102240085601807, "learning_rate": 7.783281294351406e-05, "loss": 0.011650773137807846, "step": 78130 }, { "epoch": 22.179960261141073, "grad_norm": 1.5243607759475708, "learning_rate": 7.78299744535907e-05, "loss": 0.0038802415132522584, "step": 78140 }, { "epoch": 22.182798751064432, "grad_norm": 6.211618423461914, "learning_rate": 7.782713596366733e-05, "loss": 0.008468639105558395, "step": 78150 }, { "epoch": 22.185637240987795, "grad_norm": 4.821821212768555, "learning_rate": 7.782429747374397e-05, "loss": 0.007791467010974884, "step": 78160 }, { "epoch": 22.188475730911154, "grad_norm": 0.13932007551193237, "learning_rate": 7.782145898382062e-05, "loss": 0.00805671289563179, "step": 78170 }, { "epoch": 22.191314220834517, "grad_norm": 0.13139668107032776, "learning_rate": 7.781862049389725e-05, "loss": 0.021581993997097017, "step": 78180 }, { "epoch": 22.194152710757876, "grad_norm": 4.98887825012207, "learning_rate": 7.781578200397389e-05, "loss": 0.009293796122074127, "step": 78190 }, { "epoch": 22.19699120068124, "grad_norm": 3.6963984966278076, "learning_rate": 7.781294351405053e-05, "loss": 0.010300829261541366, "step": 78200 }, { "epoch": 22.1998296906046, "grad_norm": 4.2909722328186035, "learning_rate": 7.781010502412716e-05, "loss": 0.005210285633802414, "step": 78210 }, { "epoch": 22.202668180527958, "grad_norm": 0.45989683270454407, "learning_rate": 7.78072665342038e-05, "loss": 0.01618835926055908, "step": 78220 }, { "epoch": 22.20550667045132, "grad_norm": 7.520296573638916, "learning_rate": 7.780442804428044e-05, "loss": 0.01901959478855133, "step": 78230 }, { "epoch": 22.20834516037468, "grad_norm": 0.5820671916007996, "learning_rate": 7.780158955435708e-05, "loss": 0.003843599557876587, "step": 78240 }, { "epoch": 22.211183650298043, "grad_norm": 0.19811807572841644, "learning_rate": 7.779875106443373e-05, "loss": 0.005912791192531586, "step": 78250 }, { "epoch": 22.214022140221402, "grad_norm": 3.562718391418457, "learning_rate": 7.779591257451037e-05, "loss": 0.016668358445167543, "step": 78260 }, { "epoch": 22.21686063014476, "grad_norm": 10.126795768737793, "learning_rate": 7.779307408458701e-05, "loss": 0.01387522667646408, "step": 78270 }, { "epoch": 22.219699120068125, "grad_norm": 3.5295963287353516, "learning_rate": 7.779023559466364e-05, "loss": 0.00795711725950241, "step": 78280 }, { "epoch": 22.222537609991484, "grad_norm": 6.15455961227417, "learning_rate": 7.778739710474028e-05, "loss": 0.01316492110490799, "step": 78290 }, { "epoch": 22.225376099914847, "grad_norm": 11.097175598144531, "learning_rate": 7.778455861481692e-05, "loss": 0.013008816540241242, "step": 78300 }, { "epoch": 22.228214589838206, "grad_norm": 12.18593692779541, "learning_rate": 7.778172012489356e-05, "loss": 0.013775190711021424, "step": 78310 }, { "epoch": 22.231053079761566, "grad_norm": 8.522257804870605, "learning_rate": 7.77788816349702e-05, "loss": 0.010103340446949004, "step": 78320 }, { "epoch": 22.23389156968493, "grad_norm": 2.110915422439575, "learning_rate": 7.777604314504684e-05, "loss": 0.012848296761512756, "step": 78330 }, { "epoch": 22.236730059608288, "grad_norm": 1.4709959030151367, "learning_rate": 7.777320465512347e-05, "loss": 0.018046993017196655, "step": 78340 }, { "epoch": 22.23956854953165, "grad_norm": 9.303194046020508, "learning_rate": 7.777036616520011e-05, "loss": 0.020788265764713286, "step": 78350 }, { "epoch": 22.24240703945501, "grad_norm": 14.899702072143555, "learning_rate": 7.776752767527675e-05, "loss": 0.023821789026260375, "step": 78360 }, { "epoch": 22.24524552937837, "grad_norm": 4.227825164794922, "learning_rate": 7.77646891853534e-05, "loss": 0.009640121459960937, "step": 78370 }, { "epoch": 22.248084019301732, "grad_norm": 1.8510937690734863, "learning_rate": 7.776185069543004e-05, "loss": 0.010585528612136842, "step": 78380 }, { "epoch": 22.25092250922509, "grad_norm": 0.7175607681274414, "learning_rate": 7.775901220550668e-05, "loss": 0.008308710902929306, "step": 78390 }, { "epoch": 22.253760999148454, "grad_norm": 5.19725227355957, "learning_rate": 7.775617371558332e-05, "loss": 0.012205660343170166, "step": 78400 }, { "epoch": 22.256599489071814, "grad_norm": 0.04399823397397995, "learning_rate": 7.775333522565995e-05, "loss": 0.007741999626159668, "step": 78410 }, { "epoch": 22.259437978995173, "grad_norm": 2.996161460876465, "learning_rate": 7.775049673573659e-05, "loss": 0.024648337066173552, "step": 78420 }, { "epoch": 22.262276468918536, "grad_norm": 8.189443588256836, "learning_rate": 7.774765824581323e-05, "loss": 0.007954753190279006, "step": 78430 }, { "epoch": 22.265114958841895, "grad_norm": 0.22202037274837494, "learning_rate": 7.774481975588987e-05, "loss": 0.018301871418952943, "step": 78440 }, { "epoch": 22.267953448765258, "grad_norm": 1.2551482915878296, "learning_rate": 7.774198126596651e-05, "loss": 0.009818053245544434, "step": 78450 }, { "epoch": 22.270791938688618, "grad_norm": 0.8292374610900879, "learning_rate": 7.773914277604315e-05, "loss": 0.01662033349275589, "step": 78460 }, { "epoch": 22.273630428611977, "grad_norm": 0.10649089515209198, "learning_rate": 7.773630428611978e-05, "loss": 0.009980617463588715, "step": 78470 }, { "epoch": 22.27646891853534, "grad_norm": 5.147066593170166, "learning_rate": 7.773346579619642e-05, "loss": 0.006492535769939423, "step": 78480 }, { "epoch": 22.2793074084587, "grad_norm": 5.389790058135986, "learning_rate": 7.773062730627306e-05, "loss": 0.010049711167812347, "step": 78490 }, { "epoch": 22.282145898382062, "grad_norm": 2.226016044616699, "learning_rate": 7.77277888163497e-05, "loss": 0.005699679255485535, "step": 78500 }, { "epoch": 22.282145898382062, "eval_accuracy": 0.9643288611941248, "eval_loss": 0.11839230358600616, "eval_runtime": 31.8517, "eval_samples_per_second": 493.757, "eval_steps_per_second": 7.723, "step": 78500 }, { "epoch": 22.28498438830542, "grad_norm": 2.348543643951416, "learning_rate": 7.772495032642635e-05, "loss": 0.015471228957176208, "step": 78510 }, { "epoch": 22.28782287822878, "grad_norm": 3.2751684188842773, "learning_rate": 7.772211183650299e-05, "loss": 0.007494095712900162, "step": 78520 }, { "epoch": 22.290661368152143, "grad_norm": 5.831447601318359, "learning_rate": 7.771927334657963e-05, "loss": 0.03796030282974243, "step": 78530 }, { "epoch": 22.293499858075503, "grad_norm": 10.852279663085938, "learning_rate": 7.771643485665626e-05, "loss": 0.024220287799835205, "step": 78540 }, { "epoch": 22.296338347998866, "grad_norm": 3.8469982147216797, "learning_rate": 7.77135963667329e-05, "loss": 0.023365892469882965, "step": 78550 }, { "epoch": 22.299176837922225, "grad_norm": 2.7278900146484375, "learning_rate": 7.771075787680954e-05, "loss": 0.025482177734375, "step": 78560 }, { "epoch": 22.302015327845588, "grad_norm": 4.788492202758789, "learning_rate": 7.770791938688618e-05, "loss": 0.01673136353492737, "step": 78570 }, { "epoch": 22.304853817768947, "grad_norm": 0.524034321308136, "learning_rate": 7.770508089696282e-05, "loss": 0.013599051535129547, "step": 78580 }, { "epoch": 22.307692307692307, "grad_norm": 2.589271068572998, "learning_rate": 7.770224240703946e-05, "loss": 0.020609650015830993, "step": 78590 }, { "epoch": 22.31053079761567, "grad_norm": 0.9688955545425415, "learning_rate": 7.769940391711609e-05, "loss": 0.014199270308017731, "step": 78600 }, { "epoch": 22.31336928753903, "grad_norm": 7.938648223876953, "learning_rate": 7.769656542719273e-05, "loss": 0.009601452201604844, "step": 78610 }, { "epoch": 22.31620777746239, "grad_norm": 11.757878303527832, "learning_rate": 7.769372693726938e-05, "loss": 0.028433749079704286, "step": 78620 }, { "epoch": 22.31904626738575, "grad_norm": 15.40138053894043, "learning_rate": 7.769088844734602e-05, "loss": 0.03010331988334656, "step": 78630 }, { "epoch": 22.32188475730911, "grad_norm": 0.9149933457374573, "learning_rate": 7.768804995742266e-05, "loss": 0.014781475067138672, "step": 78640 }, { "epoch": 22.324723247232473, "grad_norm": 1.3284035921096802, "learning_rate": 7.76852114674993e-05, "loss": 0.013837674260139465, "step": 78650 }, { "epoch": 22.327561737155833, "grad_norm": 5.499685764312744, "learning_rate": 7.768237297757594e-05, "loss": 0.010042565315961838, "step": 78660 }, { "epoch": 22.330400227079195, "grad_norm": 4.1156744956970215, "learning_rate": 7.767953448765257e-05, "loss": 0.005651995167136192, "step": 78670 }, { "epoch": 22.333238717002555, "grad_norm": 9.972570419311523, "learning_rate": 7.767669599772921e-05, "loss": 0.010193221271038055, "step": 78680 }, { "epoch": 22.336077206925914, "grad_norm": 12.723579406738281, "learning_rate": 7.767385750780585e-05, "loss": 0.016563312709331514, "step": 78690 }, { "epoch": 22.338915696849277, "grad_norm": 8.280366897583008, "learning_rate": 7.767101901788248e-05, "loss": 0.011286767572164536, "step": 78700 }, { "epoch": 22.341754186772636, "grad_norm": 0.6984289288520813, "learning_rate": 7.766818052795913e-05, "loss": 0.007436839491128921, "step": 78710 }, { "epoch": 22.344592676696, "grad_norm": 3.3087446689605713, "learning_rate": 7.766534203803578e-05, "loss": 0.006595052033662796, "step": 78720 }, { "epoch": 22.34743116661936, "grad_norm": 10.459646224975586, "learning_rate": 7.76625035481124e-05, "loss": 0.028175845742225647, "step": 78730 }, { "epoch": 22.350269656542718, "grad_norm": 9.923874855041504, "learning_rate": 7.765966505818904e-05, "loss": 0.008939434587955476, "step": 78740 }, { "epoch": 22.35310814646608, "grad_norm": 1.7071095705032349, "learning_rate": 7.765682656826569e-05, "loss": 0.009886356443166733, "step": 78750 }, { "epoch": 22.35594663638944, "grad_norm": 0.7185766100883484, "learning_rate": 7.765398807834233e-05, "loss": 0.005316883325576782, "step": 78760 }, { "epoch": 22.358785126312803, "grad_norm": 12.837071418762207, "learning_rate": 7.765114958841897e-05, "loss": 0.022282302379608154, "step": 78770 }, { "epoch": 22.361623616236162, "grad_norm": 2.1667263507843018, "learning_rate": 7.764831109849561e-05, "loss": 0.011915234476327896, "step": 78780 }, { "epoch": 22.36446210615952, "grad_norm": 22.122661590576172, "learning_rate": 7.764547260857225e-05, "loss": 0.031076237559318542, "step": 78790 }, { "epoch": 22.367300596082885, "grad_norm": 0.27172213792800903, "learning_rate": 7.764291796764122e-05, "loss": 0.016910012066364288, "step": 78800 }, { "epoch": 22.370139086006244, "grad_norm": 0.5888198614120483, "learning_rate": 7.764007947771786e-05, "loss": 0.013816308975219727, "step": 78810 }, { "epoch": 22.372977575929607, "grad_norm": 10.406205177307129, "learning_rate": 7.76372409877945e-05, "loss": 0.023664560914039613, "step": 78820 }, { "epoch": 22.375816065852966, "grad_norm": 2.306295394897461, "learning_rate": 7.763440249787114e-05, "loss": 0.01593463122844696, "step": 78830 }, { "epoch": 22.378654555776325, "grad_norm": 9.898871421813965, "learning_rate": 7.763156400794778e-05, "loss": 0.012129470705986023, "step": 78840 }, { "epoch": 22.38149304569969, "grad_norm": 3.3095803260803223, "learning_rate": 7.762872551802441e-05, "loss": 0.007985438406467437, "step": 78850 }, { "epoch": 22.384331535623048, "grad_norm": 3.818220615386963, "learning_rate": 7.762588702810105e-05, "loss": 0.016553662717342377, "step": 78860 }, { "epoch": 22.38717002554641, "grad_norm": 0.08684609085321426, "learning_rate": 7.76230485381777e-05, "loss": 0.005004122480750084, "step": 78870 }, { "epoch": 22.39000851546977, "grad_norm": 8.991494178771973, "learning_rate": 7.762021004825432e-05, "loss": 0.012231387197971344, "step": 78880 }, { "epoch": 22.39284700539313, "grad_norm": 2.045503854751587, "learning_rate": 7.761737155833098e-05, "loss": 0.013507908582687378, "step": 78890 }, { "epoch": 22.395685495316492, "grad_norm": 5.527192115783691, "learning_rate": 7.761453306840762e-05, "loss": 0.027555057406425477, "step": 78900 }, { "epoch": 22.39852398523985, "grad_norm": 2.264983892440796, "learning_rate": 7.761169457848425e-05, "loss": 0.01494077742099762, "step": 78910 }, { "epoch": 22.401362475163214, "grad_norm": 10.577933311462402, "learning_rate": 7.760885608856089e-05, "loss": 0.016612935066223144, "step": 78920 }, { "epoch": 22.404200965086574, "grad_norm": 11.011502265930176, "learning_rate": 7.760601759863753e-05, "loss": 0.011415421217679977, "step": 78930 }, { "epoch": 22.407039455009933, "grad_norm": 1.5920131206512451, "learning_rate": 7.760317910871417e-05, "loss": 0.013965202867984772, "step": 78940 }, { "epoch": 22.409877944933296, "grad_norm": 2.83778977394104, "learning_rate": 7.760034061879081e-05, "loss": 0.011010801792144776, "step": 78950 }, { "epoch": 22.412716434856655, "grad_norm": 1.3154271841049194, "learning_rate": 7.759750212886745e-05, "loss": 0.010433544963598251, "step": 78960 }, { "epoch": 22.415554924780018, "grad_norm": 1.393776774406433, "learning_rate": 7.759466363894408e-05, "loss": 0.007394035905599594, "step": 78970 }, { "epoch": 22.418393414703377, "grad_norm": 2.478628158569336, "learning_rate": 7.759182514902072e-05, "loss": 0.012498383969068527, "step": 78980 }, { "epoch": 22.42123190462674, "grad_norm": 0.6744087338447571, "learning_rate": 7.758898665909736e-05, "loss": 0.007888572663068772, "step": 78990 }, { "epoch": 22.4240703945501, "grad_norm": 1.3194026947021484, "learning_rate": 7.7586148169174e-05, "loss": 0.020094528794288635, "step": 79000 }, { "epoch": 22.4240703945501, "eval_accuracy": 0.9644560310294398, "eval_loss": 0.11648497730493546, "eval_runtime": 32.4787, "eval_samples_per_second": 484.225, "eval_steps_per_second": 7.574, "step": 79000 }, { "epoch": 22.42690888447346, "grad_norm": 0.1950353980064392, "learning_rate": 7.758330967925063e-05, "loss": 0.005496396869421005, "step": 79010 }, { "epoch": 22.429747374396822, "grad_norm": 8.227417945861816, "learning_rate": 7.758047118932729e-05, "loss": 0.011399309337139129, "step": 79020 }, { "epoch": 22.43258586432018, "grad_norm": 1.3837053775787354, "learning_rate": 7.757763269940393e-05, "loss": 0.00926767811179161, "step": 79030 }, { "epoch": 22.435424354243544, "grad_norm": 5.21128511428833, "learning_rate": 7.757479420948056e-05, "loss": 0.021765249967575073, "step": 79040 }, { "epoch": 22.438262844166903, "grad_norm": 0.8473485112190247, "learning_rate": 7.75719557195572e-05, "loss": 0.024957311153411866, "step": 79050 }, { "epoch": 22.441101334090263, "grad_norm": 1.3490263223648071, "learning_rate": 7.756911722963384e-05, "loss": 0.009740283340215683, "step": 79060 }, { "epoch": 22.443939824013626, "grad_norm": 1.744531512260437, "learning_rate": 7.756627873971047e-05, "loss": 0.006131973490118981, "step": 79070 }, { "epoch": 22.446778313936985, "grad_norm": 0.1797332912683487, "learning_rate": 7.756344024978711e-05, "loss": 0.010336843132972718, "step": 79080 }, { "epoch": 22.449616803860348, "grad_norm": 1.2307103872299194, "learning_rate": 7.756060175986376e-05, "loss": 0.005235159397125244, "step": 79090 }, { "epoch": 22.452455293783707, "grad_norm": 2.4683594703674316, "learning_rate": 7.755776326994039e-05, "loss": 0.008046117424964905, "step": 79100 }, { "epoch": 22.455293783707067, "grad_norm": 1.6268317699432373, "learning_rate": 7.755492478001703e-05, "loss": 0.012852539122104645, "step": 79110 }, { "epoch": 22.45813227363043, "grad_norm": 4.865121841430664, "learning_rate": 7.755208629009367e-05, "loss": 0.011758530884981156, "step": 79120 }, { "epoch": 22.46097076355379, "grad_norm": 0.6189286112785339, "learning_rate": 7.754924780017032e-05, "loss": 0.01459445059299469, "step": 79130 }, { "epoch": 22.46380925347715, "grad_norm": 0.846178412437439, "learning_rate": 7.754640931024694e-05, "loss": 0.019893331825733183, "step": 79140 }, { "epoch": 22.46664774340051, "grad_norm": 6.491055011749268, "learning_rate": 7.75435708203236e-05, "loss": 0.007758082449436187, "step": 79150 }, { "epoch": 22.46948623332387, "grad_norm": 1.9458593130111694, "learning_rate": 7.754073233040024e-05, "loss": 0.01711057424545288, "step": 79160 }, { "epoch": 22.472324723247233, "grad_norm": 4.260765552520752, "learning_rate": 7.753789384047687e-05, "loss": 0.012027893960475922, "step": 79170 }, { "epoch": 22.475163213170593, "grad_norm": 2.313204526901245, "learning_rate": 7.753505535055351e-05, "loss": 0.0062425538897514345, "step": 79180 }, { "epoch": 22.478001703093955, "grad_norm": 0.30379486083984375, "learning_rate": 7.753221686063015e-05, "loss": 0.012911394238471985, "step": 79190 }, { "epoch": 22.480840193017315, "grad_norm": 3.386138439178467, "learning_rate": 7.752937837070678e-05, "loss": 0.007911705225706101, "step": 79200 }, { "epoch": 22.483678682940674, "grad_norm": 4.456545829772949, "learning_rate": 7.752653988078342e-05, "loss": 0.012407016754150391, "step": 79210 }, { "epoch": 22.486517172864037, "grad_norm": 0.7037520408630371, "learning_rate": 7.752370139086007e-05, "loss": 0.014980792999267578, "step": 79220 }, { "epoch": 22.489355662787396, "grad_norm": 3.615569829940796, "learning_rate": 7.75208629009367e-05, "loss": 0.013668224215507507, "step": 79230 }, { "epoch": 22.49219415271076, "grad_norm": 20.066904067993164, "learning_rate": 7.751802441101334e-05, "loss": 0.020690937340259553, "step": 79240 }, { "epoch": 22.49503264263412, "grad_norm": 5.913091659545898, "learning_rate": 7.751518592108998e-05, "loss": 0.012933464348316192, "step": 79250 }, { "epoch": 22.497871132557478, "grad_norm": 2.0230982303619385, "learning_rate": 7.751234743116663e-05, "loss": 0.013022205233573914, "step": 79260 }, { "epoch": 22.50070962248084, "grad_norm": 4.580300807952881, "learning_rate": 7.750950894124325e-05, "loss": 0.01799226552248001, "step": 79270 }, { "epoch": 22.5035481124042, "grad_norm": 2.7023584842681885, "learning_rate": 7.75066704513199e-05, "loss": 0.017762008309364318, "step": 79280 }, { "epoch": 22.506386602327563, "grad_norm": 2.3168201446533203, "learning_rate": 7.750383196139655e-05, "loss": 0.010609734058380126, "step": 79290 }, { "epoch": 22.509225092250922, "grad_norm": 0.5185960531234741, "learning_rate": 7.750099347147318e-05, "loss": 0.01253051608800888, "step": 79300 }, { "epoch": 22.51206358217428, "grad_norm": 0.44416698813438416, "learning_rate": 7.749815498154982e-05, "loss": 0.01206279695034027, "step": 79310 }, { "epoch": 22.514902072097644, "grad_norm": 8.609683990478516, "learning_rate": 7.749531649162646e-05, "loss": 0.009859508275985718, "step": 79320 }, { "epoch": 22.517740562021004, "grad_norm": 3.055227041244507, "learning_rate": 7.749247800170309e-05, "loss": 0.010765496641397476, "step": 79330 }, { "epoch": 22.520579051944367, "grad_norm": 11.13996696472168, "learning_rate": 7.748963951177973e-05, "loss": 0.01065712422132492, "step": 79340 }, { "epoch": 22.523417541867726, "grad_norm": 10.12613296508789, "learning_rate": 7.748680102185639e-05, "loss": 0.011258310079574585, "step": 79350 }, { "epoch": 22.526256031791085, "grad_norm": 0.39234694838523865, "learning_rate": 7.748396253193301e-05, "loss": 0.026453298330307008, "step": 79360 }, { "epoch": 22.52909452171445, "grad_norm": 4.60313606262207, "learning_rate": 7.748112404200965e-05, "loss": 0.008643917739391327, "step": 79370 }, { "epoch": 22.531933011637808, "grad_norm": 1.3526878356933594, "learning_rate": 7.74782855520863e-05, "loss": 0.012584066390991211, "step": 79380 }, { "epoch": 22.53477150156117, "grad_norm": 6.417731761932373, "learning_rate": 7.747544706216294e-05, "loss": 0.03642912209033966, "step": 79390 }, { "epoch": 22.53760999148453, "grad_norm": 7.170996189117432, "learning_rate": 7.747260857223956e-05, "loss": 0.007970795780420304, "step": 79400 }, { "epoch": 22.540448481407893, "grad_norm": 1.17007577419281, "learning_rate": 7.74697700823162e-05, "loss": 0.012048873305320739, "step": 79410 }, { "epoch": 22.543286971331252, "grad_norm": 4.373727798461914, "learning_rate": 7.746693159239286e-05, "loss": 0.010239073634147644, "step": 79420 }, { "epoch": 22.54612546125461, "grad_norm": 3.7238335609436035, "learning_rate": 7.746409310246949e-05, "loss": 0.010963058471679688, "step": 79430 }, { "epoch": 22.548963951177974, "grad_norm": 1.5594240427017212, "learning_rate": 7.746125461254613e-05, "loss": 0.013627929985523224, "step": 79440 }, { "epoch": 22.551802441101334, "grad_norm": 0.36623379588127136, "learning_rate": 7.745841612262277e-05, "loss": 0.01595919281244278, "step": 79450 }, { "epoch": 22.554640931024696, "grad_norm": 9.257342338562012, "learning_rate": 7.74555776326994e-05, "loss": 0.010426081717014313, "step": 79460 }, { "epoch": 22.557479420948056, "grad_norm": 2.940206289291382, "learning_rate": 7.745273914277604e-05, "loss": 0.011334303766489029, "step": 79470 }, { "epoch": 22.560317910871415, "grad_norm": 0.15435819327831268, "learning_rate": 7.74499006528527e-05, "loss": 0.02565968930721283, "step": 79480 }, { "epoch": 22.563156400794778, "grad_norm": 1.4230142831802368, "learning_rate": 7.744706216292932e-05, "loss": 0.013086535036563873, "step": 79490 }, { "epoch": 22.565994890718137, "grad_norm": 0.9359275698661804, "learning_rate": 7.744422367300597e-05, "loss": 0.04905348718166351, "step": 79500 }, { "epoch": 22.565994890718137, "eval_accuracy": 0.9649647103707001, "eval_loss": 0.11553753167390823, "eval_runtime": 31.8675, "eval_samples_per_second": 493.512, "eval_steps_per_second": 7.719, "step": 79500 }, { "epoch": 22.5688333806415, "grad_norm": 6.085573196411133, "learning_rate": 7.74413851830826e-05, "loss": 0.026369887590408325, "step": 79510 }, { "epoch": 22.57167187056486, "grad_norm": 5.694607257843018, "learning_rate": 7.743854669315925e-05, "loss": 0.0267105758190155, "step": 79520 }, { "epoch": 22.57451036048822, "grad_norm": 12.553425788879395, "learning_rate": 7.743570820323588e-05, "loss": 0.021405410766601563, "step": 79530 }, { "epoch": 22.577348850411582, "grad_norm": 1.0821653604507446, "learning_rate": 7.743286971331252e-05, "loss": 0.009584519267082214, "step": 79540 }, { "epoch": 22.58018734033494, "grad_norm": 0.2032950222492218, "learning_rate": 7.743003122338917e-05, "loss": 0.012168647348880767, "step": 79550 }, { "epoch": 22.583025830258304, "grad_norm": 2.5928611755371094, "learning_rate": 7.74271927334658e-05, "loss": 0.01846584677696228, "step": 79560 }, { "epoch": 22.585864320181663, "grad_norm": 1.6427046060562134, "learning_rate": 7.742435424354244e-05, "loss": 0.0032516315579414366, "step": 79570 }, { "epoch": 22.588702810105023, "grad_norm": 1.98064124584198, "learning_rate": 7.742151575361908e-05, "loss": 0.0148907870054245, "step": 79580 }, { "epoch": 22.591541300028386, "grad_norm": 0.0847540870308876, "learning_rate": 7.741867726369571e-05, "loss": 0.008065237104892731, "step": 79590 }, { "epoch": 22.594379789951745, "grad_norm": 2.2634873390197754, "learning_rate": 7.741583877377235e-05, "loss": 0.020466260612010956, "step": 79600 }, { "epoch": 22.597218279875108, "grad_norm": 7.790076732635498, "learning_rate": 7.741300028384899e-05, "loss": 0.011237717419862746, "step": 79610 }, { "epoch": 22.600056769798467, "grad_norm": 6.0409255027771, "learning_rate": 7.741016179392563e-05, "loss": 0.012408192455768585, "step": 79620 }, { "epoch": 22.602895259721826, "grad_norm": 3.8379149436950684, "learning_rate": 7.740732330400228e-05, "loss": 0.010703689604997634, "step": 79630 }, { "epoch": 22.60573374964519, "grad_norm": 0.6396515369415283, "learning_rate": 7.740448481407892e-05, "loss": 0.012553629279136658, "step": 79640 }, { "epoch": 22.60857223956855, "grad_norm": 2.2229232788085938, "learning_rate": 7.740164632415556e-05, "loss": 0.015364086627960205, "step": 79650 }, { "epoch": 22.61141072949191, "grad_norm": 8.918983459472656, "learning_rate": 7.739880783423219e-05, "loss": 0.01649310886859894, "step": 79660 }, { "epoch": 22.61424921941527, "grad_norm": 0.3912106454372406, "learning_rate": 7.739596934430883e-05, "loss": 0.005555889755487442, "step": 79670 }, { "epoch": 22.61708770933863, "grad_norm": 1.362387776374817, "learning_rate": 7.739313085438548e-05, "loss": 0.005939261242747307, "step": 79680 }, { "epoch": 22.619926199261993, "grad_norm": 1.3112120628356934, "learning_rate": 7.739029236446211e-05, "loss": 0.009702064096927643, "step": 79690 }, { "epoch": 22.622764689185352, "grad_norm": 2.9736695289611816, "learning_rate": 7.738745387453875e-05, "loss": 0.021935120224952698, "step": 79700 }, { "epoch": 22.625603179108715, "grad_norm": 8.930765151977539, "learning_rate": 7.73846153846154e-05, "loss": 0.01166784092783928, "step": 79710 }, { "epoch": 22.628441669032075, "grad_norm": 0.084261953830719, "learning_rate": 7.738177689469202e-05, "loss": 0.020249782502651213, "step": 79720 }, { "epoch": 22.631280158955434, "grad_norm": 7.59883975982666, "learning_rate": 7.737893840476866e-05, "loss": 0.011867715418338776, "step": 79730 }, { "epoch": 22.634118648878797, "grad_norm": 9.584271430969238, "learning_rate": 7.73760999148453e-05, "loss": 0.01911238580942154, "step": 79740 }, { "epoch": 22.636957138802156, "grad_norm": 2.8228602409362793, "learning_rate": 7.737326142492195e-05, "loss": 0.015226860344409943, "step": 79750 }, { "epoch": 22.63979562872552, "grad_norm": 1.3170843124389648, "learning_rate": 7.737042293499859e-05, "loss": 0.0049213763326406475, "step": 79760 }, { "epoch": 22.64263411864888, "grad_norm": 0.6893202066421509, "learning_rate": 7.736758444507523e-05, "loss": 0.005104249715805054, "step": 79770 }, { "epoch": 22.64547260857224, "grad_norm": 4.823670387268066, "learning_rate": 7.736474595515187e-05, "loss": 0.004693111777305603, "step": 79780 }, { "epoch": 22.6483110984956, "grad_norm": 11.441197395324707, "learning_rate": 7.73619074652285e-05, "loss": 0.012336860597133636, "step": 79790 }, { "epoch": 22.65114958841896, "grad_norm": 1.8496366739273071, "learning_rate": 7.735906897530514e-05, "loss": 0.004601847380399704, "step": 79800 }, { "epoch": 22.653988078342323, "grad_norm": 6.232671737670898, "learning_rate": 7.735623048538178e-05, "loss": 0.013932222127914428, "step": 79810 }, { "epoch": 22.656826568265682, "grad_norm": 0.7702204585075378, "learning_rate": 7.735339199545842e-05, "loss": 0.0047489553689956665, "step": 79820 }, { "epoch": 22.659665058189045, "grad_norm": 1.2361254692077637, "learning_rate": 7.735055350553506e-05, "loss": 0.007621980458498001, "step": 79830 }, { "epoch": 22.662503548112404, "grad_norm": 2.8007898330688477, "learning_rate": 7.73477150156117e-05, "loss": 0.023316282033920287, "step": 79840 }, { "epoch": 22.665342038035764, "grad_norm": 6.725745677947998, "learning_rate": 7.734487652568833e-05, "loss": 0.024811407923698424, "step": 79850 }, { "epoch": 22.668180527959127, "grad_norm": 0.1743684709072113, "learning_rate": 7.734203803576497e-05, "loss": 0.009672833979129792, "step": 79860 }, { "epoch": 22.671019017882486, "grad_norm": 3.275254011154175, "learning_rate": 7.733919954584161e-05, "loss": 0.02893396019935608, "step": 79870 }, { "epoch": 22.67385750780585, "grad_norm": 9.358637809753418, "learning_rate": 7.733636105591826e-05, "loss": 0.02172216922044754, "step": 79880 }, { "epoch": 22.676695997729208, "grad_norm": 7.674645900726318, "learning_rate": 7.73335225659949e-05, "loss": 0.008159933984279633, "step": 79890 }, { "epoch": 22.679534487652568, "grad_norm": 10.914156913757324, "learning_rate": 7.733068407607154e-05, "loss": 0.021128436923027037, "step": 79900 }, { "epoch": 22.68237297757593, "grad_norm": 0.7474072575569153, "learning_rate": 7.732784558614817e-05, "loss": 0.018513143062591553, "step": 79910 }, { "epoch": 22.68521146749929, "grad_norm": 13.689690589904785, "learning_rate": 7.732500709622481e-05, "loss": 0.01652028560638428, "step": 79920 }, { "epoch": 22.688049957422653, "grad_norm": 4.518795490264893, "learning_rate": 7.732216860630145e-05, "loss": 0.009361089020967484, "step": 79930 }, { "epoch": 22.690888447346012, "grad_norm": 16.592924118041992, "learning_rate": 7.731933011637809e-05, "loss": 0.014007166028022766, "step": 79940 }, { "epoch": 22.69372693726937, "grad_norm": 1.0795841217041016, "learning_rate": 7.731649162645473e-05, "loss": 0.00787409469485283, "step": 79950 }, { "epoch": 22.696565427192734, "grad_norm": 1.2775568962097168, "learning_rate": 7.731365313653137e-05, "loss": 0.007690049707889557, "step": 79960 }, { "epoch": 22.699403917116094, "grad_norm": 2.7688798904418945, "learning_rate": 7.731081464660801e-05, "loss": 0.03022007644176483, "step": 79970 }, { "epoch": 22.702242407039456, "grad_norm": 0.8021807074546814, "learning_rate": 7.730797615668464e-05, "loss": 0.016617977619171144, "step": 79980 }, { "epoch": 22.705080896962816, "grad_norm": 7.414029121398926, "learning_rate": 7.730513766676128e-05, "loss": 0.011338051408529282, "step": 79990 }, { "epoch": 22.707919386886175, "grad_norm": 9.380739212036133, "learning_rate": 7.730229917683793e-05, "loss": 0.009740841388702393, "step": 80000 }, { "epoch": 22.707919386886175, "eval_accuracy": 0.9673173523240287, "eval_loss": 0.10911644995212555, "eval_runtime": 31.7886, "eval_samples_per_second": 494.737, "eval_steps_per_second": 7.739, "step": 80000 }, { "epoch": 22.710757876809538, "grad_norm": 3.3060810565948486, "learning_rate": 7.729946068691455e-05, "loss": 0.02486606240272522, "step": 80010 }, { "epoch": 22.713596366732897, "grad_norm": 3.00077486038208, "learning_rate": 7.729662219699121e-05, "loss": 0.011626887321472167, "step": 80020 }, { "epoch": 22.71643485665626, "grad_norm": 9.370033264160156, "learning_rate": 7.729378370706785e-05, "loss": 0.018975050747394563, "step": 80030 }, { "epoch": 22.71927334657962, "grad_norm": 1.828291893005371, "learning_rate": 7.729094521714448e-05, "loss": 0.012643803656101228, "step": 80040 }, { "epoch": 22.72211183650298, "grad_norm": 2.541023015975952, "learning_rate": 7.728810672722112e-05, "loss": 0.010191821306943894, "step": 80050 }, { "epoch": 22.72495032642634, "grad_norm": 1.8205264806747437, "learning_rate": 7.728526823729776e-05, "loss": 0.012982244789600372, "step": 80060 }, { "epoch": 22.7277888163497, "grad_norm": 3.614253520965576, "learning_rate": 7.72824297473744e-05, "loss": 0.024577914178371428, "step": 80070 }, { "epoch": 22.730627306273064, "grad_norm": 8.686661720275879, "learning_rate": 7.727959125745104e-05, "loss": 0.010043241083621979, "step": 80080 }, { "epoch": 22.733465796196423, "grad_norm": 0.15997886657714844, "learning_rate": 7.727675276752768e-05, "loss": 0.008157923817634583, "step": 80090 }, { "epoch": 22.736304286119783, "grad_norm": 0.9059934616088867, "learning_rate": 7.727391427760433e-05, "loss": 0.018460023403167724, "step": 80100 }, { "epoch": 22.739142776043145, "grad_norm": 4.328021049499512, "learning_rate": 7.727107578768095e-05, "loss": 0.0299703449010849, "step": 80110 }, { "epoch": 22.741981265966505, "grad_norm": 14.030584335327148, "learning_rate": 7.72682372977576e-05, "loss": 0.01339331567287445, "step": 80120 }, { "epoch": 22.744819755889868, "grad_norm": 1.5305132865905762, "learning_rate": 7.726539880783424e-05, "loss": 0.008145565539598465, "step": 80130 }, { "epoch": 22.747658245813227, "grad_norm": 1.293609619140625, "learning_rate": 7.726256031791086e-05, "loss": 0.012378089874982835, "step": 80140 }, { "epoch": 22.75049673573659, "grad_norm": 2.0629069805145264, "learning_rate": 7.725972182798752e-05, "loss": 0.008963125944137573, "step": 80150 }, { "epoch": 22.75333522565995, "grad_norm": 2.436915397644043, "learning_rate": 7.725688333806416e-05, "loss": 0.01439805030822754, "step": 80160 }, { "epoch": 22.75617371558331, "grad_norm": 7.195567607879639, "learning_rate": 7.725404484814079e-05, "loss": 0.011921728402376175, "step": 80170 }, { "epoch": 22.75901220550667, "grad_norm": 10.314437866210938, "learning_rate": 7.725120635821743e-05, "loss": 0.014929772913455963, "step": 80180 }, { "epoch": 22.76185069543003, "grad_norm": 8.415282249450684, "learning_rate": 7.724836786829407e-05, "loss": 0.012924760580062866, "step": 80190 }, { "epoch": 22.764689185353394, "grad_norm": 1.7107425928115845, "learning_rate": 7.724552937837071e-05, "loss": 0.021162857115268708, "step": 80200 }, { "epoch": 22.767527675276753, "grad_norm": 0.7087059020996094, "learning_rate": 7.724269088844734e-05, "loss": 0.024776445329189302, "step": 80210 }, { "epoch": 22.770366165200112, "grad_norm": 1.5928897857666016, "learning_rate": 7.7239852398524e-05, "loss": 0.011905179917812347, "step": 80220 }, { "epoch": 22.773204655123475, "grad_norm": 4.587005138397217, "learning_rate": 7.723701390860064e-05, "loss": 0.010252158343791961, "step": 80230 }, { "epoch": 22.776043145046835, "grad_norm": 1.1359758377075195, "learning_rate": 7.723417541867726e-05, "loss": 0.008689039200544358, "step": 80240 }, { "epoch": 22.778881634970197, "grad_norm": 1.0572501420974731, "learning_rate": 7.72313369287539e-05, "loss": 0.01852968633174896, "step": 80250 }, { "epoch": 22.781720124893557, "grad_norm": 9.264588356018066, "learning_rate": 7.722849843883055e-05, "loss": 0.010110127925872802, "step": 80260 }, { "epoch": 22.784558614816916, "grad_norm": 0.22735106945037842, "learning_rate": 7.722565994890717e-05, "loss": 0.007475157082080841, "step": 80270 }, { "epoch": 22.78739710474028, "grad_norm": 11.317435264587402, "learning_rate": 7.722282145898383e-05, "loss": 0.012948741018772126, "step": 80280 }, { "epoch": 22.79023559466364, "grad_norm": 4.13758659362793, "learning_rate": 7.721998296906047e-05, "loss": 0.012177083641290665, "step": 80290 }, { "epoch": 22.793074084587, "grad_norm": 4.182050704956055, "learning_rate": 7.72171444791371e-05, "loss": 0.013347941637039184, "step": 80300 }, { "epoch": 22.79591257451036, "grad_norm": 0.21450981497764587, "learning_rate": 7.721430598921374e-05, "loss": 0.004410708323121071, "step": 80310 }, { "epoch": 22.79875106443372, "grad_norm": 12.97705078125, "learning_rate": 7.721146749929038e-05, "loss": 0.01112605631351471, "step": 80320 }, { "epoch": 22.801589554357083, "grad_norm": 13.333818435668945, "learning_rate": 7.720862900936702e-05, "loss": 0.01949972063302994, "step": 80330 }, { "epoch": 22.804428044280442, "grad_norm": 1.2529797554016113, "learning_rate": 7.720579051944365e-05, "loss": 0.009480949491262436, "step": 80340 }, { "epoch": 22.807266534203805, "grad_norm": 2.1950793266296387, "learning_rate": 7.72029520295203e-05, "loss": 0.008737755566835403, "step": 80350 }, { "epoch": 22.810105024127164, "grad_norm": 10.06026554107666, "learning_rate": 7.720011353959695e-05, "loss": 0.01917884349822998, "step": 80360 }, { "epoch": 22.812943514050524, "grad_norm": 0.45100489258766174, "learning_rate": 7.719727504967357e-05, "loss": 0.014648696780204773, "step": 80370 }, { "epoch": 22.815782003973887, "grad_norm": 10.042503356933594, "learning_rate": 7.719443655975022e-05, "loss": 0.009955741465091705, "step": 80380 }, { "epoch": 22.818620493897246, "grad_norm": 3.5598833560943604, "learning_rate": 7.719159806982686e-05, "loss": 0.027786251902580262, "step": 80390 }, { "epoch": 22.82145898382061, "grad_norm": 1.6881523132324219, "learning_rate": 7.718875957990349e-05, "loss": 0.018742060661315917, "step": 80400 }, { "epoch": 22.824297473743968, "grad_norm": 0.460720032453537, "learning_rate": 7.718592108998013e-05, "loss": 0.011539719253778457, "step": 80410 }, { "epoch": 22.827135963667327, "grad_norm": 0.5719653367996216, "learning_rate": 7.718308260005678e-05, "loss": 0.006723494082689285, "step": 80420 }, { "epoch": 22.82997445359069, "grad_norm": 10.23875617980957, "learning_rate": 7.718024411013341e-05, "loss": 0.021205466985702515, "step": 80430 }, { "epoch": 22.83281294351405, "grad_norm": 0.2245953530073166, "learning_rate": 7.717740562021005e-05, "loss": 0.010038424283266068, "step": 80440 }, { "epoch": 22.835651433437413, "grad_norm": 15.705665588378906, "learning_rate": 7.717456713028669e-05, "loss": 0.014079716801643372, "step": 80450 }, { "epoch": 22.838489923360772, "grad_norm": 0.5934808850288391, "learning_rate": 7.717172864036333e-05, "loss": 0.010980446636676789, "step": 80460 }, { "epoch": 22.84132841328413, "grad_norm": 4.694215774536133, "learning_rate": 7.716889015043996e-05, "loss": 0.004700841754674912, "step": 80470 }, { "epoch": 22.844166903207494, "grad_norm": 4.17569637298584, "learning_rate": 7.716605166051662e-05, "loss": 0.007723515480756759, "step": 80480 }, { "epoch": 22.847005393130853, "grad_norm": 0.8966252207756042, "learning_rate": 7.716321317059326e-05, "loss": 0.019952893257141113, "step": 80490 }, { "epoch": 22.849843883054216, "grad_norm": 0.3975766599178314, "learning_rate": 7.716037468066989e-05, "loss": 0.027705812454223634, "step": 80500 }, { "epoch": 22.849843883054216, "eval_accuracy": 0.9701786736186176, "eval_loss": 0.09653698652982712, "eval_runtime": 31.3457, "eval_samples_per_second": 501.727, "eval_steps_per_second": 7.848, "step": 80500 }, { "epoch": 22.852682372977576, "grad_norm": 0.948364794254303, "learning_rate": 7.715753619074653e-05, "loss": 0.023232600092887877, "step": 80510 }, { "epoch": 22.855520862900935, "grad_norm": 1.0897440910339355, "learning_rate": 7.715469770082317e-05, "loss": 0.018813584744930268, "step": 80520 }, { "epoch": 22.858359352824298, "grad_norm": 2.28645396232605, "learning_rate": 7.71518592108998e-05, "loss": 0.010080874711275101, "step": 80530 }, { "epoch": 22.861197842747657, "grad_norm": 4.140142440795898, "learning_rate": 7.714902072097644e-05, "loss": 0.009485715627670288, "step": 80540 }, { "epoch": 22.86403633267102, "grad_norm": 1.3476910591125488, "learning_rate": 7.714618223105309e-05, "loss": 0.013161341845989227, "step": 80550 }, { "epoch": 22.86687482259438, "grad_norm": 11.631701469421387, "learning_rate": 7.714334374112972e-05, "loss": 0.01815873831510544, "step": 80560 }, { "epoch": 22.86971331251774, "grad_norm": 6.01510763168335, "learning_rate": 7.714050525120636e-05, "loss": 0.026488906145095824, "step": 80570 }, { "epoch": 22.8725518024411, "grad_norm": 0.8410020470619202, "learning_rate": 7.7137666761283e-05, "loss": 0.008303754031658173, "step": 80580 }, { "epoch": 22.87539029236446, "grad_norm": 6.778632640838623, "learning_rate": 7.713482827135964e-05, "loss": 0.010488261282444001, "step": 80590 }, { "epoch": 22.878228782287824, "grad_norm": 12.870098114013672, "learning_rate": 7.713198978143627e-05, "loss": 0.021042497456073762, "step": 80600 }, { "epoch": 22.881067272211183, "grad_norm": 5.46492862701416, "learning_rate": 7.712915129151291e-05, "loss": 0.03602103590965271, "step": 80610 }, { "epoch": 22.883905762134546, "grad_norm": 7.759884357452393, "learning_rate": 7.712631280158957e-05, "loss": 0.012081490457057953, "step": 80620 }, { "epoch": 22.886744252057905, "grad_norm": 2.7611868381500244, "learning_rate": 7.71234743116662e-05, "loss": 0.021846266090869905, "step": 80630 }, { "epoch": 22.889582741981265, "grad_norm": 0.18728521466255188, "learning_rate": 7.712063582174284e-05, "loss": 0.009469299763441085, "step": 80640 }, { "epoch": 22.892421231904628, "grad_norm": 1.714592695236206, "learning_rate": 7.711779733181948e-05, "loss": 0.006551323831081391, "step": 80650 }, { "epoch": 22.895259721827987, "grad_norm": 0.7882659435272217, "learning_rate": 7.711495884189611e-05, "loss": 0.0070470161736011505, "step": 80660 }, { "epoch": 22.89809821175135, "grad_norm": 3.7027416229248047, "learning_rate": 7.711212035197275e-05, "loss": 0.011835765093564987, "step": 80670 }, { "epoch": 22.90093670167471, "grad_norm": 1.3493903875350952, "learning_rate": 7.71092818620494e-05, "loss": 0.007410074770450592, "step": 80680 }, { "epoch": 22.90377519159807, "grad_norm": 6.727886199951172, "learning_rate": 7.710644337212603e-05, "loss": 0.009225580096244811, "step": 80690 }, { "epoch": 22.90661368152143, "grad_norm": 3.2748970985412598, "learning_rate": 7.710360488220267e-05, "loss": 0.012339530885219574, "step": 80700 }, { "epoch": 22.90945217144479, "grad_norm": 0.43072709441185, "learning_rate": 7.710076639227931e-05, "loss": 0.008979567140340806, "step": 80710 }, { "epoch": 22.912290661368154, "grad_norm": 5.980490684509277, "learning_rate": 7.709792790235596e-05, "loss": 0.013184870779514312, "step": 80720 }, { "epoch": 22.915129151291513, "grad_norm": 1.4979661703109741, "learning_rate": 7.709508941243258e-05, "loss": 0.009309562295675278, "step": 80730 }, { "epoch": 22.917967641214872, "grad_norm": 19.444664001464844, "learning_rate": 7.709225092250922e-05, "loss": 0.029952794313430786, "step": 80740 }, { "epoch": 22.920806131138235, "grad_norm": 1.7137722969055176, "learning_rate": 7.708941243258587e-05, "loss": 0.02423321157693863, "step": 80750 }, { "epoch": 22.923644621061595, "grad_norm": 11.211395263671875, "learning_rate": 7.708657394266251e-05, "loss": 0.012348046153783798, "step": 80760 }, { "epoch": 22.926483110984957, "grad_norm": 7.594901084899902, "learning_rate": 7.708373545273915e-05, "loss": 0.023171013593673705, "step": 80770 }, { "epoch": 22.929321600908317, "grad_norm": 0.4991413950920105, "learning_rate": 7.708089696281579e-05, "loss": 0.01727769672870636, "step": 80780 }, { "epoch": 22.932160090831676, "grad_norm": 2.3327393531799316, "learning_rate": 7.707805847289242e-05, "loss": 0.011949050426483154, "step": 80790 }, { "epoch": 22.93499858075504, "grad_norm": 1.4672796726226807, "learning_rate": 7.707521998296906e-05, "loss": 0.012447305768728257, "step": 80800 }, { "epoch": 22.9378370706784, "grad_norm": 1.2329938411712646, "learning_rate": 7.707238149304571e-05, "loss": 0.01845271438360214, "step": 80810 }, { "epoch": 22.94067556060176, "grad_norm": 4.342756748199463, "learning_rate": 7.706954300312234e-05, "loss": 0.019174408912658692, "step": 80820 }, { "epoch": 22.94351405052512, "grad_norm": 0.26236632466316223, "learning_rate": 7.706670451319898e-05, "loss": 0.02667504847049713, "step": 80830 }, { "epoch": 22.94635254044848, "grad_norm": 0.35015568137168884, "learning_rate": 7.706386602327562e-05, "loss": 0.029777154326438904, "step": 80840 }, { "epoch": 22.949191030371843, "grad_norm": 1.522350788116455, "learning_rate": 7.706102753335225e-05, "loss": 0.03382187187671661, "step": 80850 }, { "epoch": 22.952029520295202, "grad_norm": 0.41056889295578003, "learning_rate": 7.70581890434289e-05, "loss": 0.020494188368320464, "step": 80860 }, { "epoch": 22.954868010218565, "grad_norm": 12.632144927978516, "learning_rate": 7.705535055350554e-05, "loss": 0.014747911691665649, "step": 80870 }, { "epoch": 22.957706500141924, "grad_norm": 3.3428947925567627, "learning_rate": 7.705251206358218e-05, "loss": 0.006142091006040573, "step": 80880 }, { "epoch": 22.960544990065284, "grad_norm": 12.096625328063965, "learning_rate": 7.704967357365882e-05, "loss": 0.010761167854070663, "step": 80890 }, { "epoch": 22.963383479988646, "grad_norm": 8.952106475830078, "learning_rate": 7.704683508373546e-05, "loss": 0.005761110782623291, "step": 80900 }, { "epoch": 22.966221969912006, "grad_norm": 5.995619297027588, "learning_rate": 7.70439965938121e-05, "loss": 0.006663285940885544, "step": 80910 }, { "epoch": 22.96906045983537, "grad_norm": 3.710517406463623, "learning_rate": 7.704115810388873e-05, "loss": 0.010477218031883239, "step": 80920 }, { "epoch": 22.971898949758728, "grad_norm": 0.8890381455421448, "learning_rate": 7.703831961396537e-05, "loss": 0.007119788229465485, "step": 80930 }, { "epoch": 22.974737439682087, "grad_norm": 7.485180854797363, "learning_rate": 7.703548112404201e-05, "loss": 0.010250549763441086, "step": 80940 }, { "epoch": 22.97757592960545, "grad_norm": 5.8570098876953125, "learning_rate": 7.703264263411865e-05, "loss": 0.01283009946346283, "step": 80950 }, { "epoch": 22.98041441952881, "grad_norm": 1.0446549654006958, "learning_rate": 7.70298041441953e-05, "loss": 0.012663394212722778, "step": 80960 }, { "epoch": 22.983252909452172, "grad_norm": Infinity, "learning_rate": 7.702696565427194e-05, "loss": 0.02184543162584305, "step": 80970 }, { "epoch": 22.986091399375532, "grad_norm": 1.4425677061080933, "learning_rate": 7.70244110133409e-05, "loss": 0.011099603772163392, "step": 80980 }, { "epoch": 22.988929889298895, "grad_norm": 4.527901649475098, "learning_rate": 7.702157252341754e-05, "loss": 0.011553056538105011, "step": 80990 }, { "epoch": 22.991768379222254, "grad_norm": 1.7087217569351196, "learning_rate": 7.701873403349418e-05, "loss": 0.012025050818920135, "step": 81000 }, { "epoch": 22.991768379222254, "eval_accuracy": 0.9655369746296179, "eval_loss": 0.11839976161718369, "eval_runtime": 31.5879, "eval_samples_per_second": 497.881, "eval_steps_per_second": 7.788, "step": 81000 }, { "epoch": 22.994606869145613, "grad_norm": 1.620803713798523, "learning_rate": 7.701589554357083e-05, "loss": 0.03272469043731689, "step": 81010 }, { "epoch": 22.997445359068976, "grad_norm": 14.319477081298828, "learning_rate": 7.701305705364747e-05, "loss": 0.027868452668190002, "step": 81020 }, { "epoch": 23.000283848992336, "grad_norm": 6.962742805480957, "learning_rate": 7.70102185637241e-05, "loss": 0.04558496177196503, "step": 81030 }, { "epoch": 23.0031223389157, "grad_norm": 0.3507782816886902, "learning_rate": 7.700738007380074e-05, "loss": 0.007761963456869125, "step": 81040 }, { "epoch": 23.005960828839058, "grad_norm": 3.6918137073516846, "learning_rate": 7.700454158387738e-05, "loss": 0.02295752465724945, "step": 81050 }, { "epoch": 23.008799318762417, "grad_norm": 1.276634931564331, "learning_rate": 7.700170309395402e-05, "loss": 0.006325936317443848, "step": 81060 }, { "epoch": 23.01163780868578, "grad_norm": 0.3400377631187439, "learning_rate": 7.699886460403066e-05, "loss": 0.010025095939636231, "step": 81070 }, { "epoch": 23.01447629860914, "grad_norm": 1.0349035263061523, "learning_rate": 7.69960261141073e-05, "loss": 0.0036687493324279786, "step": 81080 }, { "epoch": 23.017314788532502, "grad_norm": 0.08968742936849594, "learning_rate": 7.699318762418394e-05, "loss": 0.01273488849401474, "step": 81090 }, { "epoch": 23.02015327845586, "grad_norm": 0.22282001376152039, "learning_rate": 7.699034913426057e-05, "loss": 0.02432103157043457, "step": 81100 }, { "epoch": 23.02299176837922, "grad_norm": 0.25596579909324646, "learning_rate": 7.698751064433721e-05, "loss": 0.0030750010162591936, "step": 81110 }, { "epoch": 23.025830258302584, "grad_norm": 0.02531314454972744, "learning_rate": 7.698467215441385e-05, "loss": 0.003897952288389206, "step": 81120 }, { "epoch": 23.028668748225943, "grad_norm": 0.28874582052230835, "learning_rate": 7.69818336644905e-05, "loss": 0.004992254450917244, "step": 81130 }, { "epoch": 23.031507238149306, "grad_norm": 2.429277181625366, "learning_rate": 7.697899517456714e-05, "loss": 0.005841082334518433, "step": 81140 }, { "epoch": 23.034345728072665, "grad_norm": 2.9904825687408447, "learning_rate": 7.697615668464378e-05, "loss": 0.01131460964679718, "step": 81150 }, { "epoch": 23.037184217996025, "grad_norm": 3.2538156509399414, "learning_rate": 7.69733181947204e-05, "loss": 0.016667813062667847, "step": 81160 }, { "epoch": 23.040022707919388, "grad_norm": 0.5200117230415344, "learning_rate": 7.697047970479705e-05, "loss": 0.011638278514146805, "step": 81170 }, { "epoch": 23.042861197842747, "grad_norm": 0.9085693955421448, "learning_rate": 7.696764121487369e-05, "loss": 0.00712493434548378, "step": 81180 }, { "epoch": 23.04569968776611, "grad_norm": 0.18917958438396454, "learning_rate": 7.696480272495033e-05, "loss": 0.0066272996366024016, "step": 81190 }, { "epoch": 23.04853817768947, "grad_norm": 1.7186013460159302, "learning_rate": 7.696196423502697e-05, "loss": 0.019600804150104522, "step": 81200 }, { "epoch": 23.05137666761283, "grad_norm": 2.469651460647583, "learning_rate": 7.695912574510361e-05, "loss": 0.006852841377258301, "step": 81210 }, { "epoch": 23.05421515753619, "grad_norm": 0.803154468536377, "learning_rate": 7.695628725518025e-05, "loss": 0.008562416583299638, "step": 81220 }, { "epoch": 23.05705364745955, "grad_norm": 0.532713770866394, "learning_rate": 7.695344876525688e-05, "loss": 0.006589559465646743, "step": 81230 }, { "epoch": 23.059892137382914, "grad_norm": 0.40946492552757263, "learning_rate": 7.695061027533352e-05, "loss": 0.01059492826461792, "step": 81240 }, { "epoch": 23.062730627306273, "grad_norm": 0.6528635621070862, "learning_rate": 7.694777178541016e-05, "loss": 0.013476558029651642, "step": 81250 }, { "epoch": 23.065569117229632, "grad_norm": 9.170567512512207, "learning_rate": 7.69449332954868e-05, "loss": 0.0057659059762954715, "step": 81260 }, { "epoch": 23.068407607152995, "grad_norm": 6.036689281463623, "learning_rate": 7.694209480556345e-05, "loss": 0.010828778147697449, "step": 81270 }, { "epoch": 23.071246097076354, "grad_norm": 0.28587600588798523, "learning_rate": 7.693925631564009e-05, "loss": 0.009224681556224823, "step": 81280 }, { "epoch": 23.074084586999717, "grad_norm": 13.310474395751953, "learning_rate": 7.693641782571672e-05, "loss": 0.014253759384155273, "step": 81290 }, { "epoch": 23.076923076923077, "grad_norm": 14.087088584899902, "learning_rate": 7.693357933579336e-05, "loss": 0.020697185397148134, "step": 81300 }, { "epoch": 23.079761566846436, "grad_norm": 4.609619617462158, "learning_rate": 7.693074084587e-05, "loss": 0.004944352060556411, "step": 81310 }, { "epoch": 23.0826000567698, "grad_norm": 3.9731431007385254, "learning_rate": 7.692790235594664e-05, "loss": 0.010432165861129761, "step": 81320 }, { "epoch": 23.085438546693158, "grad_norm": 1.303425669670105, "learning_rate": 7.692506386602328e-05, "loss": 0.01547238528728485, "step": 81330 }, { "epoch": 23.08827703661652, "grad_norm": 0.16182425618171692, "learning_rate": 7.692222537609992e-05, "loss": 0.007608227431774139, "step": 81340 }, { "epoch": 23.09111552653988, "grad_norm": 0.09139491617679596, "learning_rate": 7.691938688617656e-05, "loss": 0.008348201215267182, "step": 81350 }, { "epoch": 23.093954016463243, "grad_norm": 1.6499582529067993, "learning_rate": 7.691654839625319e-05, "loss": 0.008794358372688294, "step": 81360 }, { "epoch": 23.096792506386603, "grad_norm": 2.591325283050537, "learning_rate": 7.691370990632983e-05, "loss": 0.02172408401966095, "step": 81370 }, { "epoch": 23.099630996309962, "grad_norm": 0.26487675309181213, "learning_rate": 7.691087141640648e-05, "loss": 0.016495434939861296, "step": 81380 }, { "epoch": 23.102469486233325, "grad_norm": 1.375809669494629, "learning_rate": 7.690803292648312e-05, "loss": 0.0035038746893405913, "step": 81390 }, { "epoch": 23.105307976156684, "grad_norm": 0.26093676686286926, "learning_rate": 7.690519443655976e-05, "loss": 0.00615517757833004, "step": 81400 }, { "epoch": 23.108146466080047, "grad_norm": 0.34218186140060425, "learning_rate": 7.69023559466364e-05, "loss": 0.01887122541666031, "step": 81410 }, { "epoch": 23.110984956003406, "grad_norm": 8.678123474121094, "learning_rate": 7.689951745671303e-05, "loss": 0.005620415136218071, "step": 81420 }, { "epoch": 23.113823445926766, "grad_norm": 0.11254508048295975, "learning_rate": 7.689667896678967e-05, "loss": 0.013050979375839234, "step": 81430 }, { "epoch": 23.11666193585013, "grad_norm": 1.4598323106765747, "learning_rate": 7.689384047686631e-05, "loss": 0.007474815100431442, "step": 81440 }, { "epoch": 23.119500425773488, "grad_norm": 2.1692874431610107, "learning_rate": 7.689100198694295e-05, "loss": 0.009286851435899735, "step": 81450 }, { "epoch": 23.12233891569685, "grad_norm": 2.1892526149749756, "learning_rate": 7.688816349701959e-05, "loss": 0.011077985167503357, "step": 81460 }, { "epoch": 23.12517740562021, "grad_norm": 3.690589666366577, "learning_rate": 7.688532500709623e-05, "loss": 0.00821208581328392, "step": 81470 }, { "epoch": 23.12801589554357, "grad_norm": 1.410402774810791, "learning_rate": 7.688248651717288e-05, "loss": 0.014732782542705537, "step": 81480 }, { "epoch": 23.130854385466932, "grad_norm": 17.127286911010742, "learning_rate": 7.68796480272495e-05, "loss": 0.025893032550811768, "step": 81490 }, { "epoch": 23.13369287539029, "grad_norm": 4.727377891540527, "learning_rate": 7.687680953732614e-05, "loss": 0.04660629630088806, "step": 81500 }, { "epoch": 23.13369287539029, "eval_accuracy": 0.9610860303935906, "eval_loss": 0.1265936642885208, "eval_runtime": 32.1294, "eval_samples_per_second": 489.489, "eval_steps_per_second": 7.657, "step": 81500 }, { "epoch": 23.136531365313655, "grad_norm": 3.576752185821533, "learning_rate": 7.687397104740279e-05, "loss": 0.016522106528282166, "step": 81510 }, { "epoch": 23.139369855237014, "grad_norm": 0.2805086374282837, "learning_rate": 7.687113255747941e-05, "loss": 0.006738145649433136, "step": 81520 }, { "epoch": 23.142208345160373, "grad_norm": 0.22854863107204437, "learning_rate": 7.686829406755607e-05, "loss": 0.019515399634838105, "step": 81530 }, { "epoch": 23.145046835083736, "grad_norm": 1.3174718618392944, "learning_rate": 7.686545557763271e-05, "loss": 0.00976240262389183, "step": 81540 }, { "epoch": 23.147885325007096, "grad_norm": 0.4594900906085968, "learning_rate": 7.686261708770934e-05, "loss": 0.013127994537353516, "step": 81550 }, { "epoch": 23.15072381493046, "grad_norm": 8.161598205566406, "learning_rate": 7.685977859778598e-05, "loss": 0.021325382590293884, "step": 81560 }, { "epoch": 23.153562304853818, "grad_norm": 0.4259471297264099, "learning_rate": 7.685694010786262e-05, "loss": 0.006512487679719925, "step": 81570 }, { "epoch": 23.156400794777177, "grad_norm": 1.1085305213928223, "learning_rate": 7.685410161793926e-05, "loss": 0.007955869287252426, "step": 81580 }, { "epoch": 23.15923928470054, "grad_norm": 0.5901926755905151, "learning_rate": 7.68512631280159e-05, "loss": 0.01664751172065735, "step": 81590 }, { "epoch": 23.1620777746239, "grad_norm": 1.5823017358779907, "learning_rate": 7.684842463809255e-05, "loss": 0.00969500169157982, "step": 81600 }, { "epoch": 23.164916264547262, "grad_norm": 10.561538696289062, "learning_rate": 7.684558614816917e-05, "loss": 0.02339756041765213, "step": 81610 }, { "epoch": 23.16775475447062, "grad_norm": 1.017353892326355, "learning_rate": 7.684274765824581e-05, "loss": 0.007292459905147553, "step": 81620 }, { "epoch": 23.17059324439398, "grad_norm": 1.3734478950500488, "learning_rate": 7.683990916832246e-05, "loss": 0.018131014704704285, "step": 81630 }, { "epoch": 23.173431734317344, "grad_norm": 10.98176097869873, "learning_rate": 7.68370706783991e-05, "loss": 0.01684893071651459, "step": 81640 }, { "epoch": 23.176270224240703, "grad_norm": 0.8957962989807129, "learning_rate": 7.683423218847572e-05, "loss": 0.014210113883018493, "step": 81650 }, { "epoch": 23.179108714164066, "grad_norm": 5.073215007781982, "learning_rate": 7.683139369855238e-05, "loss": 0.008887016773223877, "step": 81660 }, { "epoch": 23.181947204087425, "grad_norm": 2.3756263256073, "learning_rate": 7.682855520862902e-05, "loss": 0.00913379043340683, "step": 81670 }, { "epoch": 23.184785694010785, "grad_norm": 6.557875156402588, "learning_rate": 7.682571671870565e-05, "loss": 0.010885653644800186, "step": 81680 }, { "epoch": 23.187624183934147, "grad_norm": 13.57228946685791, "learning_rate": 7.682287822878229e-05, "loss": 0.023547247052192688, "step": 81690 }, { "epoch": 23.190462673857507, "grad_norm": 0.2959260046482086, "learning_rate": 7.682003973885893e-05, "loss": 0.02730979323387146, "step": 81700 }, { "epoch": 23.19330116378087, "grad_norm": 4.152402877807617, "learning_rate": 7.681720124893557e-05, "loss": 0.011736023426055908, "step": 81710 }, { "epoch": 23.19613965370423, "grad_norm": 1.0124796628952026, "learning_rate": 7.68143627590122e-05, "loss": 0.010967476665973664, "step": 81720 }, { "epoch": 23.19897814362759, "grad_norm": 0.7735602259635925, "learning_rate": 7.681152426908886e-05, "loss": 0.011045131087303161, "step": 81730 }, { "epoch": 23.20181663355095, "grad_norm": 2.0713863372802734, "learning_rate": 7.680868577916548e-05, "loss": 0.005089344829320908, "step": 81740 }, { "epoch": 23.20465512347431, "grad_norm": 3.1303160190582275, "learning_rate": 7.680584728924213e-05, "loss": 0.019877766072750092, "step": 81750 }, { "epoch": 23.207493613397673, "grad_norm": 13.594548225402832, "learning_rate": 7.680300879931877e-05, "loss": 0.019787566363811494, "step": 81760 }, { "epoch": 23.210332103321033, "grad_norm": 9.464159965515137, "learning_rate": 7.680017030939541e-05, "loss": 0.02696545422077179, "step": 81770 }, { "epoch": 23.213170593244396, "grad_norm": 6.040079593658447, "learning_rate": 7.679733181947204e-05, "loss": 0.012206007540225983, "step": 81780 }, { "epoch": 23.216009083167755, "grad_norm": 0.13113054633140564, "learning_rate": 7.679449332954869e-05, "loss": 0.00852060467004776, "step": 81790 }, { "epoch": 23.218847573091114, "grad_norm": 0.1292133927345276, "learning_rate": 7.679165483962533e-05, "loss": 0.007242535054683685, "step": 81800 }, { "epoch": 23.221686063014477, "grad_norm": 6.790336608886719, "learning_rate": 7.678881634970196e-05, "loss": 0.016596157848834992, "step": 81810 }, { "epoch": 23.224524552937837, "grad_norm": 6.135738849639893, "learning_rate": 7.67859778597786e-05, "loss": 0.011655677855014802, "step": 81820 }, { "epoch": 23.2273630428612, "grad_norm": 1.4554401636123657, "learning_rate": 7.678313936985524e-05, "loss": 0.01121780127286911, "step": 81830 }, { "epoch": 23.23020153278456, "grad_norm": 3.1679046154022217, "learning_rate": 7.678030087993187e-05, "loss": 0.014025050401687621, "step": 81840 }, { "epoch": 23.233040022707918, "grad_norm": 3.1480331420898438, "learning_rate": 7.677746239000851e-05, "loss": 0.014249539375305176, "step": 81850 }, { "epoch": 23.23587851263128, "grad_norm": 5.555353164672852, "learning_rate": 7.677462390008517e-05, "loss": 0.01360018253326416, "step": 81860 }, { "epoch": 23.23871700255464, "grad_norm": 0.6712464690208435, "learning_rate": 7.67717854101618e-05, "loss": 0.029358720779418944, "step": 81870 }, { "epoch": 23.241555492478003, "grad_norm": 0.22279907763004303, "learning_rate": 7.676894692023844e-05, "loss": 0.0040431000292301175, "step": 81880 }, { "epoch": 23.244393982401363, "grad_norm": 0.14023631811141968, "learning_rate": 7.676610843031508e-05, "loss": 0.024998120963573456, "step": 81890 }, { "epoch": 23.247232472324722, "grad_norm": 8.533802032470703, "learning_rate": 7.676326994039172e-05, "loss": 0.008667520433664321, "step": 81900 }, { "epoch": 23.250070962248085, "grad_norm": 0.43664854764938354, "learning_rate": 7.676043145046835e-05, "loss": 0.004219681024551392, "step": 81910 }, { "epoch": 23.252909452171444, "grad_norm": 7.283153057098389, "learning_rate": 7.675759296054499e-05, "loss": 0.01028265655040741, "step": 81920 }, { "epoch": 23.255747942094807, "grad_norm": 1.0547778606414795, "learning_rate": 7.675475447062164e-05, "loss": 0.0037733543664216994, "step": 81930 }, { "epoch": 23.258586432018166, "grad_norm": 1.406489372253418, "learning_rate": 7.675191598069827e-05, "loss": 0.009622058272361756, "step": 81940 }, { "epoch": 23.261424921941526, "grad_norm": 0.505281925201416, "learning_rate": 7.674907749077491e-05, "loss": 0.008259361237287521, "step": 81950 }, { "epoch": 23.26426341186489, "grad_norm": 0.05560813471674919, "learning_rate": 7.674623900085155e-05, "loss": 0.016153261065483093, "step": 81960 }, { "epoch": 23.267101901788248, "grad_norm": 0.7451133728027344, "learning_rate": 7.674340051092818e-05, "loss": 0.019797845184803008, "step": 81970 }, { "epoch": 23.26994039171161, "grad_norm": 0.3206908404827118, "learning_rate": 7.674056202100482e-05, "loss": 0.011257698386907577, "step": 81980 }, { "epoch": 23.27277888163497, "grad_norm": 1.20969557762146, "learning_rate": 7.673772353108148e-05, "loss": 0.01275096982717514, "step": 81990 }, { "epoch": 23.27561737155833, "grad_norm": 2.338411331176758, "learning_rate": 7.67348850411581e-05, "loss": 0.006433916836977005, "step": 82000 }, { "epoch": 23.27561737155833, "eval_accuracy": 0.9689705601831246, "eval_loss": 0.10624296218156815, "eval_runtime": 31.7804, "eval_samples_per_second": 494.865, "eval_steps_per_second": 7.741, "step": 82000 }, { "epoch": 23.278455861481692, "grad_norm": 1.3388887643814087, "learning_rate": 7.673204655123475e-05, "loss": 0.00449761301279068, "step": 82010 }, { "epoch": 23.28129435140505, "grad_norm": 11.14245891571045, "learning_rate": 7.672920806131139e-05, "loss": 0.01533648520708084, "step": 82020 }, { "epoch": 23.284132841328415, "grad_norm": 0.45252472162246704, "learning_rate": 7.672636957138803e-05, "loss": 0.01781100332736969, "step": 82030 }, { "epoch": 23.286971331251774, "grad_norm": 9.260944366455078, "learning_rate": 7.672353108146466e-05, "loss": 0.01715707629919052, "step": 82040 }, { "epoch": 23.289809821175133, "grad_norm": 3.249415159225464, "learning_rate": 7.67206925915413e-05, "loss": 0.011982394009828567, "step": 82050 }, { "epoch": 23.292648311098496, "grad_norm": 0.23406581580638885, "learning_rate": 7.671785410161795e-05, "loss": 0.019793927669525146, "step": 82060 }, { "epoch": 23.295486801021855, "grad_norm": 0.7506312131881714, "learning_rate": 7.671501561169458e-05, "loss": 0.009187953174114227, "step": 82070 }, { "epoch": 23.29832529094522, "grad_norm": 2.8571550846099854, "learning_rate": 7.671217712177122e-05, "loss": 0.010929618775844575, "step": 82080 }, { "epoch": 23.301163780868578, "grad_norm": 0.7757213115692139, "learning_rate": 7.670933863184786e-05, "loss": 0.014920903742313385, "step": 82090 }, { "epoch": 23.304002270791937, "grad_norm": 8.449934959411621, "learning_rate": 7.670650014192449e-05, "loss": 0.027359050512313843, "step": 82100 }, { "epoch": 23.3068407607153, "grad_norm": 1.6749534606933594, "learning_rate": 7.670366165200113e-05, "loss": 0.006395766139030456, "step": 82110 }, { "epoch": 23.30967925063866, "grad_norm": 5.584567546844482, "learning_rate": 7.670082316207777e-05, "loss": 0.025453031063079834, "step": 82120 }, { "epoch": 23.312517740562022, "grad_norm": 3.466580867767334, "learning_rate": 7.669798467215442e-05, "loss": 0.005917467921972275, "step": 82130 }, { "epoch": 23.31535623048538, "grad_norm": 4.849684238433838, "learning_rate": 7.669514618223106e-05, "loss": 0.010397037863731385, "step": 82140 }, { "epoch": 23.31819472040874, "grad_norm": 0.607302725315094, "learning_rate": 7.66923076923077e-05, "loss": 0.007649130374193192, "step": 82150 }, { "epoch": 23.321033210332104, "grad_norm": 0.4653611481189728, "learning_rate": 7.668946920238434e-05, "loss": 0.010015854239463806, "step": 82160 }, { "epoch": 23.323871700255463, "grad_norm": 0.5790054798126221, "learning_rate": 7.668663071246097e-05, "loss": 0.016525769233703615, "step": 82170 }, { "epoch": 23.326710190178826, "grad_norm": 0.34577104449272156, "learning_rate": 7.668379222253761e-05, "loss": 0.006699518859386444, "step": 82180 }, { "epoch": 23.329548680102185, "grad_norm": 0.6061055660247803, "learning_rate": 7.668095373261426e-05, "loss": 0.02122640162706375, "step": 82190 }, { "epoch": 23.332387170025548, "grad_norm": 5.8752121925354, "learning_rate": 7.667811524269089e-05, "loss": 0.013380686938762664, "step": 82200 }, { "epoch": 23.335225659948907, "grad_norm": 1.414131760597229, "learning_rate": 7.667527675276753e-05, "loss": 0.008331533521413803, "step": 82210 }, { "epoch": 23.338064149872267, "grad_norm": 1.5979766845703125, "learning_rate": 7.667243826284417e-05, "loss": 0.022481757402420043, "step": 82220 }, { "epoch": 23.34090263979563, "grad_norm": 1.1774921417236328, "learning_rate": 7.66695997729208e-05, "loss": 0.007056374847888946, "step": 82230 }, { "epoch": 23.34374112971899, "grad_norm": 6.198848724365234, "learning_rate": 7.666676128299744e-05, "loss": 0.008384213596582413, "step": 82240 }, { "epoch": 23.346579619642352, "grad_norm": 0.41497403383255005, "learning_rate": 7.666392279307409e-05, "loss": 0.009454632550477982, "step": 82250 }, { "epoch": 23.34941810956571, "grad_norm": 5.006596565246582, "learning_rate": 7.666108430315073e-05, "loss": 0.021467940509319307, "step": 82260 }, { "epoch": 23.35225659948907, "grad_norm": 1.8049062490463257, "learning_rate": 7.665824581322737e-05, "loss": 0.017208920419216157, "step": 82270 }, { "epoch": 23.355095089412433, "grad_norm": 0.6289519667625427, "learning_rate": 7.665540732330401e-05, "loss": 0.011082616448402405, "step": 82280 }, { "epoch": 23.357933579335793, "grad_norm": 5.964437484741211, "learning_rate": 7.665256883338065e-05, "loss": 0.01135619729757309, "step": 82290 }, { "epoch": 23.360772069259156, "grad_norm": 2.3437535762786865, "learning_rate": 7.664973034345728e-05, "loss": 0.019390083849430084, "step": 82300 }, { "epoch": 23.363610559182515, "grad_norm": 0.46483907103538513, "learning_rate": 7.664689185353392e-05, "loss": 0.007107363641262054, "step": 82310 }, { "epoch": 23.366449049105874, "grad_norm": 0.26846811175346375, "learning_rate": 7.664405336361056e-05, "loss": 0.01215272694826126, "step": 82320 }, { "epoch": 23.369287539029237, "grad_norm": 13.927918434143066, "learning_rate": 7.66412148736872e-05, "loss": 0.017893636226654054, "step": 82330 }, { "epoch": 23.372126028952596, "grad_norm": 1.4418696165084839, "learning_rate": 7.663837638376384e-05, "loss": 0.009810656309127808, "step": 82340 }, { "epoch": 23.37496451887596, "grad_norm": 13.951860427856445, "learning_rate": 7.663553789384049e-05, "loss": 0.015971386432647706, "step": 82350 }, { "epoch": 23.37780300879932, "grad_norm": 0.137238010764122, "learning_rate": 7.663269940391711e-05, "loss": 0.00865924134850502, "step": 82360 }, { "epoch": 23.380641498722678, "grad_norm": 3.564608335494995, "learning_rate": 7.662986091399375e-05, "loss": 0.01751369684934616, "step": 82370 }, { "epoch": 23.38347998864604, "grad_norm": 0.31712964177131653, "learning_rate": 7.66270224240704e-05, "loss": 0.008308090269565582, "step": 82380 }, { "epoch": 23.3863184785694, "grad_norm": 0.785385012626648, "learning_rate": 7.662418393414704e-05, "loss": 0.009592992812395095, "step": 82390 }, { "epoch": 23.389156968492763, "grad_norm": 6.185344219207764, "learning_rate": 7.662134544422368e-05, "loss": 0.01427169144153595, "step": 82400 }, { "epoch": 23.391995458416122, "grad_norm": 9.924135208129883, "learning_rate": 7.661850695430032e-05, "loss": 0.02664813995361328, "step": 82410 }, { "epoch": 23.394833948339482, "grad_norm": 7.843605041503906, "learning_rate": 7.661566846437696e-05, "loss": 0.01773083657026291, "step": 82420 }, { "epoch": 23.397672438262845, "grad_norm": 4.13249397277832, "learning_rate": 7.661282997445359e-05, "loss": 0.013245417177677155, "step": 82430 }, { "epoch": 23.400510928186204, "grad_norm": 6.57637357711792, "learning_rate": 7.660999148453023e-05, "loss": 0.007513901591300965, "step": 82440 }, { "epoch": 23.403349418109567, "grad_norm": 12.128868103027344, "learning_rate": 7.660715299460687e-05, "loss": 0.02394697517156601, "step": 82450 }, { "epoch": 23.406187908032926, "grad_norm": 0.25987422466278076, "learning_rate": 7.660431450468351e-05, "loss": 0.009269306808710099, "step": 82460 }, { "epoch": 23.409026397956286, "grad_norm": 5.1541666984558105, "learning_rate": 7.660147601476015e-05, "loss": 0.031182122230529786, "step": 82470 }, { "epoch": 23.41186488787965, "grad_norm": 0.9312660098075867, "learning_rate": 7.65986375248368e-05, "loss": 0.002501872181892395, "step": 82480 }, { "epoch": 23.414703377803008, "grad_norm": 4.570296764373779, "learning_rate": 7.659579903491342e-05, "loss": 0.02177284359931946, "step": 82490 }, { "epoch": 23.41754186772637, "grad_norm": 7.330686092376709, "learning_rate": 7.659296054499007e-05, "loss": 0.026332417130470277, "step": 82500 }, { "epoch": 23.41754186772637, "eval_accuracy": 0.9671901824887137, "eval_loss": 0.10547531396150589, "eval_runtime": 32.4629, "eval_samples_per_second": 484.46, "eval_steps_per_second": 7.578, "step": 82500 }, { "epoch": 23.42038035764973, "grad_norm": 3.181257486343384, "learning_rate": 7.659012205506671e-05, "loss": 0.029849618673324585, "step": 82510 }, { "epoch": 23.42321884757309, "grad_norm": 1.6969406604766846, "learning_rate": 7.658728356514335e-05, "loss": 0.010424987971782684, "step": 82520 }, { "epoch": 23.426057337496452, "grad_norm": 2.0591225624084473, "learning_rate": 7.658444507521999e-05, "loss": 0.00893688276410103, "step": 82530 }, { "epoch": 23.42889582741981, "grad_norm": 3.565380096435547, "learning_rate": 7.658160658529663e-05, "loss": 0.006332745403051376, "step": 82540 }, { "epoch": 23.431734317343174, "grad_norm": 1.5215063095092773, "learning_rate": 7.657876809537326e-05, "loss": 0.014168977737426758, "step": 82550 }, { "epoch": 23.434572807266534, "grad_norm": 0.7097628712654114, "learning_rate": 7.65759296054499e-05, "loss": 0.006420505046844482, "step": 82560 }, { "epoch": 23.437411297189897, "grad_norm": 0.589506208896637, "learning_rate": 7.657309111552654e-05, "loss": 0.008945582807064057, "step": 82570 }, { "epoch": 23.440249787113256, "grad_norm": 2.0901496410369873, "learning_rate": 7.657025262560318e-05, "loss": 0.006495766341686249, "step": 82580 }, { "epoch": 23.443088277036615, "grad_norm": 0.704718828201294, "learning_rate": 7.656741413567982e-05, "loss": 0.013205070793628693, "step": 82590 }, { "epoch": 23.44592676695998, "grad_norm": 8.177169799804688, "learning_rate": 7.656457564575647e-05, "loss": 0.009578696638345718, "step": 82600 }, { "epoch": 23.448765256883338, "grad_norm": 3.218440294265747, "learning_rate": 7.656173715583311e-05, "loss": 0.01101265922188759, "step": 82610 }, { "epoch": 23.4516037468067, "grad_norm": 6.081578731536865, "learning_rate": 7.655889866590973e-05, "loss": 0.023344169557094573, "step": 82620 }, { "epoch": 23.45444223673006, "grad_norm": 0.12123202532529831, "learning_rate": 7.655606017598638e-05, "loss": 0.010875825583934785, "step": 82630 }, { "epoch": 23.45728072665342, "grad_norm": 10.686674118041992, "learning_rate": 7.655322168606302e-05, "loss": 0.01307520568370819, "step": 82640 }, { "epoch": 23.460119216576782, "grad_norm": 8.951855659484863, "learning_rate": 7.655038319613965e-05, "loss": 0.019513271749019623, "step": 82650 }, { "epoch": 23.46295770650014, "grad_norm": 7.39168643951416, "learning_rate": 7.65475447062163e-05, "loss": 0.010536402463912964, "step": 82660 }, { "epoch": 23.465796196423504, "grad_norm": 1.263788104057312, "learning_rate": 7.654470621629294e-05, "loss": 0.015612158179283141, "step": 82670 }, { "epoch": 23.468634686346864, "grad_norm": 1.5506490468978882, "learning_rate": 7.654186772636957e-05, "loss": 0.006364137679338455, "step": 82680 }, { "epoch": 23.471473176270223, "grad_norm": 0.6718373894691467, "learning_rate": 7.653902923644621e-05, "loss": 0.00988832265138626, "step": 82690 }, { "epoch": 23.474311666193586, "grad_norm": 13.08192253112793, "learning_rate": 7.653619074652285e-05, "loss": 0.010516923666000367, "step": 82700 }, { "epoch": 23.477150156116945, "grad_norm": 0.6148474812507629, "learning_rate": 7.65333522565995e-05, "loss": 0.020795610547065736, "step": 82710 }, { "epoch": 23.479988646040308, "grad_norm": 1.006664514541626, "learning_rate": 7.653051376667614e-05, "loss": 0.017482875287532805, "step": 82720 }, { "epoch": 23.482827135963667, "grad_norm": 4.439058780670166, "learning_rate": 7.652767527675278e-05, "loss": 0.007084453850984574, "step": 82730 }, { "epoch": 23.485665625887027, "grad_norm": 11.166254997253418, "learning_rate": 7.652483678682942e-05, "loss": 0.036588022112846376, "step": 82740 }, { "epoch": 23.48850411581039, "grad_norm": 3.744107484817505, "learning_rate": 7.652199829690605e-05, "loss": 0.012581223249435424, "step": 82750 }, { "epoch": 23.49134260573375, "grad_norm": 3.978107213973999, "learning_rate": 7.651915980698269e-05, "loss": 0.01503506600856781, "step": 82760 }, { "epoch": 23.49418109565711, "grad_norm": 0.8000993728637695, "learning_rate": 7.651632131705933e-05, "loss": 0.006816352903842926, "step": 82770 }, { "epoch": 23.49701958558047, "grad_norm": 0.15485237538814545, "learning_rate": 7.651348282713596e-05, "loss": 0.0071754463016986845, "step": 82780 }, { "epoch": 23.49985807550383, "grad_norm": 4.076370716094971, "learning_rate": 7.651064433721261e-05, "loss": 0.01530846357345581, "step": 82790 }, { "epoch": 23.502696565427193, "grad_norm": 0.2965930104255676, "learning_rate": 7.650780584728925e-05, "loss": 0.009167153388261795, "step": 82800 }, { "epoch": 23.505535055350553, "grad_norm": 3.043975353240967, "learning_rate": 7.650496735736588e-05, "loss": 0.011548572778701782, "step": 82810 }, { "epoch": 23.508373545273916, "grad_norm": 10.682815551757812, "learning_rate": 7.650212886744252e-05, "loss": 0.02089568376541138, "step": 82820 }, { "epoch": 23.511212035197275, "grad_norm": 10.54460620880127, "learning_rate": 7.649929037751916e-05, "loss": 0.012385944277048111, "step": 82830 }, { "epoch": 23.514050525120634, "grad_norm": 5.183022499084473, "learning_rate": 7.64964518875958e-05, "loss": 0.009924705326557159, "step": 82840 }, { "epoch": 23.516889015043997, "grad_norm": 0.6913396716117859, "learning_rate": 7.649361339767243e-05, "loss": 0.010891494899988174, "step": 82850 }, { "epoch": 23.519727504967356, "grad_norm": 0.08964705467224121, "learning_rate": 7.649077490774909e-05, "loss": 0.00865996554493904, "step": 82860 }, { "epoch": 23.52256599489072, "grad_norm": 1.1144227981567383, "learning_rate": 7.648793641782573e-05, "loss": 0.009030211716890335, "step": 82870 }, { "epoch": 23.52540448481408, "grad_norm": 1.8332144021987915, "learning_rate": 7.648509792790236e-05, "loss": 0.007814111560583115, "step": 82880 }, { "epoch": 23.528242974737438, "grad_norm": 3.369371175765991, "learning_rate": 7.6482259437979e-05, "loss": 0.02521330714225769, "step": 82890 }, { "epoch": 23.5310814646608, "grad_norm": 13.359131813049316, "learning_rate": 7.647942094805564e-05, "loss": 0.009891249239444733, "step": 82900 }, { "epoch": 23.53391995458416, "grad_norm": 2.5486323833465576, "learning_rate": 7.647658245813227e-05, "loss": 0.02335374504327774, "step": 82910 }, { "epoch": 23.536758444507523, "grad_norm": 6.208741664886475, "learning_rate": 7.647374396820892e-05, "loss": 0.008053935319185256, "step": 82920 }, { "epoch": 23.539596934430882, "grad_norm": 0.5290101766586304, "learning_rate": 7.647090547828556e-05, "loss": 0.006270058453083038, "step": 82930 }, { "epoch": 23.542435424354245, "grad_norm": 6.628600120544434, "learning_rate": 7.646806698836219e-05, "loss": 0.01876838207244873, "step": 82940 }, { "epoch": 23.545273914277605, "grad_norm": 0.41733840107917786, "learning_rate": 7.646522849843883e-05, "loss": 0.006512662768363953, "step": 82950 }, { "epoch": 23.548112404200964, "grad_norm": 0.6664360761642456, "learning_rate": 7.646239000851547e-05, "loss": 0.013772030174732209, "step": 82960 }, { "epoch": 23.550950894124327, "grad_norm": 2.4472177028656006, "learning_rate": 7.645955151859212e-05, "loss": 0.006162246316671371, "step": 82970 }, { "epoch": 23.553789384047686, "grad_norm": 7.343294620513916, "learning_rate": 7.645671302866874e-05, "loss": 0.026304754614830016, "step": 82980 }, { "epoch": 23.55662787397105, "grad_norm": 0.9492427110671997, "learning_rate": 7.64538745387454e-05, "loss": 0.029026344418525696, "step": 82990 }, { "epoch": 23.55946636389441, "grad_norm": 2.043377161026001, "learning_rate": 7.645103604882204e-05, "loss": 0.014937584102153779, "step": 83000 }, { "epoch": 23.55946636389441, "eval_accuracy": 0.9701150887009601, "eval_loss": 0.09734461456537247, "eval_runtime": 31.7902, "eval_samples_per_second": 494.713, "eval_steps_per_second": 7.738, "step": 83000 }, { "epoch": 23.562304853817768, "grad_norm": 2.5440618991851807, "learning_rate": 7.644819755889867e-05, "loss": 0.006747973710298538, "step": 83010 }, { "epoch": 23.56514334374113, "grad_norm": 4.450223922729492, "learning_rate": 7.644535906897531e-05, "loss": 0.009841623902320861, "step": 83020 }, { "epoch": 23.56798183366449, "grad_norm": 8.121411323547363, "learning_rate": 7.644252057905195e-05, "loss": 0.019003406167030334, "step": 83030 }, { "epoch": 23.570820323587853, "grad_norm": 0.99057537317276, "learning_rate": 7.643968208912858e-05, "loss": 0.017102931439876557, "step": 83040 }, { "epoch": 23.573658813511212, "grad_norm": 6.358065605163574, "learning_rate": 7.643684359920522e-05, "loss": 0.0262486070394516, "step": 83050 }, { "epoch": 23.57649730343457, "grad_norm": 0.3345797657966614, "learning_rate": 7.643400510928187e-05, "loss": 0.019595248997211455, "step": 83060 }, { "epoch": 23.579335793357934, "grad_norm": 11.32795238494873, "learning_rate": 7.64311666193585e-05, "loss": 0.009392689168453216, "step": 83070 }, { "epoch": 23.582174283281294, "grad_norm": 5.22819709777832, "learning_rate": 7.642832812943514e-05, "loss": 0.012082602083683013, "step": 83080 }, { "epoch": 23.585012773204657, "grad_norm": 0.9010908603668213, "learning_rate": 7.642548963951178e-05, "loss": 0.01505519300699234, "step": 83090 }, { "epoch": 23.587851263128016, "grad_norm": 1.0775946378707886, "learning_rate": 7.642265114958843e-05, "loss": 0.007733301818370819, "step": 83100 }, { "epoch": 23.590689753051375, "grad_norm": 1.7868670225143433, "learning_rate": 7.641981265966505e-05, "loss": 0.009365884959697724, "step": 83110 }, { "epoch": 23.593528242974738, "grad_norm": 1.393751621246338, "learning_rate": 7.641697416974171e-05, "loss": 0.010777691006660461, "step": 83120 }, { "epoch": 23.596366732898097, "grad_norm": 0.1793908327817917, "learning_rate": 7.641413567981835e-05, "loss": 0.01588108390569687, "step": 83130 }, { "epoch": 23.59920522282146, "grad_norm": 2.337538480758667, "learning_rate": 7.641129718989498e-05, "loss": 0.010045894980430603, "step": 83140 }, { "epoch": 23.60204371274482, "grad_norm": 0.7882490754127502, "learning_rate": 7.640845869997162e-05, "loss": 0.007647889852523804, "step": 83150 }, { "epoch": 23.60488220266818, "grad_norm": 5.196844577789307, "learning_rate": 7.640562021004826e-05, "loss": 0.01271662414073944, "step": 83160 }, { "epoch": 23.607720692591542, "grad_norm": 2.0521433353424072, "learning_rate": 7.640278172012489e-05, "loss": 0.018307459354400635, "step": 83170 }, { "epoch": 23.6105591825149, "grad_norm": 14.771334648132324, "learning_rate": 7.639994323020153e-05, "loss": 0.034636908769607545, "step": 83180 }, { "epoch": 23.613397672438264, "grad_norm": 2.3762192726135254, "learning_rate": 7.639710474027818e-05, "loss": 0.013587671518325805, "step": 83190 }, { "epoch": 23.616236162361623, "grad_norm": 13.212779998779297, "learning_rate": 7.639426625035481e-05, "loss": 0.009491260349750518, "step": 83200 }, { "epoch": 23.619074652284983, "grad_norm": 3.265249013900757, "learning_rate": 7.639142776043145e-05, "loss": 0.017671296000480653, "step": 83210 }, { "epoch": 23.621913142208346, "grad_norm": 0.253572940826416, "learning_rate": 7.63885892705081e-05, "loss": 0.011944358795881271, "step": 83220 }, { "epoch": 23.624751632131705, "grad_norm": 10.773809432983398, "learning_rate": 7.638575078058474e-05, "loss": 0.020746909081935883, "step": 83230 }, { "epoch": 23.627590122055068, "grad_norm": 6.981840133666992, "learning_rate": 7.638291229066136e-05, "loss": 0.019993923604488373, "step": 83240 }, { "epoch": 23.630428611978427, "grad_norm": 0.41949746012687683, "learning_rate": 7.6380073800738e-05, "loss": 0.005101189762353897, "step": 83250 }, { "epoch": 23.633267101901787, "grad_norm": 2.4229013919830322, "learning_rate": 7.637723531081466e-05, "loss": 0.03231497406959534, "step": 83260 }, { "epoch": 23.63610559182515, "grad_norm": 4.9027862548828125, "learning_rate": 7.637439682089129e-05, "loss": 0.02451559603214264, "step": 83270 }, { "epoch": 23.63894408174851, "grad_norm": 0.41542407870292664, "learning_rate": 7.637155833096793e-05, "loss": 0.00812191590666771, "step": 83280 }, { "epoch": 23.64178257167187, "grad_norm": 3.698788642883301, "learning_rate": 7.636871984104457e-05, "loss": 0.003953716158866883, "step": 83290 }, { "epoch": 23.64462106159523, "grad_norm": 2.8746862411499023, "learning_rate": 7.63658813511212e-05, "loss": 0.00715617835521698, "step": 83300 }, { "epoch": 23.647459551518594, "grad_norm": 0.34903642535209656, "learning_rate": 7.636304286119784e-05, "loss": 0.0038349974900484087, "step": 83310 }, { "epoch": 23.650298041441953, "grad_norm": 0.8546062111854553, "learning_rate": 7.63602043712745e-05, "loss": 0.0067264877259731294, "step": 83320 }, { "epoch": 23.653136531365313, "grad_norm": 3.083432912826538, "learning_rate": 7.635736588135112e-05, "loss": 0.00889369398355484, "step": 83330 }, { "epoch": 23.655975021288675, "grad_norm": 4.731603145599365, "learning_rate": 7.635452739142776e-05, "loss": 0.020127904415130616, "step": 83340 }, { "epoch": 23.658813511212035, "grad_norm": 1.9799318313598633, "learning_rate": 7.63516889015044e-05, "loss": 0.008436944335699081, "step": 83350 }, { "epoch": 23.661652001135398, "grad_norm": 7.74187707901001, "learning_rate": 7.634885041158105e-05, "loss": 0.008479569852352143, "step": 83360 }, { "epoch": 23.664490491058757, "grad_norm": 0.1893157809972763, "learning_rate": 7.634601192165768e-05, "loss": 0.0064016193151474, "step": 83370 }, { "epoch": 23.667328980982116, "grad_norm": 1.0991312265396118, "learning_rate": 7.634317343173432e-05, "loss": 0.007808512449264527, "step": 83380 }, { "epoch": 23.67016747090548, "grad_norm": 0.1031964123249054, "learning_rate": 7.634033494181096e-05, "loss": 0.00815063640475273, "step": 83390 }, { "epoch": 23.67300596082884, "grad_norm": 14.569587707519531, "learning_rate": 7.63374964518876e-05, "loss": 0.01709076464176178, "step": 83400 }, { "epoch": 23.6758444507522, "grad_norm": 6.105042457580566, "learning_rate": 7.633465796196424e-05, "loss": 0.007482413202524185, "step": 83410 }, { "epoch": 23.67868294067556, "grad_norm": 8.330972671508789, "learning_rate": 7.633181947204088e-05, "loss": 0.01764088124036789, "step": 83420 }, { "epoch": 23.68152143059892, "grad_norm": 7.741170883178711, "learning_rate": 7.632898098211751e-05, "loss": 0.014732329547405243, "step": 83430 }, { "epoch": 23.684359920522283, "grad_norm": 3.4004404544830322, "learning_rate": 7.632614249219415e-05, "loss": 0.016580626368522644, "step": 83440 }, { "epoch": 23.687198410445642, "grad_norm": 1.7495700120925903, "learning_rate": 7.632330400227079e-05, "loss": 0.008380424231290817, "step": 83450 }, { "epoch": 23.690036900369005, "grad_norm": 4.892657279968262, "learning_rate": 7.632046551234743e-05, "loss": 0.014952945709228515, "step": 83460 }, { "epoch": 23.692875390292365, "grad_norm": 1.9817546606063843, "learning_rate": 7.631762702242408e-05, "loss": 0.011090509593486786, "step": 83470 }, { "epoch": 23.695713880215724, "grad_norm": 0.4966922998428345, "learning_rate": 7.631478853250072e-05, "loss": 0.015612806379795074, "step": 83480 }, { "epoch": 23.698552370139087, "grad_norm": 3.459414482116699, "learning_rate": 7.631195004257734e-05, "loss": 0.016526661813259125, "step": 83490 }, { "epoch": 23.701390860062446, "grad_norm": 0.08895218372344971, "learning_rate": 7.630911155265399e-05, "loss": 0.003358781337738037, "step": 83500 }, { "epoch": 23.701390860062446, "eval_accuracy": 0.9682075411712342, "eval_loss": 0.09861993044614792, "eval_runtime": 32.4496, "eval_samples_per_second": 484.66, "eval_steps_per_second": 7.581, "step": 83500 }, { "epoch": 23.70422934998581, "grad_norm": 12.85888671875, "learning_rate": 7.630627306273063e-05, "loss": 0.012155049294233323, "step": 83510 }, { "epoch": 23.70706783990917, "grad_norm": 2.528745174407959, "learning_rate": 7.630343457280727e-05, "loss": 0.007482944428920746, "step": 83520 }, { "epoch": 23.709906329832528, "grad_norm": 2.1727800369262695, "learning_rate": 7.630059608288391e-05, "loss": 0.004269128292798996, "step": 83530 }, { "epoch": 23.71274481975589, "grad_norm": 0.17869335412979126, "learning_rate": 7.629775759296055e-05, "loss": 0.01526753455400467, "step": 83540 }, { "epoch": 23.71558330967925, "grad_norm": 12.845808982849121, "learning_rate": 7.629491910303719e-05, "loss": 0.012925630807876587, "step": 83550 }, { "epoch": 23.718421799602613, "grad_norm": 0.3440016210079193, "learning_rate": 7.629208061311382e-05, "loss": 0.019649922847747803, "step": 83560 }, { "epoch": 23.721260289525972, "grad_norm": 2.505288600921631, "learning_rate": 7.628924212319046e-05, "loss": 0.016786137223243715, "step": 83570 }, { "epoch": 23.72409877944933, "grad_norm": 1.8294084072113037, "learning_rate": 7.62864036332671e-05, "loss": 0.013619786500930786, "step": 83580 }, { "epoch": 23.726937269372694, "grad_norm": 2.873342752456665, "learning_rate": 7.628356514334374e-05, "loss": 0.019265981018543245, "step": 83590 }, { "epoch": 23.729775759296054, "grad_norm": 0.1916784644126892, "learning_rate": 7.628072665342039e-05, "loss": 0.031936776638031, "step": 83600 }, { "epoch": 23.732614249219417, "grad_norm": 4.064939022064209, "learning_rate": 7.627788816349703e-05, "loss": 0.008780252933502198, "step": 83610 }, { "epoch": 23.735452739142776, "grad_norm": 0.621260941028595, "learning_rate": 7.627504967357366e-05, "loss": 0.015223242342472076, "step": 83620 }, { "epoch": 23.738291229066135, "grad_norm": 0.5598052144050598, "learning_rate": 7.62722111836503e-05, "loss": 0.007516104727983475, "step": 83630 }, { "epoch": 23.741129718989498, "grad_norm": 0.9211838245391846, "learning_rate": 7.626937269372694e-05, "loss": 0.00517229363322258, "step": 83640 }, { "epoch": 23.743968208912857, "grad_norm": 4.200852394104004, "learning_rate": 7.626653420380358e-05, "loss": 0.005148629099130631, "step": 83650 }, { "epoch": 23.74680669883622, "grad_norm": 5.853733062744141, "learning_rate": 7.626369571388022e-05, "loss": 0.015529228746891022, "step": 83660 }, { "epoch": 23.74964518875958, "grad_norm": 8.323141098022461, "learning_rate": 7.626085722395686e-05, "loss": 0.01382160782814026, "step": 83670 }, { "epoch": 23.75248367868294, "grad_norm": 2.699948787689209, "learning_rate": 7.62580187340335e-05, "loss": 0.020977148413658143, "step": 83680 }, { "epoch": 23.755322168606302, "grad_norm": 0.8940754532814026, "learning_rate": 7.625518024411013e-05, "loss": 0.02008947730064392, "step": 83690 }, { "epoch": 23.75816065852966, "grad_norm": 10.38927936553955, "learning_rate": 7.625234175418677e-05, "loss": 0.012113240361213685, "step": 83700 }, { "epoch": 23.760999148453024, "grad_norm": 2.431267499923706, "learning_rate": 7.624950326426341e-05, "loss": 0.004374124109745026, "step": 83710 }, { "epoch": 23.763837638376383, "grad_norm": 2.9984331130981445, "learning_rate": 7.624666477434006e-05, "loss": 0.014554980397224426, "step": 83720 }, { "epoch": 23.766676128299743, "grad_norm": 4.978708744049072, "learning_rate": 7.62438262844167e-05, "loss": 0.015243619680404663, "step": 83730 }, { "epoch": 23.769514618223106, "grad_norm": 0.329736590385437, "learning_rate": 7.624098779449334e-05, "loss": 0.01855323016643524, "step": 83740 }, { "epoch": 23.772353108146465, "grad_norm": 12.14116382598877, "learning_rate": 7.623814930456997e-05, "loss": 0.03478938341140747, "step": 83750 }, { "epoch": 23.775191598069828, "grad_norm": 2.0323081016540527, "learning_rate": 7.623531081464661e-05, "loss": 0.011597354710102082, "step": 83760 }, { "epoch": 23.778030087993187, "grad_norm": 2.683112382888794, "learning_rate": 7.623275617371559e-05, "loss": 0.015265840291976928, "step": 83770 }, { "epoch": 23.78086857791655, "grad_norm": 1.1147269010543823, "learning_rate": 7.622991768379223e-05, "loss": 0.006794743984937668, "step": 83780 }, { "epoch": 23.78370706783991, "grad_norm": 4.140469074249268, "learning_rate": 7.622707919386887e-05, "loss": 0.0138153538107872, "step": 83790 }, { "epoch": 23.78654555776327, "grad_norm": 0.9724971055984497, "learning_rate": 7.62242407039455e-05, "loss": 0.012181229144334792, "step": 83800 }, { "epoch": 23.78938404768663, "grad_norm": 1.8344225883483887, "learning_rate": 7.622140221402214e-05, "loss": 0.017166543006896972, "step": 83810 }, { "epoch": 23.79222253760999, "grad_norm": 4.065749645233154, "learning_rate": 7.621856372409878e-05, "loss": 0.025070038437843323, "step": 83820 }, { "epoch": 23.795061027533354, "grad_norm": 0.6657940745353699, "learning_rate": 7.621572523417542e-05, "loss": 0.0223751038312912, "step": 83830 }, { "epoch": 23.797899517456713, "grad_norm": 14.120619773864746, "learning_rate": 7.621288674425206e-05, "loss": 0.02350504994392395, "step": 83840 }, { "epoch": 23.800738007380073, "grad_norm": 1.6944475173950195, "learning_rate": 7.62100482543287e-05, "loss": 0.014007046818733215, "step": 83850 }, { "epoch": 23.803576497303435, "grad_norm": 6.3545732498168945, "learning_rate": 7.620720976440535e-05, "loss": 0.014083229005336761, "step": 83860 }, { "epoch": 23.806414987226795, "grad_norm": 0.9429182410240173, "learning_rate": 7.620437127448197e-05, "loss": 0.00722980797290802, "step": 83870 }, { "epoch": 23.809253477150158, "grad_norm": 1.3470393419265747, "learning_rate": 7.620153278455862e-05, "loss": 0.015278784930706025, "step": 83880 }, { "epoch": 23.812091967073517, "grad_norm": 3.320622205734253, "learning_rate": 7.619869429463526e-05, "loss": 0.004866759479045868, "step": 83890 }, { "epoch": 23.814930456996876, "grad_norm": 0.35066312551498413, "learning_rate": 7.61958558047119e-05, "loss": 0.009989409148693085, "step": 83900 }, { "epoch": 23.81776894692024, "grad_norm": 0.5345789194107056, "learning_rate": 7.619301731478854e-05, "loss": 0.003192286565899849, "step": 83910 }, { "epoch": 23.8206074368436, "grad_norm": 2.0772011280059814, "learning_rate": 7.619017882486518e-05, "loss": 0.00603991262614727, "step": 83920 }, { "epoch": 23.82344592676696, "grad_norm": 12.612524032592773, "learning_rate": 7.618734033494181e-05, "loss": 0.018510225415229797, "step": 83930 }, { "epoch": 23.82628441669032, "grad_norm": 1.5009267330169678, "learning_rate": 7.618450184501845e-05, "loss": 0.007368786633014679, "step": 83940 }, { "epoch": 23.82912290661368, "grad_norm": 2.1319992542266846, "learning_rate": 7.618166335509509e-05, "loss": 0.02300973832607269, "step": 83950 }, { "epoch": 23.831961396537043, "grad_norm": 4.3605055809021, "learning_rate": 7.617882486517173e-05, "loss": 0.007141160219907761, "step": 83960 }, { "epoch": 23.834799886460402, "grad_norm": 0.17979754507541656, "learning_rate": 7.617598637524837e-05, "loss": 0.013762891292572021, "step": 83970 }, { "epoch": 23.837638376383765, "grad_norm": 1.785412311553955, "learning_rate": 7.617314788532502e-05, "loss": 0.01592242568731308, "step": 83980 }, { "epoch": 23.840476866307124, "grad_norm": 0.3671169877052307, "learning_rate": 7.617030939540166e-05, "loss": 0.013170200586318969, "step": 83990 }, { "epoch": 23.843315356230484, "grad_norm": 6.230311870574951, "learning_rate": 7.616747090547829e-05, "loss": 0.022347329556941985, "step": 84000 }, { "epoch": 23.843315356230484, "eval_accuracy": 0.965791314300248, "eval_loss": 0.11082004010677338, "eval_runtime": 32.7698, "eval_samples_per_second": 479.924, "eval_steps_per_second": 7.507, "step": 84000 }, { "epoch": 23.846153846153847, "grad_norm": 0.759524405002594, "learning_rate": 7.616463241555493e-05, "loss": 0.008841366320848466, "step": 84010 }, { "epoch": 23.848992336077206, "grad_norm": 0.2547348141670227, "learning_rate": 7.616179392563157e-05, "loss": 0.004966495186090469, "step": 84020 }, { "epoch": 23.85183082600057, "grad_norm": 0.886723518371582, "learning_rate": 7.61589554357082e-05, "loss": 0.005022808164358139, "step": 84030 }, { "epoch": 23.85466931592393, "grad_norm": 0.229450523853302, "learning_rate": 7.615611694578485e-05, "loss": 0.01749954968690872, "step": 84040 }, { "epoch": 23.857507805847288, "grad_norm": 1.2417163848876953, "learning_rate": 7.615327845586149e-05, "loss": 0.004567822068929672, "step": 84050 }, { "epoch": 23.86034629577065, "grad_norm": 3.470907688140869, "learning_rate": 7.615043996593812e-05, "loss": 0.020869632065296174, "step": 84060 }, { "epoch": 23.86318478569401, "grad_norm": 0.49826672673225403, "learning_rate": 7.614760147601476e-05, "loss": 0.008900497853755952, "step": 84070 }, { "epoch": 23.866023275617373, "grad_norm": 2.1810293197631836, "learning_rate": 7.61447629860914e-05, "loss": 0.011838518828153611, "step": 84080 }, { "epoch": 23.868861765540732, "grad_norm": 6.328418731689453, "learning_rate": 7.614192449616804e-05, "loss": 0.011463573575019837, "step": 84090 }, { "epoch": 23.87170025546409, "grad_norm": 1.2754799127578735, "learning_rate": 7.613908600624469e-05, "loss": 0.01238202080130577, "step": 84100 }, { "epoch": 23.874538745387454, "grad_norm": 1.2204383611679077, "learning_rate": 7.613624751632133e-05, "loss": 0.010947038233280183, "step": 84110 }, { "epoch": 23.877377235310814, "grad_norm": 0.5982845425605774, "learning_rate": 7.613340902639797e-05, "loss": 0.018706832826137543, "step": 84120 }, { "epoch": 23.880215725234176, "grad_norm": 10.973430633544922, "learning_rate": 7.61305705364746e-05, "loss": 0.03058081567287445, "step": 84130 }, { "epoch": 23.883054215157536, "grad_norm": 7.948242664337158, "learning_rate": 7.612773204655124e-05, "loss": 0.0066550873219966885, "step": 84140 }, { "epoch": 23.8858927050809, "grad_norm": 1.1548420190811157, "learning_rate": 7.612489355662788e-05, "loss": 0.010352612286806107, "step": 84150 }, { "epoch": 23.888731195004258, "grad_norm": 1.1410589218139648, "learning_rate": 7.61220550667045e-05, "loss": 0.005702554434537888, "step": 84160 }, { "epoch": 23.891569684927617, "grad_norm": 8.575799942016602, "learning_rate": 7.611921657678116e-05, "loss": 0.024443334341049193, "step": 84170 }, { "epoch": 23.89440817485098, "grad_norm": 1.1583493947982788, "learning_rate": 7.61163780868578e-05, "loss": 0.0034597612917423247, "step": 84180 }, { "epoch": 23.89724666477434, "grad_norm": 9.375972747802734, "learning_rate": 7.611353959693443e-05, "loss": 0.009630904346704484, "step": 84190 }, { "epoch": 23.900085154697702, "grad_norm": 3.15425968170166, "learning_rate": 7.611070110701107e-05, "loss": 0.018268083035945893, "step": 84200 }, { "epoch": 23.902923644621062, "grad_norm": 0.2165924459695816, "learning_rate": 7.610786261708771e-05, "loss": 0.009226091951131821, "step": 84210 }, { "epoch": 23.90576213454442, "grad_norm": 0.3494337797164917, "learning_rate": 7.610502412716435e-05, "loss": 0.008549485355615616, "step": 84220 }, { "epoch": 23.908600624467784, "grad_norm": 2.8992931842803955, "learning_rate": 7.6102185637241e-05, "loss": 0.009287688881158829, "step": 84230 }, { "epoch": 23.911439114391143, "grad_norm": 4.939039707183838, "learning_rate": 7.609934714731764e-05, "loss": 0.019577112793922425, "step": 84240 }, { "epoch": 23.914277604314506, "grad_norm": 1.534653902053833, "learning_rate": 7.609650865739428e-05, "loss": 0.01008881777524948, "step": 84250 }, { "epoch": 23.917116094237866, "grad_norm": 3.809917449951172, "learning_rate": 7.60936701674709e-05, "loss": 0.02685166001319885, "step": 84260 }, { "epoch": 23.919954584161225, "grad_norm": 14.699606895446777, "learning_rate": 7.609083167754755e-05, "loss": 0.02274409234523773, "step": 84270 }, { "epoch": 23.922793074084588, "grad_norm": 14.207786560058594, "learning_rate": 7.608799318762419e-05, "loss": 0.03492521643638611, "step": 84280 }, { "epoch": 23.925631564007947, "grad_norm": 1.874414324760437, "learning_rate": 7.608515469770082e-05, "loss": 0.009038679301738739, "step": 84290 }, { "epoch": 23.92847005393131, "grad_norm": 13.659541130065918, "learning_rate": 7.608231620777747e-05, "loss": 0.011327023059129715, "step": 84300 }, { "epoch": 23.93130854385467, "grad_norm": 7.141719341278076, "learning_rate": 7.607947771785411e-05, "loss": 0.008892245590686798, "step": 84310 }, { "epoch": 23.93414703377803, "grad_norm": 1.0897291898727417, "learning_rate": 7.607663922793074e-05, "loss": 0.01939873993396759, "step": 84320 }, { "epoch": 23.93698552370139, "grad_norm": 9.336360931396484, "learning_rate": 7.607380073800738e-05, "loss": 0.012104091048240662, "step": 84330 }, { "epoch": 23.93982401362475, "grad_norm": 7.535465717315674, "learning_rate": 7.607096224808402e-05, "loss": 0.01173306182026863, "step": 84340 }, { "epoch": 23.942662503548114, "grad_norm": 1.202559471130371, "learning_rate": 7.606812375816067e-05, "loss": 0.03088620901107788, "step": 84350 }, { "epoch": 23.945500993471473, "grad_norm": 3.900709390640259, "learning_rate": 7.606528526823729e-05, "loss": 0.03329380750656128, "step": 84360 }, { "epoch": 23.948339483394832, "grad_norm": 7.0889716148376465, "learning_rate": 7.606244677831395e-05, "loss": 0.007630067318677903, "step": 84370 }, { "epoch": 23.951177973318195, "grad_norm": 0.25361502170562744, "learning_rate": 7.605960828839058e-05, "loss": 0.01660737246274948, "step": 84380 }, { "epoch": 23.954016463241555, "grad_norm": 0.27367648482322693, "learning_rate": 7.605676979846722e-05, "loss": 0.018328465521335602, "step": 84390 }, { "epoch": 23.956854953164918, "grad_norm": 2.3243510723114014, "learning_rate": 7.605393130854386e-05, "loss": 0.013505256175994873, "step": 84400 }, { "epoch": 23.959693443088277, "grad_norm": 9.808854103088379, "learning_rate": 7.60510928186205e-05, "loss": 0.027745193243026732, "step": 84410 }, { "epoch": 23.962531933011636, "grad_norm": 1.3662803173065186, "learning_rate": 7.604825432869713e-05, "loss": 0.00739012211561203, "step": 84420 }, { "epoch": 23.965370422935, "grad_norm": 5.0183281898498535, "learning_rate": 7.604541583877378e-05, "loss": 0.013295121490955353, "step": 84430 }, { "epoch": 23.96820891285836, "grad_norm": 0.8835544586181641, "learning_rate": 7.604257734885042e-05, "loss": 0.02955119013786316, "step": 84440 }, { "epoch": 23.97104740278172, "grad_norm": 13.038615226745605, "learning_rate": 7.603973885892705e-05, "loss": 0.016930481791496275, "step": 84450 }, { "epoch": 23.97388589270508, "grad_norm": 0.285456120967865, "learning_rate": 7.60369003690037e-05, "loss": 0.010375700145959853, "step": 84460 }, { "epoch": 23.97672438262844, "grad_norm": 0.826026976108551, "learning_rate": 7.603406187908033e-05, "loss": 0.005427590012550354, "step": 84470 }, { "epoch": 23.979562872551803, "grad_norm": 0.5882053971290588, "learning_rate": 7.603122338915696e-05, "loss": 0.031773269176483154, "step": 84480 }, { "epoch": 23.982401362475162, "grad_norm": 0.34798431396484375, "learning_rate": 7.60283848992336e-05, "loss": 0.017507794499397277, "step": 84490 }, { "epoch": 23.985239852398525, "grad_norm": 18.65087890625, "learning_rate": 7.602554640931026e-05, "loss": 0.010562290251255036, "step": 84500 }, { "epoch": 23.985239852398525, "eval_accuracy": 0.9675081070770013, "eval_loss": 0.10572259873151779, "eval_runtime": 31.9737, "eval_samples_per_second": 491.874, "eval_steps_per_second": 7.694, "step": 84500 }, { "epoch": 23.988078342321884, "grad_norm": 1.9048060178756714, "learning_rate": 7.602270791938689e-05, "loss": 0.021375086903572083, "step": 84510 }, { "epoch": 23.990916832245247, "grad_norm": 0.3980805575847626, "learning_rate": 7.601986942946353e-05, "loss": 0.026813524961471557, "step": 84520 }, { "epoch": 23.993755322168607, "grad_norm": 2.8301875591278076, "learning_rate": 7.601703093954017e-05, "loss": 0.012315025180578231, "step": 84530 }, { "epoch": 23.996593812091966, "grad_norm": 0.46884873509407043, "learning_rate": 7.601419244961681e-05, "loss": 0.021125704050064087, "step": 84540 }, { "epoch": 23.99943230201533, "grad_norm": 3.358616352081299, "learning_rate": 7.601135395969344e-05, "loss": 0.013842548429965972, "step": 84550 }, { "epoch": 24.002270791938688, "grad_norm": 2.1204473972320557, "learning_rate": 7.600851546977008e-05, "loss": 0.013447676599025727, "step": 84560 }, { "epoch": 24.00510928186205, "grad_norm": 0.10345333814620972, "learning_rate": 7.600567697984673e-05, "loss": 0.01847100257873535, "step": 84570 }, { "epoch": 24.00794777178541, "grad_norm": 0.26038622856140137, "learning_rate": 7.600283848992336e-05, "loss": 0.012046968936920166, "step": 84580 }, { "epoch": 24.01078626170877, "grad_norm": 3.4501569271087646, "learning_rate": 7.6e-05, "loss": 0.010232838988304137, "step": 84590 }, { "epoch": 24.013624751632133, "grad_norm": 0.2959158718585968, "learning_rate": 7.599716151007665e-05, "loss": 0.004404805973172188, "step": 84600 }, { "epoch": 24.016463241555492, "grad_norm": 0.6889228224754333, "learning_rate": 7.599432302015327e-05, "loss": 0.014449702203273773, "step": 84610 }, { "epoch": 24.019301731478855, "grad_norm": 0.9879194498062134, "learning_rate": 7.599148453022991e-05, "loss": 0.005400035530328751, "step": 84620 }, { "epoch": 24.022140221402214, "grad_norm": 3.965278148651123, "learning_rate": 7.598864604030657e-05, "loss": 0.026307129859924318, "step": 84630 }, { "epoch": 24.024978711325573, "grad_norm": 2.2091922760009766, "learning_rate": 7.59858075503832e-05, "loss": 0.01141185313463211, "step": 84640 }, { "epoch": 24.027817201248936, "grad_norm": 0.5622738003730774, "learning_rate": 7.598296906045984e-05, "loss": 0.006513230502605438, "step": 84650 }, { "epoch": 24.030655691172296, "grad_norm": 0.21021191775798798, "learning_rate": 7.598013057053648e-05, "loss": 0.0035309791564941406, "step": 84660 }, { "epoch": 24.03349418109566, "grad_norm": 2.49345064163208, "learning_rate": 7.597729208061312e-05, "loss": 0.007036945223808289, "step": 84670 }, { "epoch": 24.036332671019018, "grad_norm": 9.031028747558594, "learning_rate": 7.597445359068975e-05, "loss": 0.008424460887908936, "step": 84680 }, { "epoch": 24.039171160942377, "grad_norm": 3.8753223419189453, "learning_rate": 7.597161510076639e-05, "loss": 0.012846311926841736, "step": 84690 }, { "epoch": 24.04200965086574, "grad_norm": 3.0023934841156006, "learning_rate": 7.596877661084305e-05, "loss": 0.007488427311182022, "step": 84700 }, { "epoch": 24.0448481407891, "grad_norm": 8.386985778808594, "learning_rate": 7.596593812091967e-05, "loss": 0.019355453550815582, "step": 84710 }, { "epoch": 24.047686630712462, "grad_norm": 0.48925352096557617, "learning_rate": 7.596309963099631e-05, "loss": 0.0070985168218612674, "step": 84720 }, { "epoch": 24.05052512063582, "grad_norm": 5.458119869232178, "learning_rate": 7.596026114107296e-05, "loss": 0.009399396181106568, "step": 84730 }, { "epoch": 24.05336361055918, "grad_norm": 3.70332670211792, "learning_rate": 7.595742265114958e-05, "loss": 0.017363426089286805, "step": 84740 }, { "epoch": 24.056202100482544, "grad_norm": 5.7898430824279785, "learning_rate": 7.595458416122623e-05, "loss": 0.02179422527551651, "step": 84750 }, { "epoch": 24.059040590405903, "grad_norm": 0.608582615852356, "learning_rate": 7.595174567130287e-05, "loss": 0.009582144767045974, "step": 84760 }, { "epoch": 24.061879080329266, "grad_norm": 0.713959276676178, "learning_rate": 7.594890718137951e-05, "loss": 0.005781042575836182, "step": 84770 }, { "epoch": 24.064717570252625, "grad_norm": 14.982464790344238, "learning_rate": 7.594606869145615e-05, "loss": 0.010399052500724792, "step": 84780 }, { "epoch": 24.067556060175985, "grad_norm": 0.11601941287517548, "learning_rate": 7.594323020153279e-05, "loss": 0.0038609974086284637, "step": 84790 }, { "epoch": 24.070394550099348, "grad_norm": 0.48062312602996826, "learning_rate": 7.594039171160943e-05, "loss": 0.00273037888109684, "step": 84800 }, { "epoch": 24.073233040022707, "grad_norm": 3.332939386367798, "learning_rate": 7.593755322168606e-05, "loss": 0.006208663806319237, "step": 84810 }, { "epoch": 24.07607152994607, "grad_norm": 5.165525436401367, "learning_rate": 7.59347147317627e-05, "loss": 0.0037791263312101364, "step": 84820 }, { "epoch": 24.07891001986943, "grad_norm": 1.2610677480697632, "learning_rate": 7.593187624183936e-05, "loss": 0.006203094124794006, "step": 84830 }, { "epoch": 24.08174850979279, "grad_norm": 0.13150855898857117, "learning_rate": 7.592903775191598e-05, "loss": 0.0036239050328731536, "step": 84840 }, { "epoch": 24.08458699971615, "grad_norm": 0.5227026343345642, "learning_rate": 7.592619926199263e-05, "loss": 0.007079766690731048, "step": 84850 }, { "epoch": 24.08742548963951, "grad_norm": 8.103550910949707, "learning_rate": 7.592336077206927e-05, "loss": 0.008645490556955338, "step": 84860 }, { "epoch": 24.090263979562874, "grad_norm": 0.18947942554950714, "learning_rate": 7.59205222821459e-05, "loss": 0.011797967553138732, "step": 84870 }, { "epoch": 24.093102469486233, "grad_norm": 0.38045382499694824, "learning_rate": 7.591768379222254e-05, "loss": 0.011091934144496917, "step": 84880 }, { "epoch": 24.095940959409592, "grad_norm": 1.7371768951416016, "learning_rate": 7.591484530229918e-05, "loss": 0.010108402371406556, "step": 84890 }, { "epoch": 24.098779449332955, "grad_norm": 3.0963540077209473, "learning_rate": 7.591200681237582e-05, "loss": 0.004754865169525146, "step": 84900 }, { "epoch": 24.101617939256315, "grad_norm": 0.43404924869537354, "learning_rate": 7.590916832245246e-05, "loss": 0.004050199687480926, "step": 84910 }, { "epoch": 24.104456429179677, "grad_norm": 0.383226603269577, "learning_rate": 7.59063298325291e-05, "loss": 0.005064722150564194, "step": 84920 }, { "epoch": 24.107294919103037, "grad_norm": 5.937305450439453, "learning_rate": 7.590349134260574e-05, "loss": 0.01660972833633423, "step": 84930 }, { "epoch": 24.1101334090264, "grad_norm": 6.189237117767334, "learning_rate": 7.590065285268237e-05, "loss": 0.005902718007564545, "step": 84940 }, { "epoch": 24.11297189894976, "grad_norm": 3.1979804039001465, "learning_rate": 7.589781436275901e-05, "loss": 0.012903961539268493, "step": 84950 }, { "epoch": 24.11581038887312, "grad_norm": 1.1772693395614624, "learning_rate": 7.589497587283565e-05, "loss": 0.01269942820072174, "step": 84960 }, { "epoch": 24.11864887879648, "grad_norm": 1.1022330522537231, "learning_rate": 7.58921373829123e-05, "loss": 0.007412086427211762, "step": 84970 }, { "epoch": 24.12148736871984, "grad_norm": 1.8838292360305786, "learning_rate": 7.588929889298894e-05, "loss": 0.005235916748642921, "step": 84980 }, { "epoch": 24.124325858643203, "grad_norm": 2.305962085723877, "learning_rate": 7.588646040306558e-05, "loss": 0.01819613128900528, "step": 84990 }, { "epoch": 24.127164348566563, "grad_norm": 1.5014597177505493, "learning_rate": 7.58836219131422e-05, "loss": 0.005534688383340836, "step": 85000 }, { "epoch": 24.127164348566563, "eval_accuracy": 0.9657277293825904, "eval_loss": 0.1106887087225914, "eval_runtime": 32.0008, "eval_samples_per_second": 491.457, "eval_steps_per_second": 7.687, "step": 85000 }, { "epoch": 24.130002838489922, "grad_norm": 7.664631366729736, "learning_rate": 7.588078342321885e-05, "loss": 0.008475860953330994, "step": 85010 }, { "epoch": 24.132841328413285, "grad_norm": 7.520578384399414, "learning_rate": 7.587794493329549e-05, "loss": 0.024843305349349976, "step": 85020 }, { "epoch": 24.135679818336644, "grad_norm": 5.945399761199951, "learning_rate": 7.587510644337213e-05, "loss": 0.01612308919429779, "step": 85030 }, { "epoch": 24.138518308260007, "grad_norm": 2.361400842666626, "learning_rate": 7.587226795344877e-05, "loss": 0.01494961678981781, "step": 85040 }, { "epoch": 24.141356798183367, "grad_norm": 11.86889362335205, "learning_rate": 7.586942946352541e-05, "loss": 0.01900387406349182, "step": 85050 }, { "epoch": 24.144195288106726, "grad_norm": 0.2923731803894043, "learning_rate": 7.586659097360205e-05, "loss": 0.0044320538640022274, "step": 85060 }, { "epoch": 24.14703377803009, "grad_norm": 0.5926135182380676, "learning_rate": 7.586375248367868e-05, "loss": 0.006633836030960083, "step": 85070 }, { "epoch": 24.149872267953448, "grad_norm": 2.749441146850586, "learning_rate": 7.586091399375532e-05, "loss": 0.017066454887390135, "step": 85080 }, { "epoch": 24.15271075787681, "grad_norm": 0.2300490438938141, "learning_rate": 7.585807550383196e-05, "loss": 0.009375964105129243, "step": 85090 }, { "epoch": 24.15554924780017, "grad_norm": 1.776023507118225, "learning_rate": 7.58552370139086e-05, "loss": 0.013631683588027955, "step": 85100 }, { "epoch": 24.15838773772353, "grad_norm": 11.041183471679688, "learning_rate": 7.585239852398525e-05, "loss": 0.008951046317815781, "step": 85110 }, { "epoch": 24.161226227646893, "grad_norm": 2.2577743530273438, "learning_rate": 7.584956003406189e-05, "loss": 0.009594593942165375, "step": 85120 }, { "epoch": 24.164064717570252, "grad_norm": 1.3612792491912842, "learning_rate": 7.584672154413852e-05, "loss": 0.008569695055484772, "step": 85130 }, { "epoch": 24.166903207493615, "grad_norm": 11.403969764709473, "learning_rate": 7.584388305421516e-05, "loss": 0.0077738739550113675, "step": 85140 }, { "epoch": 24.169741697416974, "grad_norm": 2.2700397968292236, "learning_rate": 7.58410445642918e-05, "loss": 0.004367386549711227, "step": 85150 }, { "epoch": 24.172580187340333, "grad_norm": 3.2118048667907715, "learning_rate": 7.583820607436844e-05, "loss": 0.004065866023302079, "step": 85160 }, { "epoch": 24.175418677263696, "grad_norm": 1.2803608179092407, "learning_rate": 7.583536758444508e-05, "loss": 0.016695743799209593, "step": 85170 }, { "epoch": 24.178257167187056, "grad_norm": 3.2404940128326416, "learning_rate": 7.583252909452172e-05, "loss": 0.013581687211990356, "step": 85180 }, { "epoch": 24.18109565711042, "grad_norm": 2.400435209274292, "learning_rate": 7.582969060459836e-05, "loss": 0.00726730227470398, "step": 85190 }, { "epoch": 24.183934147033778, "grad_norm": 12.427816390991211, "learning_rate": 7.582685211467499e-05, "loss": 0.02270824760198593, "step": 85200 }, { "epoch": 24.186772636957137, "grad_norm": 3.7422256469726562, "learning_rate": 7.582401362475163e-05, "loss": 0.022049340605735778, "step": 85210 }, { "epoch": 24.1896111268805, "grad_norm": 0.34341269731521606, "learning_rate": 7.582117513482828e-05, "loss": 0.019231058657169342, "step": 85220 }, { "epoch": 24.19244961680386, "grad_norm": 16.335460662841797, "learning_rate": 7.581833664490492e-05, "loss": 0.03085731267929077, "step": 85230 }, { "epoch": 24.195288106727222, "grad_norm": 1.7565821409225464, "learning_rate": 7.581549815498156e-05, "loss": 0.01612974554300308, "step": 85240 }, { "epoch": 24.19812659665058, "grad_norm": 12.592902183532715, "learning_rate": 7.58126596650582e-05, "loss": 0.011208910495042801, "step": 85250 }, { "epoch": 24.20096508657394, "grad_norm": 6.833895206451416, "learning_rate": 7.580982117513483e-05, "loss": 0.010031253844499589, "step": 85260 }, { "epoch": 24.203803576497304, "grad_norm": 1.4420963525772095, "learning_rate": 7.580698268521147e-05, "loss": 0.01829663962125778, "step": 85270 }, { "epoch": 24.206642066420663, "grad_norm": 2.397555351257324, "learning_rate": 7.580414419528811e-05, "loss": 0.01912621855735779, "step": 85280 }, { "epoch": 24.209480556344026, "grad_norm": 2.0013787746429443, "learning_rate": 7.580130570536475e-05, "loss": 0.010009374469518661, "step": 85290 }, { "epoch": 24.212319046267385, "grad_norm": 13.157902717590332, "learning_rate": 7.579846721544139e-05, "loss": 0.022078612446784975, "step": 85300 }, { "epoch": 24.215157536190745, "grad_norm": 0.7067900896072388, "learning_rate": 7.579562872551803e-05, "loss": 0.015771131217479705, "step": 85310 }, { "epoch": 24.217996026114108, "grad_norm": 11.601316452026367, "learning_rate": 7.579279023559466e-05, "loss": 0.014154157042503357, "step": 85320 }, { "epoch": 24.220834516037467, "grad_norm": 2.4811770915985107, "learning_rate": 7.57899517456713e-05, "loss": 0.006140285357832909, "step": 85330 }, { "epoch": 24.22367300596083, "grad_norm": 0.33110931515693665, "learning_rate": 7.578711325574794e-05, "loss": 0.01483476310968399, "step": 85340 }, { "epoch": 24.22651149588419, "grad_norm": 0.7763314247131348, "learning_rate": 7.578427476582459e-05, "loss": 0.022756025195121765, "step": 85350 }, { "epoch": 24.229349985807552, "grad_norm": 1.5477824211120605, "learning_rate": 7.578143627590121e-05, "loss": 0.004848732054233551, "step": 85360 }, { "epoch": 24.23218847573091, "grad_norm": 0.3319757878780365, "learning_rate": 7.577859778597787e-05, "loss": 0.011671453714370728, "step": 85370 }, { "epoch": 24.23502696565427, "grad_norm": 0.3363244831562042, "learning_rate": 7.577575929605451e-05, "loss": 0.012435142695903779, "step": 85380 }, { "epoch": 24.237865455577634, "grad_norm": 0.23122678697109222, "learning_rate": 7.577292080613114e-05, "loss": 0.017498072981834412, "step": 85390 }, { "epoch": 24.240703945500993, "grad_norm": 10.567215919494629, "learning_rate": 7.577008231620778e-05, "loss": 0.007359525561332703, "step": 85400 }, { "epoch": 24.243542435424356, "grad_norm": 2.3252737522125244, "learning_rate": 7.576724382628442e-05, "loss": 0.006452349573373794, "step": 85410 }, { "epoch": 24.246380925347715, "grad_norm": 8.361401557922363, "learning_rate": 7.576440533636105e-05, "loss": 0.018063643574714662, "step": 85420 }, { "epoch": 24.249219415271074, "grad_norm": 3.543668270111084, "learning_rate": 7.57615668464377e-05, "loss": 0.015395312011241913, "step": 85430 }, { "epoch": 24.252057905194437, "grad_norm": 0.7056883573532104, "learning_rate": 7.575872835651434e-05, "loss": 0.01463746577501297, "step": 85440 }, { "epoch": 24.254896395117797, "grad_norm": 10.48837661743164, "learning_rate": 7.575588986659097e-05, "loss": 0.010624726861715316, "step": 85450 }, { "epoch": 24.25773488504116, "grad_norm": 2.8735995292663574, "learning_rate": 7.575305137666761e-05, "loss": 0.009501266479492187, "step": 85460 }, { "epoch": 24.26057337496452, "grad_norm": 0.900643527507782, "learning_rate": 7.575021288674426e-05, "loss": 0.00846024751663208, "step": 85470 }, { "epoch": 24.26341186488788, "grad_norm": 0.6239857077598572, "learning_rate": 7.57473743968209e-05, "loss": 0.007527311146259308, "step": 85480 }, { "epoch": 24.26625035481124, "grad_norm": 6.811006546020508, "learning_rate": 7.574453590689752e-05, "loss": 0.007985348254442215, "step": 85490 }, { "epoch": 24.2690888447346, "grad_norm": 0.8450534343719482, "learning_rate": 7.574169741697418e-05, "loss": 0.005784529075026512, "step": 85500 }, { "epoch": 24.2690888447346, "eval_accuracy": 0.9721498060660011, "eval_loss": 0.08942712843418121, "eval_runtime": 32.454, "eval_samples_per_second": 484.594, "eval_steps_per_second": 7.58, "step": 85500 }, { "epoch": 24.271927334657963, "grad_norm": 3.869919538497925, "learning_rate": 7.573885892705082e-05, "loss": 0.008161671459674835, "step": 85510 }, { "epoch": 24.274765824581323, "grad_norm": 2.0412440299987793, "learning_rate": 7.573602043712745e-05, "loss": 0.015182547271251678, "step": 85520 }, { "epoch": 24.277604314504682, "grad_norm": 0.23042835295200348, "learning_rate": 7.573318194720409e-05, "loss": 0.010144972801208496, "step": 85530 }, { "epoch": 24.280442804428045, "grad_norm": 3.158698081970215, "learning_rate": 7.573034345728073e-05, "loss": 0.01009099930524826, "step": 85540 }, { "epoch": 24.283281294351404, "grad_norm": 1.1883310079574585, "learning_rate": 7.572750496735736e-05, "loss": 0.006799289584159851, "step": 85550 }, { "epoch": 24.286119784274767, "grad_norm": 1.3850857019424438, "learning_rate": 7.572466647743401e-05, "loss": 0.007283708453178406, "step": 85560 }, { "epoch": 24.288958274198126, "grad_norm": 0.08487583696842194, "learning_rate": 7.572182798751066e-05, "loss": 0.012294325232505798, "step": 85570 }, { "epoch": 24.291796764121486, "grad_norm": 2.192869186401367, "learning_rate": 7.571898949758728e-05, "loss": 0.015506701171398162, "step": 85580 }, { "epoch": 24.29463525404485, "grad_norm": 0.125902459025383, "learning_rate": 7.571615100766392e-05, "loss": 0.0205858051776886, "step": 85590 }, { "epoch": 24.297473743968208, "grad_norm": 9.355557441711426, "learning_rate": 7.571331251774057e-05, "loss": 0.018552133440971376, "step": 85600 }, { "epoch": 24.30031223389157, "grad_norm": 2.395425796508789, "learning_rate": 7.571047402781721e-05, "loss": 0.004945536330342293, "step": 85610 }, { "epoch": 24.30315072381493, "grad_norm": 1.101117730140686, "learning_rate": 7.570763553789384e-05, "loss": 0.013471022248268127, "step": 85620 }, { "epoch": 24.30598921373829, "grad_norm": 0.2889711856842041, "learning_rate": 7.570479704797049e-05, "loss": 0.0149894580245018, "step": 85630 }, { "epoch": 24.308827703661652, "grad_norm": 4.994829177856445, "learning_rate": 7.570195855804713e-05, "loss": 0.009349507838487625, "step": 85640 }, { "epoch": 24.311666193585012, "grad_norm": 1.0628548860549927, "learning_rate": 7.569912006812376e-05, "loss": 0.01000620350241661, "step": 85650 }, { "epoch": 24.314504683508375, "grad_norm": 11.063276290893555, "learning_rate": 7.56962815782004e-05, "loss": 0.018959511816501618, "step": 85660 }, { "epoch": 24.317343173431734, "grad_norm": 0.08882137387990952, "learning_rate": 7.569344308827704e-05, "loss": 0.007786166667938232, "step": 85670 }, { "epoch": 24.320181663355093, "grad_norm": 2.3768861293792725, "learning_rate": 7.569060459835367e-05, "loss": 0.008636796474456787, "step": 85680 }, { "epoch": 24.323020153278456, "grad_norm": 5.162334442138672, "learning_rate": 7.568776610843031e-05, "loss": 0.011789308488368988, "step": 85690 }, { "epoch": 24.325858643201816, "grad_norm": 1.7718873023986816, "learning_rate": 7.568492761850697e-05, "loss": 0.011643117666244507, "step": 85700 }, { "epoch": 24.32869713312518, "grad_norm": 3.10215163230896, "learning_rate": 7.56820891285836e-05, "loss": 0.011649016290903091, "step": 85710 }, { "epoch": 24.331535623048538, "grad_norm": 1.8847545385360718, "learning_rate": 7.567925063866024e-05, "loss": 0.008386296778917312, "step": 85720 }, { "epoch": 24.3343741129719, "grad_norm": 0.15926982462406158, "learning_rate": 7.567641214873688e-05, "loss": 0.017521002888679506, "step": 85730 }, { "epoch": 24.33721260289526, "grad_norm": 1.864924669265747, "learning_rate": 7.567357365881352e-05, "loss": 0.009884700179100037, "step": 85740 }, { "epoch": 24.34005109281862, "grad_norm": 0.6104827523231506, "learning_rate": 7.567073516889015e-05, "loss": 0.00510796457529068, "step": 85750 }, { "epoch": 24.342889582741982, "grad_norm": 5.961177349090576, "learning_rate": 7.56678966789668e-05, "loss": 0.014258350431919097, "step": 85760 }, { "epoch": 24.34572807266534, "grad_norm": 1.9542880058288574, "learning_rate": 7.566505818904344e-05, "loss": 0.013351155817508698, "step": 85770 }, { "epoch": 24.348566562588704, "grad_norm": 1.545385718345642, "learning_rate": 7.566221969912007e-05, "loss": 0.00917430892586708, "step": 85780 }, { "epoch": 24.351405052512064, "grad_norm": 0.3673706352710724, "learning_rate": 7.565938120919671e-05, "loss": 0.008816666156053542, "step": 85790 }, { "epoch": 24.354243542435423, "grad_norm": 0.5473952889442444, "learning_rate": 7.565654271927335e-05, "loss": 0.0052477136254310604, "step": 85800 }, { "epoch": 24.357082032358786, "grad_norm": 1.6478450298309326, "learning_rate": 7.565370422934998e-05, "loss": 0.008570219576358794, "step": 85810 }, { "epoch": 24.359920522282145, "grad_norm": 1.4293564558029175, "learning_rate": 7.565086573942662e-05, "loss": 0.011023954302072526, "step": 85820 }, { "epoch": 24.362759012205508, "grad_norm": 0.10506630688905716, "learning_rate": 7.564802724950328e-05, "loss": 0.010192238539457322, "step": 85830 }, { "epoch": 24.365597502128868, "grad_norm": 0.05637767165899277, "learning_rate": 7.56451887595799e-05, "loss": 0.016657623648643493, "step": 85840 }, { "epoch": 24.368435992052227, "grad_norm": 0.24487879872322083, "learning_rate": 7.564235026965655e-05, "loss": 0.017416645586490632, "step": 85850 }, { "epoch": 24.37127448197559, "grad_norm": 0.6654809713363647, "learning_rate": 7.563951177973319e-05, "loss": 0.007884503901004791, "step": 85860 }, { "epoch": 24.37411297189895, "grad_norm": 8.374706268310547, "learning_rate": 7.563667328980983e-05, "loss": 0.014122214913368226, "step": 85870 }, { "epoch": 24.376951461822312, "grad_norm": 0.3887140452861786, "learning_rate": 7.563383479988646e-05, "loss": 0.012857778370380402, "step": 85880 }, { "epoch": 24.37978995174567, "grad_norm": 10.01579761505127, "learning_rate": 7.56309963099631e-05, "loss": 0.03637037575244904, "step": 85890 }, { "epoch": 24.38262844166903, "grad_norm": 3.512822151184082, "learning_rate": 7.562815782003975e-05, "loss": 0.005749260261654854, "step": 85900 }, { "epoch": 24.385466931592394, "grad_norm": 0.3181319832801819, "learning_rate": 7.562531933011638e-05, "loss": 0.015750199556350708, "step": 85910 }, { "epoch": 24.388305421515753, "grad_norm": 6.515655040740967, "learning_rate": 7.562248084019302e-05, "loss": 0.010526826977729798, "step": 85920 }, { "epoch": 24.391143911439116, "grad_norm": 13.040922164916992, "learning_rate": 7.561964235026966e-05, "loss": 0.01679864376783371, "step": 85930 }, { "epoch": 24.393982401362475, "grad_norm": 7.477162837982178, "learning_rate": 7.561680386034629e-05, "loss": 0.009164389967918397, "step": 85940 }, { "epoch": 24.396820891285834, "grad_norm": 1.548324704170227, "learning_rate": 7.561396537042293e-05, "loss": 0.01286749392747879, "step": 85950 }, { "epoch": 24.399659381209197, "grad_norm": 10.464897155761719, "learning_rate": 7.561112688049959e-05, "loss": 0.01696442663669586, "step": 85960 }, { "epoch": 24.402497871132557, "grad_norm": 5.819425106048584, "learning_rate": 7.560828839057622e-05, "loss": 0.0108443021774292, "step": 85970 }, { "epoch": 24.40533636105592, "grad_norm": 1.6916236877441406, "learning_rate": 7.560544990065286e-05, "loss": 0.016887083649635315, "step": 85980 }, { "epoch": 24.40817485097928, "grad_norm": 0.6641570329666138, "learning_rate": 7.56026114107295e-05, "loss": 0.008201391994953155, "step": 85990 }, { "epoch": 24.411013340902638, "grad_norm": 0.45300954580307007, "learning_rate": 7.559977292080614e-05, "loss": 0.01769823282957077, "step": 86000 }, { "epoch": 24.411013340902638, "eval_accuracy": 0.9689705601831246, "eval_loss": 0.10774319618940353, "eval_runtime": 31.7164, "eval_samples_per_second": 495.864, "eval_steps_per_second": 7.756, "step": 86000 }, { "epoch": 24.413851830826, "grad_norm": 1.4352697134017944, "learning_rate": 7.559693443088277e-05, "loss": 0.018337222933769225, "step": 86010 }, { "epoch": 24.41669032074936, "grad_norm": 2.8338675498962402, "learning_rate": 7.559409594095941e-05, "loss": 0.008044353127479554, "step": 86020 }, { "epoch": 24.419528810672723, "grad_norm": 0.3395408093929291, "learning_rate": 7.559125745103605e-05, "loss": 0.005279507488012314, "step": 86030 }, { "epoch": 24.422367300596083, "grad_norm": 0.31454989314079285, "learning_rate": 7.558841896111269e-05, "loss": 0.004294072836637497, "step": 86040 }, { "epoch": 24.425205790519442, "grad_norm": 0.6733565330505371, "learning_rate": 7.558558047118933e-05, "loss": 0.006210532039403915, "step": 86050 }, { "epoch": 24.428044280442805, "grad_norm": 1.6580836772918701, "learning_rate": 7.558274198126597e-05, "loss": 0.03039458692073822, "step": 86060 }, { "epoch": 24.430882770366164, "grad_norm": 7.571953296661377, "learning_rate": 7.55799034913426e-05, "loss": 0.016900870203971862, "step": 86070 }, { "epoch": 24.433721260289527, "grad_norm": 0.49733617901802063, "learning_rate": 7.557706500141924e-05, "loss": 0.013136042654514313, "step": 86080 }, { "epoch": 24.436559750212886, "grad_norm": 9.622514724731445, "learning_rate": 7.557422651149589e-05, "loss": 0.015646675229072572, "step": 86090 }, { "epoch": 24.43939824013625, "grad_norm": 1.1060527563095093, "learning_rate": 7.557138802157253e-05, "loss": 0.014757946133613586, "step": 86100 }, { "epoch": 24.44223673005961, "grad_norm": 7.923277854919434, "learning_rate": 7.556854953164917e-05, "loss": 0.014649665355682373, "step": 86110 }, { "epoch": 24.445075219982968, "grad_norm": 0.30258503556251526, "learning_rate": 7.556571104172581e-05, "loss": 0.008989807218313217, "step": 86120 }, { "epoch": 24.44791370990633, "grad_norm": 1.4158624410629272, "learning_rate": 7.556287255180244e-05, "loss": 0.007803232967853546, "step": 86130 }, { "epoch": 24.45075219982969, "grad_norm": 3.5515754222869873, "learning_rate": 7.556003406187908e-05, "loss": 0.010949216783046722, "step": 86140 }, { "epoch": 24.453590689753053, "grad_norm": 1.6793653964996338, "learning_rate": 7.555719557195572e-05, "loss": 0.006334037333726883, "step": 86150 }, { "epoch": 24.456429179676412, "grad_norm": 1.6839778423309326, "learning_rate": 7.555435708203236e-05, "loss": 0.01614852696657181, "step": 86160 }, { "epoch": 24.45926766959977, "grad_norm": 0.6528506278991699, "learning_rate": 7.5551518592109e-05, "loss": 0.00482315868139267, "step": 86170 }, { "epoch": 24.462106159523135, "grad_norm": 6.182528495788574, "learning_rate": 7.554868010218564e-05, "loss": 0.010558417439460755, "step": 86180 }, { "epoch": 24.464944649446494, "grad_norm": 0.49513840675354004, "learning_rate": 7.554584161226229e-05, "loss": 0.005963114276528359, "step": 86190 }, { "epoch": 24.467783139369857, "grad_norm": 0.9860437512397766, "learning_rate": 7.554300312233891e-05, "loss": 0.010787472873926163, "step": 86200 }, { "epoch": 24.470621629293216, "grad_norm": 10.727680206298828, "learning_rate": 7.554016463241555e-05, "loss": 0.02604767680168152, "step": 86210 }, { "epoch": 24.473460119216575, "grad_norm": 0.22291924059391022, "learning_rate": 7.55373261424922e-05, "loss": 0.007957303524017334, "step": 86220 }, { "epoch": 24.47629860913994, "grad_norm": 4.049028396606445, "learning_rate": 7.553448765256884e-05, "loss": 0.0058005165308713915, "step": 86230 }, { "epoch": 24.479137099063298, "grad_norm": 2.097445487976074, "learning_rate": 7.553164916264548e-05, "loss": 0.009019745886325837, "step": 86240 }, { "epoch": 24.48197558898666, "grad_norm": 0.2356591373682022, "learning_rate": 7.552881067272212e-05, "loss": 0.012569110095500945, "step": 86250 }, { "epoch": 24.48481407891002, "grad_norm": 1.7650319337844849, "learning_rate": 7.552597218279875e-05, "loss": 0.018574845790863038, "step": 86260 }, { "epoch": 24.48765256883338, "grad_norm": 0.629223644733429, "learning_rate": 7.552313369287539e-05, "loss": 0.015525272488594056, "step": 86270 }, { "epoch": 24.490491058756742, "grad_norm": 0.13371798396110535, "learning_rate": 7.552029520295203e-05, "loss": 0.0018377602100372314, "step": 86280 }, { "epoch": 24.4933295486801, "grad_norm": 0.17614972591400146, "learning_rate": 7.551745671302867e-05, "loss": 0.001714678481221199, "step": 86290 }, { "epoch": 24.496168038603464, "grad_norm": 0.1152481734752655, "learning_rate": 7.551461822310531e-05, "loss": 0.010191304236650467, "step": 86300 }, { "epoch": 24.499006528526824, "grad_norm": 1.1194592714309692, "learning_rate": 7.551177973318195e-05, "loss": 0.0097679503262043, "step": 86310 }, { "epoch": 24.501845018450183, "grad_norm": 1.1617859601974487, "learning_rate": 7.55089412432586e-05, "loss": 0.008482585102319718, "step": 86320 }, { "epoch": 24.504683508373546, "grad_norm": 0.48891130089759827, "learning_rate": 7.550610275333522e-05, "loss": 0.013673518598079682, "step": 86330 }, { "epoch": 24.507521998296905, "grad_norm": 0.48462042212486267, "learning_rate": 7.550326426341187e-05, "loss": 0.005935866013169288, "step": 86340 }, { "epoch": 24.510360488220268, "grad_norm": 7.163588047027588, "learning_rate": 7.55004257734885e-05, "loss": 0.0077618852257728575, "step": 86350 }, { "epoch": 24.513198978143627, "grad_norm": 0.7101739645004272, "learning_rate": 7.549758728356515e-05, "loss": 0.004610227048397064, "step": 86360 }, { "epoch": 24.516037468066987, "grad_norm": 3.297109603881836, "learning_rate": 7.549474879364179e-05, "loss": 0.011734139919281007, "step": 86370 }, { "epoch": 24.51887595799035, "grad_norm": 3.4683430194854736, "learning_rate": 7.549191030371843e-05, "loss": 0.008321138471364975, "step": 86380 }, { "epoch": 24.52171444791371, "grad_norm": 0.12119369953870773, "learning_rate": 7.548907181379506e-05, "loss": 0.006652224808931351, "step": 86390 }, { "epoch": 24.524552937837072, "grad_norm": 0.6051621437072754, "learning_rate": 7.54862333238717e-05, "loss": 0.012994924187660217, "step": 86400 }, { "epoch": 24.52739142776043, "grad_norm": 2.225515127182007, "learning_rate": 7.548339483394834e-05, "loss": 0.014857205748558044, "step": 86410 }, { "epoch": 24.53022991768379, "grad_norm": 6.695326805114746, "learning_rate": 7.548055634402498e-05, "loss": 0.007468993961811066, "step": 86420 }, { "epoch": 24.533068407607153, "grad_norm": 2.067754030227661, "learning_rate": 7.547771785410162e-05, "loss": 0.004249143972992897, "step": 86430 }, { "epoch": 24.535906897530513, "grad_norm": 3.1433768272399902, "learning_rate": 7.547516321317059e-05, "loss": 0.009402300417423248, "step": 86440 }, { "epoch": 24.538745387453876, "grad_norm": 1.1617540121078491, "learning_rate": 7.547232472324723e-05, "loss": 0.008847872912883758, "step": 86450 }, { "epoch": 24.541583877377235, "grad_norm": 0.5742031931877136, "learning_rate": 7.546948623332387e-05, "loss": 0.012604688107967377, "step": 86460 }, { "epoch": 24.544422367300598, "grad_norm": 0.5153142213821411, "learning_rate": 7.546664774340051e-05, "loss": 0.008601532131433488, "step": 86470 }, { "epoch": 24.547260857223957, "grad_norm": 5.297637939453125, "learning_rate": 7.546380925347716e-05, "loss": 0.008239265531301498, "step": 86480 }, { "epoch": 24.550099347147317, "grad_norm": 1.1708295345306396, "learning_rate": 7.54609707635538e-05, "loss": 0.012036576122045516, "step": 86490 }, { "epoch": 24.55293783707068, "grad_norm": 1.2955753803253174, "learning_rate": 7.545813227363044e-05, "loss": 0.007438194751739502, "step": 86500 }, { "epoch": 24.55293783707068, "eval_accuracy": 0.967826031665289, "eval_loss": 0.1085750162601471, "eval_runtime": 32.1874, "eval_samples_per_second": 488.607, "eval_steps_per_second": 7.643, "step": 86500 }, { "epoch": 24.55577632699404, "grad_norm": 4.013537883758545, "learning_rate": 7.545529378370707e-05, "loss": 0.020248191058635713, "step": 86510 }, { "epoch": 24.558614816917398, "grad_norm": 3.099640369415283, "learning_rate": 7.545245529378371e-05, "loss": 0.01388494074344635, "step": 86520 }, { "epoch": 24.56145330684076, "grad_norm": 8.307168006896973, "learning_rate": 7.544961680386035e-05, "loss": 0.012331354618072509, "step": 86530 }, { "epoch": 24.56429179676412, "grad_norm": 11.281428337097168, "learning_rate": 7.544677831393699e-05, "loss": 0.015896648168563843, "step": 86540 }, { "epoch": 24.567130286687483, "grad_norm": 4.183461666107178, "learning_rate": 7.544393982401363e-05, "loss": 0.01052619218826294, "step": 86550 }, { "epoch": 24.569968776610843, "grad_norm": 1.3841561079025269, "learning_rate": 7.544110133409027e-05, "loss": 0.013000310957431793, "step": 86560 }, { "epoch": 24.572807266534205, "grad_norm": 0.22603771090507507, "learning_rate": 7.54382628441669e-05, "loss": 0.02425757497549057, "step": 86570 }, { "epoch": 24.575645756457565, "grad_norm": 1.0649877786636353, "learning_rate": 7.543542435424354e-05, "loss": 0.018128839135169984, "step": 86580 }, { "epoch": 24.578484246380924, "grad_norm": 0.38050633668899536, "learning_rate": 7.543258586432018e-05, "loss": 0.010467040538787841, "step": 86590 }, { "epoch": 24.581322736304287, "grad_norm": 1.8373584747314453, "learning_rate": 7.542974737439683e-05, "loss": 0.004486091062426567, "step": 86600 }, { "epoch": 24.584161226227646, "grad_norm": 0.9621447920799255, "learning_rate": 7.542690888447347e-05, "loss": 0.014976632595062257, "step": 86610 }, { "epoch": 24.58699971615101, "grad_norm": 3.740422487258911, "learning_rate": 7.542407039455011e-05, "loss": 0.006465326249599457, "step": 86620 }, { "epoch": 24.58983820607437, "grad_norm": 7.454321384429932, "learning_rate": 7.542123190462675e-05, "loss": 0.012022820860147476, "step": 86630 }, { "epoch": 24.592676695997728, "grad_norm": 12.778980255126953, "learning_rate": 7.541839341470338e-05, "loss": 0.012108631432056427, "step": 86640 }, { "epoch": 24.59551518592109, "grad_norm": 10.574054718017578, "learning_rate": 7.541555492478002e-05, "loss": 0.023545390367507933, "step": 86650 }, { "epoch": 24.59835367584445, "grad_norm": 0.3041205406188965, "learning_rate": 7.541271643485666e-05, "loss": 0.009798835963010788, "step": 86660 }, { "epoch": 24.601192165767813, "grad_norm": 5.590033531188965, "learning_rate": 7.540987794493329e-05, "loss": 0.01951398253440857, "step": 86670 }, { "epoch": 24.604030655691172, "grad_norm": 0.42919260263442993, "learning_rate": 7.540703945500994e-05, "loss": 0.012195106595754623, "step": 86680 }, { "epoch": 24.60686914561453, "grad_norm": 10.508100509643555, "learning_rate": 7.540420096508658e-05, "loss": 0.026058754324913024, "step": 86690 }, { "epoch": 24.609707635537895, "grad_norm": 1.0591177940368652, "learning_rate": 7.540136247516321e-05, "loss": 0.01330510675907135, "step": 86700 }, { "epoch": 24.612546125461254, "grad_norm": 0.8153659701347351, "learning_rate": 7.539852398523985e-05, "loss": 0.011104600131511688, "step": 86710 }, { "epoch": 24.615384615384617, "grad_norm": 0.6737568974494934, "learning_rate": 7.53956854953165e-05, "loss": 0.0065693393349647525, "step": 86720 }, { "epoch": 24.618223105307976, "grad_norm": 16.24028205871582, "learning_rate": 7.539284700539314e-05, "loss": 0.02482734024524689, "step": 86730 }, { "epoch": 24.621061595231335, "grad_norm": 8.424254417419434, "learning_rate": 7.539000851546978e-05, "loss": 0.01015494391322136, "step": 86740 }, { "epoch": 24.6239000851547, "grad_norm": 0.5216388702392578, "learning_rate": 7.538717002554642e-05, "loss": 0.007353351265192032, "step": 86750 }, { "epoch": 24.626738575078058, "grad_norm": 0.18451029062271118, "learning_rate": 7.538433153562306e-05, "loss": 0.010811682790517807, "step": 86760 }, { "epoch": 24.62957706500142, "grad_norm": 3.698284387588501, "learning_rate": 7.538149304569969e-05, "loss": 0.012761187553405762, "step": 86770 }, { "epoch": 24.63241555492478, "grad_norm": 1.8929272890090942, "learning_rate": 7.537865455577633e-05, "loss": 0.003917547687888145, "step": 86780 }, { "epoch": 24.63525404484814, "grad_norm": 3.80242919921875, "learning_rate": 7.537581606585297e-05, "loss": 0.010679098963737487, "step": 86790 }, { "epoch": 24.638092534771502, "grad_norm": 8.86025333404541, "learning_rate": 7.53729775759296e-05, "loss": 0.014045929908752442, "step": 86800 }, { "epoch": 24.64093102469486, "grad_norm": 9.523038864135742, "learning_rate": 7.537013908600625e-05, "loss": 0.006660276651382446, "step": 86810 }, { "epoch": 24.643769514618224, "grad_norm": 14.693415641784668, "learning_rate": 7.53673005960829e-05, "loss": 0.009134665876626969, "step": 86820 }, { "epoch": 24.646608004541584, "grad_norm": 3.314321279525757, "learning_rate": 7.536446210615952e-05, "loss": 0.014549160003662109, "step": 86830 }, { "epoch": 24.649446494464943, "grad_norm": 0.21771349012851715, "learning_rate": 7.536162361623616e-05, "loss": 0.010577455163002014, "step": 86840 }, { "epoch": 24.652284984388306, "grad_norm": 1.2537258863449097, "learning_rate": 7.53587851263128e-05, "loss": 0.006577695906162262, "step": 86850 }, { "epoch": 24.655123474311665, "grad_norm": 0.24269087612628937, "learning_rate": 7.535594663638945e-05, "loss": 0.008163337409496308, "step": 86860 }, { "epoch": 24.657961964235028, "grad_norm": 2.7925302982330322, "learning_rate": 7.535310814646607e-05, "loss": 0.006752020120620728, "step": 86870 }, { "epoch": 24.660800454158387, "grad_norm": 0.33916938304901123, "learning_rate": 7.535026965654273e-05, "loss": 0.008962725847959518, "step": 86880 }, { "epoch": 24.663638944081747, "grad_norm": 0.2434057593345642, "learning_rate": 7.534743116661937e-05, "loss": 0.014952491223812103, "step": 86890 }, { "epoch": 24.66647743400511, "grad_norm": 0.4776949882507324, "learning_rate": 7.5344592676696e-05, "loss": 0.00804145187139511, "step": 86900 }, { "epoch": 24.66931592392847, "grad_norm": 0.43567776679992676, "learning_rate": 7.534175418677264e-05, "loss": 0.004113298654556274, "step": 86910 }, { "epoch": 24.672154413851832, "grad_norm": 0.11134946346282959, "learning_rate": 7.533891569684928e-05, "loss": 0.003929298743605614, "step": 86920 }, { "epoch": 24.67499290377519, "grad_norm": 1.0919134616851807, "learning_rate": 7.533607720692591e-05, "loss": 0.005093041807413101, "step": 86930 }, { "epoch": 24.677831393698554, "grad_norm": 0.731681764125824, "learning_rate": 7.533323871700256e-05, "loss": 0.009381788223981858, "step": 86940 }, { "epoch": 24.680669883621913, "grad_norm": 2.7037811279296875, "learning_rate": 7.53304002270792e-05, "loss": 0.007820196449756622, "step": 86950 }, { "epoch": 24.683508373545273, "grad_norm": 1.4343582391738892, "learning_rate": 7.532756173715583e-05, "loss": 0.006610675156116486, "step": 86960 }, { "epoch": 24.686346863468636, "grad_norm": 2.605112314224243, "learning_rate": 7.532472324723247e-05, "loss": 0.004300115257501602, "step": 86970 }, { "epoch": 24.689185353391995, "grad_norm": 7.131167411804199, "learning_rate": 7.532188475730912e-05, "loss": 0.011153773218393326, "step": 86980 }, { "epoch": 24.692023843315358, "grad_norm": 1.4395395517349243, "learning_rate": 7.531904626738576e-05, "loss": 0.020947591960430147, "step": 86990 }, { "epoch": 24.694862333238717, "grad_norm": 18.303829193115234, "learning_rate": 7.531620777746239e-05, "loss": 0.047566074132919314, "step": 87000 }, { "epoch": 24.694862333238717, "eval_accuracy": 0.9602594264640427, "eval_loss": 0.1317024827003479, "eval_runtime": 31.7111, "eval_samples_per_second": 495.946, "eval_steps_per_second": 7.758, "step": 87000 }, { "epoch": 24.697700823162076, "grad_norm": 2.497554063796997, "learning_rate": 7.531336928753904e-05, "loss": 0.016774526238441466, "step": 87010 }, { "epoch": 24.70053931308544, "grad_norm": 0.4982735514640808, "learning_rate": 7.531053079761567e-05, "loss": 0.011550667881965637, "step": 87020 }, { "epoch": 24.7033778030088, "grad_norm": 1.5645840167999268, "learning_rate": 7.530769230769231e-05, "loss": 0.012465214729309082, "step": 87030 }, { "epoch": 24.70621629293216, "grad_norm": 0.8627891540527344, "learning_rate": 7.530485381776895e-05, "loss": 0.008143007755279541, "step": 87040 }, { "epoch": 24.70905478285552, "grad_norm": 0.4362296462059021, "learning_rate": 7.530201532784559e-05, "loss": 0.007987241446971893, "step": 87050 }, { "epoch": 24.71189327277888, "grad_norm": 0.1727224439382553, "learning_rate": 7.529917683792222e-05, "loss": 0.012553539872169495, "step": 87060 }, { "epoch": 24.714731762702243, "grad_norm": 0.4889027178287506, "learning_rate": 7.529633834799886e-05, "loss": 0.008621846884489059, "step": 87070 }, { "epoch": 24.717570252625602, "grad_norm": 11.209785461425781, "learning_rate": 7.529349985807552e-05, "loss": 0.020931655168533327, "step": 87080 }, { "epoch": 24.720408742548965, "grad_norm": 14.347349166870117, "learning_rate": 7.529066136815214e-05, "loss": 0.034314191341400145, "step": 87090 }, { "epoch": 24.723247232472325, "grad_norm": 0.2758462429046631, "learning_rate": 7.528782287822879e-05, "loss": 0.009108667075634003, "step": 87100 }, { "epoch": 24.726085722395684, "grad_norm": 0.9600092172622681, "learning_rate": 7.528498438830543e-05, "loss": 0.009231778979301452, "step": 87110 }, { "epoch": 24.728924212319047, "grad_norm": 0.2973717451095581, "learning_rate": 7.528214589838205e-05, "loss": 0.008982735872268676, "step": 87120 }, { "epoch": 24.731762702242406, "grad_norm": 4.576530456542969, "learning_rate": 7.52793074084587e-05, "loss": 0.013734231889247894, "step": 87130 }, { "epoch": 24.73460119216577, "grad_norm": 0.5495737791061401, "learning_rate": 7.527646891853535e-05, "loss": 0.008714700490236283, "step": 87140 }, { "epoch": 24.73743968208913, "grad_norm": 13.219279289245605, "learning_rate": 7.527363042861198e-05, "loss": 0.008818300068378448, "step": 87150 }, { "epoch": 24.740278172012488, "grad_norm": 2.2898061275482178, "learning_rate": 7.527079193868862e-05, "loss": 0.005736418813467026, "step": 87160 }, { "epoch": 24.74311666193585, "grad_norm": 0.3078395128250122, "learning_rate": 7.526795344876526e-05, "loss": 0.007464403659105301, "step": 87170 }, { "epoch": 24.74595515185921, "grad_norm": 0.4191669523715973, "learning_rate": 7.52651149588419e-05, "loss": 0.019647635519504547, "step": 87180 }, { "epoch": 24.748793641782573, "grad_norm": 0.894784688949585, "learning_rate": 7.526227646891853e-05, "loss": 0.015010680258274078, "step": 87190 }, { "epoch": 24.751632131705932, "grad_norm": 0.36110028624534607, "learning_rate": 7.525943797899517e-05, "loss": 0.005522892251610756, "step": 87200 }, { "epoch": 24.75447062162929, "grad_norm": 0.1097046509385109, "learning_rate": 7.525659948907183e-05, "loss": 0.013405486941337585, "step": 87210 }, { "epoch": 24.757309111552654, "grad_norm": 8.6362886428833, "learning_rate": 7.525376099914846e-05, "loss": 0.008329806476831436, "step": 87220 }, { "epoch": 24.760147601476014, "grad_norm": 2.5056357383728027, "learning_rate": 7.52509225092251e-05, "loss": 0.016384705901145935, "step": 87230 }, { "epoch": 24.762986091399377, "grad_norm": 3.5006537437438965, "learning_rate": 7.524808401930174e-05, "loss": 0.01273140162229538, "step": 87240 }, { "epoch": 24.765824581322736, "grad_norm": 3.437854528427124, "learning_rate": 7.524524552937837e-05, "loss": 0.007156369090080261, "step": 87250 }, { "epoch": 24.768663071246095, "grad_norm": 3.800734043121338, "learning_rate": 7.524240703945501e-05, "loss": 0.03642425537109375, "step": 87260 }, { "epoch": 24.77150156116946, "grad_norm": 5.916914939880371, "learning_rate": 7.523956854953166e-05, "loss": 0.02490185797214508, "step": 87270 }, { "epoch": 24.774340051092818, "grad_norm": 14.271767616271973, "learning_rate": 7.523673005960829e-05, "loss": 0.0245846226811409, "step": 87280 }, { "epoch": 24.77717854101618, "grad_norm": 6.42150354385376, "learning_rate": 7.523389156968493e-05, "loss": 0.009956157207489014, "step": 87290 }, { "epoch": 24.78001703093954, "grad_norm": 9.239266395568848, "learning_rate": 7.523105307976157e-05, "loss": 0.014026257395744323, "step": 87300 }, { "epoch": 24.782855520862903, "grad_norm": 5.918678283691406, "learning_rate": 7.522821458983821e-05, "loss": 0.015704232454299926, "step": 87310 }, { "epoch": 24.785694010786262, "grad_norm": 6.271981239318848, "learning_rate": 7.522537609991484e-05, "loss": 0.01067735105752945, "step": 87320 }, { "epoch": 24.78853250070962, "grad_norm": 1.248551368713379, "learning_rate": 7.522253760999148e-05, "loss": 0.0186002254486084, "step": 87330 }, { "epoch": 24.791370990632984, "grad_norm": 0.4113011062145233, "learning_rate": 7.521969912006814e-05, "loss": 0.007843995094299316, "step": 87340 }, { "epoch": 24.794209480556344, "grad_norm": 0.37523746490478516, "learning_rate": 7.521686063014477e-05, "loss": 0.011750002205371857, "step": 87350 }, { "epoch": 24.797047970479706, "grad_norm": 7.022216796875, "learning_rate": 7.521402214022141e-05, "loss": 0.007191299647092819, "step": 87360 }, { "epoch": 24.799886460403066, "grad_norm": 1.983604907989502, "learning_rate": 7.521118365029805e-05, "loss": 0.005740867555141449, "step": 87370 }, { "epoch": 24.802724950326425, "grad_norm": 2.143331289291382, "learning_rate": 7.520834516037468e-05, "loss": 0.005615520477294922, "step": 87380 }, { "epoch": 24.805563440249788, "grad_norm": 1.0639060735702515, "learning_rate": 7.520550667045132e-05, "loss": 0.009142071008682251, "step": 87390 }, { "epoch": 24.808401930173147, "grad_norm": 8.75391674041748, "learning_rate": 7.520266818052796e-05, "loss": 0.005440958589315414, "step": 87400 }, { "epoch": 24.81124042009651, "grad_norm": 4.223254680633545, "learning_rate": 7.51998296906046e-05, "loss": 0.03077690601348877, "step": 87410 }, { "epoch": 24.81407891001987, "grad_norm": 4.01494836807251, "learning_rate": 7.519699120068124e-05, "loss": 0.008579573035240174, "step": 87420 }, { "epoch": 24.81691739994323, "grad_norm": 3.50024676322937, "learning_rate": 7.519415271075788e-05, "loss": 0.02002474069595337, "step": 87430 }, { "epoch": 24.81975588986659, "grad_norm": 0.917782187461853, "learning_rate": 7.519131422083452e-05, "loss": 0.013988485932350159, "step": 87440 }, { "epoch": 24.82259437978995, "grad_norm": 1.4966775178909302, "learning_rate": 7.518847573091115e-05, "loss": 0.026089850068092345, "step": 87450 }, { "epoch": 24.825432869713314, "grad_norm": 8.906994819641113, "learning_rate": 7.51856372409878e-05, "loss": 0.025699731707572938, "step": 87460 }, { "epoch": 24.828271359636673, "grad_norm": 1.5569522380828857, "learning_rate": 7.518279875106445e-05, "loss": 0.020185469090938567, "step": 87470 }, { "epoch": 24.831109849560033, "grad_norm": 16.2556095123291, "learning_rate": 7.517996026114108e-05, "loss": 0.015798908472061158, "step": 87480 }, { "epoch": 24.833948339483396, "grad_norm": 0.08828189224004745, "learning_rate": 7.517712177121772e-05, "loss": 0.006485775113105774, "step": 87490 }, { "epoch": 24.836786829406755, "grad_norm": 0.5685157179832458, "learning_rate": 7.517428328129436e-05, "loss": 0.01690186411142349, "step": 87500 }, { "epoch": 24.836786829406755, "eval_accuracy": 0.9669994277357411, "eval_loss": 0.1103290244936943, "eval_runtime": 31.9648, "eval_samples_per_second": 492.009, "eval_steps_per_second": 7.696, "step": 87500 }, { "epoch": 24.839625319330118, "grad_norm": 0.6033041477203369, "learning_rate": 7.517144479137099e-05, "loss": 0.012880977988243104, "step": 87510 }, { "epoch": 24.842463809253477, "grad_norm": 0.755061149597168, "learning_rate": 7.516860630144763e-05, "loss": 0.022327718138694764, "step": 87520 }, { "epoch": 24.845302299176836, "grad_norm": 6.705821990966797, "learning_rate": 7.516576781152427e-05, "loss": 0.0173881396651268, "step": 87530 }, { "epoch": 24.8481407891002, "grad_norm": 0.8221394419670105, "learning_rate": 7.516292932160091e-05, "loss": 0.004737811535596848, "step": 87540 }, { "epoch": 24.85097927902356, "grad_norm": 14.285408020019531, "learning_rate": 7.516009083167755e-05, "loss": 0.011455235630273819, "step": 87550 }, { "epoch": 24.85381776894692, "grad_norm": 4.1569504737854, "learning_rate": 7.51572523417542e-05, "loss": 0.026419785618782044, "step": 87560 }, { "epoch": 24.85665625887028, "grad_norm": 2.0025784969329834, "learning_rate": 7.515441385183084e-05, "loss": 0.02092153877019882, "step": 87570 }, { "epoch": 24.85949474879364, "grad_norm": 0.9641667008399963, "learning_rate": 7.515157536190746e-05, "loss": 0.02503083348274231, "step": 87580 }, { "epoch": 24.862333238717003, "grad_norm": 7.7629923820495605, "learning_rate": 7.51487368719841e-05, "loss": 0.014247119426727295, "step": 87590 }, { "epoch": 24.865171728640362, "grad_norm": 12.19746208190918, "learning_rate": 7.514589838206075e-05, "loss": 0.024246279895305634, "step": 87600 }, { "epoch": 24.868010218563725, "grad_norm": 4.187434196472168, "learning_rate": 7.514305989213739e-05, "loss": 0.01857968270778656, "step": 87610 }, { "epoch": 24.870848708487085, "grad_norm": 7.167263984680176, "learning_rate": 7.514022140221403e-05, "loss": 0.006836596131324768, "step": 87620 }, { "epoch": 24.873687198410444, "grad_norm": 0.4774487018585205, "learning_rate": 7.513738291229067e-05, "loss": 0.007520785927772522, "step": 87630 }, { "epoch": 24.876525688333807, "grad_norm": 12.680917739868164, "learning_rate": 7.51345444223673e-05, "loss": 0.010874718427658081, "step": 87640 }, { "epoch": 24.879364178257166, "grad_norm": 1.2957574129104614, "learning_rate": 7.513170593244394e-05, "loss": 0.01659461557865143, "step": 87650 }, { "epoch": 24.88220266818053, "grad_norm": 2.0649008750915527, "learning_rate": 7.512886744252058e-05, "loss": 0.012110823392868042, "step": 87660 }, { "epoch": 24.88504115810389, "grad_norm": 0.5235140323638916, "learning_rate": 7.512602895259722e-05, "loss": 0.00854002609848976, "step": 87670 }, { "epoch": 24.88787964802725, "grad_norm": 1.5249601602554321, "learning_rate": 7.512319046267386e-05, "loss": 0.015473225712776184, "step": 87680 }, { "epoch": 24.89071813795061, "grad_norm": 0.29476678371429443, "learning_rate": 7.51203519727505e-05, "loss": 0.021198800206184386, "step": 87690 }, { "epoch": 24.89355662787397, "grad_norm": 12.52983283996582, "learning_rate": 7.511751348282715e-05, "loss": 0.013080716133117676, "step": 87700 }, { "epoch": 24.896395117797333, "grad_norm": 9.88040828704834, "learning_rate": 7.511467499290377e-05, "loss": 0.019578444957733154, "step": 87710 }, { "epoch": 24.899233607720692, "grad_norm": 1.0633220672607422, "learning_rate": 7.511183650298042e-05, "loss": 0.018388885259628295, "step": 87720 }, { "epoch": 24.902072097644055, "grad_norm": 2.0224289894104004, "learning_rate": 7.510899801305706e-05, "loss": 0.019063127040863038, "step": 87730 }, { "epoch": 24.904910587567414, "grad_norm": 13.229560852050781, "learning_rate": 7.51061595231337e-05, "loss": 0.02253347635269165, "step": 87740 }, { "epoch": 24.907749077490774, "grad_norm": 1.544021725654602, "learning_rate": 7.510332103321034e-05, "loss": 0.011013183742761612, "step": 87750 }, { "epoch": 24.910587567414137, "grad_norm": 0.08206012845039368, "learning_rate": 7.510048254328698e-05, "loss": 0.009166860580444336, "step": 87760 }, { "epoch": 24.913426057337496, "grad_norm": 4.057959079742432, "learning_rate": 7.509764405336361e-05, "loss": 0.006913983821868896, "step": 87770 }, { "epoch": 24.91626454726086, "grad_norm": 3.6529219150543213, "learning_rate": 7.509480556344025e-05, "loss": 0.007982385903596878, "step": 87780 }, { "epoch": 24.919103037184218, "grad_norm": 3.55979061126709, "learning_rate": 7.509196707351689e-05, "loss": 0.006905844062566757, "step": 87790 }, { "epoch": 24.921941527107577, "grad_norm": 4.727871417999268, "learning_rate": 7.508912858359353e-05, "loss": 0.008420443534851075, "step": 87800 }, { "epoch": 24.92478001703094, "grad_norm": 2.0958633422851562, "learning_rate": 7.508629009367017e-05, "loss": 0.016092444956302642, "step": 87810 }, { "epoch": 24.9276185069543, "grad_norm": 0.15478816628456116, "learning_rate": 7.508345160374682e-05, "loss": 0.02559421956539154, "step": 87820 }, { "epoch": 24.930456996877663, "grad_norm": 0.5389772653579712, "learning_rate": 7.508061311382346e-05, "loss": 0.017667108774185182, "step": 87830 }, { "epoch": 24.933295486801022, "grad_norm": 18.557151794433594, "learning_rate": 7.507777462390008e-05, "loss": 0.02704583704471588, "step": 87840 }, { "epoch": 24.93613397672438, "grad_norm": 12.760278701782227, "learning_rate": 7.507493613397673e-05, "loss": 0.03062024712562561, "step": 87850 }, { "epoch": 24.938972466647744, "grad_norm": 1.5880898237228394, "learning_rate": 7.507209764405337e-05, "loss": 0.052779597043991086, "step": 87860 }, { "epoch": 24.941810956571103, "grad_norm": 0.4904594123363495, "learning_rate": 7.506925915413001e-05, "loss": 0.033505970239639284, "step": 87870 }, { "epoch": 24.944649446494466, "grad_norm": 0.7134040594100952, "learning_rate": 7.506642066420665e-05, "loss": 0.018302568793296815, "step": 87880 }, { "epoch": 24.947487936417826, "grad_norm": 1.6387419700622559, "learning_rate": 7.506358217428329e-05, "loss": 0.009172136336565018, "step": 87890 }, { "epoch": 24.950326426341185, "grad_norm": 10.225381851196289, "learning_rate": 7.506074368435992e-05, "loss": 0.015201254189014435, "step": 87900 }, { "epoch": 24.953164916264548, "grad_norm": 11.05975341796875, "learning_rate": 7.505790519443656e-05, "loss": 0.013270682096481324, "step": 87910 }, { "epoch": 24.956003406187907, "grad_norm": 0.05077755078673363, "learning_rate": 7.50550667045132e-05, "loss": 0.010212510824203491, "step": 87920 }, { "epoch": 24.95884189611127, "grad_norm": 1.3177310228347778, "learning_rate": 7.505222821458984e-05, "loss": 0.007656729966402054, "step": 87930 }, { "epoch": 24.96168038603463, "grad_norm": 6.527772903442383, "learning_rate": 7.504938972466649e-05, "loss": 0.011381358653306962, "step": 87940 }, { "epoch": 24.96451887595799, "grad_norm": 1.5577088594436646, "learning_rate": 7.504655123474313e-05, "loss": 0.011264510452747345, "step": 87950 }, { "epoch": 24.96735736588135, "grad_norm": 1.3065193891525269, "learning_rate": 7.504371274481975e-05, "loss": 0.009608546644449234, "step": 87960 }, { "epoch": 24.97019585580471, "grad_norm": 1.0861492156982422, "learning_rate": 7.50408742548964e-05, "loss": 0.007268063724040985, "step": 87970 }, { "epoch": 24.973034345728074, "grad_norm": 4.9401021003723145, "learning_rate": 7.503803576497304e-05, "loss": 0.012372027337551116, "step": 87980 }, { "epoch": 24.975872835651433, "grad_norm": 3.4493188858032227, "learning_rate": 7.503519727504968e-05, "loss": 0.02445839047431946, "step": 87990 }, { "epoch": 24.978711325574793, "grad_norm": 1.6203080415725708, "learning_rate": 7.50323587851263e-05, "loss": 0.006531777232885361, "step": 88000 }, { "epoch": 24.978711325574793, "eval_accuracy": 0.9694156546067273, "eval_loss": 0.10140075534582138, "eval_runtime": 31.776, "eval_samples_per_second": 494.933, "eval_steps_per_second": 7.742, "step": 88000 }, { "epoch": 24.981549815498155, "grad_norm": 0.5066255927085876, "learning_rate": 7.502952029520296e-05, "loss": 0.009429792314767838, "step": 88010 }, { "epoch": 24.984388305421515, "grad_norm": 3.279327392578125, "learning_rate": 7.50266818052796e-05, "loss": 0.0029561664909124375, "step": 88020 }, { "epoch": 24.987226795344878, "grad_norm": 0.3823559880256653, "learning_rate": 7.502384331535623e-05, "loss": 0.005222257226705551, "step": 88030 }, { "epoch": 24.990065285268237, "grad_norm": 0.5083175897598267, "learning_rate": 7.502100482543287e-05, "loss": 0.012538023293018341, "step": 88040 }, { "epoch": 24.9929037751916, "grad_norm": 2.0581490993499756, "learning_rate": 7.501816633550951e-05, "loss": 0.014437662065029144, "step": 88050 }, { "epoch": 24.99574226511496, "grad_norm": 2.9934422969818115, "learning_rate": 7.501532784558614e-05, "loss": 0.01310369074344635, "step": 88060 }, { "epoch": 24.99858075503832, "grad_norm": 0.717076301574707, "learning_rate": 7.50124893556628e-05, "loss": 0.004772055149078369, "step": 88070 }, { "epoch": 25.00141924496168, "grad_norm": 0.9498530030250549, "learning_rate": 7.500965086573944e-05, "loss": 0.01948847621679306, "step": 88080 }, { "epoch": 25.00425773488504, "grad_norm": 3.247642755508423, "learning_rate": 7.500681237581606e-05, "loss": 0.014774627983570099, "step": 88090 }, { "epoch": 25.007096224808404, "grad_norm": 0.13476575911045074, "learning_rate": 7.50039738858927e-05, "loss": 0.007771135866641998, "step": 88100 }, { "epoch": 25.009934714731763, "grad_norm": 0.2716066837310791, "learning_rate": 7.500113539596935e-05, "loss": 0.012376495450735093, "step": 88110 }, { "epoch": 25.012773204655122, "grad_norm": 3.709446668624878, "learning_rate": 7.499829690604599e-05, "loss": 0.011676845699548721, "step": 88120 }, { "epoch": 25.015611694578485, "grad_norm": 0.708116888999939, "learning_rate": 7.499545841612262e-05, "loss": 0.01151844710111618, "step": 88130 }, { "epoch": 25.018450184501845, "grad_norm": 1.56290602684021, "learning_rate": 7.499261992619927e-05, "loss": 0.015797659754753113, "step": 88140 }, { "epoch": 25.021288674425207, "grad_norm": 0.46768295764923096, "learning_rate": 7.498978143627591e-05, "loss": 0.012793682515621185, "step": 88150 }, { "epoch": 25.024127164348567, "grad_norm": 2.678234815597534, "learning_rate": 7.498694294635254e-05, "loss": 0.00877302885055542, "step": 88160 }, { "epoch": 25.026965654271926, "grad_norm": 2.4953036308288574, "learning_rate": 7.498410445642918e-05, "loss": 0.004702713340520859, "step": 88170 }, { "epoch": 25.02980414419529, "grad_norm": 0.37608787417411804, "learning_rate": 7.498126596650582e-05, "loss": 0.008555131405591965, "step": 88180 }, { "epoch": 25.03264263411865, "grad_norm": 0.352338045835495, "learning_rate": 7.497842747658245e-05, "loss": 0.007688891887664795, "step": 88190 }, { "epoch": 25.03548112404201, "grad_norm": 0.968505322933197, "learning_rate": 7.497558898665909e-05, "loss": 0.027637457847595213, "step": 88200 }, { "epoch": 25.03831961396537, "grad_norm": 3.6665990352630615, "learning_rate": 7.497275049673575e-05, "loss": 0.004941928759217262, "step": 88210 }, { "epoch": 25.04115810388873, "grad_norm": 0.2770722508430481, "learning_rate": 7.496991200681238e-05, "loss": 0.00776611715555191, "step": 88220 }, { "epoch": 25.043996593812093, "grad_norm": 12.582157135009766, "learning_rate": 7.496707351688902e-05, "loss": 0.013689103722572326, "step": 88230 }, { "epoch": 25.046835083735452, "grad_norm": 0.20681189000606537, "learning_rate": 7.496423502696566e-05, "loss": 0.00542277954518795, "step": 88240 }, { "epoch": 25.049673573658815, "grad_norm": 0.433368980884552, "learning_rate": 7.49613965370423e-05, "loss": 0.0042979147285223005, "step": 88250 }, { "epoch": 25.052512063582174, "grad_norm": 1.0875904560089111, "learning_rate": 7.495855804711893e-05, "loss": 0.0037341035902500154, "step": 88260 }, { "epoch": 25.055350553505534, "grad_norm": 0.049867842346429825, "learning_rate": 7.495571955719558e-05, "loss": 0.00936647430062294, "step": 88270 }, { "epoch": 25.058189043428897, "grad_norm": 0.23676122725009918, "learning_rate": 7.495288106727222e-05, "loss": 0.004690884053707123, "step": 88280 }, { "epoch": 25.061027533352256, "grad_norm": 0.16013887524604797, "learning_rate": 7.495004257734885e-05, "loss": 0.003253427892923355, "step": 88290 }, { "epoch": 25.06386602327562, "grad_norm": 0.33957648277282715, "learning_rate": 7.494720408742549e-05, "loss": 0.005886750295758247, "step": 88300 }, { "epoch": 25.066704513198978, "grad_norm": 0.0957634374499321, "learning_rate": 7.494436559750213e-05, "loss": 0.007570482790470123, "step": 88310 }, { "epoch": 25.069543003122337, "grad_norm": 0.1333116888999939, "learning_rate": 7.494152710757876e-05, "loss": 0.01260690689086914, "step": 88320 }, { "epoch": 25.0723814930457, "grad_norm": 0.7813993692398071, "learning_rate": 7.49386886176554e-05, "loss": 0.004560505971312523, "step": 88330 }, { "epoch": 25.07521998296906, "grad_norm": 0.28840896487236023, "learning_rate": 7.493585012773206e-05, "loss": 0.01157001256942749, "step": 88340 }, { "epoch": 25.078058472892423, "grad_norm": 6.713897228240967, "learning_rate": 7.493301163780869e-05, "loss": 0.009289898723363877, "step": 88350 }, { "epoch": 25.080896962815782, "grad_norm": 3.9016716480255127, "learning_rate": 7.493017314788533e-05, "loss": 0.0029821325093507766, "step": 88360 }, { "epoch": 25.08373545273914, "grad_norm": 0.06385046988725662, "learning_rate": 7.492733465796197e-05, "loss": 0.00582755021750927, "step": 88370 }, { "epoch": 25.086573942662504, "grad_norm": 0.6981302499771118, "learning_rate": 7.492449616803861e-05, "loss": 0.0291093111038208, "step": 88380 }, { "epoch": 25.089412432585863, "grad_norm": 2.3380746841430664, "learning_rate": 7.492165767811524e-05, "loss": 0.003324177861213684, "step": 88390 }, { "epoch": 25.092250922509226, "grad_norm": 12.919342041015625, "learning_rate": 7.49188191881919e-05, "loss": 0.019175425171852112, "step": 88400 }, { "epoch": 25.095089412432586, "grad_norm": 0.44890886545181274, "learning_rate": 7.491598069826853e-05, "loss": 0.010593345761299134, "step": 88410 }, { "epoch": 25.097927902355945, "grad_norm": 2.9193708896636963, "learning_rate": 7.491314220834516e-05, "loss": 0.015274964272975922, "step": 88420 }, { "epoch": 25.100766392279308, "grad_norm": 0.8115370273590088, "learning_rate": 7.49103037184218e-05, "loss": 0.010110214352607727, "step": 88430 }, { "epoch": 25.103604882202667, "grad_norm": 3.675935745239258, "learning_rate": 7.490746522849845e-05, "loss": 0.008279177546501159, "step": 88440 }, { "epoch": 25.10644337212603, "grad_norm": 1.4785747528076172, "learning_rate": 7.490462673857507e-05, "loss": 0.00602576658129692, "step": 88450 }, { "epoch": 25.10928186204939, "grad_norm": 3.8742661476135254, "learning_rate": 7.490178824865171e-05, "loss": 0.004725802689790726, "step": 88460 }, { "epoch": 25.11212035197275, "grad_norm": 2.847621440887451, "learning_rate": 7.489894975872837e-05, "loss": 0.008736441284418106, "step": 88470 }, { "epoch": 25.11495884189611, "grad_norm": 3.212477445602417, "learning_rate": 7.4896111268805e-05, "loss": 0.010048354417085648, "step": 88480 }, { "epoch": 25.11779733181947, "grad_norm": 0.6197217106819153, "learning_rate": 7.489327277888164e-05, "loss": 0.013915348052978515, "step": 88490 }, { "epoch": 25.120635821742834, "grad_norm": 1.7774198055267334, "learning_rate": 7.489043428895828e-05, "loss": 0.01153482049703598, "step": 88500 }, { "epoch": 25.120635821742834, "eval_accuracy": 0.970687352959878, "eval_loss": 0.09431147575378418, "eval_runtime": 31.6779, "eval_samples_per_second": 496.466, "eval_steps_per_second": 7.766, "step": 88500 }, { "epoch": 25.123474311666193, "grad_norm": 16.670610427856445, "learning_rate": 7.488759579903492e-05, "loss": 0.013590759038925171, "step": 88510 }, { "epoch": 25.126312801589556, "grad_norm": 1.0112979412078857, "learning_rate": 7.488475730911155e-05, "loss": 0.012445001304149628, "step": 88520 }, { "epoch": 25.129151291512915, "grad_norm": 0.8229496479034424, "learning_rate": 7.488191881918819e-05, "loss": 0.013478408753871917, "step": 88530 }, { "epoch": 25.131989781436275, "grad_norm": 0.2367406189441681, "learning_rate": 7.487908032926485e-05, "loss": 0.004234018921852112, "step": 88540 }, { "epoch": 25.134828271359638, "grad_norm": 9.110761642456055, "learning_rate": 7.487624183934147e-05, "loss": 0.021476611495018005, "step": 88550 }, { "epoch": 25.137666761282997, "grad_norm": 1.043081283569336, "learning_rate": 7.487340334941811e-05, "loss": 0.021034453809261323, "step": 88560 }, { "epoch": 25.14050525120636, "grad_norm": 0.14409944415092468, "learning_rate": 7.487056485949476e-05, "loss": 0.005038371682167054, "step": 88570 }, { "epoch": 25.14334374112972, "grad_norm": 0.3085535764694214, "learning_rate": 7.486772636957138e-05, "loss": 0.009268808364868163, "step": 88580 }, { "epoch": 25.14618223105308, "grad_norm": 0.8214060068130493, "learning_rate": 7.486488787964803e-05, "loss": 0.005521495640277862, "step": 88590 }, { "epoch": 25.14902072097644, "grad_norm": 0.34801214933395386, "learning_rate": 7.486204938972468e-05, "loss": 0.007418334484100342, "step": 88600 }, { "epoch": 25.1518592108998, "grad_norm": 0.8509289026260376, "learning_rate": 7.485921089980131e-05, "loss": 0.010123103857040405, "step": 88610 }, { "epoch": 25.154697700823164, "grad_norm": 6.638089656829834, "learning_rate": 7.485637240987795e-05, "loss": 0.009859584271907806, "step": 88620 }, { "epoch": 25.157536190746523, "grad_norm": 1.032214641571045, "learning_rate": 7.485353391995459e-05, "loss": 0.007451426237821579, "step": 88630 }, { "epoch": 25.160374680669882, "grad_norm": 10.31036376953125, "learning_rate": 7.485069543003123e-05, "loss": 0.009381026029586792, "step": 88640 }, { "epoch": 25.163213170593245, "grad_norm": 0.5044496059417725, "learning_rate": 7.484785694010786e-05, "loss": 0.004036794230341911, "step": 88650 }, { "epoch": 25.166051660516604, "grad_norm": 0.5299307107925415, "learning_rate": 7.48450184501845e-05, "loss": 0.0037077143788337708, "step": 88660 }, { "epoch": 25.168890150439967, "grad_norm": 0.14194254577159882, "learning_rate": 7.484217996026116e-05, "loss": 0.009735964238643646, "step": 88670 }, { "epoch": 25.171728640363327, "grad_norm": 0.8430927395820618, "learning_rate": 7.483934147033778e-05, "loss": 0.017762945592403413, "step": 88680 }, { "epoch": 25.174567130286686, "grad_norm": 7.64051628112793, "learning_rate": 7.483650298041443e-05, "loss": 0.016383124887943266, "step": 88690 }, { "epoch": 25.17740562021005, "grad_norm": 0.7499608397483826, "learning_rate": 7.483366449049107e-05, "loss": 0.004283434897661209, "step": 88700 }, { "epoch": 25.18024411013341, "grad_norm": 1.6921086311340332, "learning_rate": 7.48308260005677e-05, "loss": 0.013006976246833802, "step": 88710 }, { "epoch": 25.18308260005677, "grad_norm": 0.12373252213001251, "learning_rate": 7.482798751064434e-05, "loss": 0.004560244455933571, "step": 88720 }, { "epoch": 25.18592108998013, "grad_norm": 0.4559331238269806, "learning_rate": 7.482514902072098e-05, "loss": 0.011958794295787811, "step": 88730 }, { "epoch": 25.18875957990349, "grad_norm": 8.311619758605957, "learning_rate": 7.482231053079762e-05, "loss": 0.010458940267562866, "step": 88740 }, { "epoch": 25.191598069826853, "grad_norm": 6.972181797027588, "learning_rate": 7.481947204087426e-05, "loss": 0.01580372601747513, "step": 88750 }, { "epoch": 25.194436559750212, "grad_norm": 0.32099586725234985, "learning_rate": 7.48166335509509e-05, "loss": 0.009928276389837265, "step": 88760 }, { "epoch": 25.197275049673575, "grad_norm": 0.2579554319381714, "learning_rate": 7.481379506102754e-05, "loss": 0.006549473851919174, "step": 88770 }, { "epoch": 25.200113539596934, "grad_norm": 0.3874220550060272, "learning_rate": 7.481095657110417e-05, "loss": 0.007567853480577469, "step": 88780 }, { "epoch": 25.202952029520294, "grad_norm": 0.2313355654478073, "learning_rate": 7.480811808118081e-05, "loss": 0.0017976256087422371, "step": 88790 }, { "epoch": 25.205790519443656, "grad_norm": 17.096664428710938, "learning_rate": 7.480527959125745e-05, "loss": 0.014226937294006347, "step": 88800 }, { "epoch": 25.208629009367016, "grad_norm": 1.0677471160888672, "learning_rate": 7.48024411013341e-05, "loss": 0.008298756182193756, "step": 88810 }, { "epoch": 25.21146749929038, "grad_norm": 0.26424217224121094, "learning_rate": 7.479960261141074e-05, "loss": 0.007903221249580383, "step": 88820 }, { "epoch": 25.214305989213738, "grad_norm": 5.477983474731445, "learning_rate": 7.479676412148738e-05, "loss": 0.006830044090747833, "step": 88830 }, { "epoch": 25.217144479137097, "grad_norm": 0.6820372343063354, "learning_rate": 7.4793925631564e-05, "loss": 0.0071021273732185366, "step": 88840 }, { "epoch": 25.21998296906046, "grad_norm": 0.3963971436023712, "learning_rate": 7.479108714164065e-05, "loss": 0.005699237436056137, "step": 88850 }, { "epoch": 25.22282145898382, "grad_norm": 3.97796893119812, "learning_rate": 7.478824865171729e-05, "loss": 0.009984759241342544, "step": 88860 }, { "epoch": 25.225659948907182, "grad_norm": 3.2888407707214355, "learning_rate": 7.478541016179393e-05, "loss": 0.012810561060905456, "step": 88870 }, { "epoch": 25.22849843883054, "grad_norm": 10.101506233215332, "learning_rate": 7.478257167187057e-05, "loss": 0.006711839139461518, "step": 88880 }, { "epoch": 25.231336928753905, "grad_norm": 0.14629694819450378, "learning_rate": 7.477973318194721e-05, "loss": 0.008156283944845199, "step": 88890 }, { "epoch": 25.234175418677264, "grad_norm": 0.4291971027851105, "learning_rate": 7.477689469202384e-05, "loss": 0.010949918627738952, "step": 88900 }, { "epoch": 25.237013908600623, "grad_norm": 0.7845060229301453, "learning_rate": 7.477405620210048e-05, "loss": 0.003921416774392128, "step": 88910 }, { "epoch": 25.239852398523986, "grad_norm": 2.5105197429656982, "learning_rate": 7.477121771217712e-05, "loss": 0.023205177485942842, "step": 88920 }, { "epoch": 25.242690888447346, "grad_norm": 0.8168386816978455, "learning_rate": 7.476837922225376e-05, "loss": 0.0068645179271698, "step": 88930 }, { "epoch": 25.24552937837071, "grad_norm": 7.170585632324219, "learning_rate": 7.47655407323304e-05, "loss": 0.017696772515773774, "step": 88940 }, { "epoch": 25.248367868294068, "grad_norm": 1.9250679016113281, "learning_rate": 7.476270224240705e-05, "loss": 0.00790131688117981, "step": 88950 }, { "epoch": 25.251206358217427, "grad_norm": 0.5847057104110718, "learning_rate": 7.475986375248369e-05, "loss": 0.005492392182350159, "step": 88960 }, { "epoch": 25.25404484814079, "grad_norm": 0.412710964679718, "learning_rate": 7.475702526256032e-05, "loss": 0.008156956732273101, "step": 88970 }, { "epoch": 25.25688333806415, "grad_norm": 3.0751490592956543, "learning_rate": 7.475418677263696e-05, "loss": 0.006053932383656502, "step": 88980 }, { "epoch": 25.259721827987512, "grad_norm": 2.2361323833465576, "learning_rate": 7.47513482827136e-05, "loss": 0.010091498494148254, "step": 88990 }, { "epoch": 25.26256031791087, "grad_norm": 2.423135995864868, "learning_rate": 7.474850979279024e-05, "loss": 0.007437251508235931, "step": 89000 }, { "epoch": 25.26256031791087, "eval_accuracy": 0.9684618808418644, "eval_loss": 0.1019994467496872, "eval_runtime": 31.1684, "eval_samples_per_second": 504.582, "eval_steps_per_second": 7.893, "step": 89000 }, { "epoch": 25.26539880783423, "grad_norm": 1.7267898321151733, "learning_rate": 7.474567130286688e-05, "loss": 0.008317448943853379, "step": 89010 }, { "epoch": 25.268237297757594, "grad_norm": 8.916865348815918, "learning_rate": 7.474283281294352e-05, "loss": 0.012136688083410263, "step": 89020 }, { "epoch": 25.271075787680953, "grad_norm": 0.3587147295475006, "learning_rate": 7.473999432302015e-05, "loss": 0.015295733511447907, "step": 89030 }, { "epoch": 25.273914277604316, "grad_norm": 13.232930183410645, "learning_rate": 7.473715583309679e-05, "loss": 0.008462396264076234, "step": 89040 }, { "epoch": 25.276752767527675, "grad_norm": 2.6565041542053223, "learning_rate": 7.473431734317343e-05, "loss": 0.006883489340543747, "step": 89050 }, { "epoch": 25.279591257451035, "grad_norm": 1.7084414958953857, "learning_rate": 7.473147885325008e-05, "loss": 0.019288837909698486, "step": 89060 }, { "epoch": 25.282429747374398, "grad_norm": 0.9162612557411194, "learning_rate": 7.472864036332672e-05, "loss": 0.01957191973924637, "step": 89070 }, { "epoch": 25.285268237297757, "grad_norm": 0.14706148207187653, "learning_rate": 7.472580187340336e-05, "loss": 0.010636127740144729, "step": 89080 }, { "epoch": 25.28810672722112, "grad_norm": 0.7329239845275879, "learning_rate": 7.472296338348e-05, "loss": 0.003111882507801056, "step": 89090 }, { "epoch": 25.29094521714448, "grad_norm": 1.0475722551345825, "learning_rate": 7.472012489355663e-05, "loss": 0.006907813996076584, "step": 89100 }, { "epoch": 25.29378370706784, "grad_norm": 3.6275408267974854, "learning_rate": 7.471728640363327e-05, "loss": 0.006338354200124741, "step": 89110 }, { "epoch": 25.2966221969912, "grad_norm": 0.8380832076072693, "learning_rate": 7.471444791370991e-05, "loss": 0.008261603116989136, "step": 89120 }, { "epoch": 25.29946068691456, "grad_norm": 8.231410026550293, "learning_rate": 7.471160942378654e-05, "loss": 0.018148744106292726, "step": 89130 }, { "epoch": 25.302299176837924, "grad_norm": 9.821887016296387, "learning_rate": 7.470877093386319e-05, "loss": 0.01976998746395111, "step": 89140 }, { "epoch": 25.305137666761283, "grad_norm": 2.143378973007202, "learning_rate": 7.470593244393983e-05, "loss": 0.014833828806877137, "step": 89150 }, { "epoch": 25.307976156684642, "grad_norm": 0.8944421410560608, "learning_rate": 7.470309395401646e-05, "loss": 0.008533579856157303, "step": 89160 }, { "epoch": 25.310814646608005, "grad_norm": 4.758836269378662, "learning_rate": 7.47002554640931e-05, "loss": 0.010344582051038742, "step": 89170 }, { "epoch": 25.313653136531364, "grad_norm": 5.470677375793457, "learning_rate": 7.469741697416974e-05, "loss": 0.008317559957504272, "step": 89180 }, { "epoch": 25.316491626454727, "grad_norm": 5.148582935333252, "learning_rate": 7.469457848424639e-05, "loss": 0.00568288154900074, "step": 89190 }, { "epoch": 25.319330116378087, "grad_norm": 0.5280429720878601, "learning_rate": 7.469173999432303e-05, "loss": 0.014339965581893922, "step": 89200 }, { "epoch": 25.322168606301446, "grad_norm": 0.13519060611724854, "learning_rate": 7.468890150439967e-05, "loss": 0.0030381591990590096, "step": 89210 }, { "epoch": 25.32500709622481, "grad_norm": 0.805263876914978, "learning_rate": 7.468606301447631e-05, "loss": 0.006089410930871964, "step": 89220 }, { "epoch": 25.327845586148168, "grad_norm": 10.490825653076172, "learning_rate": 7.468322452455294e-05, "loss": 0.00943363755941391, "step": 89230 }, { "epoch": 25.33068407607153, "grad_norm": 9.392050743103027, "learning_rate": 7.468038603462958e-05, "loss": 0.019899250566959382, "step": 89240 }, { "epoch": 25.33352256599489, "grad_norm": 0.14821508526802063, "learning_rate": 7.467754754470622e-05, "loss": 0.01045924872159958, "step": 89250 }, { "epoch": 25.336361055918253, "grad_norm": 11.088156700134277, "learning_rate": 7.467470905478285e-05, "loss": 0.01059696450829506, "step": 89260 }, { "epoch": 25.339199545841613, "grad_norm": 2.051734209060669, "learning_rate": 7.46718705648595e-05, "loss": 0.004252380132675171, "step": 89270 }, { "epoch": 25.342038035764972, "grad_norm": 5.370089054107666, "learning_rate": 7.466903207493614e-05, "loss": 0.0049885731190443035, "step": 89280 }, { "epoch": 25.344876525688335, "grad_norm": 0.7628191113471985, "learning_rate": 7.466619358501277e-05, "loss": 0.017790311574935914, "step": 89290 }, { "epoch": 25.347715015611694, "grad_norm": 2.1906838417053223, "learning_rate": 7.466335509508941e-05, "loss": 0.009002625942230225, "step": 89300 }, { "epoch": 25.350553505535057, "grad_norm": 0.9409029483795166, "learning_rate": 7.466051660516606e-05, "loss": 0.010196210443973541, "step": 89310 }, { "epoch": 25.353391995458416, "grad_norm": 10.652228355407715, "learning_rate": 7.46576781152427e-05, "loss": 0.009210561960935592, "step": 89320 }, { "epoch": 25.356230485381776, "grad_norm": 0.5737007260322571, "learning_rate": 7.465483962531932e-05, "loss": 0.027372828125953673, "step": 89330 }, { "epoch": 25.35906897530514, "grad_norm": 1.1164965629577637, "learning_rate": 7.46522849843883e-05, "loss": 0.027994710206985473, "step": 89340 }, { "epoch": 25.361907465228498, "grad_norm": 1.7253034114837646, "learning_rate": 7.464944649446495e-05, "loss": 0.013909734785556793, "step": 89350 }, { "epoch": 25.36474595515186, "grad_norm": 6.285429954528809, "learning_rate": 7.464660800454159e-05, "loss": 0.0035476937890052797, "step": 89360 }, { "epoch": 25.36758444507522, "grad_norm": 0.17200380563735962, "learning_rate": 7.464376951461823e-05, "loss": 0.004537386819720268, "step": 89370 }, { "epoch": 25.37042293499858, "grad_norm": 0.19730165600776672, "learning_rate": 7.464093102469487e-05, "loss": 0.011376288533210755, "step": 89380 }, { "epoch": 25.373261424921942, "grad_norm": 1.7278491258621216, "learning_rate": 7.463809253477151e-05, "loss": 0.01840505748987198, "step": 89390 }, { "epoch": 25.3760999148453, "grad_norm": 6.291707515716553, "learning_rate": 7.463525404484815e-05, "loss": 0.02354396879673004, "step": 89400 }, { "epoch": 25.378938404768665, "grad_norm": 0.22226478159427643, "learning_rate": 7.463241555492478e-05, "loss": 0.004655932635068893, "step": 89410 }, { "epoch": 25.381776894692024, "grad_norm": 0.7492541670799255, "learning_rate": 7.462957706500142e-05, "loss": 0.010103525966405869, "step": 89420 }, { "epoch": 25.384615384615383, "grad_norm": 10.672445297241211, "learning_rate": 7.462673857507806e-05, "loss": 0.005995461717247963, "step": 89430 }, { "epoch": 25.387453874538746, "grad_norm": 0.5955822467803955, "learning_rate": 7.462390008515469e-05, "loss": 0.0016033813357353211, "step": 89440 }, { "epoch": 25.390292364462105, "grad_norm": 10.716609001159668, "learning_rate": 7.462106159523135e-05, "loss": 0.008373571932315827, "step": 89450 }, { "epoch": 25.39313085438547, "grad_norm": 0.0685468316078186, "learning_rate": 7.461822310530799e-05, "loss": 0.01330282837152481, "step": 89460 }, { "epoch": 25.395969344308828, "grad_norm": 0.5304935574531555, "learning_rate": 7.461538461538462e-05, "loss": 0.007710562646389007, "step": 89470 }, { "epoch": 25.398807834232187, "grad_norm": 2.178284168243408, "learning_rate": 7.461254612546126e-05, "loss": 0.012113523483276368, "step": 89480 }, { "epoch": 25.40164632415555, "grad_norm": 2.3107974529266357, "learning_rate": 7.46097076355379e-05, "loss": 0.004521171748638153, "step": 89490 }, { "epoch": 25.40448481407891, "grad_norm": 1.8620744943618774, "learning_rate": 7.460686914561454e-05, "loss": 0.011105257272720336, "step": 89500 }, { "epoch": 25.40448481407891, "eval_accuracy": 0.968779805430152, "eval_loss": 0.09675829112529755, "eval_runtime": 32.0178, "eval_samples_per_second": 491.196, "eval_steps_per_second": 7.683, "step": 89500 }, { "epoch": 25.407323304002272, "grad_norm": 0.47470375895500183, "learning_rate": 7.460403065569117e-05, "loss": 0.014409217238426208, "step": 89510 }, { "epoch": 25.41016179392563, "grad_norm": 9.425232887268066, "learning_rate": 7.460119216576782e-05, "loss": 0.018158724904060362, "step": 89520 }, { "epoch": 25.41300028384899, "grad_norm": 0.690945029258728, "learning_rate": 7.459835367584446e-05, "loss": 0.008444498479366302, "step": 89530 }, { "epoch": 25.415838773772354, "grad_norm": 0.20545555651187897, "learning_rate": 7.459551518592109e-05, "loss": 0.008665291965007782, "step": 89540 }, { "epoch": 25.418677263695713, "grad_norm": 0.12948764860630035, "learning_rate": 7.459267669599773e-05, "loss": 0.014690090715885163, "step": 89550 }, { "epoch": 25.421515753619076, "grad_norm": 0.17304100096225739, "learning_rate": 7.458983820607437e-05, "loss": 0.007998335361480712, "step": 89560 }, { "epoch": 25.424354243542435, "grad_norm": 0.2681643068790436, "learning_rate": 7.4586999716151e-05, "loss": 0.012142705917358398, "step": 89570 }, { "epoch": 25.427192733465795, "grad_norm": 1.4312011003494263, "learning_rate": 7.458416122622766e-05, "loss": 0.013508948683738708, "step": 89580 }, { "epoch": 25.430031223389157, "grad_norm": 4.251752853393555, "learning_rate": 7.45813227363043e-05, "loss": 0.03405413031578064, "step": 89590 }, { "epoch": 25.432869713312517, "grad_norm": 0.7419965267181396, "learning_rate": 7.457848424638093e-05, "loss": 0.021492759883403777, "step": 89600 }, { "epoch": 25.43570820323588, "grad_norm": 12.000972747802734, "learning_rate": 7.457564575645757e-05, "loss": 0.00889514535665512, "step": 89610 }, { "epoch": 25.43854669315924, "grad_norm": 0.7974718809127808, "learning_rate": 7.457280726653421e-05, "loss": 0.007174207270145417, "step": 89620 }, { "epoch": 25.4413851830826, "grad_norm": 1.3605254888534546, "learning_rate": 7.456996877661085e-05, "loss": 0.024742935597896577, "step": 89630 }, { "epoch": 25.44422367300596, "grad_norm": 2.2025234699249268, "learning_rate": 7.456713028668748e-05, "loss": 0.009747720509767532, "step": 89640 }, { "epoch": 25.44706216292932, "grad_norm": 13.632218360900879, "learning_rate": 7.456429179676413e-05, "loss": 0.016755563020706177, "step": 89650 }, { "epoch": 25.449900652852683, "grad_norm": 5.678091049194336, "learning_rate": 7.456145330684076e-05, "loss": 0.00833926498889923, "step": 89660 }, { "epoch": 25.452739142776043, "grad_norm": 0.250432550907135, "learning_rate": 7.45586148169174e-05, "loss": 0.00542074404656887, "step": 89670 }, { "epoch": 25.455577632699406, "grad_norm": 0.35750293731689453, "learning_rate": 7.455577632699404e-05, "loss": 0.004305268079042435, "step": 89680 }, { "epoch": 25.458416122622765, "grad_norm": 1.9380242824554443, "learning_rate": 7.455293783707068e-05, "loss": 0.0034865133464336397, "step": 89690 }, { "epoch": 25.461254612546124, "grad_norm": 1.7275127172470093, "learning_rate": 7.455009934714731e-05, "loss": 0.01373683512210846, "step": 89700 }, { "epoch": 25.464093102469487, "grad_norm": 2.633692502975464, "learning_rate": 7.454726085722395e-05, "loss": 0.007697141170501709, "step": 89710 }, { "epoch": 25.466931592392847, "grad_norm": 0.37864720821380615, "learning_rate": 7.454442236730061e-05, "loss": 0.006617267429828644, "step": 89720 }, { "epoch": 25.46977008231621, "grad_norm": 0.46586552262306213, "learning_rate": 7.454158387737724e-05, "loss": 0.015822188556194307, "step": 89730 }, { "epoch": 25.47260857223957, "grad_norm": 6.255487442016602, "learning_rate": 7.453874538745388e-05, "loss": 0.019852426648139954, "step": 89740 }, { "epoch": 25.475447062162928, "grad_norm": 1.8029837608337402, "learning_rate": 7.453590689753052e-05, "loss": 0.007124315202236176, "step": 89750 }, { "epoch": 25.47828555208629, "grad_norm": 5.727311134338379, "learning_rate": 7.453306840760715e-05, "loss": 0.007385125011205673, "step": 89760 }, { "epoch": 25.48112404200965, "grad_norm": 1.282725214958191, "learning_rate": 7.453022991768379e-05, "loss": 0.004679073393344879, "step": 89770 }, { "epoch": 25.483962531933013, "grad_norm": 0.2311718761920929, "learning_rate": 7.452739142776044e-05, "loss": 0.0028640016913414, "step": 89780 }, { "epoch": 25.486801021856373, "grad_norm": 0.18721166253089905, "learning_rate": 7.452455293783707e-05, "loss": 0.002139225974678993, "step": 89790 }, { "epoch": 25.489639511779732, "grad_norm": 0.42017048597335815, "learning_rate": 7.452171444791371e-05, "loss": 0.010600708425045013, "step": 89800 }, { "epoch": 25.492478001703095, "grad_norm": 0.6089770197868347, "learning_rate": 7.451887595799035e-05, "loss": 0.003857284039258957, "step": 89810 }, { "epoch": 25.495316491626454, "grad_norm": 12.837237358093262, "learning_rate": 7.4516037468067e-05, "loss": 0.011503615230321885, "step": 89820 }, { "epoch": 25.498154981549817, "grad_norm": 2.9708023071289062, "learning_rate": 7.451319897814362e-05, "loss": 0.010358942300081253, "step": 89830 }, { "epoch": 25.500993471473176, "grad_norm": 0.6784881949424744, "learning_rate": 7.451036048822026e-05, "loss": 0.012020077556371689, "step": 89840 }, { "epoch": 25.503831961396536, "grad_norm": 5.127619743347168, "learning_rate": 7.450752199829692e-05, "loss": 0.017242418229579927, "step": 89850 }, { "epoch": 25.5066704513199, "grad_norm": 6.843567848205566, "learning_rate": 7.450468350837355e-05, "loss": 0.018272146582603455, "step": 89860 }, { "epoch": 25.509508941243258, "grad_norm": 2.332331895828247, "learning_rate": 7.450184501845019e-05, "loss": 0.007542164623737335, "step": 89870 }, { "epoch": 25.51234743116662, "grad_norm": 4.345371246337891, "learning_rate": 7.449900652852683e-05, "loss": 0.009639744460582734, "step": 89880 }, { "epoch": 25.51518592108998, "grad_norm": 12.709461212158203, "learning_rate": 7.449616803860346e-05, "loss": 0.014072233438491821, "step": 89890 }, { "epoch": 25.51802441101334, "grad_norm": 0.20448452234268188, "learning_rate": 7.44933295486801e-05, "loss": 0.007963302731513976, "step": 89900 }, { "epoch": 25.520862900936702, "grad_norm": 13.476292610168457, "learning_rate": 7.449049105875674e-05, "loss": 0.019671632349491118, "step": 89910 }, { "epoch": 25.52370139086006, "grad_norm": 0.4545043110847473, "learning_rate": 7.448765256883338e-05, "loss": 0.0026844840496778486, "step": 89920 }, { "epoch": 25.526539880783425, "grad_norm": 0.33921900391578674, "learning_rate": 7.448481407891002e-05, "loss": 0.010314853489398956, "step": 89930 }, { "epoch": 25.529378370706784, "grad_norm": 4.510886192321777, "learning_rate": 7.448197558898666e-05, "loss": 0.009906703233718872, "step": 89940 }, { "epoch": 25.532216860630143, "grad_norm": 4.679142951965332, "learning_rate": 7.44791370990633e-05, "loss": 0.006335891783237457, "step": 89950 }, { "epoch": 25.535055350553506, "grad_norm": 10.095640182495117, "learning_rate": 7.447629860913993e-05, "loss": 0.006000934541225434, "step": 89960 }, { "epoch": 25.537893840476865, "grad_norm": 2.538888692855835, "learning_rate": 7.447346011921658e-05, "loss": 0.010891618579626084, "step": 89970 }, { "epoch": 25.54073233040023, "grad_norm": 1.4497588872909546, "learning_rate": 7.447062162929323e-05, "loss": 0.013632899522781372, "step": 89980 }, { "epoch": 25.543570820323588, "grad_norm": 14.750211715698242, "learning_rate": 7.446778313936986e-05, "loss": 0.021562840044498443, "step": 89990 }, { "epoch": 25.546409310246947, "grad_norm": 0.5189008712768555, "learning_rate": 7.44649446494465e-05, "loss": 0.008615458756685257, "step": 90000 }, { "epoch": 25.546409310246947, "eval_accuracy": 0.9682711260888918, "eval_loss": 0.10716607421636581, "eval_runtime": 32.1401, "eval_samples_per_second": 489.327, "eval_steps_per_second": 7.654, "step": 90000 }, { "epoch": 25.54924780017031, "grad_norm": 0.38540369272232056, "learning_rate": 7.446210615952314e-05, "loss": 0.011609244346618652, "step": 90010 }, { "epoch": 25.55208629009367, "grad_norm": 1.0785837173461914, "learning_rate": 7.445926766959977e-05, "loss": 0.008320166915655135, "step": 90020 }, { "epoch": 25.554924780017032, "grad_norm": 2.063633441925049, "learning_rate": 7.445642917967641e-05, "loss": 0.010965348035097123, "step": 90030 }, { "epoch": 25.55776326994039, "grad_norm": 1.066653847694397, "learning_rate": 7.445359068975305e-05, "loss": 0.004671579971909523, "step": 90040 }, { "epoch": 25.56060175986375, "grad_norm": 8.457413673400879, "learning_rate": 7.445075219982969e-05, "loss": 0.021729104220867157, "step": 90050 }, { "epoch": 25.563440249787114, "grad_norm": 8.392313003540039, "learning_rate": 7.444791370990633e-05, "loss": 0.013003981113433838, "step": 90060 }, { "epoch": 25.566278739710473, "grad_norm": 4.624108791351318, "learning_rate": 7.444507521998298e-05, "loss": 0.013246238231658936, "step": 90070 }, { "epoch": 25.569117229633836, "grad_norm": 9.892694473266602, "learning_rate": 7.444223673005962e-05, "loss": 0.01721630394458771, "step": 90080 }, { "epoch": 25.571955719557195, "grad_norm": 1.4449228048324585, "learning_rate": 7.443939824013624e-05, "loss": 0.010924828052520753, "step": 90090 }, { "epoch": 25.574794209480558, "grad_norm": 2.0210015773773193, "learning_rate": 7.443655975021289e-05, "loss": 0.008521143347024918, "step": 90100 }, { "epoch": 25.577632699403917, "grad_norm": 0.676954448223114, "learning_rate": 7.443372126028954e-05, "loss": 0.01687990576028824, "step": 90110 }, { "epoch": 25.580471189327277, "grad_norm": 0.1836266964673996, "learning_rate": 7.443088277036617e-05, "loss": 0.001654258742928505, "step": 90120 }, { "epoch": 25.58330967925064, "grad_norm": 2.2601585388183594, "learning_rate": 7.442804428044281e-05, "loss": 0.02085067331790924, "step": 90130 }, { "epoch": 25.586148169174, "grad_norm": 0.49517562985420227, "learning_rate": 7.442520579051945e-05, "loss": 0.00798189789056778, "step": 90140 }, { "epoch": 25.588986659097362, "grad_norm": 1.1464717388153076, "learning_rate": 7.442236730059608e-05, "loss": 0.003812873363494873, "step": 90150 }, { "epoch": 25.59182514902072, "grad_norm": 0.11964661628007889, "learning_rate": 7.441952881067272e-05, "loss": 0.0021910477429628373, "step": 90160 }, { "epoch": 25.59466363894408, "grad_norm": 7.753471374511719, "learning_rate": 7.441669032074936e-05, "loss": 0.0059756219387054445, "step": 90170 }, { "epoch": 25.597502128867443, "grad_norm": 0.2821861505508423, "learning_rate": 7.4413851830826e-05, "loss": 0.004846497625112534, "step": 90180 }, { "epoch": 25.600340618790803, "grad_norm": 3.1929540634155273, "learning_rate": 7.441101334090265e-05, "loss": 0.013284030556678771, "step": 90190 }, { "epoch": 25.603179108714166, "grad_norm": 2.5116124153137207, "learning_rate": 7.440817485097929e-05, "loss": 0.004954066127538681, "step": 90200 }, { "epoch": 25.606017598637525, "grad_norm": 0.8399869203567505, "learning_rate": 7.440533636105593e-05, "loss": 0.003288187086582184, "step": 90210 }, { "epoch": 25.608856088560884, "grad_norm": 3.6743085384368896, "learning_rate": 7.440249787113256e-05, "loss": 0.007133663445711136, "step": 90220 }, { "epoch": 25.611694578484247, "grad_norm": 0.6636117696762085, "learning_rate": 7.43996593812092e-05, "loss": 0.003915078938007355, "step": 90230 }, { "epoch": 25.614533068407606, "grad_norm": 2.7770304679870605, "learning_rate": 7.439682089128584e-05, "loss": 0.006891221553087234, "step": 90240 }, { "epoch": 25.61737155833097, "grad_norm": 12.426870346069336, "learning_rate": 7.439398240136248e-05, "loss": 0.013340204954147339, "step": 90250 }, { "epoch": 25.62021004825433, "grad_norm": 4.590137481689453, "learning_rate": 7.439114391143912e-05, "loss": 0.020398013293743134, "step": 90260 }, { "epoch": 25.623048538177688, "grad_norm": 0.472206711769104, "learning_rate": 7.438830542151576e-05, "loss": 0.009042491763830185, "step": 90270 }, { "epoch": 25.62588702810105, "grad_norm": 1.9922901391983032, "learning_rate": 7.438546693159239e-05, "loss": 0.009527622908353805, "step": 90280 }, { "epoch": 25.62872551802441, "grad_norm": 0.19202233850955963, "learning_rate": 7.438262844166903e-05, "loss": 0.016148528456687926, "step": 90290 }, { "epoch": 25.631564007947773, "grad_norm": 0.6510984897613525, "learning_rate": 7.437978995174567e-05, "loss": 0.011894938349723817, "step": 90300 }, { "epoch": 25.634402497871132, "grad_norm": 9.376468658447266, "learning_rate": 7.437695146182231e-05, "loss": 0.02315758615732193, "step": 90310 }, { "epoch": 25.637240987794492, "grad_norm": 1.9915847778320312, "learning_rate": 7.437411297189896e-05, "loss": 0.013033168017864227, "step": 90320 }, { "epoch": 25.640079477717855, "grad_norm": 0.18826636672019958, "learning_rate": 7.43712744819756e-05, "loss": 0.004606899619102478, "step": 90330 }, { "epoch": 25.642917967641214, "grad_norm": 2.8383915424346924, "learning_rate": 7.436843599205224e-05, "loss": 0.015871141850948334, "step": 90340 }, { "epoch": 25.645756457564577, "grad_norm": 3.208726644515991, "learning_rate": 7.436559750212887e-05, "loss": 0.009737040847539902, "step": 90350 }, { "epoch": 25.648594947487936, "grad_norm": 0.5642299056053162, "learning_rate": 7.436275901220551e-05, "loss": 0.0032096564769744872, "step": 90360 }, { "epoch": 25.651433437411296, "grad_norm": 2.0800364017486572, "learning_rate": 7.435992052228215e-05, "loss": 0.005873514711856842, "step": 90370 }, { "epoch": 25.65427192733466, "grad_norm": 1.5962680578231812, "learning_rate": 7.435708203235879e-05, "loss": 0.007971050590276718, "step": 90380 }, { "epoch": 25.657110417258018, "grad_norm": 1.4835530519485474, "learning_rate": 7.435424354243543e-05, "loss": 0.019242051243782043, "step": 90390 }, { "epoch": 25.65994890718138, "grad_norm": 0.503690242767334, "learning_rate": 7.435140505251207e-05, "loss": 0.0186344712972641, "step": 90400 }, { "epoch": 25.66278739710474, "grad_norm": 1.3899058103561401, "learning_rate": 7.43485665625887e-05, "loss": 0.008043307811021805, "step": 90410 }, { "epoch": 25.6656258870281, "grad_norm": 0.07085864245891571, "learning_rate": 7.434572807266534e-05, "loss": 0.003644735366106033, "step": 90420 }, { "epoch": 25.668464376951462, "grad_norm": 0.8334052562713623, "learning_rate": 7.434288958274198e-05, "loss": 0.007996916770935059, "step": 90430 }, { "epoch": 25.67130286687482, "grad_norm": 5.77651309967041, "learning_rate": 7.434005109281863e-05, "loss": 0.014565524458885194, "step": 90440 }, { "epoch": 25.674141356798184, "grad_norm": 3.1890127658843994, "learning_rate": 7.433721260289527e-05, "loss": 0.003922684863209724, "step": 90450 }, { "epoch": 25.676979846721544, "grad_norm": 14.084465980529785, "learning_rate": 7.433437411297191e-05, "loss": 0.015540170669555663, "step": 90460 }, { "epoch": 25.679818336644907, "grad_norm": 0.9925637245178223, "learning_rate": 7.433153562304855e-05, "loss": 0.007200963795185089, "step": 90470 }, { "epoch": 25.682656826568266, "grad_norm": 1.2706853151321411, "learning_rate": 7.432869713312518e-05, "loss": 0.013453133404254913, "step": 90480 }, { "epoch": 25.685495316491625, "grad_norm": 2.0845508575439453, "learning_rate": 7.432585864320182e-05, "loss": 0.017751027643680573, "step": 90490 }, { "epoch": 25.688333806414988, "grad_norm": 1.5505220890045166, "learning_rate": 7.432302015327846e-05, "loss": 0.00464121550321579, "step": 90500 }, { "epoch": 25.688333806414988, "eval_accuracy": 0.9701786736186176, "eval_loss": 0.10010858625173569, "eval_runtime": 31.4936, "eval_samples_per_second": 499.371, "eval_steps_per_second": 7.811, "step": 90500 }, { "epoch": 25.691172296338348, "grad_norm": 0.6962722539901733, "learning_rate": 7.43201816633551e-05, "loss": 0.007226504385471344, "step": 90510 }, { "epoch": 25.69401078626171, "grad_norm": 14.827392578125, "learning_rate": 7.431734317343174e-05, "loss": 0.012569423019886016, "step": 90520 }, { "epoch": 25.69684927618507, "grad_norm": 0.6234973669052124, "learning_rate": 7.431450468350838e-05, "loss": 0.005498065799474716, "step": 90530 }, { "epoch": 25.69968776610843, "grad_norm": 4.237894058227539, "learning_rate": 7.431166619358501e-05, "loss": 0.007270877063274383, "step": 90540 }, { "epoch": 25.702526256031792, "grad_norm": 3.3369369506835938, "learning_rate": 7.430882770366165e-05, "loss": 0.010328102111816406, "step": 90550 }, { "epoch": 25.70536474595515, "grad_norm": 0.22364307940006256, "learning_rate": 7.43059892137383e-05, "loss": 0.010292800515890122, "step": 90560 }, { "epoch": 25.708203235878514, "grad_norm": 0.38995879888534546, "learning_rate": 7.430315072381494e-05, "loss": 0.009616436064243316, "step": 90570 }, { "epoch": 25.711041725801874, "grad_norm": 0.827457070350647, "learning_rate": 7.430031223389158e-05, "loss": 0.011696966737508774, "step": 90580 }, { "epoch": 25.713880215725233, "grad_norm": 12.106224060058594, "learning_rate": 7.429747374396822e-05, "loss": 0.014079192280769348, "step": 90590 }, { "epoch": 25.716718705648596, "grad_norm": 3.9957568645477295, "learning_rate": 7.429463525404485e-05, "loss": 0.014243847131729126, "step": 90600 }, { "epoch": 25.719557195571955, "grad_norm": 0.22086453437805176, "learning_rate": 7.429179676412149e-05, "loss": 0.012242984771728516, "step": 90610 }, { "epoch": 25.722395685495318, "grad_norm": 0.5101515054702759, "learning_rate": 7.428895827419813e-05, "loss": 0.007430297881364822, "step": 90620 }, { "epoch": 25.725234175418677, "grad_norm": 3.7444708347320557, "learning_rate": 7.428611978427477e-05, "loss": 0.007301381975412369, "step": 90630 }, { "epoch": 25.728072665342037, "grad_norm": 8.901936531066895, "learning_rate": 7.42832812943514e-05, "loss": 0.025249657034873963, "step": 90640 }, { "epoch": 25.7309111552654, "grad_norm": 3.650114059448242, "learning_rate": 7.428044280442805e-05, "loss": 0.012544295191764832, "step": 90650 }, { "epoch": 25.73374964518876, "grad_norm": 9.632060050964355, "learning_rate": 7.42776043145047e-05, "loss": 0.017318019270896913, "step": 90660 }, { "epoch": 25.73658813511212, "grad_norm": 6.719638347625732, "learning_rate": 7.427476582458132e-05, "loss": 0.017960447072982787, "step": 90670 }, { "epoch": 25.73942662503548, "grad_norm": 2.021409511566162, "learning_rate": 7.427192733465796e-05, "loss": 0.013045002520084382, "step": 90680 }, { "epoch": 25.74226511495884, "grad_norm": 1.2915898561477661, "learning_rate": 7.42690888447346e-05, "loss": 0.008352828025817872, "step": 90690 }, { "epoch": 25.745103604882203, "grad_norm": 10.643007278442383, "learning_rate": 7.426625035481123e-05, "loss": 0.012357166409492493, "step": 90700 }, { "epoch": 25.747942094805563, "grad_norm": 0.26296836137771606, "learning_rate": 7.426341186488789e-05, "loss": 0.01130620613694191, "step": 90710 }, { "epoch": 25.750780584728926, "grad_norm": 1.054850459098816, "learning_rate": 7.426057337496453e-05, "loss": 0.006417820602655411, "step": 90720 }, { "epoch": 25.753619074652285, "grad_norm": 2.2031800746917725, "learning_rate": 7.425773488504116e-05, "loss": 0.010071641206741333, "step": 90730 }, { "epoch": 25.756457564575644, "grad_norm": 3.632169246673584, "learning_rate": 7.42548963951178e-05, "loss": 0.004650980234146118, "step": 90740 }, { "epoch": 25.759296054499007, "grad_norm": 4.0324625968933105, "learning_rate": 7.425205790519444e-05, "loss": 0.004930272698402405, "step": 90750 }, { "epoch": 25.762134544422366, "grad_norm": 0.16914105415344238, "learning_rate": 7.424921941527108e-05, "loss": 0.0028475372120738028, "step": 90760 }, { "epoch": 25.76497303434573, "grad_norm": 0.7319206595420837, "learning_rate": 7.424638092534771e-05, "loss": 0.003315746784210205, "step": 90770 }, { "epoch": 25.76781152426909, "grad_norm": 0.6170145869255066, "learning_rate": 7.424354243542436e-05, "loss": 0.006154341995716095, "step": 90780 }, { "epoch": 25.770650014192448, "grad_norm": 3.030618667602539, "learning_rate": 7.4240703945501e-05, "loss": 0.009194561839103698, "step": 90790 }, { "epoch": 25.77348850411581, "grad_norm": 8.204113006591797, "learning_rate": 7.423786545557763e-05, "loss": 0.012365908920764923, "step": 90800 }, { "epoch": 25.77632699403917, "grad_norm": 0.29321184754371643, "learning_rate": 7.423502696565427e-05, "loss": 0.01490156501531601, "step": 90810 }, { "epoch": 25.779165483962533, "grad_norm": 2.304220199584961, "learning_rate": 7.423218847573092e-05, "loss": 0.008847502619028091, "step": 90820 }, { "epoch": 25.782003973885892, "grad_norm": 0.2627812325954437, "learning_rate": 7.422934998580754e-05, "loss": 0.0071276560425758365, "step": 90830 }, { "epoch": 25.784842463809255, "grad_norm": 2.2214231491088867, "learning_rate": 7.422651149588419e-05, "loss": 0.017530369758605956, "step": 90840 }, { "epoch": 25.787680953732615, "grad_norm": 0.30617737770080566, "learning_rate": 7.422367300596084e-05, "loss": 0.015485936403274536, "step": 90850 }, { "epoch": 25.790519443655974, "grad_norm": 7.192709445953369, "learning_rate": 7.422083451603747e-05, "loss": 0.021380624175071715, "step": 90860 }, { "epoch": 25.793357933579337, "grad_norm": 0.9066954851150513, "learning_rate": 7.421799602611411e-05, "loss": 0.03239285945892334, "step": 90870 }, { "epoch": 25.796196423502696, "grad_norm": 0.6990506052970886, "learning_rate": 7.421515753619075e-05, "loss": 0.018890266120433808, "step": 90880 }, { "epoch": 25.79903491342606, "grad_norm": 9.746612548828125, "learning_rate": 7.421231904626739e-05, "loss": 0.01640550196170807, "step": 90890 }, { "epoch": 25.80187340334942, "grad_norm": 0.8654089570045471, "learning_rate": 7.420948055634402e-05, "loss": 0.004177383705973625, "step": 90900 }, { "epoch": 25.804711893272778, "grad_norm": 6.421217918395996, "learning_rate": 7.420664206642067e-05, "loss": 0.005990825593471527, "step": 90910 }, { "epoch": 25.80755038319614, "grad_norm": 0.46521976590156555, "learning_rate": 7.420380357649732e-05, "loss": 0.019810928404331206, "step": 90920 }, { "epoch": 25.8103888731195, "grad_norm": 7.606039524078369, "learning_rate": 7.420096508657394e-05, "loss": 0.01322605162858963, "step": 90930 }, { "epoch": 25.813227363042863, "grad_norm": 18.673248291015625, "learning_rate": 7.419812659665059e-05, "loss": 0.013413110375404358, "step": 90940 }, { "epoch": 25.816065852966222, "grad_norm": 4.864353656768799, "learning_rate": 7.419528810672723e-05, "loss": 0.01897050142288208, "step": 90950 }, { "epoch": 25.81890434288958, "grad_norm": 2.738835096359253, "learning_rate": 7.419244961680385e-05, "loss": 0.005047548562288284, "step": 90960 }, { "epoch": 25.821742832812944, "grad_norm": 2.254920244216919, "learning_rate": 7.41896111268805e-05, "loss": 0.02924007773399353, "step": 90970 }, { "epoch": 25.824581322736304, "grad_norm": 7.6654863357543945, "learning_rate": 7.418677263695715e-05, "loss": 0.019563350081443786, "step": 90980 }, { "epoch": 25.827419812659667, "grad_norm": 1.1490751504898071, "learning_rate": 7.418393414703378e-05, "loss": 0.017334794998168944, "step": 90990 }, { "epoch": 25.830258302583026, "grad_norm": 3.7733311653137207, "learning_rate": 7.418109565711042e-05, "loss": 0.004975069314241409, "step": 91000 }, { "epoch": 25.830258302583026, "eval_accuracy": 0.967953201500604, "eval_loss": 0.10677766054868698, "eval_runtime": 32.3298, "eval_samples_per_second": 486.456, "eval_steps_per_second": 7.609, "step": 91000 }, { "epoch": 25.833096792506385, "grad_norm": 6.386855125427246, "learning_rate": 7.417825716718706e-05, "loss": 0.01648752987384796, "step": 91010 }, { "epoch": 25.835935282429748, "grad_norm": 10.907753944396973, "learning_rate": 7.41754186772637e-05, "loss": 0.009514281153678894, "step": 91020 }, { "epoch": 25.838773772353107, "grad_norm": 0.3697199821472168, "learning_rate": 7.417258018734033e-05, "loss": 0.008957795798778534, "step": 91030 }, { "epoch": 25.84161226227647, "grad_norm": 23.928796768188477, "learning_rate": 7.416974169741697e-05, "loss": 0.026893222332000734, "step": 91040 }, { "epoch": 25.84445075219983, "grad_norm": 14.434755325317383, "learning_rate": 7.416690320749363e-05, "loss": 0.013743598759174348, "step": 91050 }, { "epoch": 25.84728924212319, "grad_norm": 1.8203034400939941, "learning_rate": 7.416406471757025e-05, "loss": 0.012014111876487732, "step": 91060 }, { "epoch": 25.850127732046552, "grad_norm": 4.547553062438965, "learning_rate": 7.41612262276469e-05, "loss": 0.008239923417568207, "step": 91070 }, { "epoch": 25.85296622196991, "grad_norm": 0.6611328125, "learning_rate": 7.415838773772354e-05, "loss": 0.007966189086437226, "step": 91080 }, { "epoch": 25.855804711893274, "grad_norm": 0.7000671029090881, "learning_rate": 7.415554924780017e-05, "loss": 0.015919029712677002, "step": 91090 }, { "epoch": 25.858643201816633, "grad_norm": 2.305426597595215, "learning_rate": 7.415271075787681e-05, "loss": 0.030028027296066285, "step": 91100 }, { "epoch": 25.861481691739993, "grad_norm": 15.570962905883789, "learning_rate": 7.414987226795346e-05, "loss": 0.011820004880428314, "step": 91110 }, { "epoch": 25.864320181663356, "grad_norm": 4.1458258628845215, "learning_rate": 7.414703377803009e-05, "loss": 0.013002432882785797, "step": 91120 }, { "epoch": 25.867158671586715, "grad_norm": 1.4667894840240479, "learning_rate": 7.414419528810673e-05, "loss": 0.007879237830638885, "step": 91130 }, { "epoch": 25.869997161510078, "grad_norm": 0.21614038944244385, "learning_rate": 7.414135679818337e-05, "loss": 0.021773914992809295, "step": 91140 }, { "epoch": 25.872835651433437, "grad_norm": 0.7875774502754211, "learning_rate": 7.413851830826001e-05, "loss": 0.013640430569648743, "step": 91150 }, { "epoch": 25.875674141356797, "grad_norm": 3.9141488075256348, "learning_rate": 7.413567981833664e-05, "loss": 0.00488295815885067, "step": 91160 }, { "epoch": 25.87851263128016, "grad_norm": 0.9543119072914124, "learning_rate": 7.413284132841328e-05, "loss": 0.00411306694149971, "step": 91170 }, { "epoch": 25.88135112120352, "grad_norm": 0.23275023698806763, "learning_rate": 7.413000283848994e-05, "loss": 0.010814174264669418, "step": 91180 }, { "epoch": 25.88418961112688, "grad_norm": 0.147043839097023, "learning_rate": 7.412716434856657e-05, "loss": 0.0070279560983181, "step": 91190 }, { "epoch": 25.88702810105024, "grad_norm": 1.1091636419296265, "learning_rate": 7.412432585864321e-05, "loss": 0.010818889737129212, "step": 91200 }, { "epoch": 25.8898665909736, "grad_norm": 10.605774879455566, "learning_rate": 7.412148736871985e-05, "loss": 0.023306429386138916, "step": 91210 }, { "epoch": 25.892705080896963, "grad_norm": 0.2075026035308838, "learning_rate": 7.411864887879648e-05, "loss": 0.007010768353939057, "step": 91220 }, { "epoch": 25.895543570820323, "grad_norm": 1.0023187398910522, "learning_rate": 7.411581038887312e-05, "loss": 0.0034771181643009188, "step": 91230 }, { "epoch": 25.898382060743685, "grad_norm": 9.463793754577637, "learning_rate": 7.411297189894976e-05, "loss": 0.007321682572364807, "step": 91240 }, { "epoch": 25.901220550667045, "grad_norm": 8.556441307067871, "learning_rate": 7.41101334090264e-05, "loss": 0.016090008616447448, "step": 91250 }, { "epoch": 25.904059040590404, "grad_norm": 2.1930532455444336, "learning_rate": 7.410729491910304e-05, "loss": 0.02759735882282257, "step": 91260 }, { "epoch": 25.906897530513767, "grad_norm": 0.5320106744766235, "learning_rate": 7.410445642917968e-05, "loss": 0.0117863230407238, "step": 91270 }, { "epoch": 25.909736020437126, "grad_norm": 7.868771553039551, "learning_rate": 7.410161793925632e-05, "loss": 0.022496986389160156, "step": 91280 }, { "epoch": 25.91257451036049, "grad_norm": 2.2077040672302246, "learning_rate": 7.409877944933295e-05, "loss": 0.009529747068881989, "step": 91290 }, { "epoch": 25.91541300028385, "grad_norm": 0.7100173830986023, "learning_rate": 7.40959409594096e-05, "loss": 0.0024851012974977495, "step": 91300 }, { "epoch": 25.91825149020721, "grad_norm": 4.5609869956970215, "learning_rate": 7.409310246948625e-05, "loss": 0.006191249564290046, "step": 91310 }, { "epoch": 25.92108998013057, "grad_norm": 0.27032625675201416, "learning_rate": 7.409026397956288e-05, "loss": 0.008818237483501435, "step": 91320 }, { "epoch": 25.92392847005393, "grad_norm": 12.286656379699707, "learning_rate": 7.408742548963952e-05, "loss": 0.013966916501522065, "step": 91330 }, { "epoch": 25.926766959977293, "grad_norm": 0.16965515911579132, "learning_rate": 7.408458699971616e-05, "loss": 0.013837824761867522, "step": 91340 }, { "epoch": 25.929605449900652, "grad_norm": 3.1021642684936523, "learning_rate": 7.408174850979279e-05, "loss": 0.012317148596048355, "step": 91350 }, { "epoch": 25.932443939824015, "grad_norm": 0.5863439440727234, "learning_rate": 7.407891001986943e-05, "loss": 0.017285771667957306, "step": 91360 }, { "epoch": 25.935282429747375, "grad_norm": 3.3516483306884766, "learning_rate": 7.407607152994607e-05, "loss": 0.019114471971988678, "step": 91370 }, { "epoch": 25.938120919670734, "grad_norm": 12.952999114990234, "learning_rate": 7.407323304002271e-05, "loss": 0.02923743426799774, "step": 91380 }, { "epoch": 25.940959409594097, "grad_norm": 10.030466079711914, "learning_rate": 7.407039455009935e-05, "loss": 0.013706576824188233, "step": 91390 }, { "epoch": 25.943797899517456, "grad_norm": 2.9724695682525635, "learning_rate": 7.4067556060176e-05, "loss": 0.005817212909460068, "step": 91400 }, { "epoch": 25.94663638944082, "grad_norm": 3.005795478820801, "learning_rate": 7.406471757025264e-05, "loss": 0.00826607272028923, "step": 91410 }, { "epoch": 25.94947487936418, "grad_norm": 6.576315879821777, "learning_rate": 7.406187908032926e-05, "loss": 0.009742242097854615, "step": 91420 }, { "epoch": 25.952313369287538, "grad_norm": 1.3519794940948486, "learning_rate": 7.40590405904059e-05, "loss": 0.016540224850177764, "step": 91430 }, { "epoch": 25.9551518592109, "grad_norm": 0.4042956829071045, "learning_rate": 7.405620210048255e-05, "loss": 0.009927792102098465, "step": 91440 }, { "epoch": 25.95799034913426, "grad_norm": 8.763338088989258, "learning_rate": 7.405336361055919e-05, "loss": 0.00847296342253685, "step": 91450 }, { "epoch": 25.960828839057623, "grad_norm": 2.534849166870117, "learning_rate": 7.405052512063583e-05, "loss": 0.007318933308124542, "step": 91460 }, { "epoch": 25.963667328980982, "grad_norm": 1.0958232879638672, "learning_rate": 7.404768663071247e-05, "loss": 0.0032197430729866026, "step": 91470 }, { "epoch": 25.96650581890434, "grad_norm": 1.2913508415222168, "learning_rate": 7.40448481407891e-05, "loss": 0.009135988354682923, "step": 91480 }, { "epoch": 25.969344308827704, "grad_norm": 15.695176124572754, "learning_rate": 7.404200965086574e-05, "loss": 0.01782803386449814, "step": 91490 }, { "epoch": 25.972182798751064, "grad_norm": 21.704504013061523, "learning_rate": 7.403917116094238e-05, "loss": 0.028137663006782533, "step": 91500 }, { "epoch": 25.972182798751064, "eval_accuracy": 0.9702422585362752, "eval_loss": 0.09414780884981155, "eval_runtime": 31.6903, "eval_samples_per_second": 496.272, "eval_steps_per_second": 7.763, "step": 91500 }, { "epoch": 25.975021288674427, "grad_norm": 0.9988896250724792, "learning_rate": 7.403633267101902e-05, "loss": 0.01075546145439148, "step": 91510 }, { "epoch": 25.977859778597786, "grad_norm": 0.19585177302360535, "learning_rate": 7.403349418109566e-05, "loss": 0.02346855103969574, "step": 91520 }, { "epoch": 25.980698268521145, "grad_norm": 1.9649263620376587, "learning_rate": 7.40306556911723e-05, "loss": 0.009406203776597977, "step": 91530 }, { "epoch": 25.983536758444508, "grad_norm": 11.722123146057129, "learning_rate": 7.402781720124893e-05, "loss": 0.013320703804492951, "step": 91540 }, { "epoch": 25.986375248367867, "grad_norm": 0.3345646560192108, "learning_rate": 7.402497871132557e-05, "loss": 0.005006463080644607, "step": 91550 }, { "epoch": 25.98921373829123, "grad_norm": 0.31834325194358826, "learning_rate": 7.402214022140222e-05, "loss": 0.003969578072428703, "step": 91560 }, { "epoch": 25.99205222821459, "grad_norm": 14.432562828063965, "learning_rate": 7.401930173147886e-05, "loss": 0.024895551800727844, "step": 91570 }, { "epoch": 25.99489071813795, "grad_norm": 6.374305248260498, "learning_rate": 7.40164632415555e-05, "loss": 0.0059796363115310665, "step": 91580 }, { "epoch": 25.997729208061312, "grad_norm": 6.072107791900635, "learning_rate": 7.401362475163214e-05, "loss": 0.010428088903427123, "step": 91590 }, { "epoch": 26.00056769798467, "grad_norm": 2.508728504180908, "learning_rate": 7.40110701107011e-05, "loss": 0.026856860518455504, "step": 91600 }, { "epoch": 26.003406187908034, "grad_norm": 0.47993969917297363, "learning_rate": 7.400823162077775e-05, "loss": 0.011805834621191025, "step": 91610 }, { "epoch": 26.006244677831393, "grad_norm": 4.719772815704346, "learning_rate": 7.400539313085439e-05, "loss": 0.01024458035826683, "step": 91620 }, { "epoch": 26.009083167754753, "grad_norm": 0.16518598794937134, "learning_rate": 7.400255464093103e-05, "loss": 0.016178898513317108, "step": 91630 }, { "epoch": 26.011921657678116, "grad_norm": 0.16494183242321014, "learning_rate": 7.399971615100767e-05, "loss": 0.00660051554441452, "step": 91640 }, { "epoch": 26.014760147601475, "grad_norm": 1.6402392387390137, "learning_rate": 7.399687766108431e-05, "loss": 0.012804946303367615, "step": 91650 }, { "epoch": 26.017598637524838, "grad_norm": 6.249334812164307, "learning_rate": 7.399403917116094e-05, "loss": 0.004940034821629524, "step": 91660 }, { "epoch": 26.020437127448197, "grad_norm": 0.16886022686958313, "learning_rate": 7.399120068123758e-05, "loss": 0.004770774394273758, "step": 91670 }, { "epoch": 26.02327561737156, "grad_norm": 0.3433725833892822, "learning_rate": 7.398836219131422e-05, "loss": 0.0034532219171524046, "step": 91680 }, { "epoch": 26.02611410729492, "grad_norm": 0.6225762367248535, "learning_rate": 7.398552370139086e-05, "loss": 0.006844887882471085, "step": 91690 }, { "epoch": 26.02895259721828, "grad_norm": 8.108752250671387, "learning_rate": 7.39826852114675e-05, "loss": 0.006304467469453812, "step": 91700 }, { "epoch": 26.03179108714164, "grad_norm": 0.42007920145988464, "learning_rate": 7.397984672154415e-05, "loss": 0.023453757166862488, "step": 91710 }, { "epoch": 26.034629577065, "grad_norm": 1.4228780269622803, "learning_rate": 7.397700823162078e-05, "loss": 0.004242456704378128, "step": 91720 }, { "epoch": 26.037468066988364, "grad_norm": 6.449555397033691, "learning_rate": 7.397416974169742e-05, "loss": 0.007444438338279724, "step": 91730 }, { "epoch": 26.040306556911723, "grad_norm": 0.423074334859848, "learning_rate": 7.397133125177406e-05, "loss": 0.00402703583240509, "step": 91740 }, { "epoch": 26.043145046835082, "grad_norm": 3.628984212875366, "learning_rate": 7.39684927618507e-05, "loss": 0.008005397766828537, "step": 91750 }, { "epoch": 26.045983536758445, "grad_norm": 1.4003269672393799, "learning_rate": 7.396565427192734e-05, "loss": 0.0035007480531930924, "step": 91760 }, { "epoch": 26.048822026681805, "grad_norm": 16.51129722595215, "learning_rate": 7.396281578200398e-05, "loss": 0.015429350733757018, "step": 91770 }, { "epoch": 26.051660516605168, "grad_norm": 0.6514703035354614, "learning_rate": 7.395997729208062e-05, "loss": 0.0046922177076339725, "step": 91780 }, { "epoch": 26.054499006528527, "grad_norm": 0.48423317074775696, "learning_rate": 7.395713880215725e-05, "loss": 0.008311672508716584, "step": 91790 }, { "epoch": 26.057337496451886, "grad_norm": 2.4846277236938477, "learning_rate": 7.395430031223389e-05, "loss": 0.012605038285255433, "step": 91800 }, { "epoch": 26.06017598637525, "grad_norm": 10.705912590026855, "learning_rate": 7.395146182231053e-05, "loss": 0.007300843298435211, "step": 91810 }, { "epoch": 26.06301447629861, "grad_norm": 6.153450965881348, "learning_rate": 7.394862333238718e-05, "loss": 0.012901200354099274, "step": 91820 }, { "epoch": 26.06585296622197, "grad_norm": 1.408206820487976, "learning_rate": 7.394578484246382e-05, "loss": 0.0029639896005392075, "step": 91830 }, { "epoch": 26.06869145614533, "grad_norm": 4.78284215927124, "learning_rate": 7.394294635254046e-05, "loss": 0.015924523770809173, "step": 91840 }, { "epoch": 26.07152994606869, "grad_norm": 4.065502643585205, "learning_rate": 7.394010786261709e-05, "loss": 0.015618054568767548, "step": 91850 }, { "epoch": 26.074368435992053, "grad_norm": 0.44722357392311096, "learning_rate": 7.393726937269373e-05, "loss": 0.005499398708343506, "step": 91860 }, { "epoch": 26.077206925915412, "grad_norm": 1.260283350944519, "learning_rate": 7.393443088277037e-05, "loss": 0.004512446001172066, "step": 91870 }, { "epoch": 26.080045415838775, "grad_norm": 3.7341675758361816, "learning_rate": 7.393159239284701e-05, "loss": 0.008506526798009872, "step": 91880 }, { "epoch": 26.082883905762134, "grad_norm": 2.3325865268707275, "learning_rate": 7.392875390292365e-05, "loss": 0.005642106756567955, "step": 91890 }, { "epoch": 26.085722395685494, "grad_norm": 12.285056114196777, "learning_rate": 7.392591541300029e-05, "loss": 0.010328821837902069, "step": 91900 }, { "epoch": 26.088560885608857, "grad_norm": 1.7200316190719604, "learning_rate": 7.392307692307693e-05, "loss": 0.01295812577009201, "step": 91910 }, { "epoch": 26.091399375532216, "grad_norm": 1.1561529636383057, "learning_rate": 7.392023843315356e-05, "loss": 0.021177276968955994, "step": 91920 }, { "epoch": 26.09423786545558, "grad_norm": 0.7578274011611938, "learning_rate": 7.39173999432302e-05, "loss": 0.0057920094579458235, "step": 91930 }, { "epoch": 26.097076355378938, "grad_norm": 5.519168376922607, "learning_rate": 7.391456145330684e-05, "loss": 0.005496088415384293, "step": 91940 }, { "epoch": 26.099914845302298, "grad_norm": 7.024682998657227, "learning_rate": 7.391172296338347e-05, "loss": 0.012977916002273559, "step": 91950 }, { "epoch": 26.10275333522566, "grad_norm": 0.22377611696720123, "learning_rate": 7.390888447346013e-05, "loss": 0.010054262727499009, "step": 91960 }, { "epoch": 26.10559182514902, "grad_norm": 0.5555391907691956, "learning_rate": 7.390604598353677e-05, "loss": 0.006316271424293518, "step": 91970 }, { "epoch": 26.108430315072383, "grad_norm": 0.6258983612060547, "learning_rate": 7.39032074936134e-05, "loss": 0.006910897791385651, "step": 91980 }, { "epoch": 26.111268804995742, "grad_norm": 2.155669927597046, "learning_rate": 7.390036900369004e-05, "loss": 0.006577058136463166, "step": 91990 }, { "epoch": 26.1141072949191, "grad_norm": 8.577763557434082, "learning_rate": 7.389753051376668e-05, "loss": 0.007209164649248123, "step": 92000 }, { "epoch": 26.1141072949191, "eval_accuracy": 0.9699243339479875, "eval_loss": 0.0958784893155098, "eval_runtime": 31.4863, "eval_samples_per_second": 499.487, "eval_steps_per_second": 7.813, "step": 92000 }, { "epoch": 26.116945784842464, "grad_norm": 0.41550689935684204, "learning_rate": 7.389469202384332e-05, "loss": 0.017158791422843933, "step": 92010 }, { "epoch": 26.119784274765824, "grad_norm": 8.108226776123047, "learning_rate": 7.389185353391996e-05, "loss": 0.007350982725620269, "step": 92020 }, { "epoch": 26.122622764689186, "grad_norm": 0.9515445232391357, "learning_rate": 7.38890150439966e-05, "loss": 0.006752476841211319, "step": 92030 }, { "epoch": 26.125461254612546, "grad_norm": 2.1425278186798096, "learning_rate": 7.388617655407324e-05, "loss": 0.010766242444515229, "step": 92040 }, { "epoch": 26.12829974453591, "grad_norm": 0.5390787720680237, "learning_rate": 7.388333806414987e-05, "loss": 0.0026796359568834307, "step": 92050 }, { "epoch": 26.131138234459268, "grad_norm": 1.4580254554748535, "learning_rate": 7.388049957422651e-05, "loss": 0.007669779658317566, "step": 92060 }, { "epoch": 26.133976724382627, "grad_norm": 0.5257467031478882, "learning_rate": 7.387766108430316e-05, "loss": 0.002690298855304718, "step": 92070 }, { "epoch": 26.13681521430599, "grad_norm": 1.2211860418319702, "learning_rate": 7.387482259437978e-05, "loss": 0.008357351273298263, "step": 92080 }, { "epoch": 26.13965370422935, "grad_norm": 0.7747055888175964, "learning_rate": 7.387198410445644e-05, "loss": 0.010955923795700073, "step": 92090 }, { "epoch": 26.142492194152712, "grad_norm": 0.18109197914600372, "learning_rate": 7.386914561453308e-05, "loss": 0.00810195952653885, "step": 92100 }, { "epoch": 26.14533068407607, "grad_norm": 5.393846035003662, "learning_rate": 7.386630712460971e-05, "loss": 0.010102920979261399, "step": 92110 }, { "epoch": 26.14816917399943, "grad_norm": 0.796149492263794, "learning_rate": 7.386346863468635e-05, "loss": 0.007447591423988343, "step": 92120 }, { "epoch": 26.151007663922794, "grad_norm": 0.0904274582862854, "learning_rate": 7.386063014476299e-05, "loss": 0.01581808179616928, "step": 92130 }, { "epoch": 26.153846153846153, "grad_norm": 1.426159143447876, "learning_rate": 7.385779165483963e-05, "loss": 0.017684692144393922, "step": 92140 }, { "epoch": 26.156684643769516, "grad_norm": 10.58442211151123, "learning_rate": 7.385495316491626e-05, "loss": 0.010682049393653869, "step": 92150 }, { "epoch": 26.159523133692876, "grad_norm": 0.8104249238967896, "learning_rate": 7.385211467499291e-05, "loss": 0.005755631625652314, "step": 92160 }, { "epoch": 26.162361623616235, "grad_norm": 11.096292495727539, "learning_rate": 7.384927618506956e-05, "loss": 0.011455899477005005, "step": 92170 }, { "epoch": 26.165200113539598, "grad_norm": 0.4139510989189148, "learning_rate": 7.384643769514618e-05, "loss": 0.016855058073997498, "step": 92180 }, { "epoch": 26.168038603462957, "grad_norm": 14.637990951538086, "learning_rate": 7.384359920522282e-05, "loss": 0.016529493033885956, "step": 92190 }, { "epoch": 26.17087709338632, "grad_norm": 3.83351993560791, "learning_rate": 7.384076071529947e-05, "loss": 0.007639339566230774, "step": 92200 }, { "epoch": 26.17371558330968, "grad_norm": 0.5502344369888306, "learning_rate": 7.38379222253761e-05, "loss": 0.006361270695924759, "step": 92210 }, { "epoch": 26.17655407323304, "grad_norm": 0.3495018184185028, "learning_rate": 7.383508373545275e-05, "loss": 0.009624318033456803, "step": 92220 }, { "epoch": 26.1793925631564, "grad_norm": 3.0911316871643066, "learning_rate": 7.383224524552939e-05, "loss": 0.004666218906641007, "step": 92230 }, { "epoch": 26.18223105307976, "grad_norm": 0.8983182311058044, "learning_rate": 7.382940675560602e-05, "loss": 0.017222529649734496, "step": 92240 }, { "epoch": 26.185069543003124, "grad_norm": 8.949037551879883, "learning_rate": 7.382656826568266e-05, "loss": 0.01280592679977417, "step": 92250 }, { "epoch": 26.187908032926483, "grad_norm": 0.21194879710674286, "learning_rate": 7.38237297757593e-05, "loss": 0.006116367876529694, "step": 92260 }, { "epoch": 26.190746522849842, "grad_norm": 0.7844346761703491, "learning_rate": 7.382089128583594e-05, "loss": 0.020743103325366975, "step": 92270 }, { "epoch": 26.193585012773205, "grad_norm": 6.67525577545166, "learning_rate": 7.381805279591257e-05, "loss": 0.010026507079601288, "step": 92280 }, { "epoch": 26.196423502696565, "grad_norm": 0.6282540559768677, "learning_rate": 7.381521430598923e-05, "loss": 0.002783839404582977, "step": 92290 }, { "epoch": 26.199261992619927, "grad_norm": 0.37662559747695923, "learning_rate": 7.381237581606585e-05, "loss": 0.00458422601222992, "step": 92300 }, { "epoch": 26.202100482543287, "grad_norm": 0.2306385189294815, "learning_rate": 7.38095373261425e-05, "loss": 0.002305847965180874, "step": 92310 }, { "epoch": 26.204938972466646, "grad_norm": 2.8592166900634766, "learning_rate": 7.380669883621914e-05, "loss": 0.006137551739811897, "step": 92320 }, { "epoch": 26.20777746239001, "grad_norm": 2.4240221977233887, "learning_rate": 7.380386034629578e-05, "loss": 0.009489794820547104, "step": 92330 }, { "epoch": 26.21061595231337, "grad_norm": 0.41663840413093567, "learning_rate": 7.38010218563724e-05, "loss": 0.012977346777915955, "step": 92340 }, { "epoch": 26.21345444223673, "grad_norm": 0.6482568383216858, "learning_rate": 7.379818336644905e-05, "loss": 0.0019741088151931764, "step": 92350 }, { "epoch": 26.21629293216009, "grad_norm": 1.2553253173828125, "learning_rate": 7.37953448765257e-05, "loss": 0.03158543407917023, "step": 92360 }, { "epoch": 26.21913142208345, "grad_norm": 1.1307393312454224, "learning_rate": 7.379250638660233e-05, "loss": 0.01929493099451065, "step": 92370 }, { "epoch": 26.221969912006813, "grad_norm": 0.2109479010105133, "learning_rate": 7.378966789667897e-05, "loss": 0.005219285190105438, "step": 92380 }, { "epoch": 26.224808401930172, "grad_norm": 6.8526716232299805, "learning_rate": 7.378682940675561e-05, "loss": 0.007986192405223847, "step": 92390 }, { "epoch": 26.227646891853535, "grad_norm": 0.7104370594024658, "learning_rate": 7.378399091683225e-05, "loss": 0.010788311064243317, "step": 92400 }, { "epoch": 26.230485381776894, "grad_norm": 7.885170936584473, "learning_rate": 7.378115242690888e-05, "loss": 0.007539589703083038, "step": 92410 }, { "epoch": 26.233323871700254, "grad_norm": 1.2500799894332886, "learning_rate": 7.377831393698554e-05, "loss": 0.002973645552992821, "step": 92420 }, { "epoch": 26.236162361623617, "grad_norm": 10.949283599853516, "learning_rate": 7.377547544706216e-05, "loss": 0.014808160066604615, "step": 92430 }, { "epoch": 26.239000851546976, "grad_norm": 0.8921816349029541, "learning_rate": 7.37726369571388e-05, "loss": 0.010337941348552704, "step": 92440 }, { "epoch": 26.24183934147034, "grad_norm": 1.4611961841583252, "learning_rate": 7.376979846721545e-05, "loss": 0.015223929286003112, "step": 92450 }, { "epoch": 26.244677831393698, "grad_norm": 1.7603025436401367, "learning_rate": 7.376695997729209e-05, "loss": 0.009551189839839935, "step": 92460 }, { "epoch": 26.24751632131706, "grad_norm": 5.045895099639893, "learning_rate": 7.376412148736872e-05, "loss": 0.020971786975860596, "step": 92470 }, { "epoch": 26.25035481124042, "grad_norm": 0.08690071851015091, "learning_rate": 7.376128299744536e-05, "loss": 0.00686119869351387, "step": 92480 }, { "epoch": 26.25319330116378, "grad_norm": 0.9654287695884705, "learning_rate": 7.375844450752201e-05, "loss": 0.006131289154291153, "step": 92490 }, { "epoch": 26.256031791087143, "grad_norm": 0.0503830760717392, "learning_rate": 7.375560601759864e-05, "loss": 0.012299502640962601, "step": 92500 }, { "epoch": 26.256031791087143, "eval_accuracy": 0.9703058434539328, "eval_loss": 0.09468699246644974, "eval_runtime": 31.8091, "eval_samples_per_second": 494.419, "eval_steps_per_second": 7.734, "step": 92500 }, { "epoch": 26.258870281010502, "grad_norm": 3.025846004486084, "learning_rate": 7.375276752767528e-05, "loss": 0.004975470900535584, "step": 92510 }, { "epoch": 26.261708770933865, "grad_norm": 0.2321932464838028, "learning_rate": 7.374992903775192e-05, "loss": 0.006643175333738327, "step": 92520 }, { "epoch": 26.264547260857224, "grad_norm": 2.2461955547332764, "learning_rate": 7.374709054782855e-05, "loss": 0.011097977310419083, "step": 92530 }, { "epoch": 26.267385750780583, "grad_norm": 3.1445255279541016, "learning_rate": 7.374425205790519e-05, "loss": 0.005190606415271759, "step": 92540 }, { "epoch": 26.270224240703946, "grad_norm": 0.3949303925037384, "learning_rate": 7.374141356798183e-05, "loss": 0.012363631278276443, "step": 92550 }, { "epoch": 26.273062730627306, "grad_norm": 1.0306131839752197, "learning_rate": 7.373857507805847e-05, "loss": 0.007492886483669281, "step": 92560 }, { "epoch": 26.27590122055067, "grad_norm": 6.044046401977539, "learning_rate": 7.373573658813512e-05, "loss": 0.00794539675116539, "step": 92570 }, { "epoch": 26.278739710474028, "grad_norm": 0.3525402843952179, "learning_rate": 7.373289809821176e-05, "loss": 0.009858734905719757, "step": 92580 }, { "epoch": 26.281578200397387, "grad_norm": 3.033472776412964, "learning_rate": 7.37300596082884e-05, "loss": 0.015811192989349365, "step": 92590 }, { "epoch": 26.28441669032075, "grad_norm": 1.378052830696106, "learning_rate": 7.372722111836503e-05, "loss": 0.006582745164632797, "step": 92600 }, { "epoch": 26.28725518024411, "grad_norm": 0.9024652242660522, "learning_rate": 7.372438262844167e-05, "loss": 0.028896653652191163, "step": 92610 }, { "epoch": 26.290093670167472, "grad_norm": 0.32481107115745544, "learning_rate": 7.372154413851832e-05, "loss": 0.026616260409355164, "step": 92620 }, { "epoch": 26.29293216009083, "grad_norm": 5.929981708526611, "learning_rate": 7.371870564859495e-05, "loss": 0.019439248740673064, "step": 92630 }, { "epoch": 26.29577065001419, "grad_norm": 2.0967047214508057, "learning_rate": 7.371586715867159e-05, "loss": 0.005662263557314873, "step": 92640 }, { "epoch": 26.298609139937554, "grad_norm": 11.044548988342285, "learning_rate": 7.371302866874823e-05, "loss": 0.03227064609527588, "step": 92650 }, { "epoch": 26.301447629860913, "grad_norm": 0.08486620336771011, "learning_rate": 7.371019017882486e-05, "loss": 0.005920019745826721, "step": 92660 }, { "epoch": 26.304286119784276, "grad_norm": 6.801551342010498, "learning_rate": 7.37073516889015e-05, "loss": 0.012377683073282242, "step": 92670 }, { "epoch": 26.307124609707635, "grad_norm": 2.0847713947296143, "learning_rate": 7.370451319897814e-05, "loss": 0.021713931858539582, "step": 92680 }, { "epoch": 26.309963099630995, "grad_norm": 1.3447253704071045, "learning_rate": 7.370167470905479e-05, "loss": 0.006275822967290878, "step": 92690 }, { "epoch": 26.312801589554358, "grad_norm": 5.674437046051025, "learning_rate": 7.369883621913143e-05, "loss": 0.01807156056165695, "step": 92700 }, { "epoch": 26.315640079477717, "grad_norm": 1.2775100469589233, "learning_rate": 7.369599772920807e-05, "loss": 0.030078983306884764, "step": 92710 }, { "epoch": 26.31847856940108, "grad_norm": 7.216073036193848, "learning_rate": 7.369315923928471e-05, "loss": 0.02365185022354126, "step": 92720 }, { "epoch": 26.32131705932444, "grad_norm": 14.173765182495117, "learning_rate": 7.369032074936134e-05, "loss": 0.025605848431587218, "step": 92730 }, { "epoch": 26.3241555492478, "grad_norm": 2.7671217918395996, "learning_rate": 7.368748225943798e-05, "loss": 0.013354629278182983, "step": 92740 }, { "epoch": 26.32699403917116, "grad_norm": 4.251099109649658, "learning_rate": 7.368464376951462e-05, "loss": 0.02026963084936142, "step": 92750 }, { "epoch": 26.32983252909452, "grad_norm": 3.0159120559692383, "learning_rate": 7.368180527959126e-05, "loss": 0.010386209189891814, "step": 92760 }, { "epoch": 26.332671019017884, "grad_norm": 3.779182195663452, "learning_rate": 7.36789667896679e-05, "loss": 0.004496930167078972, "step": 92770 }, { "epoch": 26.335509508941243, "grad_norm": 12.402325630187988, "learning_rate": 7.367612829974454e-05, "loss": 0.025400730967521667, "step": 92780 }, { "epoch": 26.338347998864602, "grad_norm": 8.638562202453613, "learning_rate": 7.367328980982117e-05, "loss": 0.013978950679302216, "step": 92790 }, { "epoch": 26.341186488787965, "grad_norm": 9.683276176452637, "learning_rate": 7.367045131989781e-05, "loss": 0.007374817878007889, "step": 92800 }, { "epoch": 26.344024978711325, "grad_norm": 0.45772796869277954, "learning_rate": 7.366761282997445e-05, "loss": 0.006616039574146271, "step": 92810 }, { "epoch": 26.346863468634687, "grad_norm": 3.851959228515625, "learning_rate": 7.36647743400511e-05, "loss": 0.012069138884544372, "step": 92820 }, { "epoch": 26.349701958558047, "grad_norm": 14.085392951965332, "learning_rate": 7.366193585012774e-05, "loss": 0.01765160858631134, "step": 92830 }, { "epoch": 26.352540448481406, "grad_norm": 1.4482502937316895, "learning_rate": 7.365909736020438e-05, "loss": 0.004070385918021202, "step": 92840 }, { "epoch": 26.35537893840477, "grad_norm": 2.6822054386138916, "learning_rate": 7.365625887028102e-05, "loss": 0.013836394250392913, "step": 92850 }, { "epoch": 26.35821742832813, "grad_norm": 0.6110975742340088, "learning_rate": 7.365342038035765e-05, "loss": 0.005951684340834618, "step": 92860 }, { "epoch": 26.36105591825149, "grad_norm": 0.13471630215644836, "learning_rate": 7.365058189043429e-05, "loss": 0.0041008956730365755, "step": 92870 }, { "epoch": 26.36389440817485, "grad_norm": 1.1595858335494995, "learning_rate": 7.364774340051093e-05, "loss": 0.005022244900465012, "step": 92880 }, { "epoch": 26.366732898098213, "grad_norm": 5.212616443634033, "learning_rate": 7.364490491058757e-05, "loss": 0.012870852649211884, "step": 92890 }, { "epoch": 26.369571388021573, "grad_norm": 0.7942333221435547, "learning_rate": 7.364206642066421e-05, "loss": 0.004189717397093773, "step": 92900 }, { "epoch": 26.372409877944932, "grad_norm": 0.6024128794670105, "learning_rate": 7.363922793074085e-05, "loss": 0.007008887082338333, "step": 92910 }, { "epoch": 26.375248367868295, "grad_norm": 0.3002496063709259, "learning_rate": 7.363638944081748e-05, "loss": 0.009779290109872819, "step": 92920 }, { "epoch": 26.378086857791654, "grad_norm": 0.3000771403312683, "learning_rate": 7.363355095089412e-05, "loss": 0.00793919637799263, "step": 92930 }, { "epoch": 26.380925347715017, "grad_norm": 1.5796071290969849, "learning_rate": 7.363071246097077e-05, "loss": 0.014450131356716156, "step": 92940 }, { "epoch": 26.383763837638377, "grad_norm": 1.6405445337295532, "learning_rate": 7.36278739710474e-05, "loss": 0.008795601129531861, "step": 92950 }, { "epoch": 26.386602327561736, "grad_norm": 0.2510904371738434, "learning_rate": 7.362503548112405e-05, "loss": 0.005699639394879341, "step": 92960 }, { "epoch": 26.3894408174851, "grad_norm": 0.09752581268548965, "learning_rate": 7.362219699120069e-05, "loss": 0.020186567306518556, "step": 92970 }, { "epoch": 26.392279307408458, "grad_norm": 0.5659540295600891, "learning_rate": 7.361935850127733e-05, "loss": 0.010608004033565521, "step": 92980 }, { "epoch": 26.39511779733182, "grad_norm": 1.1968358755111694, "learning_rate": 7.361652001135396e-05, "loss": 0.006119243800640106, "step": 92990 }, { "epoch": 26.39795628725518, "grad_norm": 3.1396477222442627, "learning_rate": 7.36136815214306e-05, "loss": 0.004807562381029129, "step": 93000 }, { "epoch": 26.39795628725518, "eval_accuracy": 0.9650282952883577, "eval_loss": 0.1161697655916214, "eval_runtime": 32.189, "eval_samples_per_second": 488.584, "eval_steps_per_second": 7.642, "step": 93000 }, { "epoch": 26.40079477717854, "grad_norm": 1.2163498401641846, "learning_rate": 7.361084303150724e-05, "loss": 0.008039128780364991, "step": 93010 }, { "epoch": 26.403633267101903, "grad_norm": 2.8897817134857178, "learning_rate": 7.360800454158388e-05, "loss": 0.0057464692741632465, "step": 93020 }, { "epoch": 26.406471757025262, "grad_norm": 1.4638886451721191, "learning_rate": 7.360516605166052e-05, "loss": 0.003186637908220291, "step": 93030 }, { "epoch": 26.409310246948625, "grad_norm": 0.8826212286949158, "learning_rate": 7.360232756173717e-05, "loss": 0.010672713071107865, "step": 93040 }, { "epoch": 26.412148736871984, "grad_norm": 3.4137396812438965, "learning_rate": 7.35994890718138e-05, "loss": 0.00811089649796486, "step": 93050 }, { "epoch": 26.414987226795343, "grad_norm": 1.3350623846054077, "learning_rate": 7.359665058189043e-05, "loss": 0.017855803668498992, "step": 93060 }, { "epoch": 26.417825716718706, "grad_norm": 4.353815078735352, "learning_rate": 7.359381209196708e-05, "loss": 0.006519340723752975, "step": 93070 }, { "epoch": 26.420664206642066, "grad_norm": 2.5659937858581543, "learning_rate": 7.359097360204372e-05, "loss": 0.005372990667819977, "step": 93080 }, { "epoch": 26.42350269656543, "grad_norm": 3.4872448444366455, "learning_rate": 7.358813511212036e-05, "loss": 0.010218745470046997, "step": 93090 }, { "epoch": 26.426341186488788, "grad_norm": 1.6558328866958618, "learning_rate": 7.3585296622197e-05, "loss": 0.011450881510972977, "step": 93100 }, { "epoch": 26.429179676412147, "grad_norm": 0.8963851928710938, "learning_rate": 7.358245813227364e-05, "loss": 0.009582974761724473, "step": 93110 }, { "epoch": 26.43201816633551, "grad_norm": 2.569427013397217, "learning_rate": 7.357961964235027e-05, "loss": 0.005919871479272842, "step": 93120 }, { "epoch": 26.43485665625887, "grad_norm": 2.3696396350860596, "learning_rate": 7.357678115242691e-05, "loss": 0.007329928129911423, "step": 93130 }, { "epoch": 26.437695146182232, "grad_norm": 0.1995571106672287, "learning_rate": 7.357394266250355e-05, "loss": 0.01079520806670189, "step": 93140 }, { "epoch": 26.44053363610559, "grad_norm": 0.35779473185539246, "learning_rate": 7.35711041725802e-05, "loss": 0.01520516574382782, "step": 93150 }, { "epoch": 26.44337212602895, "grad_norm": 0.9381704330444336, "learning_rate": 7.356826568265683e-05, "loss": 0.01937687546014786, "step": 93160 }, { "epoch": 26.446210615952314, "grad_norm": 0.7077054381370544, "learning_rate": 7.356542719273348e-05, "loss": 0.010111543536186218, "step": 93170 }, { "epoch": 26.449049105875673, "grad_norm": 4.909936904907227, "learning_rate": 7.35625887028101e-05, "loss": 0.008020897209644318, "step": 93180 }, { "epoch": 26.451887595799036, "grad_norm": 1.5370436906814575, "learning_rate": 7.355975021288675e-05, "loss": 0.006582596898078918, "step": 93190 }, { "epoch": 26.454726085722395, "grad_norm": 2.325643301010132, "learning_rate": 7.355691172296339e-05, "loss": 0.007035617530345917, "step": 93200 }, { "epoch": 26.457564575645755, "grad_norm": 1.2454723119735718, "learning_rate": 7.355407323304003e-05, "loss": 0.008420885354280472, "step": 93210 }, { "epoch": 26.460403065569118, "grad_norm": 1.3416330814361572, "learning_rate": 7.355123474311667e-05, "loss": 0.005542892217636109, "step": 93220 }, { "epoch": 26.463241555492477, "grad_norm": 4.251852512359619, "learning_rate": 7.354839625319331e-05, "loss": 0.01014450117945671, "step": 93230 }, { "epoch": 26.46608004541584, "grad_norm": 0.07669439166784286, "learning_rate": 7.354555776326994e-05, "loss": 0.010270357877016068, "step": 93240 }, { "epoch": 26.4689185353392, "grad_norm": 10.482711791992188, "learning_rate": 7.354271927334658e-05, "loss": 0.012404431402683259, "step": 93250 }, { "epoch": 26.471757025262562, "grad_norm": 1.408994197845459, "learning_rate": 7.353988078342322e-05, "loss": 0.012409591674804687, "step": 93260 }, { "epoch": 26.47459551518592, "grad_norm": 5.073132514953613, "learning_rate": 7.353704229349986e-05, "loss": 0.0165861576795578, "step": 93270 }, { "epoch": 26.47743400510928, "grad_norm": 0.9293218851089478, "learning_rate": 7.353420380357649e-05, "loss": 0.022351078689098358, "step": 93280 }, { "epoch": 26.480272495032644, "grad_norm": 0.6898579597473145, "learning_rate": 7.353136531365315e-05, "loss": 0.023683179914951325, "step": 93290 }, { "epoch": 26.483110984956003, "grad_norm": 3.2098238468170166, "learning_rate": 7.352852682372979e-05, "loss": 0.006058626249432564, "step": 93300 }, { "epoch": 26.485949474879366, "grad_norm": 5.955200672149658, "learning_rate": 7.352568833380641e-05, "loss": 0.005017219111323357, "step": 93310 }, { "epoch": 26.488787964802725, "grad_norm": 2.9740405082702637, "learning_rate": 7.352284984388306e-05, "loss": 0.007307432591915131, "step": 93320 }, { "epoch": 26.491626454726084, "grad_norm": 4.516443729400635, "learning_rate": 7.35200113539597e-05, "loss": 0.02282538264989853, "step": 93330 }, { "epoch": 26.494464944649447, "grad_norm": 0.15894350409507751, "learning_rate": 7.351717286403633e-05, "loss": 0.002300383523106575, "step": 93340 }, { "epoch": 26.497303434572807, "grad_norm": 2.02738618850708, "learning_rate": 7.351433437411298e-05, "loss": 0.01436062753200531, "step": 93350 }, { "epoch": 26.50014192449617, "grad_norm": 1.5969582796096802, "learning_rate": 7.351149588418962e-05, "loss": 0.017239491641521453, "step": 93360 }, { "epoch": 26.50298041441953, "grad_norm": 5.777594089508057, "learning_rate": 7.350865739426625e-05, "loss": 0.01588103175163269, "step": 93370 }, { "epoch": 26.50581890434289, "grad_norm": 6.048971652984619, "learning_rate": 7.350581890434289e-05, "loss": 0.015367710590362548, "step": 93380 }, { "epoch": 26.50865739426625, "grad_norm": 6.3774800300598145, "learning_rate": 7.350298041441953e-05, "loss": 0.01360400766134262, "step": 93390 }, { "epoch": 26.51149588418961, "grad_norm": 7.105537414550781, "learning_rate": 7.350014192449617e-05, "loss": 0.01069067120552063, "step": 93400 }, { "epoch": 26.514334374112973, "grad_norm": 4.433438301086426, "learning_rate": 7.34973034345728e-05, "loss": 0.01765124499797821, "step": 93410 }, { "epoch": 26.517172864036333, "grad_norm": 3.893505573272705, "learning_rate": 7.349446494464946e-05, "loss": 0.009969960153102874, "step": 93420 }, { "epoch": 26.520011353959692, "grad_norm": 0.6545042395591736, "learning_rate": 7.34916264547261e-05, "loss": 0.014891240000724792, "step": 93430 }, { "epoch": 26.522849843883055, "grad_norm": 6.841866493225098, "learning_rate": 7.348878796480273e-05, "loss": 0.00988268256187439, "step": 93440 }, { "epoch": 26.525688333806414, "grad_norm": 0.42013347148895264, "learning_rate": 7.348594947487937e-05, "loss": 0.022642302513122558, "step": 93450 }, { "epoch": 26.528526823729777, "grad_norm": 6.23939323425293, "learning_rate": 7.348311098495601e-05, "loss": 0.019336767494678497, "step": 93460 }, { "epoch": 26.531365313653136, "grad_norm": 0.7334332466125488, "learning_rate": 7.348027249503264e-05, "loss": 0.011577697098255157, "step": 93470 }, { "epoch": 26.534203803576496, "grad_norm": 0.7589626312255859, "learning_rate": 7.347743400510928e-05, "loss": 0.013959628343582154, "step": 93480 }, { "epoch": 26.53704229349986, "grad_norm": 0.8063339591026306, "learning_rate": 7.347459551518593e-05, "loss": 0.020280875265598297, "step": 93490 }, { "epoch": 26.539880783423218, "grad_norm": 4.919368267059326, "learning_rate": 7.347175702526256e-05, "loss": 0.011336325854063033, "step": 93500 }, { "epoch": 26.539880783423218, "eval_accuracy": 0.9685890506771794, "eval_loss": 0.09968402981758118, "eval_runtime": 32.4782, "eval_samples_per_second": 484.232, "eval_steps_per_second": 7.574, "step": 93500 }, { "epoch": 26.54271927334658, "grad_norm": 2.1325113773345947, "learning_rate": 7.34689185353392e-05, "loss": 0.018076063692569734, "step": 93510 }, { "epoch": 26.54555776326994, "grad_norm": 0.5887179970741272, "learning_rate": 7.346608004541584e-05, "loss": 0.011239643394947051, "step": 93520 }, { "epoch": 26.5483962531933, "grad_norm": 7.1910481452941895, "learning_rate": 7.346324155549248e-05, "loss": 0.006685967743396759, "step": 93530 }, { "epoch": 26.551234743116662, "grad_norm": 4.799663543701172, "learning_rate": 7.346040306556911e-05, "loss": 0.007635510712862015, "step": 93540 }, { "epoch": 26.55407323304002, "grad_norm": 9.017477035522461, "learning_rate": 7.345756457564577e-05, "loss": 0.011153483390808105, "step": 93550 }, { "epoch": 26.556911722963385, "grad_norm": 2.9574196338653564, "learning_rate": 7.345472608572241e-05, "loss": 0.005689339712262154, "step": 93560 }, { "epoch": 26.559750212886744, "grad_norm": 4.893571853637695, "learning_rate": 7.345188759579904e-05, "loss": 0.020778751373291014, "step": 93570 }, { "epoch": 26.562588702810103, "grad_norm": 0.6927777528762817, "learning_rate": 7.344904910587568e-05, "loss": 0.02228154391050339, "step": 93580 }, { "epoch": 26.565427192733466, "grad_norm": 1.3354644775390625, "learning_rate": 7.344621061595232e-05, "loss": 0.010807336121797562, "step": 93590 }, { "epoch": 26.568265682656826, "grad_norm": 0.41370514035224915, "learning_rate": 7.344337212602895e-05, "loss": 0.01049211323261261, "step": 93600 }, { "epoch": 26.57110417258019, "grad_norm": 8.193758964538574, "learning_rate": 7.344053363610559e-05, "loss": 0.023290959000587464, "step": 93610 }, { "epoch": 26.573942662503548, "grad_norm": 16.485464096069336, "learning_rate": 7.343797899517457e-05, "loss": 0.01803134083747864, "step": 93620 }, { "epoch": 26.57678115242691, "grad_norm": 1.6572365760803223, "learning_rate": 7.343514050525121e-05, "loss": 0.009075594693422317, "step": 93630 }, { "epoch": 26.57961964235027, "grad_norm": 0.6884151697158813, "learning_rate": 7.343230201532785e-05, "loss": 0.006136306002736092, "step": 93640 }, { "epoch": 26.58245813227363, "grad_norm": 4.5465826988220215, "learning_rate": 7.342946352540448e-05, "loss": 0.003441450744867325, "step": 93650 }, { "epoch": 26.585296622196992, "grad_norm": 1.3059022426605225, "learning_rate": 7.342662503548112e-05, "loss": 0.006587804853916168, "step": 93660 }, { "epoch": 26.58813511212035, "grad_norm": 1.2390974760055542, "learning_rate": 7.342378654555778e-05, "loss": 0.007151448726654052, "step": 93670 }, { "epoch": 26.590973602043714, "grad_norm": 6.941291809082031, "learning_rate": 7.34209480556344e-05, "loss": 0.009516268968582153, "step": 93680 }, { "epoch": 26.593812091967074, "grad_norm": 0.8465082049369812, "learning_rate": 7.341810956571104e-05, "loss": 0.01101771742105484, "step": 93690 }, { "epoch": 26.596650581890433, "grad_norm": 0.4537831246852875, "learning_rate": 7.341527107578769e-05, "loss": 0.008472254127264022, "step": 93700 }, { "epoch": 26.599489071813796, "grad_norm": 0.1561071276664734, "learning_rate": 7.341243258586433e-05, "loss": 0.008398085832595825, "step": 93710 }, { "epoch": 26.602327561737155, "grad_norm": 0.3406132161617279, "learning_rate": 7.340959409594095e-05, "loss": 0.005939728766679764, "step": 93720 }, { "epoch": 26.605166051660518, "grad_norm": 0.6351897716522217, "learning_rate": 7.340675560601761e-05, "loss": 0.019451333582401274, "step": 93730 }, { "epoch": 26.608004541583878, "grad_norm": 1.1513665914535522, "learning_rate": 7.340391711609425e-05, "loss": 0.021116769313812254, "step": 93740 }, { "epoch": 26.610843031507237, "grad_norm": 4.844072341918945, "learning_rate": 7.340107862617088e-05, "loss": 0.007307896018028259, "step": 93750 }, { "epoch": 26.6136815214306, "grad_norm": 0.8704673051834106, "learning_rate": 7.339824013624752e-05, "loss": 0.0052663017064332966, "step": 93760 }, { "epoch": 26.61652001135396, "grad_norm": 0.5843260884284973, "learning_rate": 7.339540164632416e-05, "loss": 0.00935194343328476, "step": 93770 }, { "epoch": 26.619358501277322, "grad_norm": 0.16241100430488586, "learning_rate": 7.339256315640079e-05, "loss": 0.00642252042889595, "step": 93780 }, { "epoch": 26.62219699120068, "grad_norm": 2.4427473545074463, "learning_rate": 7.338972466647743e-05, "loss": 0.006610368192195892, "step": 93790 }, { "epoch": 26.62503548112404, "grad_norm": 0.04928050562739372, "learning_rate": 7.338688617655409e-05, "loss": 0.02486005425453186, "step": 93800 }, { "epoch": 26.627873971047404, "grad_norm": 3.9031119346618652, "learning_rate": 7.338404768663071e-05, "loss": 0.008999011665582656, "step": 93810 }, { "epoch": 26.630712460970763, "grad_norm": 0.9540936946868896, "learning_rate": 7.338120919670736e-05, "loss": 0.005932574719190597, "step": 93820 }, { "epoch": 26.633550950894126, "grad_norm": 5.391296863555908, "learning_rate": 7.3378370706784e-05, "loss": 0.01105673685669899, "step": 93830 }, { "epoch": 26.636389440817485, "grad_norm": 1.161635160446167, "learning_rate": 7.337553221686064e-05, "loss": 0.01124252676963806, "step": 93840 }, { "epoch": 26.639227930740844, "grad_norm": 0.6210116744041443, "learning_rate": 7.337269372693727e-05, "loss": 0.008226653933525086, "step": 93850 }, { "epoch": 26.642066420664207, "grad_norm": 4.83935546875, "learning_rate": 7.336985523701391e-05, "loss": 0.02346726953983307, "step": 93860 }, { "epoch": 26.644904910587567, "grad_norm": 0.6457901000976562, "learning_rate": 7.336701674709056e-05, "loss": 0.006627421826124191, "step": 93870 }, { "epoch": 26.64774340051093, "grad_norm": 0.08087478578090668, "learning_rate": 7.336417825716719e-05, "loss": 0.0095731683075428, "step": 93880 }, { "epoch": 26.65058189043429, "grad_norm": 0.9883803129196167, "learning_rate": 7.336133976724383e-05, "loss": 0.00721057578921318, "step": 93890 }, { "epoch": 26.653420380357648, "grad_norm": 7.240014553070068, "learning_rate": 7.335850127732047e-05, "loss": 0.010792773962020875, "step": 93900 }, { "epoch": 26.65625887028101, "grad_norm": 0.3851408064365387, "learning_rate": 7.33556627873971e-05, "loss": 0.005291806533932686, "step": 93910 }, { "epoch": 26.65909736020437, "grad_norm": 0.4661642909049988, "learning_rate": 7.335282429747374e-05, "loss": 0.005909577012062073, "step": 93920 }, { "epoch": 26.661935850127733, "grad_norm": 0.12833087146282196, "learning_rate": 7.33499858075504e-05, "loss": 0.008726454526185989, "step": 93930 }, { "epoch": 26.664774340051093, "grad_norm": 0.6580816507339478, "learning_rate": 7.334714731762702e-05, "loss": 0.011630985140800475, "step": 93940 }, { "epoch": 26.667612829974452, "grad_norm": 3.5707263946533203, "learning_rate": 7.334430882770367e-05, "loss": 0.010473309457302094, "step": 93950 }, { "epoch": 26.670451319897815, "grad_norm": 0.9508419036865234, "learning_rate": 7.334147033778031e-05, "loss": 0.005357644706964493, "step": 93960 }, { "epoch": 26.673289809821174, "grad_norm": 12.329163551330566, "learning_rate": 7.333863184785695e-05, "loss": 0.03086712956428528, "step": 93970 }, { "epoch": 26.676128299744537, "grad_norm": 2.383697271347046, "learning_rate": 7.333579335793358e-05, "loss": 0.020872431993484496, "step": 93980 }, { "epoch": 26.678966789667896, "grad_norm": 1.512812852859497, "learning_rate": 7.333295486801022e-05, "loss": 0.016036182641983032, "step": 93990 }, { "epoch": 26.68180527959126, "grad_norm": 0.9780462384223938, "learning_rate": 7.333011637808687e-05, "loss": 0.010519023239612579, "step": 94000 }, { "epoch": 26.68180527959126, "eval_accuracy": 0.9692248998537547, "eval_loss": 0.1048494279384613, "eval_runtime": 31.7743, "eval_samples_per_second": 494.96, "eval_steps_per_second": 7.742, "step": 94000 }, { "epoch": 26.68464376951462, "grad_norm": 2.6364002227783203, "learning_rate": 7.33272778881635e-05, "loss": 0.013288049399852753, "step": 94010 }, { "epoch": 26.687482259437978, "grad_norm": 7.415493965148926, "learning_rate": 7.332443939824014e-05, "loss": 0.02603104114532471, "step": 94020 }, { "epoch": 26.69032074936134, "grad_norm": 0.13083277642726898, "learning_rate": 7.332160090831678e-05, "loss": 0.013575693964958191, "step": 94030 }, { "epoch": 26.6931592392847, "grad_norm": 2.9462966918945312, "learning_rate": 7.331876241839341e-05, "loss": 0.007019470632076264, "step": 94040 }, { "epoch": 26.695997729208063, "grad_norm": 1.0577255487442017, "learning_rate": 7.331592392847005e-05, "loss": 0.006899809837341309, "step": 94050 }, { "epoch": 26.698836219131422, "grad_norm": 1.240348219871521, "learning_rate": 7.33130854385467e-05, "loss": 0.0093466617166996, "step": 94060 }, { "epoch": 26.70167470905478, "grad_norm": 0.13549132645130157, "learning_rate": 7.331024694862334e-05, "loss": 0.01021496281027794, "step": 94070 }, { "epoch": 26.704513198978145, "grad_norm": 0.4940224885940552, "learning_rate": 7.330740845869998e-05, "loss": 0.0046342164278030396, "step": 94080 }, { "epoch": 26.707351688901504, "grad_norm": 0.7037240266799927, "learning_rate": 7.330456996877662e-05, "loss": 0.007728366553783417, "step": 94090 }, { "epoch": 26.710190178824867, "grad_norm": 1.1176986694335938, "learning_rate": 7.330173147885326e-05, "loss": 0.0026960244402289392, "step": 94100 }, { "epoch": 26.713028668748226, "grad_norm": 0.25860780477523804, "learning_rate": 7.329889298892989e-05, "loss": 0.0065338902175426485, "step": 94110 }, { "epoch": 26.715867158671585, "grad_norm": 2.9327640533447266, "learning_rate": 7.329605449900653e-05, "loss": 0.0034998197108507155, "step": 94120 }, { "epoch": 26.71870564859495, "grad_norm": 0.9977213144302368, "learning_rate": 7.329321600908317e-05, "loss": 0.01593065559864044, "step": 94130 }, { "epoch": 26.721544138518308, "grad_norm": 3.589890718460083, "learning_rate": 7.329037751915981e-05, "loss": 0.005348076671361923, "step": 94140 }, { "epoch": 26.72438262844167, "grad_norm": 1.153486728668213, "learning_rate": 7.328753902923645e-05, "loss": 0.016149234771728516, "step": 94150 }, { "epoch": 26.72722111836503, "grad_norm": 0.31281599402427673, "learning_rate": 7.32847005393131e-05, "loss": 0.0035915620625019073, "step": 94160 }, { "epoch": 26.73005960828839, "grad_norm": 2.5461089611053467, "learning_rate": 7.328186204938972e-05, "loss": 0.0040773067623376845, "step": 94170 }, { "epoch": 26.732898098211752, "grad_norm": 4.219906330108643, "learning_rate": 7.327902355946636e-05, "loss": 0.009428070485591888, "step": 94180 }, { "epoch": 26.73573658813511, "grad_norm": 1.0715082883834839, "learning_rate": 7.3276185069543e-05, "loss": 0.003371432051062584, "step": 94190 }, { "epoch": 26.738575078058474, "grad_norm": 0.19039398431777954, "learning_rate": 7.327334657961965e-05, "loss": 0.00784885436296463, "step": 94200 }, { "epoch": 26.741413567981834, "grad_norm": 7.357936859130859, "learning_rate": 7.327050808969629e-05, "loss": 0.011466562747955322, "step": 94210 }, { "epoch": 26.744252057905193, "grad_norm": 1.2628207206726074, "learning_rate": 7.326766959977293e-05, "loss": 0.0031861763447523115, "step": 94220 }, { "epoch": 26.747090547828556, "grad_norm": 0.24524575471878052, "learning_rate": 7.326483110984956e-05, "loss": 0.0031773947179317475, "step": 94230 }, { "epoch": 26.749929037751915, "grad_norm": 1.2926634550094604, "learning_rate": 7.32619926199262e-05, "loss": 0.010897711664438248, "step": 94240 }, { "epoch": 26.752767527675278, "grad_norm": 5.313648223876953, "learning_rate": 7.325915413000284e-05, "loss": 0.0052309799939394, "step": 94250 }, { "epoch": 26.755606017598637, "grad_norm": 11.837434768676758, "learning_rate": 7.325631564007948e-05, "loss": 0.006376129388809204, "step": 94260 }, { "epoch": 26.758444507521997, "grad_norm": 1.498656153678894, "learning_rate": 7.325347715015612e-05, "loss": 0.020749929547309875, "step": 94270 }, { "epoch": 26.76128299744536, "grad_norm": 1.3998619318008423, "learning_rate": 7.325063866023276e-05, "loss": 0.005323236435651779, "step": 94280 }, { "epoch": 26.76412148736872, "grad_norm": 10.666291236877441, "learning_rate": 7.32478001703094e-05, "loss": 0.010934248566627502, "step": 94290 }, { "epoch": 26.766959977292082, "grad_norm": 15.526649475097656, "learning_rate": 7.324496168038603e-05, "loss": 0.01293966919183731, "step": 94300 }, { "epoch": 26.76979846721544, "grad_norm": 6.155138969421387, "learning_rate": 7.324212319046267e-05, "loss": 0.011608079075813293, "step": 94310 }, { "epoch": 26.7726369571388, "grad_norm": 0.2257644385099411, "learning_rate": 7.323928470053932e-05, "loss": 0.01860720068216324, "step": 94320 }, { "epoch": 26.775475447062163, "grad_norm": 0.31138718128204346, "learning_rate": 7.323644621061596e-05, "loss": 0.017933444678783418, "step": 94330 }, { "epoch": 26.778313936985523, "grad_norm": 0.22316892445087433, "learning_rate": 7.32336077206926e-05, "loss": 0.01873452216386795, "step": 94340 }, { "epoch": 26.781152426908886, "grad_norm": 1.287440299987793, "learning_rate": 7.323076923076924e-05, "loss": 0.0024506187066435812, "step": 94350 }, { "epoch": 26.783990916832245, "grad_norm": 6.362514019012451, "learning_rate": 7.322793074084587e-05, "loss": 0.012238384038209916, "step": 94360 }, { "epoch": 26.786829406755604, "grad_norm": 1.06751549243927, "learning_rate": 7.322509225092251e-05, "loss": 0.009376539289951325, "step": 94370 }, { "epoch": 26.789667896678967, "grad_norm": 2.0011606216430664, "learning_rate": 7.322225376099915e-05, "loss": 0.010755889117717743, "step": 94380 }, { "epoch": 26.792506386602327, "grad_norm": 0.18323780596256256, "learning_rate": 7.321941527107579e-05, "loss": 0.0080840103328228, "step": 94390 }, { "epoch": 26.79534487652569, "grad_norm": 0.35479533672332764, "learning_rate": 7.321657678115243e-05, "loss": 0.002134854905307293, "step": 94400 }, { "epoch": 26.79818336644905, "grad_norm": 4.576927185058594, "learning_rate": 7.321373829122907e-05, "loss": 0.01145692765712738, "step": 94410 }, { "epoch": 26.801021856372408, "grad_norm": 0.4212914705276489, "learning_rate": 7.321089980130572e-05, "loss": 0.005034103989601135, "step": 94420 }, { "epoch": 26.80386034629577, "grad_norm": 0.6421583890914917, "learning_rate": 7.320806131138234e-05, "loss": 0.00903690904378891, "step": 94430 }, { "epoch": 26.80669883621913, "grad_norm": 12.132715225219727, "learning_rate": 7.320522282145898e-05, "loss": 0.016330939531326295, "step": 94440 }, { "epoch": 26.809537326142493, "grad_norm": 10.826515197753906, "learning_rate": 7.320238433153563e-05, "loss": 0.01073497161269188, "step": 94450 }, { "epoch": 26.812375816065853, "grad_norm": 1.2761679887771606, "learning_rate": 7.319954584161225e-05, "loss": 0.021025915443897248, "step": 94460 }, { "epoch": 26.815214305989215, "grad_norm": 0.7034673094749451, "learning_rate": 7.319670735168891e-05, "loss": 0.017810511589050292, "step": 94470 }, { "epoch": 26.818052795912575, "grad_norm": 0.815653920173645, "learning_rate": 7.319386886176555e-05, "loss": 0.005450914800167084, "step": 94480 }, { "epoch": 26.820891285835934, "grad_norm": 1.0747979879379272, "learning_rate": 7.319103037184218e-05, "loss": 0.009490350633859635, "step": 94490 }, { "epoch": 26.823729775759297, "grad_norm": 0.16881529986858368, "learning_rate": 7.318819188191882e-05, "loss": 0.009227895736694336, "step": 94500 }, { "epoch": 26.823729775759297, "eval_accuracy": 0.9666815031474534, "eval_loss": 0.11548859626054764, "eval_runtime": 31.8408, "eval_samples_per_second": 493.925, "eval_steps_per_second": 7.726, "step": 94500 }, { "epoch": 26.826568265682656, "grad_norm": 6.397447109222412, "learning_rate": 7.318535339199546e-05, "loss": 0.020643207430839538, "step": 94510 }, { "epoch": 26.82940675560602, "grad_norm": 0.5587457418441772, "learning_rate": 7.31825149020721e-05, "loss": 0.011985748261213302, "step": 94520 }, { "epoch": 26.83224524552938, "grad_norm": 1.2035893201828003, "learning_rate": 7.317967641214874e-05, "loss": 0.008251894265413284, "step": 94530 }, { "epoch": 26.835083735452738, "grad_norm": 0.9046289324760437, "learning_rate": 7.317683792222539e-05, "loss": 0.01099552884697914, "step": 94540 }, { "epoch": 26.8379222253761, "grad_norm": 9.948662757873535, "learning_rate": 7.317399943230203e-05, "loss": 0.016390039026737212, "step": 94550 }, { "epoch": 26.84076071529946, "grad_norm": 2.8673548698425293, "learning_rate": 7.317116094237865e-05, "loss": 0.0057214625179767605, "step": 94560 }, { "epoch": 26.843599205222823, "grad_norm": 16.781118392944336, "learning_rate": 7.31683224524553e-05, "loss": 0.0250559002161026, "step": 94570 }, { "epoch": 26.846437695146182, "grad_norm": 6.041494369506836, "learning_rate": 7.316548396253194e-05, "loss": 0.005941551178693771, "step": 94580 }, { "epoch": 26.84927618506954, "grad_norm": 2.3877108097076416, "learning_rate": 7.316264547260856e-05, "loss": 0.007182009518146515, "step": 94590 }, { "epoch": 26.852114674992904, "grad_norm": 1.4107893705368042, "learning_rate": 7.315980698268522e-05, "loss": 0.004748644679784775, "step": 94600 }, { "epoch": 26.854953164916264, "grad_norm": 2.2655131816864014, "learning_rate": 7.315696849276186e-05, "loss": 0.020558658242225646, "step": 94610 }, { "epoch": 26.857791654839627, "grad_norm": 0.6342401504516602, "learning_rate": 7.315413000283849e-05, "loss": 0.01361614465713501, "step": 94620 }, { "epoch": 26.860630144762986, "grad_norm": 14.508710861206055, "learning_rate": 7.315129151291513e-05, "loss": 0.02947966754436493, "step": 94630 }, { "epoch": 26.863468634686345, "grad_norm": 4.156998157501221, "learning_rate": 7.314845302299177e-05, "loss": 0.006452225148677826, "step": 94640 }, { "epoch": 26.86630712460971, "grad_norm": 1.8338624238967896, "learning_rate": 7.314561453306841e-05, "loss": 0.012524004280567168, "step": 94650 }, { "epoch": 26.869145614533068, "grad_norm": 1.187557339668274, "learning_rate": 7.314277604314504e-05, "loss": 0.02244972586631775, "step": 94660 }, { "epoch": 26.87198410445643, "grad_norm": 8.91844367980957, "learning_rate": 7.31399375532217e-05, "loss": 0.01820616126060486, "step": 94670 }, { "epoch": 26.87482259437979, "grad_norm": 9.856206893920898, "learning_rate": 7.313709906329834e-05, "loss": 0.012397131323814392, "step": 94680 }, { "epoch": 26.87766108430315, "grad_norm": 8.526193618774414, "learning_rate": 7.313426057337497e-05, "loss": 0.01205759346485138, "step": 94690 }, { "epoch": 26.880499574226512, "grad_norm": 1.1396818161010742, "learning_rate": 7.31314220834516e-05, "loss": 0.008211804926395417, "step": 94700 }, { "epoch": 26.88333806414987, "grad_norm": 5.030377388000488, "learning_rate": 7.312858359352825e-05, "loss": 0.010646968334913253, "step": 94710 }, { "epoch": 26.886176554073234, "grad_norm": 0.252395898103714, "learning_rate": 7.312574510360488e-05, "loss": 0.004068059474229812, "step": 94720 }, { "epoch": 26.889015043996594, "grad_norm": 0.7781500816345215, "learning_rate": 7.312290661368153e-05, "loss": 0.008444656431674958, "step": 94730 }, { "epoch": 26.891853533919953, "grad_norm": 4.12952184677124, "learning_rate": 7.312006812375817e-05, "loss": 0.004265264049172402, "step": 94740 }, { "epoch": 26.894692023843316, "grad_norm": 0.4212832748889923, "learning_rate": 7.31172296338348e-05, "loss": 0.012350338697433471, "step": 94750 }, { "epoch": 26.897530513766675, "grad_norm": 0.3568253219127655, "learning_rate": 7.311439114391144e-05, "loss": 0.013381332159042358, "step": 94760 }, { "epoch": 26.900369003690038, "grad_norm": 0.46841901540756226, "learning_rate": 7.311155265398808e-05, "loss": 0.014715728163719178, "step": 94770 }, { "epoch": 26.903207493613397, "grad_norm": 3.224759578704834, "learning_rate": 7.310871416406472e-05, "loss": 0.006854061782360077, "step": 94780 }, { "epoch": 26.906045983536757, "grad_norm": 0.8850780725479126, "learning_rate": 7.310587567414135e-05, "loss": 0.017140981554985047, "step": 94790 }, { "epoch": 26.90888447346012, "grad_norm": 2.9493415355682373, "learning_rate": 7.3103037184218e-05, "loss": 0.006350439041852951, "step": 94800 }, { "epoch": 26.91172296338348, "grad_norm": 0.2581605315208435, "learning_rate": 7.310019869429465e-05, "loss": 0.009014574438333511, "step": 94810 }, { "epoch": 26.914561453306842, "grad_norm": 7.52855920791626, "learning_rate": 7.309736020437128e-05, "loss": 0.01122516170144081, "step": 94820 }, { "epoch": 26.9173999432302, "grad_norm": 9.842864036560059, "learning_rate": 7.309452171444792e-05, "loss": 0.010161265730857849, "step": 94830 }, { "epoch": 26.920238433153564, "grad_norm": 8.342184066772461, "learning_rate": 7.309168322452456e-05, "loss": 0.025299814343452454, "step": 94840 }, { "epoch": 26.923076923076923, "grad_norm": 14.028326988220215, "learning_rate": 7.308884473460119e-05, "loss": 0.010789446532726288, "step": 94850 }, { "epoch": 26.925915413000283, "grad_norm": 5.466632843017578, "learning_rate": 7.308600624467784e-05, "loss": 0.02698662579059601, "step": 94860 }, { "epoch": 26.928753902923646, "grad_norm": 9.788919448852539, "learning_rate": 7.308316775475448e-05, "loss": 0.008938751369714736, "step": 94870 }, { "epoch": 26.931592392847005, "grad_norm": 3.878929376602173, "learning_rate": 7.308032926483111e-05, "loss": 0.0148302361369133, "step": 94880 }, { "epoch": 26.934430882770368, "grad_norm": 0.4376879334449768, "learning_rate": 7.307749077490775e-05, "loss": 0.015847574174404144, "step": 94890 }, { "epoch": 26.937269372693727, "grad_norm": 10.621622085571289, "learning_rate": 7.307465228498439e-05, "loss": 0.01660303622484207, "step": 94900 }, { "epoch": 26.940107862617086, "grad_norm": 0.3777320981025696, "learning_rate": 7.307181379506103e-05, "loss": 0.008840611577033997, "step": 94910 }, { "epoch": 26.94294635254045, "grad_norm": 1.948773741722107, "learning_rate": 7.306897530513766e-05, "loss": 0.011807572841644288, "step": 94920 }, { "epoch": 26.94578484246381, "grad_norm": 11.493670463562012, "learning_rate": 7.306613681521432e-05, "loss": 0.013804540038108826, "step": 94930 }, { "epoch": 26.94862333238717, "grad_norm": 0.6795633435249329, "learning_rate": 7.306329832529096e-05, "loss": 0.031068021059036256, "step": 94940 }, { "epoch": 26.95146182231053, "grad_norm": 12.200681686401367, "learning_rate": 7.306045983536759e-05, "loss": 0.015989182889461516, "step": 94950 }, { "epoch": 26.95430031223389, "grad_norm": 5.475698947906494, "learning_rate": 7.305762134544423e-05, "loss": 0.018589550256729127, "step": 94960 }, { "epoch": 26.957138802157253, "grad_norm": 10.37171459197998, "learning_rate": 7.305478285552087e-05, "loss": 0.014926382899284362, "step": 94970 }, { "epoch": 26.959977292080612, "grad_norm": 1.591007947921753, "learning_rate": 7.30519443655975e-05, "loss": 0.00547194704413414, "step": 94980 }, { "epoch": 26.962815782003975, "grad_norm": 1.946585774421692, "learning_rate": 7.304910587567414e-05, "loss": 0.010621996968984604, "step": 94990 }, { "epoch": 26.965654271927335, "grad_norm": 3.2840471267700195, "learning_rate": 7.30462673857508e-05, "loss": 0.022071157395839692, "step": 95000 }, { "epoch": 26.965654271927335, "eval_accuracy": 0.9651554651236727, "eval_loss": 0.1152285560965538, "eval_runtime": 31.8897, "eval_samples_per_second": 493.169, "eval_steps_per_second": 7.714, "step": 95000 }, { "epoch": 26.968492761850694, "grad_norm": 5.742368698120117, "learning_rate": 7.304342889582742e-05, "loss": 0.009496699273586272, "step": 95010 }, { "epoch": 26.971331251774057, "grad_norm": 0.09833352267742157, "learning_rate": 7.304059040590406e-05, "loss": 0.0046347551047801975, "step": 95020 }, { "epoch": 26.974169741697416, "grad_norm": 0.25711163878440857, "learning_rate": 7.30377519159807e-05, "loss": 0.017611576616764067, "step": 95030 }, { "epoch": 26.97700823162078, "grad_norm": 8.331297874450684, "learning_rate": 7.303491342605735e-05, "loss": 0.008422775566577912, "step": 95040 }, { "epoch": 26.97984672154414, "grad_norm": 1.734641671180725, "learning_rate": 7.303207493613397e-05, "loss": 0.01211327239871025, "step": 95050 }, { "epoch": 26.982685211467498, "grad_norm": 0.5438372492790222, "learning_rate": 7.302923644621063e-05, "loss": 0.008285184949636459, "step": 95060 }, { "epoch": 26.98552370139086, "grad_norm": 13.321969032287598, "learning_rate": 7.302639795628726e-05, "loss": 0.008095437288284301, "step": 95070 }, { "epoch": 26.98836219131422, "grad_norm": 5.428834438323975, "learning_rate": 7.30235594663639e-05, "loss": 0.014718478918075562, "step": 95080 }, { "epoch": 26.991200681237583, "grad_norm": 1.988488793373108, "learning_rate": 7.302072097644054e-05, "loss": 0.014892590045928956, "step": 95090 }, { "epoch": 26.994039171160942, "grad_norm": 0.21988387405872345, "learning_rate": 7.301788248651718e-05, "loss": 0.015247519314289092, "step": 95100 }, { "epoch": 26.9968776610843, "grad_norm": 4.11231803894043, "learning_rate": 7.301504399659381e-05, "loss": 0.012525960803031921, "step": 95110 }, { "epoch": 26.999716151007664, "grad_norm": 1.8023399114608765, "learning_rate": 7.301220550667045e-05, "loss": 0.01647813767194748, "step": 95120 }, { "epoch": 27.002554640931024, "grad_norm": 7.9026780128479, "learning_rate": 7.30093670167471e-05, "loss": 0.014436399936676026, "step": 95130 }, { "epoch": 27.005393130854387, "grad_norm": 1.0764693021774292, "learning_rate": 7.300652852682373e-05, "loss": 0.007888266444206237, "step": 95140 }, { "epoch": 27.008231620777746, "grad_norm": 0.08524254709482193, "learning_rate": 7.300369003690037e-05, "loss": 0.01325371116399765, "step": 95150 }, { "epoch": 27.011070110701105, "grad_norm": 0.18291603028774261, "learning_rate": 7.300085154697701e-05, "loss": 0.014587007462978363, "step": 95160 }, { "epoch": 27.013908600624468, "grad_norm": 0.2014232873916626, "learning_rate": 7.299801305705364e-05, "loss": 0.007328209280967712, "step": 95170 }, { "epoch": 27.016747090547828, "grad_norm": 0.8993664979934692, "learning_rate": 7.299517456713028e-05, "loss": 0.006159491091966629, "step": 95180 }, { "epoch": 27.01958558047119, "grad_norm": 7.0965495109558105, "learning_rate": 7.299233607720693e-05, "loss": 0.0031824085861444475, "step": 95190 }, { "epoch": 27.02242407039455, "grad_norm": 9.556612014770508, "learning_rate": 7.298949758728357e-05, "loss": 0.012323324382305146, "step": 95200 }, { "epoch": 27.025262560317913, "grad_norm": 0.1286274492740631, "learning_rate": 7.298665909736021e-05, "loss": 0.00985255092382431, "step": 95210 }, { "epoch": 27.028101050241272, "grad_norm": 7.862353801727295, "learning_rate": 7.298382060743685e-05, "loss": 0.0120895616710186, "step": 95220 }, { "epoch": 27.03093954016463, "grad_norm": 8.758325576782227, "learning_rate": 7.298098211751349e-05, "loss": 0.006636605411767959, "step": 95230 }, { "epoch": 27.033778030087994, "grad_norm": 1.5534446239471436, "learning_rate": 7.297814362759012e-05, "loss": 0.002974515035748482, "step": 95240 }, { "epoch": 27.036616520011354, "grad_norm": 0.06047556549310684, "learning_rate": 7.297530513766676e-05, "loss": 0.002357245981693268, "step": 95250 }, { "epoch": 27.039455009934716, "grad_norm": 0.07717142254114151, "learning_rate": 7.297246664774341e-05, "loss": 0.0028319453820586206, "step": 95260 }, { "epoch": 27.042293499858076, "grad_norm": 1.5141997337341309, "learning_rate": 7.296962815782004e-05, "loss": 0.012891288101673126, "step": 95270 }, { "epoch": 27.045131989781435, "grad_norm": 2.8872857093811035, "learning_rate": 7.296678966789668e-05, "loss": 0.005435745045542717, "step": 95280 }, { "epoch": 27.047970479704798, "grad_norm": 7.0280961990356445, "learning_rate": 7.296395117797333e-05, "loss": 0.012304079532623292, "step": 95290 }, { "epoch": 27.050808969628157, "grad_norm": 1.1121705770492554, "learning_rate": 7.296111268804995e-05, "loss": 0.003240145742893219, "step": 95300 }, { "epoch": 27.05364745955152, "grad_norm": 0.7756054997444153, "learning_rate": 7.29582741981266e-05, "loss": 0.00594131276011467, "step": 95310 }, { "epoch": 27.05648594947488, "grad_norm": 0.334761381149292, "learning_rate": 7.295543570820324e-05, "loss": 0.019674669206142425, "step": 95320 }, { "epoch": 27.05932443939824, "grad_norm": 0.1335884928703308, "learning_rate": 7.295259721827988e-05, "loss": 0.0056454546749591826, "step": 95330 }, { "epoch": 27.0621629293216, "grad_norm": 1.4134819507598877, "learning_rate": 7.294975872835652e-05, "loss": 0.014025644958019256, "step": 95340 }, { "epoch": 27.06500141924496, "grad_norm": 0.21472875773906708, "learning_rate": 7.294692023843316e-05, "loss": 0.00498523935675621, "step": 95350 }, { "epoch": 27.067839909168324, "grad_norm": 2.6188158988952637, "learning_rate": 7.29440817485098e-05, "loss": 0.015001380443572998, "step": 95360 }, { "epoch": 27.070678399091683, "grad_norm": 0.24918845295906067, "learning_rate": 7.294124325858643e-05, "loss": 0.0036949679255485536, "step": 95370 }, { "epoch": 27.073516889015043, "grad_norm": 3.7165369987487793, "learning_rate": 7.293840476866307e-05, "loss": 0.0031681805849075317, "step": 95380 }, { "epoch": 27.076355378938405, "grad_norm": 0.15804407000541687, "learning_rate": 7.293556627873971e-05, "loss": 0.0019795143976807593, "step": 95390 }, { "epoch": 27.079193868861765, "grad_norm": 0.23503343760967255, "learning_rate": 7.293272778881635e-05, "loss": 0.004706960916519165, "step": 95400 }, { "epoch": 27.082032358785128, "grad_norm": 0.255169153213501, "learning_rate": 7.2929889298893e-05, "loss": 0.004814198240637779, "step": 95410 }, { "epoch": 27.084870848708487, "grad_norm": 2.0719711780548096, "learning_rate": 7.292705080896964e-05, "loss": 0.009840811043977738, "step": 95420 }, { "epoch": 27.087709338631846, "grad_norm": 1.1336169242858887, "learning_rate": 7.292421231904626e-05, "loss": 0.0035200797021389006, "step": 95430 }, { "epoch": 27.09054782855521, "grad_norm": 1.4868299961090088, "learning_rate": 7.29213738291229e-05, "loss": 0.006213813275098801, "step": 95440 }, { "epoch": 27.09338631847857, "grad_norm": 0.1265774965286255, "learning_rate": 7.291853533919955e-05, "loss": 0.021602602303028108, "step": 95450 }, { "epoch": 27.09622480840193, "grad_norm": 3.412343978881836, "learning_rate": 7.291569684927619e-05, "loss": 0.003488282114267349, "step": 95460 }, { "epoch": 27.09906329832529, "grad_norm": 4.044987201690674, "learning_rate": 7.291285835935283e-05, "loss": 0.005457519739866257, "step": 95470 }, { "epoch": 27.10190178824865, "grad_norm": 8.39084243774414, "learning_rate": 7.291001986942947e-05, "loss": 0.015611542761325837, "step": 95480 }, { "epoch": 27.104740278172013, "grad_norm": 4.072351932525635, "learning_rate": 7.290718137950611e-05, "loss": 0.00747731626033783, "step": 95490 }, { "epoch": 27.107578768095372, "grad_norm": 0.2045382261276245, "learning_rate": 7.290434288958274e-05, "loss": 0.005929672718048095, "step": 95500 }, { "epoch": 27.107578768095372, "eval_accuracy": 0.96986074903033, "eval_loss": 0.10046815872192383, "eval_runtime": 31.8048, "eval_samples_per_second": 494.485, "eval_steps_per_second": 7.735, "step": 95500 }, { "epoch": 27.110417258018735, "grad_norm": 3.092857837677002, "learning_rate": 7.290150439965938e-05, "loss": 0.007351582497358322, "step": 95510 }, { "epoch": 27.113255747942095, "grad_norm": 0.35463252663612366, "learning_rate": 7.289866590973602e-05, "loss": 0.0043140958994627, "step": 95520 }, { "epoch": 27.116094237865454, "grad_norm": 0.03940659761428833, "learning_rate": 7.289582741981266e-05, "loss": 0.005683646351099014, "step": 95530 }, { "epoch": 27.118932727788817, "grad_norm": 0.4777376055717468, "learning_rate": 7.28929889298893e-05, "loss": 0.002036338672041893, "step": 95540 }, { "epoch": 27.121771217712176, "grad_norm": 2.1356241703033447, "learning_rate": 7.289015043996595e-05, "loss": 0.0062846653163433075, "step": 95550 }, { "epoch": 27.12460970763554, "grad_norm": 0.8136447668075562, "learning_rate": 7.288731195004257e-05, "loss": 0.018706507980823517, "step": 95560 }, { "epoch": 27.1274481975589, "grad_norm": 7.297195911407471, "learning_rate": 7.288447346011922e-05, "loss": 0.010456858575344086, "step": 95570 }, { "epoch": 27.130286687482258, "grad_norm": 3.5316708087921143, "learning_rate": 7.288163497019586e-05, "loss": 0.02014935314655304, "step": 95580 }, { "epoch": 27.13312517740562, "grad_norm": 1.3246920108795166, "learning_rate": 7.28787964802725e-05, "loss": 0.012094598263502121, "step": 95590 }, { "epoch": 27.13596366732898, "grad_norm": 0.6021990776062012, "learning_rate": 7.287595799034914e-05, "loss": 0.0029250387102365493, "step": 95600 }, { "epoch": 27.138802157252343, "grad_norm": 16.04469871520996, "learning_rate": 7.287311950042578e-05, "loss": 0.005827989429235458, "step": 95610 }, { "epoch": 27.141640647175702, "grad_norm": 8.3602933883667, "learning_rate": 7.287028101050242e-05, "loss": 0.011805259436368943, "step": 95620 }, { "epoch": 27.144479137099065, "grad_norm": 0.07793623954057693, "learning_rate": 7.286744252057905e-05, "loss": 0.006363669037818908, "step": 95630 }, { "epoch": 27.147317627022424, "grad_norm": 0.9708279371261597, "learning_rate": 7.286460403065569e-05, "loss": 0.002565213479101658, "step": 95640 }, { "epoch": 27.150156116945784, "grad_norm": 11.768292427062988, "learning_rate": 7.286176554073233e-05, "loss": 0.012795543670654297, "step": 95650 }, { "epoch": 27.152994606869147, "grad_norm": 4.34949254989624, "learning_rate": 7.285892705080898e-05, "loss": 0.007452624291181565, "step": 95660 }, { "epoch": 27.155833096792506, "grad_norm": 0.5058379173278809, "learning_rate": 7.285608856088562e-05, "loss": 0.0036213286221027374, "step": 95670 }, { "epoch": 27.15867158671587, "grad_norm": 0.32105812430381775, "learning_rate": 7.285325007096226e-05, "loss": 0.0041717179119586945, "step": 95680 }, { "epoch": 27.161510076639228, "grad_norm": 14.32698917388916, "learning_rate": 7.285041158103889e-05, "loss": 0.012550786137580872, "step": 95690 }, { "epoch": 27.164348566562587, "grad_norm": 4.324656009674072, "learning_rate": 7.284757309111553e-05, "loss": 0.011185474693775177, "step": 95700 }, { "epoch": 27.16718705648595, "grad_norm": 6.045734882354736, "learning_rate": 7.284473460119217e-05, "loss": 0.008705034852027893, "step": 95710 }, { "epoch": 27.17002554640931, "grad_norm": 3.5558059215545654, "learning_rate": 7.284189611126881e-05, "loss": 0.006051670387387275, "step": 95720 }, { "epoch": 27.172864036332673, "grad_norm": 3.1671321392059326, "learning_rate": 7.283905762134545e-05, "loss": 0.01106426641345024, "step": 95730 }, { "epoch": 27.175702526256032, "grad_norm": 0.912066638469696, "learning_rate": 7.283621913142209e-05, "loss": 0.006127223372459412, "step": 95740 }, { "epoch": 27.17854101617939, "grad_norm": 14.557927131652832, "learning_rate": 7.283338064149873e-05, "loss": 0.016689641773700713, "step": 95750 }, { "epoch": 27.181379506102754, "grad_norm": 0.20242708921432495, "learning_rate": 7.283054215157536e-05, "loss": 0.010514412820339204, "step": 95760 }, { "epoch": 27.184217996026113, "grad_norm": 0.16144487261772156, "learning_rate": 7.2827703661652e-05, "loss": 0.0036367315798997877, "step": 95770 }, { "epoch": 27.187056485949476, "grad_norm": 9.16048526763916, "learning_rate": 7.282486517172864e-05, "loss": 0.008228586614131927, "step": 95780 }, { "epoch": 27.189894975872836, "grad_norm": 0.05781473591923714, "learning_rate": 7.282202668180527e-05, "loss": 0.0021053383126854895, "step": 95790 }, { "epoch": 27.192733465796195, "grad_norm": 0.7909797430038452, "learning_rate": 7.281918819188193e-05, "loss": 0.004358413070440293, "step": 95800 }, { "epoch": 27.195571955719558, "grad_norm": 1.9347373247146606, "learning_rate": 7.281634970195857e-05, "loss": 0.01045796126127243, "step": 95810 }, { "epoch": 27.198410445642917, "grad_norm": 1.1858774423599243, "learning_rate": 7.28135112120352e-05, "loss": 0.008381951600313187, "step": 95820 }, { "epoch": 27.20124893556628, "grad_norm": 7.8907270431518555, "learning_rate": 7.281067272211184e-05, "loss": 0.009133132547140122, "step": 95830 }, { "epoch": 27.20408742548964, "grad_norm": 5.41282844543457, "learning_rate": 7.280783423218848e-05, "loss": 0.016681912541389465, "step": 95840 }, { "epoch": 27.206925915413, "grad_norm": 1.2343826293945312, "learning_rate": 7.280499574226512e-05, "loss": 0.005586374178528785, "step": 95850 }, { "epoch": 27.20976440533636, "grad_norm": 0.14001768827438354, "learning_rate": 7.280215725234176e-05, "loss": 0.004759392887353897, "step": 95860 }, { "epoch": 27.21260289525972, "grad_norm": 0.3972637951374054, "learning_rate": 7.27993187624184e-05, "loss": 0.0048266701400279995, "step": 95870 }, { "epoch": 27.215441385183084, "grad_norm": 1.9308353662490845, "learning_rate": 7.279648027249504e-05, "loss": 0.003910179436206818, "step": 95880 }, { "epoch": 27.218279875106443, "grad_norm": 2.325427770614624, "learning_rate": 7.279364178257167e-05, "loss": 0.01410355269908905, "step": 95890 }, { "epoch": 27.221118365029803, "grad_norm": 0.1319456845521927, "learning_rate": 7.279080329264831e-05, "loss": 0.00962822139263153, "step": 95900 }, { "epoch": 27.223956854953165, "grad_norm": 0.4218675196170807, "learning_rate": 7.278796480272496e-05, "loss": 0.02063506543636322, "step": 95910 }, { "epoch": 27.226795344876525, "grad_norm": 3.877767562866211, "learning_rate": 7.278512631280158e-05, "loss": 0.003137446567416191, "step": 95920 }, { "epoch": 27.229633834799888, "grad_norm": 1.1283106803894043, "learning_rate": 7.278228782287824e-05, "loss": 0.0037359926849603654, "step": 95930 }, { "epoch": 27.232472324723247, "grad_norm": 0.5127775073051453, "learning_rate": 7.277944933295488e-05, "loss": 0.006305716931819916, "step": 95940 }, { "epoch": 27.235310814646606, "grad_norm": 0.24632003903388977, "learning_rate": 7.277661084303151e-05, "loss": 0.019400307536125184, "step": 95950 }, { "epoch": 27.23814930456997, "grad_norm": 1.3541440963745117, "learning_rate": 7.277377235310815e-05, "loss": 0.0021309757605195044, "step": 95960 }, { "epoch": 27.24098779449333, "grad_norm": 1.2853630781173706, "learning_rate": 7.277093386318479e-05, "loss": 0.006213001161813736, "step": 95970 }, { "epoch": 27.24382628441669, "grad_norm": 0.6624603271484375, "learning_rate": 7.276809537326143e-05, "loss": 0.005244841054081917, "step": 95980 }, { "epoch": 27.24666477434005, "grad_norm": 1.2028666734695435, "learning_rate": 7.276525688333806e-05, "loss": 0.0059256177395582196, "step": 95990 }, { "epoch": 27.249503264263414, "grad_norm": 1.048482060432434, "learning_rate": 7.276241839341471e-05, "loss": 0.02292678952217102, "step": 96000 }, { "epoch": 27.249503264263414, "eval_accuracy": 0.9691613149360971, "eval_loss": 0.1033172532916069, "eval_runtime": 31.4942, "eval_samples_per_second": 499.362, "eval_steps_per_second": 7.811, "step": 96000 }, { "epoch": 27.252341754186773, "grad_norm": 13.947169303894043, "learning_rate": 7.275957990349134e-05, "loss": 0.012080571055412293, "step": 96010 }, { "epoch": 27.255180244110132, "grad_norm": 4.547450065612793, "learning_rate": 7.275674141356798e-05, "loss": 0.006088655069470405, "step": 96020 }, { "epoch": 27.258018734033495, "grad_norm": 0.33254486322402954, "learning_rate": 7.275390292364462e-05, "loss": 0.01574893295764923, "step": 96030 }, { "epoch": 27.260857223956855, "grad_norm": 0.09293801337480545, "learning_rate": 7.275106443372127e-05, "loss": 0.009039023518562317, "step": 96040 }, { "epoch": 27.263695713880217, "grad_norm": 1.611240029335022, "learning_rate": 7.27482259437979e-05, "loss": 0.010182559490203857, "step": 96050 }, { "epoch": 27.266534203803577, "grad_norm": 0.7767118811607361, "learning_rate": 7.274538745387455e-05, "loss": 0.0025655966252088546, "step": 96060 }, { "epoch": 27.269372693726936, "grad_norm": 7.622793197631836, "learning_rate": 7.274254896395119e-05, "loss": 0.005044136568903923, "step": 96070 }, { "epoch": 27.2722111836503, "grad_norm": 0.8604416847229004, "learning_rate": 7.273971047402782e-05, "loss": 0.0035683788359165193, "step": 96080 }, { "epoch": 27.27504967357366, "grad_norm": 0.7299647927284241, "learning_rate": 7.273687198410446e-05, "loss": 0.01007835790514946, "step": 96090 }, { "epoch": 27.27788816349702, "grad_norm": 0.3364797830581665, "learning_rate": 7.27340334941811e-05, "loss": 0.0036778807640075683, "step": 96100 }, { "epoch": 27.28072665342038, "grad_norm": 0.30109700560569763, "learning_rate": 7.273119500425773e-05, "loss": 0.007915738970041275, "step": 96110 }, { "epoch": 27.28356514334374, "grad_norm": 6.812478065490723, "learning_rate": 7.272835651433437e-05, "loss": 0.007474875450134278, "step": 96120 }, { "epoch": 27.286403633267103, "grad_norm": 1.2262887954711914, "learning_rate": 7.272551802441102e-05, "loss": 0.0050614744424819945, "step": 96130 }, { "epoch": 27.289242123190462, "grad_norm": 9.31180191040039, "learning_rate": 7.272267953448765e-05, "loss": 0.0058542326092720035, "step": 96140 }, { "epoch": 27.292080613113825, "grad_norm": 0.45088517665863037, "learning_rate": 7.27198410445643e-05, "loss": 0.013272187113761902, "step": 96150 }, { "epoch": 27.294919103037184, "grad_norm": 0.4800405502319336, "learning_rate": 7.271700255464094e-05, "loss": 0.004820101708173752, "step": 96160 }, { "epoch": 27.297757592960544, "grad_norm": 0.3897879123687744, "learning_rate": 7.271416406471758e-05, "loss": 0.005838307738304138, "step": 96170 }, { "epoch": 27.300596082883906, "grad_norm": 3.394780158996582, "learning_rate": 7.27113255747942e-05, "loss": 0.003043442964553833, "step": 96180 }, { "epoch": 27.303434572807266, "grad_norm": 2.188339948654175, "learning_rate": 7.270848708487086e-05, "loss": 0.004591661319136619, "step": 96190 }, { "epoch": 27.30627306273063, "grad_norm": 3.340996026992798, "learning_rate": 7.27056485949475e-05, "loss": 0.0065595611929893495, "step": 96200 }, { "epoch": 27.309111552653988, "grad_norm": 1.8544424772262573, "learning_rate": 7.270281010502413e-05, "loss": 0.022549913823604585, "step": 96210 }, { "epoch": 27.311950042577347, "grad_norm": 1.800384521484375, "learning_rate": 7.269997161510077e-05, "loss": 0.00701739564538002, "step": 96220 }, { "epoch": 27.31478853250071, "grad_norm": 0.809044361114502, "learning_rate": 7.269713312517741e-05, "loss": 0.005051366239786148, "step": 96230 }, { "epoch": 27.31762702242407, "grad_norm": 0.43807655572891235, "learning_rate": 7.269429463525404e-05, "loss": 0.014744056761264801, "step": 96240 }, { "epoch": 27.320465512347432, "grad_norm": 0.2330809086561203, "learning_rate": 7.269145614533068e-05, "loss": 0.008742526173591614, "step": 96250 }, { "epoch": 27.323304002270792, "grad_norm": 0.2933183014392853, "learning_rate": 7.268861765540734e-05, "loss": 0.0083245649933815, "step": 96260 }, { "epoch": 27.32614249219415, "grad_norm": 2.33992862701416, "learning_rate": 7.268577916548396e-05, "loss": 0.009417203068733216, "step": 96270 }, { "epoch": 27.328980982117514, "grad_norm": 1.5231910943984985, "learning_rate": 7.26829406755606e-05, "loss": 0.009498282521963119, "step": 96280 }, { "epoch": 27.331819472040873, "grad_norm": 0.9619318246841431, "learning_rate": 7.268010218563725e-05, "loss": 0.012492598593235016, "step": 96290 }, { "epoch": 27.334657961964236, "grad_norm": 3.6967458724975586, "learning_rate": 7.267726369571389e-05, "loss": 0.007103332877159118, "step": 96300 }, { "epoch": 27.337496451887596, "grad_norm": 18.862524032592773, "learning_rate": 7.267442520579052e-05, "loss": 0.006693925708532333, "step": 96310 }, { "epoch": 27.340334941810955, "grad_norm": 0.29030734300613403, "learning_rate": 7.267158671586716e-05, "loss": 0.007195537537336349, "step": 96320 }, { "epoch": 27.343173431734318, "grad_norm": 1.9510637521743774, "learning_rate": 7.266874822594381e-05, "loss": 0.012335879355669021, "step": 96330 }, { "epoch": 27.346011921657677, "grad_norm": 0.15836979448795319, "learning_rate": 7.266590973602044e-05, "loss": 0.0038990002125501634, "step": 96340 }, { "epoch": 27.34885041158104, "grad_norm": 0.12012756615877151, "learning_rate": 7.266307124609708e-05, "loss": 0.007017889618873596, "step": 96350 }, { "epoch": 27.3516889015044, "grad_norm": 0.25763124227523804, "learning_rate": 7.266023275617372e-05, "loss": 0.016184821724891663, "step": 96360 }, { "epoch": 27.35452739142776, "grad_norm": 0.27734827995300293, "learning_rate": 7.265739426625035e-05, "loss": 0.0028782114386558533, "step": 96370 }, { "epoch": 27.35736588135112, "grad_norm": 5.8750200271606445, "learning_rate": 7.265455577632699e-05, "loss": 0.011175237596035004, "step": 96380 }, { "epoch": 27.36020437127448, "grad_norm": 0.6503676772117615, "learning_rate": 7.265171728640365e-05, "loss": 0.0018696736544370652, "step": 96390 }, { "epoch": 27.363042861197844, "grad_norm": 1.7437611818313599, "learning_rate": 7.264887879648027e-05, "loss": 0.009328977018594743, "step": 96400 }, { "epoch": 27.365881351121203, "grad_norm": 3.5311877727508545, "learning_rate": 7.264604030655692e-05, "loss": 0.013546699285507202, "step": 96410 }, { "epoch": 27.368719841044566, "grad_norm": 1.6429803371429443, "learning_rate": 7.264320181663356e-05, "loss": 0.00697207897901535, "step": 96420 }, { "epoch": 27.371558330967925, "grad_norm": 4.738337993621826, "learning_rate": 7.26403633267102e-05, "loss": 0.005013350024819374, "step": 96430 }, { "epoch": 27.374396820891285, "grad_norm": 0.4073253870010376, "learning_rate": 7.263752483678683e-05, "loss": 0.006968820840120316, "step": 96440 }, { "epoch": 27.377235310814648, "grad_norm": 2.3746659755706787, "learning_rate": 7.263468634686347e-05, "loss": 0.0054799336940050125, "step": 96450 }, { "epoch": 27.380073800738007, "grad_norm": 1.6901803016662598, "learning_rate": 7.263184785694012e-05, "loss": 0.009037511050701141, "step": 96460 }, { "epoch": 27.38291229066137, "grad_norm": 2.0042309761047363, "learning_rate": 7.262900936701675e-05, "loss": 0.022585903108119965, "step": 96470 }, { "epoch": 27.38575078058473, "grad_norm": 7.392411231994629, "learning_rate": 7.262617087709339e-05, "loss": 0.01852399706840515, "step": 96480 }, { "epoch": 27.38858927050809, "grad_norm": 1.6687690019607544, "learning_rate": 7.262361623616236e-05, "loss": 0.03147360980510712, "step": 96490 }, { "epoch": 27.39142776043145, "grad_norm": 8.044534683227539, "learning_rate": 7.2620777746239e-05, "loss": 0.013714486360549926, "step": 96500 }, { "epoch": 27.39142776043145, "eval_accuracy": 0.9504037642271254, "eval_loss": 0.17266052961349487, "eval_runtime": 31.8253, "eval_samples_per_second": 494.166, "eval_steps_per_second": 7.73, "step": 96500 }, { "epoch": 27.39426625035481, "grad_norm": 2.395951509475708, "learning_rate": 7.261793925631565e-05, "loss": 0.012301383912563324, "step": 96510 }, { "epoch": 27.397104740278174, "grad_norm": 12.739520072937012, "learning_rate": 7.261510076639228e-05, "loss": 0.007749860733747482, "step": 96520 }, { "epoch": 27.399943230201533, "grad_norm": 7.558526039123535, "learning_rate": 7.261226227646892e-05, "loss": 0.00884302332997322, "step": 96530 }, { "epoch": 27.402781720124892, "grad_norm": 3.3410837650299072, "learning_rate": 7.260942378654556e-05, "loss": 0.0058293651789426805, "step": 96540 }, { "epoch": 27.405620210048255, "grad_norm": 1.2156295776367188, "learning_rate": 7.260658529662219e-05, "loss": 0.010729898512363435, "step": 96550 }, { "epoch": 27.408458699971614, "grad_norm": 2.8206207752227783, "learning_rate": 7.260374680669883e-05, "loss": 0.003312871605157852, "step": 96560 }, { "epoch": 27.411297189894977, "grad_norm": 0.6392220258712769, "learning_rate": 7.260090831677549e-05, "loss": 0.004106239229440689, "step": 96570 }, { "epoch": 27.414135679818337, "grad_norm": 0.2573968768119812, "learning_rate": 7.259806982685212e-05, "loss": 0.002267581783235073, "step": 96580 }, { "epoch": 27.416974169741696, "grad_norm": 1.4451062679290771, "learning_rate": 7.259523133692876e-05, "loss": 0.011018510162830352, "step": 96590 }, { "epoch": 27.41981265966506, "grad_norm": 1.4824426174163818, "learning_rate": 7.25923928470054e-05, "loss": 0.013237187266349792, "step": 96600 }, { "epoch": 27.422651149588418, "grad_norm": 4.649725437164307, "learning_rate": 7.258955435708204e-05, "loss": 0.007501885294914246, "step": 96610 }, { "epoch": 27.42548963951178, "grad_norm": 0.8873780369758606, "learning_rate": 7.258671586715867e-05, "loss": 0.003422384709119797, "step": 96620 }, { "epoch": 27.42832812943514, "grad_norm": 1.164281964302063, "learning_rate": 7.258387737723531e-05, "loss": 0.011814720183610915, "step": 96630 }, { "epoch": 27.4311666193585, "grad_norm": 0.29692983627319336, "learning_rate": 7.258103888731197e-05, "loss": 0.012984825670719147, "step": 96640 }, { "epoch": 27.434005109281863, "grad_norm": 0.19270507991313934, "learning_rate": 7.257820039738859e-05, "loss": 0.0064699709415435794, "step": 96650 }, { "epoch": 27.436843599205222, "grad_norm": 1.0714445114135742, "learning_rate": 7.257536190746523e-05, "loss": 0.005831456929445267, "step": 96660 }, { "epoch": 27.439682089128585, "grad_norm": 0.09459691494703293, "learning_rate": 7.257252341754188e-05, "loss": 0.007966187596321107, "step": 96670 }, { "epoch": 27.442520579051944, "grad_norm": 3.170504570007324, "learning_rate": 7.25696849276185e-05, "loss": 0.009935998916625976, "step": 96680 }, { "epoch": 27.445359068975304, "grad_norm": 0.16680097579956055, "learning_rate": 7.256684643769514e-05, "loss": 0.007530912756919861, "step": 96690 }, { "epoch": 27.448197558898666, "grad_norm": 0.6157312393188477, "learning_rate": 7.256400794777179e-05, "loss": 0.004374706372618675, "step": 96700 }, { "epoch": 27.451036048822026, "grad_norm": 1.2958533763885498, "learning_rate": 7.256116945784843e-05, "loss": 0.0033854272216558455, "step": 96710 }, { "epoch": 27.45387453874539, "grad_norm": 3.427262306213379, "learning_rate": 7.255833096792507e-05, "loss": 0.008452561497688294, "step": 96720 }, { "epoch": 27.456713028668748, "grad_norm": 3.8613510131835938, "learning_rate": 7.255549247800171e-05, "loss": 0.02232562154531479, "step": 96730 }, { "epoch": 27.459551518592107, "grad_norm": 0.5082535147666931, "learning_rate": 7.255265398807835e-05, "loss": 0.012889789044857025, "step": 96740 }, { "epoch": 27.46239000851547, "grad_norm": 3.989868640899658, "learning_rate": 7.254981549815498e-05, "loss": 0.011557345092296601, "step": 96750 }, { "epoch": 27.46522849843883, "grad_norm": 4.326202392578125, "learning_rate": 7.254697700823162e-05, "loss": 0.011441760510206223, "step": 96760 }, { "epoch": 27.468066988362192, "grad_norm": 7.229315280914307, "learning_rate": 7.254413851830826e-05, "loss": 0.013257811963558196, "step": 96770 }, { "epoch": 27.47090547828555, "grad_norm": 5.238214492797852, "learning_rate": 7.25413000283849e-05, "loss": 0.006614356487989426, "step": 96780 }, { "epoch": 27.473743968208915, "grad_norm": 1.003938913345337, "learning_rate": 7.253846153846155e-05, "loss": 0.005573833733797074, "step": 96790 }, { "epoch": 27.476582458132274, "grad_norm": 1.3977997303009033, "learning_rate": 7.253562304853819e-05, "loss": 0.012365708500146866, "step": 96800 }, { "epoch": 27.479420948055633, "grad_norm": 0.26060131192207336, "learning_rate": 7.253278455861481e-05, "loss": 0.01858411878347397, "step": 96810 }, { "epoch": 27.482259437978996, "grad_norm": 0.3627377152442932, "learning_rate": 7.252994606869146e-05, "loss": 0.00517183244228363, "step": 96820 }, { "epoch": 27.485097927902356, "grad_norm": 1.1966164112091064, "learning_rate": 7.25271075787681e-05, "loss": 0.013920573890209198, "step": 96830 }, { "epoch": 27.48793641782572, "grad_norm": 0.3464977741241455, "learning_rate": 7.252426908884474e-05, "loss": 0.013054509460926057, "step": 96840 }, { "epoch": 27.490774907749078, "grad_norm": 0.2093365341424942, "learning_rate": 7.252143059892138e-05, "loss": 0.012960982322692872, "step": 96850 }, { "epoch": 27.493613397672437, "grad_norm": 0.6205979585647583, "learning_rate": 7.251859210899802e-05, "loss": 0.004815002530813217, "step": 96860 }, { "epoch": 27.4964518875958, "grad_norm": 0.08560879528522491, "learning_rate": 7.251575361907465e-05, "loss": 0.00298139713704586, "step": 96870 }, { "epoch": 27.49929037751916, "grad_norm": 21.027992248535156, "learning_rate": 7.251291512915129e-05, "loss": 0.01402672678232193, "step": 96880 }, { "epoch": 27.502128867442522, "grad_norm": 6.23898458480835, "learning_rate": 7.251007663922793e-05, "loss": 0.006502272188663482, "step": 96890 }, { "epoch": 27.50496735736588, "grad_norm": 0.41462770104408264, "learning_rate": 7.250723814930457e-05, "loss": 0.005933268740773201, "step": 96900 }, { "epoch": 27.50780584728924, "grad_norm": 9.051358222961426, "learning_rate": 7.250439965938121e-05, "loss": 0.0063718177378177645, "step": 96910 }, { "epoch": 27.510644337212604, "grad_norm": 2.805727481842041, "learning_rate": 7.250156116945786e-05, "loss": 0.023745051026344298, "step": 96920 }, { "epoch": 27.513482827135963, "grad_norm": 0.05529703572392464, "learning_rate": 7.24987226795345e-05, "loss": 0.006505998969078064, "step": 96930 }, { "epoch": 27.516321317059326, "grad_norm": 1.3401886224746704, "learning_rate": 7.249588418961112e-05, "loss": 0.002583126723766327, "step": 96940 }, { "epoch": 27.519159806982685, "grad_norm": 18.591110229492188, "learning_rate": 7.249304569968777e-05, "loss": 0.007697752863168717, "step": 96950 }, { "epoch": 27.521998296906045, "grad_norm": 11.396615028381348, "learning_rate": 7.249020720976441e-05, "loss": 0.0189493864774704, "step": 96960 }, { "epoch": 27.524836786829407, "grad_norm": 0.5214499831199646, "learning_rate": 7.248736871984105e-05, "loss": 0.011895643919706345, "step": 96970 }, { "epoch": 27.527675276752767, "grad_norm": 6.899120807647705, "learning_rate": 7.248453022991769e-05, "loss": 0.013189561665058136, "step": 96980 }, { "epoch": 27.53051376667613, "grad_norm": 0.1787477284669876, "learning_rate": 7.248169173999433e-05, "loss": 0.008279111981391907, "step": 96990 }, { "epoch": 27.53335225659949, "grad_norm": 4.751768112182617, "learning_rate": 7.247885325007096e-05, "loss": 0.0037852950394153596, "step": 97000 }, { "epoch": 27.53335225659949, "eval_accuracy": 0.9696064093596999, "eval_loss": 0.10103029012680054, "eval_runtime": 31.6708, "eval_samples_per_second": 496.577, "eval_steps_per_second": 7.767, "step": 97000 }, { "epoch": 27.53619074652285, "grad_norm": 18.309608459472656, "learning_rate": 7.24760147601476e-05, "loss": 0.005369041860103607, "step": 97010 }, { "epoch": 27.53902923644621, "grad_norm": 0.3141149580478668, "learning_rate": 7.247317627022424e-05, "loss": 0.008644135296344757, "step": 97020 }, { "epoch": 27.54186772636957, "grad_norm": 1.2588982582092285, "learning_rate": 7.247033778030088e-05, "loss": 0.009364360570907592, "step": 97030 }, { "epoch": 27.544706216292933, "grad_norm": 2.368772268295288, "learning_rate": 7.246749929037753e-05, "loss": 0.008384592831134796, "step": 97040 }, { "epoch": 27.547544706216293, "grad_norm": 0.9589358568191528, "learning_rate": 7.246466080045417e-05, "loss": 0.015506300330162048, "step": 97050 }, { "epoch": 27.550383196139652, "grad_norm": 4.463979244232178, "learning_rate": 7.246182231053081e-05, "loss": 0.009839032590389252, "step": 97060 }, { "epoch": 27.553221686063015, "grad_norm": 1.2548481225967407, "learning_rate": 7.245898382060744e-05, "loss": 0.008721734583377837, "step": 97070 }, { "epoch": 27.556060175986374, "grad_norm": 0.9108835458755493, "learning_rate": 7.245614533068408e-05, "loss": 0.007576054334640503, "step": 97080 }, { "epoch": 27.558898665909737, "grad_norm": 0.2655707597732544, "learning_rate": 7.245330684076072e-05, "loss": 0.017939600348472595, "step": 97090 }, { "epoch": 27.561737155833097, "grad_norm": 3.9697265625, "learning_rate": 7.245046835083735e-05, "loss": 0.004625979065895081, "step": 97100 }, { "epoch": 27.564575645756456, "grad_norm": 15.890666961669922, "learning_rate": 7.2447629860914e-05, "loss": 0.0073037870228290554, "step": 97110 }, { "epoch": 27.56741413567982, "grad_norm": 0.27114689350128174, "learning_rate": 7.244479137099064e-05, "loss": 0.004322763532400131, "step": 97120 }, { "epoch": 27.570252625603178, "grad_norm": 0.1193581074476242, "learning_rate": 7.244195288106727e-05, "loss": 0.016020789742469788, "step": 97130 }, { "epoch": 27.57309111552654, "grad_norm": 3.3606345653533936, "learning_rate": 7.243911439114391e-05, "loss": 0.005299486964941025, "step": 97140 }, { "epoch": 27.5759296054499, "grad_norm": 0.4245653450489044, "learning_rate": 7.243627590122055e-05, "loss": 0.01874551922082901, "step": 97150 }, { "epoch": 27.578768095373263, "grad_norm": 0.6381334662437439, "learning_rate": 7.24334374112972e-05, "loss": 0.0030078617855906487, "step": 97160 }, { "epoch": 27.581606585296623, "grad_norm": 3.3288047313690186, "learning_rate": 7.243059892137384e-05, "loss": 0.002357511967420578, "step": 97170 }, { "epoch": 27.584445075219982, "grad_norm": 0.6359197497367859, "learning_rate": 7.242776043145048e-05, "loss": 0.006189919263124466, "step": 97180 }, { "epoch": 27.587283565143345, "grad_norm": 3.2643861770629883, "learning_rate": 7.242492194152712e-05, "loss": 0.0044521156698465346, "step": 97190 }, { "epoch": 27.590122055066704, "grad_norm": 3.789705276489258, "learning_rate": 7.242208345160375e-05, "loss": 0.006668069958686828, "step": 97200 }, { "epoch": 27.592960544990063, "grad_norm": 0.311458945274353, "learning_rate": 7.241924496168039e-05, "loss": 0.010240865498781204, "step": 97210 }, { "epoch": 27.595799034913426, "grad_norm": 6.262993812561035, "learning_rate": 7.241640647175703e-05, "loss": 0.01112165004014969, "step": 97220 }, { "epoch": 27.598637524836786, "grad_norm": 1.1777467727661133, "learning_rate": 7.241356798183366e-05, "loss": 0.008434039354324342, "step": 97230 }, { "epoch": 27.60147601476015, "grad_norm": 8.886853218078613, "learning_rate": 7.241072949191031e-05, "loss": 0.009245367348194122, "step": 97240 }, { "epoch": 27.604314504683508, "grad_norm": 7.137858867645264, "learning_rate": 7.240789100198695e-05, "loss": 0.009209196269512176, "step": 97250 }, { "epoch": 27.60715299460687, "grad_norm": 1.8496016263961792, "learning_rate": 7.240505251206358e-05, "loss": 0.008311323821544647, "step": 97260 }, { "epoch": 27.60999148453023, "grad_norm": 2.5030462741851807, "learning_rate": 7.240221402214022e-05, "loss": 0.00514330193400383, "step": 97270 }, { "epoch": 27.61282997445359, "grad_norm": 2.34675931930542, "learning_rate": 7.239937553221686e-05, "loss": 0.007117930054664612, "step": 97280 }, { "epoch": 27.615668464376952, "grad_norm": 2.6385257244110107, "learning_rate": 7.23965370422935e-05, "loss": 0.014711697399616242, "step": 97290 }, { "epoch": 27.61850695430031, "grad_norm": 12.356476783752441, "learning_rate": 7.239369855237013e-05, "loss": 0.01791999936103821, "step": 97300 }, { "epoch": 27.621345444223675, "grad_norm": 12.133975982666016, "learning_rate": 7.239086006244679e-05, "loss": 0.015779873728752135, "step": 97310 }, { "epoch": 27.624183934147034, "grad_norm": 8.408624649047852, "learning_rate": 7.238802157252343e-05, "loss": 0.016854201257228852, "step": 97320 }, { "epoch": 27.627022424070393, "grad_norm": 1.9350236654281616, "learning_rate": 7.238518308260006e-05, "loss": 0.02021499127149582, "step": 97330 }, { "epoch": 27.629860913993756, "grad_norm": 0.8486632704734802, "learning_rate": 7.23823445926767e-05, "loss": 0.009477576613426209, "step": 97340 }, { "epoch": 27.632699403917115, "grad_norm": 9.853727340698242, "learning_rate": 7.237950610275334e-05, "loss": 0.007922462373971938, "step": 97350 }, { "epoch": 27.63553789384048, "grad_norm": 13.207910537719727, "learning_rate": 7.237666761282997e-05, "loss": 0.012919804453849793, "step": 97360 }, { "epoch": 27.638376383763838, "grad_norm": 13.077064514160156, "learning_rate": 7.237382912290662e-05, "loss": 0.025103533267974855, "step": 97370 }, { "epoch": 27.641214873687197, "grad_norm": 2.627812385559082, "learning_rate": 7.237099063298326e-05, "loss": 0.005594475194811821, "step": 97380 }, { "epoch": 27.64405336361056, "grad_norm": 0.34933528304100037, "learning_rate": 7.236815214305989e-05, "loss": 0.009236873686313629, "step": 97390 }, { "epoch": 27.64689185353392, "grad_norm": 0.6580194234848022, "learning_rate": 7.236531365313653e-05, "loss": 0.004521681368350983, "step": 97400 }, { "epoch": 27.649730343457282, "grad_norm": 11.513921737670898, "learning_rate": 7.236247516321317e-05, "loss": 0.007708260416984558, "step": 97410 }, { "epoch": 27.65256883338064, "grad_norm": 5.798037528991699, "learning_rate": 7.235963667328982e-05, "loss": 0.020144805312156677, "step": 97420 }, { "epoch": 27.655407323304, "grad_norm": 5.320600986480713, "learning_rate": 7.235679818336644e-05, "loss": 0.006030211597681046, "step": 97430 }, { "epoch": 27.658245813227364, "grad_norm": 4.088475704193115, "learning_rate": 7.23539596934431e-05, "loss": 0.014188669621944427, "step": 97440 }, { "epoch": 27.661084303150723, "grad_norm": 0.15569989383220673, "learning_rate": 7.235112120351974e-05, "loss": 0.017789986729621888, "step": 97450 }, { "epoch": 27.663922793074086, "grad_norm": 3.991321086883545, "learning_rate": 7.234828271359637e-05, "loss": 0.007727222144603729, "step": 97460 }, { "epoch": 27.666761282997445, "grad_norm": 0.4398612678050995, "learning_rate": 7.234544422367301e-05, "loss": 0.028758913278579712, "step": 97470 }, { "epoch": 27.669599772920805, "grad_norm": 0.6409338116645813, "learning_rate": 7.234260573374965e-05, "loss": 0.00842859074473381, "step": 97480 }, { "epoch": 27.672438262844167, "grad_norm": 3.174285650253296, "learning_rate": 7.233976724382628e-05, "loss": 0.012332449853420257, "step": 97490 }, { "epoch": 27.675276752767527, "grad_norm": 10.691227912902832, "learning_rate": 7.233692875390292e-05, "loss": 0.02360384613275528, "step": 97500 }, { "epoch": 27.675276752767527, "eval_accuracy": 0.9672537674063713, "eval_loss": 0.10689795017242432, "eval_runtime": 31.9797, "eval_samples_per_second": 491.78, "eval_steps_per_second": 7.692, "step": 97500 }, { "epoch": 27.67811524269089, "grad_norm": 2.829873561859131, "learning_rate": 7.233409026397957e-05, "loss": 0.030706974864006042, "step": 97510 }, { "epoch": 27.68095373261425, "grad_norm": 0.17624731361865997, "learning_rate": 7.23312517740562e-05, "loss": 0.003634941950440407, "step": 97520 }, { "epoch": 27.68379222253761, "grad_norm": 9.34074592590332, "learning_rate": 7.232841328413284e-05, "loss": 0.021944473683834075, "step": 97530 }, { "epoch": 27.68663071246097, "grad_norm": 0.44940418004989624, "learning_rate": 7.232557479420949e-05, "loss": 0.013528761267662049, "step": 97540 }, { "epoch": 27.68946920238433, "grad_norm": 0.11034413427114487, "learning_rate": 7.232273630428613e-05, "loss": 0.009645035117864608, "step": 97550 }, { "epoch": 27.692307692307693, "grad_norm": 4.107700347900391, "learning_rate": 7.231989781436275e-05, "loss": 0.016584350168704985, "step": 97560 }, { "epoch": 27.695146182231053, "grad_norm": 3.4550509452819824, "learning_rate": 7.231705932443941e-05, "loss": 0.00828484669327736, "step": 97570 }, { "epoch": 27.697984672154412, "grad_norm": 13.39214038848877, "learning_rate": 7.231422083451605e-05, "loss": 0.009620623290538787, "step": 97580 }, { "epoch": 27.700823162077775, "grad_norm": 2.5609512329101562, "learning_rate": 7.231138234459268e-05, "loss": 0.005959572270512581, "step": 97590 }, { "epoch": 27.703661652001134, "grad_norm": 4.887593746185303, "learning_rate": 7.230854385466932e-05, "loss": 0.013908010721206666, "step": 97600 }, { "epoch": 27.706500141924497, "grad_norm": 1.9685144424438477, "learning_rate": 7.230570536474596e-05, "loss": 0.005428294837474823, "step": 97610 }, { "epoch": 27.709338631847857, "grad_norm": 6.592779636383057, "learning_rate": 7.230286687482259e-05, "loss": 0.006298057734966278, "step": 97620 }, { "epoch": 27.71217712177122, "grad_norm": 2.8147764205932617, "learning_rate": 7.230002838489923e-05, "loss": 0.007245076447725296, "step": 97630 }, { "epoch": 27.71501561169458, "grad_norm": 12.905450820922852, "learning_rate": 7.229718989497589e-05, "loss": 0.019087037444114684, "step": 97640 }, { "epoch": 27.717854101617938, "grad_norm": 3.3887698650360107, "learning_rate": 7.229435140505251e-05, "loss": 0.01091318279504776, "step": 97650 }, { "epoch": 27.7206925915413, "grad_norm": 8.123041152954102, "learning_rate": 7.229151291512915e-05, "loss": 0.006569728255271912, "step": 97660 }, { "epoch": 27.72353108146466, "grad_norm": 0.401538610458374, "learning_rate": 7.22886744252058e-05, "loss": 0.005447173863649369, "step": 97670 }, { "epoch": 27.726369571388023, "grad_norm": 1.5301367044448853, "learning_rate": 7.228583593528244e-05, "loss": 0.004692605882883072, "step": 97680 }, { "epoch": 27.729208061311382, "grad_norm": 1.930240273475647, "learning_rate": 7.228299744535907e-05, "loss": 0.013348780572414398, "step": 97690 }, { "epoch": 27.732046551234742, "grad_norm": 3.3197882175445557, "learning_rate": 7.228015895543571e-05, "loss": 0.011002518236637115, "step": 97700 }, { "epoch": 27.734885041158105, "grad_norm": 1.3957546949386597, "learning_rate": 7.227732046551235e-05, "loss": 0.008149507641792297, "step": 97710 }, { "epoch": 27.737723531081464, "grad_norm": 1.0319628715515137, "learning_rate": 7.227448197558899e-05, "loss": 0.0028079690411686896, "step": 97720 }, { "epoch": 27.740562021004827, "grad_norm": 0.13723233342170715, "learning_rate": 7.227164348566563e-05, "loss": 0.004231739044189453, "step": 97730 }, { "epoch": 27.743400510928186, "grad_norm": 1.7648617029190063, "learning_rate": 7.226880499574227e-05, "loss": 0.0059420645236969, "step": 97740 }, { "epoch": 27.746239000851546, "grad_norm": 0.7952282428741455, "learning_rate": 7.22659665058189e-05, "loss": 0.00470990426838398, "step": 97750 }, { "epoch": 27.74907749077491, "grad_norm": 0.6019049286842346, "learning_rate": 7.226312801589554e-05, "loss": 0.005932306498289108, "step": 97760 }, { "epoch": 27.751915980698268, "grad_norm": 8.901731491088867, "learning_rate": 7.22602895259722e-05, "loss": 0.007958200573921204, "step": 97770 }, { "epoch": 27.75475447062163, "grad_norm": 1.9121177196502686, "learning_rate": 7.225745103604882e-05, "loss": 0.006304142624139785, "step": 97780 }, { "epoch": 27.75759296054499, "grad_norm": 1.8885475397109985, "learning_rate": 7.225461254612547e-05, "loss": 0.00800783783197403, "step": 97790 }, { "epoch": 27.76043145046835, "grad_norm": 0.6312928795814514, "learning_rate": 7.225177405620211e-05, "loss": 0.013156653940677642, "step": 97800 }, { "epoch": 27.763269940391712, "grad_norm": 0.661371648311615, "learning_rate": 7.224893556627873e-05, "loss": 0.013505423069000244, "step": 97810 }, { "epoch": 27.76610843031507, "grad_norm": 2.2893810272216797, "learning_rate": 7.224609707635538e-05, "loss": 0.005285640805959701, "step": 97820 }, { "epoch": 27.768946920238434, "grad_norm": 0.1757335662841797, "learning_rate": 7.224325858643202e-05, "loss": 0.0034962423145771027, "step": 97830 }, { "epoch": 27.771785410161794, "grad_norm": 11.619396209716797, "learning_rate": 7.224042009650866e-05, "loss": 0.009888204932212829, "step": 97840 }, { "epoch": 27.774623900085153, "grad_norm": 11.598549842834473, "learning_rate": 7.22375816065853e-05, "loss": 0.007115699350833893, "step": 97850 }, { "epoch": 27.777462390008516, "grad_norm": 3.7941131591796875, "learning_rate": 7.223474311666194e-05, "loss": 0.005165163800120354, "step": 97860 }, { "epoch": 27.780300879931875, "grad_norm": 9.735224723815918, "learning_rate": 7.223190462673858e-05, "loss": 0.009645286202430724, "step": 97870 }, { "epoch": 27.78313936985524, "grad_norm": 4.729522228240967, "learning_rate": 7.222906613681521e-05, "loss": 0.01726793944835663, "step": 97880 }, { "epoch": 27.785977859778598, "grad_norm": 1.9284528493881226, "learning_rate": 7.222622764689185e-05, "loss": 0.014305844902992249, "step": 97890 }, { "epoch": 27.788816349701957, "grad_norm": 0.05202716961503029, "learning_rate": 7.222338915696851e-05, "loss": 0.010578495264053345, "step": 97900 }, { "epoch": 27.79165483962532, "grad_norm": 0.9352652430534363, "learning_rate": 7.222055066704514e-05, "loss": 0.010015155375003814, "step": 97910 }, { "epoch": 27.79449332954868, "grad_norm": 5.734329700469971, "learning_rate": 7.221771217712178e-05, "loss": 0.008656066656112672, "step": 97920 }, { "epoch": 27.797331819472042, "grad_norm": 1.7903918027877808, "learning_rate": 7.221487368719842e-05, "loss": 0.012787604331970214, "step": 97930 }, { "epoch": 27.8001703093954, "grad_norm": 3.4531805515289307, "learning_rate": 7.221203519727505e-05, "loss": 0.028377822041511534, "step": 97940 }, { "epoch": 27.80300879931876, "grad_norm": 0.14791043102741241, "learning_rate": 7.220919670735169e-05, "loss": 0.008879422396421432, "step": 97950 }, { "epoch": 27.805847289242124, "grad_norm": 0.09765651077032089, "learning_rate": 7.220635821742833e-05, "loss": 0.02921227514743805, "step": 97960 }, { "epoch": 27.808685779165483, "grad_norm": 10.292367935180664, "learning_rate": 7.220351972750497e-05, "loss": 0.013348039984703065, "step": 97970 }, { "epoch": 27.811524269088846, "grad_norm": 2.124948024749756, "learning_rate": 7.220068123758161e-05, "loss": 0.009312741458415985, "step": 97980 }, { "epoch": 27.814362759012205, "grad_norm": 3.3016645908355713, "learning_rate": 7.219784274765825e-05, "loss": 0.008218172937631607, "step": 97990 }, { "epoch": 27.817201248935568, "grad_norm": 0.9870637059211731, "learning_rate": 7.21950042577349e-05, "loss": 0.008779406547546387, "step": 98000 }, { "epoch": 27.817201248935568, "eval_accuracy": 0.9703058434539328, "eval_loss": 0.10532072931528091, "eval_runtime": 31.3547, "eval_samples_per_second": 501.583, "eval_steps_per_second": 7.846, "step": 98000 }, { "epoch": 27.820039738858927, "grad_norm": 2.913940191268921, "learning_rate": 7.219216576781152e-05, "loss": 0.010377797484397887, "step": 98010 }, { "epoch": 27.822878228782287, "grad_norm": 1.9477972984313965, "learning_rate": 7.218932727788816e-05, "loss": 0.011614929884672165, "step": 98020 }, { "epoch": 27.82571671870565, "grad_norm": 0.7664371132850647, "learning_rate": 7.21864887879648e-05, "loss": 0.011967381834983826, "step": 98030 }, { "epoch": 27.82855520862901, "grad_norm": 0.37905406951904297, "learning_rate": 7.218365029804145e-05, "loss": 0.014957410097122193, "step": 98040 }, { "epoch": 27.83139369855237, "grad_norm": 0.3093266785144806, "learning_rate": 7.218081180811809e-05, "loss": 0.004982839524745941, "step": 98050 }, { "epoch": 27.83423218847573, "grad_norm": 0.48155394196510315, "learning_rate": 7.217797331819473e-05, "loss": 0.007398340106010437, "step": 98060 }, { "epoch": 27.83707067839909, "grad_norm": 2.741950511932373, "learning_rate": 7.217513482827136e-05, "loss": 0.01253604143857956, "step": 98070 }, { "epoch": 27.839909168322453, "grad_norm": 13.374955177307129, "learning_rate": 7.2172296338348e-05, "loss": 0.01176898181438446, "step": 98080 }, { "epoch": 27.842747658245813, "grad_norm": 2.3230221271514893, "learning_rate": 7.216945784842464e-05, "loss": 0.008635807037353515, "step": 98090 }, { "epoch": 27.845586148169176, "grad_norm": 4.1186676025390625, "learning_rate": 7.216661935850128e-05, "loss": 0.004837186262011528, "step": 98100 }, { "epoch": 27.848424638092535, "grad_norm": 4.813791275024414, "learning_rate": 7.216378086857792e-05, "loss": 0.03244186639785766, "step": 98110 }, { "epoch": 27.851263128015894, "grad_norm": 4.337273120880127, "learning_rate": 7.216094237865456e-05, "loss": 0.00572916567325592, "step": 98120 }, { "epoch": 27.854101617939257, "grad_norm": 1.7730696201324463, "learning_rate": 7.21581038887312e-05, "loss": 0.011466015875339509, "step": 98130 }, { "epoch": 27.856940107862616, "grad_norm": 0.16428756713867188, "learning_rate": 7.215526539880783e-05, "loss": 0.015670016407966614, "step": 98140 }, { "epoch": 27.85977859778598, "grad_norm": 0.12462593615055084, "learning_rate": 7.215242690888447e-05, "loss": 0.013828511536121368, "step": 98150 }, { "epoch": 27.86261708770934, "grad_norm": 0.19113485515117645, "learning_rate": 7.214958841896112e-05, "loss": 0.0016321105882525443, "step": 98160 }, { "epoch": 27.865455577632698, "grad_norm": 8.633868217468262, "learning_rate": 7.214674992903776e-05, "loss": 0.012891767919063568, "step": 98170 }, { "epoch": 27.86829406755606, "grad_norm": 3.787313461303711, "learning_rate": 7.21439114391144e-05, "loss": 0.01167978197336197, "step": 98180 }, { "epoch": 27.87113255747942, "grad_norm": 1.3536145687103271, "learning_rate": 7.214107294919104e-05, "loss": 0.0035085935145616532, "step": 98190 }, { "epoch": 27.873971047402783, "grad_norm": 13.450508117675781, "learning_rate": 7.213823445926767e-05, "loss": 0.008535811305046081, "step": 98200 }, { "epoch": 27.876809537326142, "grad_norm": 1.455994725227356, "learning_rate": 7.213539596934431e-05, "loss": 0.013846945762634278, "step": 98210 }, { "epoch": 27.8796480272495, "grad_norm": 6.51689338684082, "learning_rate": 7.213255747942095e-05, "loss": 0.005934932082891464, "step": 98220 }, { "epoch": 27.882486517172865, "grad_norm": 0.15285904705524445, "learning_rate": 7.212971898949759e-05, "loss": 0.015312400460243226, "step": 98230 }, { "epoch": 27.885325007096224, "grad_norm": 0.06120777502655983, "learning_rate": 7.212688049957423e-05, "loss": 0.01133604347705841, "step": 98240 }, { "epoch": 27.888163497019587, "grad_norm": 0.14342893660068512, "learning_rate": 7.212404200965087e-05, "loss": 0.008290559798479081, "step": 98250 }, { "epoch": 27.891001986942946, "grad_norm": 0.7989756464958191, "learning_rate": 7.212120351972752e-05, "loss": 0.009576807916164397, "step": 98260 }, { "epoch": 27.893840476866306, "grad_norm": 4.805694103240967, "learning_rate": 7.211836502980414e-05, "loss": 0.009475824981927871, "step": 98270 }, { "epoch": 27.89667896678967, "grad_norm": 3.4726603031158447, "learning_rate": 7.211552653988078e-05, "loss": 0.013142059743404388, "step": 98280 }, { "epoch": 27.899517456713028, "grad_norm": 6.406253337860107, "learning_rate": 7.211268804995743e-05, "loss": 0.017322225868701933, "step": 98290 }, { "epoch": 27.90235594663639, "grad_norm": 0.1765398532152176, "learning_rate": 7.210984956003407e-05, "loss": 0.01699725687503815, "step": 98300 }, { "epoch": 27.90519443655975, "grad_norm": 1.9625648260116577, "learning_rate": 7.210701107011071e-05, "loss": 0.00621083602309227, "step": 98310 }, { "epoch": 27.90803292648311, "grad_norm": 2.850149631500244, "learning_rate": 7.210417258018735e-05, "loss": 0.010471602529287338, "step": 98320 }, { "epoch": 27.910871416406472, "grad_norm": 9.402115821838379, "learning_rate": 7.210133409026398e-05, "loss": 0.020138588547706605, "step": 98330 }, { "epoch": 27.91370990632983, "grad_norm": 0.6400034427642822, "learning_rate": 7.209849560034062e-05, "loss": 0.011958037316799165, "step": 98340 }, { "epoch": 27.916548396253194, "grad_norm": 0.5266924500465393, "learning_rate": 7.209565711041726e-05, "loss": 0.005979631841182709, "step": 98350 }, { "epoch": 27.919386886176554, "grad_norm": 0.10253424197435379, "learning_rate": 7.20928186204939e-05, "loss": 0.022770293056964874, "step": 98360 }, { "epoch": 27.922225376099917, "grad_norm": 11.751627922058105, "learning_rate": 7.208998013057054e-05, "loss": 0.01326298713684082, "step": 98370 }, { "epoch": 27.925063866023276, "grad_norm": 12.277976989746094, "learning_rate": 7.208714164064718e-05, "loss": 0.018603825569152833, "step": 98380 }, { "epoch": 27.927902355946635, "grad_norm": 0.29820170998573303, "learning_rate": 7.208430315072383e-05, "loss": 0.009783060103654862, "step": 98390 }, { "epoch": 27.930740845869998, "grad_norm": 0.40491244196891785, "learning_rate": 7.208146466080045e-05, "loss": 0.00326567143201828, "step": 98400 }, { "epoch": 27.933579335793358, "grad_norm": 0.34837955236434937, "learning_rate": 7.20786261708771e-05, "loss": 0.006888265907764435, "step": 98410 }, { "epoch": 27.93641782571672, "grad_norm": 0.22316290438175201, "learning_rate": 7.207578768095374e-05, "loss": 0.008432324230670928, "step": 98420 }, { "epoch": 27.93925631564008, "grad_norm": 11.993837356567383, "learning_rate": 7.207294919103036e-05, "loss": 0.007068382948637009, "step": 98430 }, { "epoch": 27.94209480556344, "grad_norm": 0.780241072177887, "learning_rate": 7.207011070110702e-05, "loss": 0.001531766913831234, "step": 98440 }, { "epoch": 27.944933295486802, "grad_norm": 0.7169052958488464, "learning_rate": 7.206727221118366e-05, "loss": 0.004088509827852249, "step": 98450 }, { "epoch": 27.94777178541016, "grad_norm": 9.375650405883789, "learning_rate": 7.206443372126029e-05, "loss": 0.009386953711509705, "step": 98460 }, { "epoch": 27.950610275333524, "grad_norm": 0.7772210836410522, "learning_rate": 7.206159523133693e-05, "loss": 0.004189445823431015, "step": 98470 }, { "epoch": 27.953448765256883, "grad_norm": 1.6636050939559937, "learning_rate": 7.205875674141357e-05, "loss": 0.0035268016159534453, "step": 98480 }, { "epoch": 27.956287255180243, "grad_norm": 3.341454029083252, "learning_rate": 7.205591825149021e-05, "loss": 0.009532211720943451, "step": 98490 }, { "epoch": 27.959125745103606, "grad_norm": 0.30353203415870667, "learning_rate": 7.205307976156685e-05, "loss": 0.009427633136510849, "step": 98500 }, { "epoch": 27.959125745103606, "eval_accuracy": 0.9717682965600559, "eval_loss": 0.09503769874572754, "eval_runtime": 32.6519, "eval_samples_per_second": 481.657, "eval_steps_per_second": 7.534, "step": 98500 }, { "epoch": 27.961964235026965, "grad_norm": 12.016179084777832, "learning_rate": 7.20502412716435e-05, "loss": 0.006822566688060761, "step": 98510 }, { "epoch": 27.964802724950328, "grad_norm": 17.09442901611328, "learning_rate": 7.204740278172014e-05, "loss": 0.012226901203393935, "step": 98520 }, { "epoch": 27.967641214873687, "grad_norm": 6.03265380859375, "learning_rate": 7.204456429179676e-05, "loss": 0.0039820164442062374, "step": 98530 }, { "epoch": 27.970479704797047, "grad_norm": 0.15899983048439026, "learning_rate": 7.20417258018734e-05, "loss": 0.004237930104136467, "step": 98540 }, { "epoch": 27.97331819472041, "grad_norm": 5.050915718078613, "learning_rate": 7.203888731195005e-05, "loss": 0.005622537806630135, "step": 98550 }, { "epoch": 27.97615668464377, "grad_norm": 12.573478698730469, "learning_rate": 7.203604882202668e-05, "loss": 0.009363728761672973, "step": 98560 }, { "epoch": 27.97899517456713, "grad_norm": 2.065699577331543, "learning_rate": 7.203321033210333e-05, "loss": 0.013362047076225281, "step": 98570 }, { "epoch": 27.98183366449049, "grad_norm": 1.4128072261810303, "learning_rate": 7.203037184217997e-05, "loss": 0.011735916137695312, "step": 98580 }, { "epoch": 27.98467215441385, "grad_norm": 0.9049136638641357, "learning_rate": 7.20275333522566e-05, "loss": 0.008742931485176086, "step": 98590 }, { "epoch": 27.987510644337213, "grad_norm": 5.3398118019104, "learning_rate": 7.202469486233324e-05, "loss": 0.012394602596759795, "step": 98600 }, { "epoch": 27.990349134260573, "grad_norm": 0.850437581539154, "learning_rate": 7.202185637240988e-05, "loss": 0.013493858277797699, "step": 98610 }, { "epoch": 27.993187624183935, "grad_norm": 0.22714051604270935, "learning_rate": 7.201901788248652e-05, "loss": 0.011869265139102936, "step": 98620 }, { "epoch": 27.996026114107295, "grad_norm": 1.0199172496795654, "learning_rate": 7.201617939256315e-05, "loss": 0.013123774528503418, "step": 98630 }, { "epoch": 27.998864604030654, "grad_norm": 12.334456443786621, "learning_rate": 7.20133409026398e-05, "loss": 0.017952007055282593, "step": 98640 }, { "epoch": 28.001703093954017, "grad_norm": 7.467791557312012, "learning_rate": 7.201050241271643e-05, "loss": 0.006950414925813675, "step": 98650 }, { "epoch": 28.004541583877376, "grad_norm": 0.29806914925575256, "learning_rate": 7.200766392279308e-05, "loss": 0.0026668280363082884, "step": 98660 }, { "epoch": 28.00738007380074, "grad_norm": 8.481799125671387, "learning_rate": 7.200482543286972e-05, "loss": 0.007975976169109344, "step": 98670 }, { "epoch": 28.0102185637241, "grad_norm": 1.0360645055770874, "learning_rate": 7.200198694294636e-05, "loss": 0.003578939288854599, "step": 98680 }, { "epoch": 28.013057053647458, "grad_norm": 0.4537346363067627, "learning_rate": 7.199914845302299e-05, "loss": 0.01181703433394432, "step": 98690 }, { "epoch": 28.01589554357082, "grad_norm": 4.873838424682617, "learning_rate": 7.199630996309964e-05, "loss": 0.0031110072508454324, "step": 98700 }, { "epoch": 28.01873403349418, "grad_norm": 0.26027458906173706, "learning_rate": 7.199347147317628e-05, "loss": 0.0170804962515831, "step": 98710 }, { "epoch": 28.021572523417543, "grad_norm": 4.413666248321533, "learning_rate": 7.199063298325291e-05, "loss": 0.0014163525775074959, "step": 98720 }, { "epoch": 28.024411013340902, "grad_norm": 1.3233695030212402, "learning_rate": 7.198779449332955e-05, "loss": 0.007451024651527405, "step": 98730 }, { "epoch": 28.02724950326426, "grad_norm": 2.9688849449157715, "learning_rate": 7.198495600340619e-05, "loss": 0.006008873134851456, "step": 98740 }, { "epoch": 28.030087993187625, "grad_norm": 0.539840817451477, "learning_rate": 7.198211751348282e-05, "loss": 0.009261567890644074, "step": 98750 }, { "epoch": 28.032926483110984, "grad_norm": 0.17760953307151794, "learning_rate": 7.197927902355946e-05, "loss": 0.010997213423252106, "step": 98760 }, { "epoch": 28.035764973034347, "grad_norm": 1.8217352628707886, "learning_rate": 7.197644053363612e-05, "loss": 0.004322588071227073, "step": 98770 }, { "epoch": 28.038603462957706, "grad_norm": 9.33586311340332, "learning_rate": 7.197360204371274e-05, "loss": 0.004479963704943657, "step": 98780 }, { "epoch": 28.04144195288107, "grad_norm": 0.9295791387557983, "learning_rate": 7.197076355378939e-05, "loss": 0.00568314716219902, "step": 98790 }, { "epoch": 28.04428044280443, "grad_norm": 15.011871337890625, "learning_rate": 7.196792506386603e-05, "loss": 0.016332793235778808, "step": 98800 }, { "epoch": 28.047118932727788, "grad_norm": 2.3577892780303955, "learning_rate": 7.196508657394267e-05, "loss": 0.001977499388158321, "step": 98810 }, { "epoch": 28.04995742265115, "grad_norm": 1.93413507938385, "learning_rate": 7.19622480840193e-05, "loss": 0.004534290730953216, "step": 98820 }, { "epoch": 28.05279591257451, "grad_norm": 2.9221365451812744, "learning_rate": 7.195940959409594e-05, "loss": 0.0015928322449326516, "step": 98830 }, { "epoch": 28.055634402497873, "grad_norm": 1.1011254787445068, "learning_rate": 7.195657110417259e-05, "loss": 0.004823616147041321, "step": 98840 }, { "epoch": 28.058472892421232, "grad_norm": 0.6268956661224365, "learning_rate": 7.195373261424922e-05, "loss": 0.0031440861523151396, "step": 98850 }, { "epoch": 28.06131138234459, "grad_norm": 0.30382683873176575, "learning_rate": 7.195089412432586e-05, "loss": 0.009927687793970108, "step": 98860 }, { "epoch": 28.064149872267954, "grad_norm": 14.53017807006836, "learning_rate": 7.19480556344025e-05, "loss": 0.012180811911821365, "step": 98870 }, { "epoch": 28.066988362191314, "grad_norm": 2.0771942138671875, "learning_rate": 7.194521714447913e-05, "loss": 0.013554766774177551, "step": 98880 }, { "epoch": 28.069826852114677, "grad_norm": 2.0917203426361084, "learning_rate": 7.194237865455577e-05, "loss": 0.0029291097074747086, "step": 98890 }, { "epoch": 28.072665342038036, "grad_norm": 6.1794233322143555, "learning_rate": 7.193954016463243e-05, "loss": 0.005445221066474914, "step": 98900 }, { "epoch": 28.075503831961395, "grad_norm": 2.51206111907959, "learning_rate": 7.193670167470906e-05, "loss": 0.0028542133048176767, "step": 98910 }, { "epoch": 28.078342321884758, "grad_norm": 0.158002108335495, "learning_rate": 7.19338631847857e-05, "loss": 0.005274620652198791, "step": 98920 }, { "epoch": 28.081180811808117, "grad_norm": 1.1941492557525635, "learning_rate": 7.193102469486234e-05, "loss": 0.0024107675999403, "step": 98930 }, { "epoch": 28.08401930173148, "grad_norm": 0.46413490176200867, "learning_rate": 7.192818620493898e-05, "loss": 0.01038382202386856, "step": 98940 }, { "epoch": 28.08685779165484, "grad_norm": 0.3940821886062622, "learning_rate": 7.192534771501561e-05, "loss": 0.0020679386332631113, "step": 98950 }, { "epoch": 28.0896962815782, "grad_norm": 0.22385403513908386, "learning_rate": 7.192250922509225e-05, "loss": 0.005021730810403824, "step": 98960 }, { "epoch": 28.092534771501562, "grad_norm": 11.810433387756348, "learning_rate": 7.19196707351689e-05, "loss": 0.03277445435523987, "step": 98970 }, { "epoch": 28.09537326142492, "grad_norm": 2.3139777183532715, "learning_rate": 7.191683224524553e-05, "loss": 0.011056835949420928, "step": 98980 }, { "epoch": 28.098211751348284, "grad_norm": 3.5697226524353027, "learning_rate": 7.191399375532217e-05, "loss": 0.008641229569911956, "step": 98990 }, { "epoch": 28.101050241271643, "grad_norm": 0.3516702651977539, "learning_rate": 7.191115526539881e-05, "loss": 0.004821983352303505, "step": 99000 }, { "epoch": 28.101050241271643, "eval_accuracy": 0.9721498060660011, "eval_loss": 0.09106661379337311, "eval_runtime": 31.6125, "eval_samples_per_second": 497.492, "eval_steps_per_second": 7.782, "step": 99000 }, { "epoch": 28.103888731195003, "grad_norm": 0.166743665933609, "learning_rate": 7.190831677547544e-05, "loss": 0.00967477709054947, "step": 99010 }, { "epoch": 28.106727221118366, "grad_norm": 0.21311303973197937, "learning_rate": 7.190547828555208e-05, "loss": 0.0070309355854988095, "step": 99020 }, { "epoch": 28.109565711041725, "grad_norm": 0.09932377189397812, "learning_rate": 7.190263979562873e-05, "loss": 0.004808880388736725, "step": 99030 }, { "epoch": 28.112404200965088, "grad_norm": 0.29097533226013184, "learning_rate": 7.189980130570537e-05, "loss": 0.010361646115779877, "step": 99040 }, { "epoch": 28.115242690888447, "grad_norm": 0.4040045440196991, "learning_rate": 7.189696281578201e-05, "loss": 0.009962106496095658, "step": 99050 }, { "epoch": 28.118081180811807, "grad_norm": 6.014442443847656, "learning_rate": 7.189412432585865e-05, "loss": 0.005878487229347229, "step": 99060 }, { "epoch": 28.12091967073517, "grad_norm": 0.35094133019447327, "learning_rate": 7.189128583593529e-05, "loss": 0.013683578372001648, "step": 99070 }, { "epoch": 28.12375816065853, "grad_norm": 8.667387962341309, "learning_rate": 7.188844734601192e-05, "loss": 0.005853508412837982, "step": 99080 }, { "epoch": 28.12659665058189, "grad_norm": 11.186172485351562, "learning_rate": 7.188560885608856e-05, "loss": 0.013310211896896362, "step": 99090 }, { "epoch": 28.12943514050525, "grad_norm": 0.38267460465431213, "learning_rate": 7.188277036616521e-05, "loss": 0.0037995431572198867, "step": 99100 }, { "epoch": 28.13227363042861, "grad_norm": 0.9870344400405884, "learning_rate": 7.187993187624184e-05, "loss": 0.0035861700773239134, "step": 99110 }, { "epoch": 28.135112120351973, "grad_norm": 11.831226348876953, "learning_rate": 7.187709338631848e-05, "loss": 0.01008450835943222, "step": 99120 }, { "epoch": 28.137950610275333, "grad_norm": 4.381542205810547, "learning_rate": 7.187425489639513e-05, "loss": 0.0028457075357437136, "step": 99130 }, { "epoch": 28.140789100198695, "grad_norm": 0.5518834590911865, "learning_rate": 7.187141640647175e-05, "loss": 0.0023403670638799667, "step": 99140 }, { "epoch": 28.143627590122055, "grad_norm": 2.9439587593078613, "learning_rate": 7.18685779165484e-05, "loss": 0.005348050594329834, "step": 99150 }, { "epoch": 28.146466080045414, "grad_norm": 0.191433385014534, "learning_rate": 7.186573942662504e-05, "loss": 0.005311688035726547, "step": 99160 }, { "epoch": 28.149304569968777, "grad_norm": 0.3984471261501312, "learning_rate": 7.186290093670168e-05, "loss": 0.004995758458971977, "step": 99170 }, { "epoch": 28.152143059892136, "grad_norm": 1.2335752248764038, "learning_rate": 7.186006244677832e-05, "loss": 0.010822852700948715, "step": 99180 }, { "epoch": 28.1549815498155, "grad_norm": 2.2628772258758545, "learning_rate": 7.185722395685496e-05, "loss": 0.016571898758411408, "step": 99190 }, { "epoch": 28.15782003973886, "grad_norm": 1.8707209825515747, "learning_rate": 7.18543854669316e-05, "loss": 0.0039032064378261566, "step": 99200 }, { "epoch": 28.16065852966222, "grad_norm": 0.0559735931456089, "learning_rate": 7.185154697700823e-05, "loss": 0.0021983252838253973, "step": 99210 }, { "epoch": 28.16349701958558, "grad_norm": 2.437621593475342, "learning_rate": 7.184870848708487e-05, "loss": 0.0038735304027795792, "step": 99220 }, { "epoch": 28.16633550950894, "grad_norm": 0.7910727262496948, "learning_rate": 7.184586999716153e-05, "loss": 0.0058883953839540485, "step": 99230 }, { "epoch": 28.169173999432303, "grad_norm": 4.698671817779541, "learning_rate": 7.184303150723815e-05, "loss": 0.00824366882443428, "step": 99240 }, { "epoch": 28.172012489355662, "grad_norm": 0.28635090589523315, "learning_rate": 7.18401930173148e-05, "loss": 0.00510806143283844, "step": 99250 }, { "epoch": 28.174850979279025, "grad_norm": 0.18305616080760956, "learning_rate": 7.183735452739144e-05, "loss": 0.0059801720082759855, "step": 99260 }, { "epoch": 28.177689469202384, "grad_norm": 0.2710353434085846, "learning_rate": 7.183451603746806e-05, "loss": 0.016687440872192382, "step": 99270 }, { "epoch": 28.180527959125744, "grad_norm": 0.15624389052391052, "learning_rate": 7.18316775475447e-05, "loss": 0.0055293112993240355, "step": 99280 }, { "epoch": 28.183366449049107, "grad_norm": 0.9305402040481567, "learning_rate": 7.182883905762135e-05, "loss": 0.0020359382033348083, "step": 99290 }, { "epoch": 28.186204938972466, "grad_norm": 5.156400680541992, "learning_rate": 7.182600056769799e-05, "loss": 0.006112484633922577, "step": 99300 }, { "epoch": 28.18904342889583, "grad_norm": 0.15358850359916687, "learning_rate": 7.182316207777463e-05, "loss": 0.004187551885843277, "step": 99310 }, { "epoch": 28.19188191881919, "grad_norm": 1.0000665187835693, "learning_rate": 7.182032358785127e-05, "loss": 0.010005500912666321, "step": 99320 }, { "epoch": 28.194720408742548, "grad_norm": 0.31868642568588257, "learning_rate": 7.181748509792791e-05, "loss": 0.004903925210237503, "step": 99330 }, { "epoch": 28.19755889866591, "grad_norm": 7.087325096130371, "learning_rate": 7.181464660800454e-05, "loss": 0.013305026292800903, "step": 99340 }, { "epoch": 28.20039738858927, "grad_norm": 0.7735273838043213, "learning_rate": 7.181180811808118e-05, "loss": 0.00845407098531723, "step": 99350 }, { "epoch": 28.203235878512633, "grad_norm": Infinity, "learning_rate": 7.180896962815782e-05, "loss": 0.02649623155593872, "step": 99360 }, { "epoch": 28.206074368435992, "grad_norm": 2.1063737869262695, "learning_rate": 7.18064149872268e-05, "loss": 0.019975988566875456, "step": 99370 }, { "epoch": 28.20891285835935, "grad_norm": 1.5339776277542114, "learning_rate": 7.180357649730344e-05, "loss": 0.00294656902551651, "step": 99380 }, { "epoch": 28.211751348282714, "grad_norm": 0.7404527068138123, "learning_rate": 7.180073800738007e-05, "loss": 0.008500242233276367, "step": 99390 }, { "epoch": 28.214589838206074, "grad_norm": 9.017537117004395, "learning_rate": 7.179789951745671e-05, "loss": 0.003944528102874756, "step": 99400 }, { "epoch": 28.217428328129436, "grad_norm": 0.5409374237060547, "learning_rate": 7.179506102753335e-05, "loss": 0.003008117713034153, "step": 99410 }, { "epoch": 28.220266818052796, "grad_norm": 1.012901782989502, "learning_rate": 7.179222253761e-05, "loss": 0.0022559275850653647, "step": 99420 }, { "epoch": 28.223105307976155, "grad_norm": 0.2942754328250885, "learning_rate": 7.178938404768664e-05, "loss": 0.004658424854278564, "step": 99430 }, { "epoch": 28.225943797899518, "grad_norm": 3.034888744354248, "learning_rate": 7.178654555776328e-05, "loss": 0.0037041105329990386, "step": 99440 }, { "epoch": 28.228782287822877, "grad_norm": 1.0000689029693604, "learning_rate": 7.17837070678399e-05, "loss": 0.007320758700370788, "step": 99450 }, { "epoch": 28.23162077774624, "grad_norm": 0.6151852607727051, "learning_rate": 7.178086857791655e-05, "loss": 0.008309341967105865, "step": 99460 }, { "epoch": 28.2344592676696, "grad_norm": 8.539571762084961, "learning_rate": 7.177803008799319e-05, "loss": 0.007549205422401428, "step": 99470 }, { "epoch": 28.23729775759296, "grad_norm": 0.3434697985649109, "learning_rate": 7.177519159806983e-05, "loss": 0.014373625814914703, "step": 99480 }, { "epoch": 28.240136247516322, "grad_norm": 0.06811416894197464, "learning_rate": 7.177235310814647e-05, "loss": 0.004527425020933151, "step": 99490 }, { "epoch": 28.24297473743968, "grad_norm": 0.18530352413654327, "learning_rate": 7.176951461822311e-05, "loss": 0.010271275043487548, "step": 99500 }, { "epoch": 28.24297473743968, "eval_accuracy": 0.9710688624658231, "eval_loss": 0.10043025016784668, "eval_runtime": 32.0465, "eval_samples_per_second": 490.756, "eval_steps_per_second": 7.676, "step": 99500 }, { "epoch": 28.245813227363044, "grad_norm": 7.870345592498779, "learning_rate": 7.176667612829974e-05, "loss": 0.0062620677053928375, "step": 99510 }, { "epoch": 28.248651717286403, "grad_norm": 2.1217336654663086, "learning_rate": 7.176383763837638e-05, "loss": 0.013517190515995026, "step": 99520 }, { "epoch": 28.251490207209763, "grad_norm": 0.18019378185272217, "learning_rate": 7.176099914845302e-05, "loss": 0.02984822392463684, "step": 99530 }, { "epoch": 28.254328697133126, "grad_norm": 1.0927499532699585, "learning_rate": 7.175816065852967e-05, "loss": 0.007988478243350982, "step": 99540 }, { "epoch": 28.257167187056485, "grad_norm": 0.55823814868927, "learning_rate": 7.17553221686063e-05, "loss": 0.022663332521915436, "step": 99550 }, { "epoch": 28.260005676979848, "grad_norm": 0.4943069815635681, "learning_rate": 7.175248367868295e-05, "loss": 0.0180228590965271, "step": 99560 }, { "epoch": 28.262844166903207, "grad_norm": 0.5438591241836548, "learning_rate": 7.174964518875959e-05, "loss": 0.016398058831691743, "step": 99570 }, { "epoch": 28.26568265682657, "grad_norm": 0.8588427305221558, "learning_rate": 7.174680669883622e-05, "loss": 0.013099882006645202, "step": 99580 }, { "epoch": 28.26852114674993, "grad_norm": 0.2103145867586136, "learning_rate": 7.174396820891286e-05, "loss": 0.0022102640941739082, "step": 99590 }, { "epoch": 28.27135963667329, "grad_norm": 1.1357195377349854, "learning_rate": 7.17411297189895e-05, "loss": 0.014051295816898346, "step": 99600 }, { "epoch": 28.27419812659665, "grad_norm": 0.8924902677536011, "learning_rate": 7.173829122906614e-05, "loss": 0.011151428520679473, "step": 99610 }, { "epoch": 28.27703661652001, "grad_norm": 0.5075741410255432, "learning_rate": 7.173545273914278e-05, "loss": 0.01110089272260666, "step": 99620 }, { "epoch": 28.279875106443374, "grad_norm": 13.592966079711914, "learning_rate": 7.173261424921942e-05, "loss": 0.009353438019752502, "step": 99630 }, { "epoch": 28.282713596366733, "grad_norm": 0.902879536151886, "learning_rate": 7.172977575929605e-05, "loss": 0.013001786172389984, "step": 99640 }, { "epoch": 28.285552086290092, "grad_norm": 0.33300724625587463, "learning_rate": 7.17269372693727e-05, "loss": 0.005142996460199356, "step": 99650 }, { "epoch": 28.288390576213455, "grad_norm": 0.56903076171875, "learning_rate": 7.172409877944933e-05, "loss": 0.006196601688861847, "step": 99660 }, { "epoch": 28.291229066136815, "grad_norm": 1.6884076595306396, "learning_rate": 7.172126028952598e-05, "loss": 0.0052748024463653564, "step": 99670 }, { "epoch": 28.294067556060178, "grad_norm": 1.3580198287963867, "learning_rate": 7.171842179960262e-05, "loss": 0.005803303048014641, "step": 99680 }, { "epoch": 28.296906045983537, "grad_norm": 1.6739133596420288, "learning_rate": 7.171558330967926e-05, "loss": 0.007761512696743011, "step": 99690 }, { "epoch": 28.299744535906896, "grad_norm": 0.12523601949214935, "learning_rate": 7.17127448197559e-05, "loss": 0.004343170672655106, "step": 99700 }, { "epoch": 28.30258302583026, "grad_norm": 10.531148910522461, "learning_rate": 7.170990632983253e-05, "loss": 0.010171770304441451, "step": 99710 }, { "epoch": 28.30542151575362, "grad_norm": 0.6256738901138306, "learning_rate": 7.170706783990917e-05, "loss": 0.006293675303459168, "step": 99720 }, { "epoch": 28.30826000567698, "grad_norm": 10.115216255187988, "learning_rate": 7.170422934998581e-05, "loss": 0.00881604328751564, "step": 99730 }, { "epoch": 28.31109849560034, "grad_norm": 4.375977516174316, "learning_rate": 7.170139086006244e-05, "loss": 0.0037077315151691436, "step": 99740 }, { "epoch": 28.3139369855237, "grad_norm": 0.08654285222291946, "learning_rate": 7.16985523701391e-05, "loss": 0.00287113543599844, "step": 99750 }, { "epoch": 28.316775475447063, "grad_norm": 0.05073104426264763, "learning_rate": 7.169571388021573e-05, "loss": 0.0011185450479388238, "step": 99760 }, { "epoch": 28.319613965370422, "grad_norm": 0.3021485209465027, "learning_rate": 7.169287539029236e-05, "loss": 0.0036738071590662, "step": 99770 }, { "epoch": 28.322452455293785, "grad_norm": 0.6410467624664307, "learning_rate": 7.1690036900369e-05, "loss": 0.004172143712639809, "step": 99780 }, { "epoch": 28.325290945217144, "grad_norm": 0.4152567684650421, "learning_rate": 7.168719841044565e-05, "loss": 0.01196725368499756, "step": 99790 }, { "epoch": 28.328129435140504, "grad_norm": 0.11621975898742676, "learning_rate": 7.168435992052229e-05, "loss": 0.00799572914838791, "step": 99800 }, { "epoch": 28.330967925063867, "grad_norm": 2.2739481925964355, "learning_rate": 7.168152143059893e-05, "loss": 0.007562326639890671, "step": 99810 }, { "epoch": 28.333806414987226, "grad_norm": 0.6035052537918091, "learning_rate": 7.167868294067557e-05, "loss": 0.00640815794467926, "step": 99820 }, { "epoch": 28.33664490491059, "grad_norm": 0.5572497248649597, "learning_rate": 7.167584445075221e-05, "loss": 0.0018093831837177276, "step": 99830 }, { "epoch": 28.339483394833948, "grad_norm": 0.46112316846847534, "learning_rate": 7.167300596082884e-05, "loss": 0.006089796125888824, "step": 99840 }, { "epoch": 28.342321884757308, "grad_norm": 0.11471764743328094, "learning_rate": 7.167016747090548e-05, "loss": 0.016142004728317262, "step": 99850 }, { "epoch": 28.34516037468067, "grad_norm": 1.2781332731246948, "learning_rate": 7.166732898098212e-05, "loss": 0.02902599275112152, "step": 99860 }, { "epoch": 28.34799886460403, "grad_norm": 0.7988767623901367, "learning_rate": 7.166449049105875e-05, "loss": 0.0028774447739124296, "step": 99870 }, { "epoch": 28.350837354527393, "grad_norm": 1.424643635749817, "learning_rate": 7.16616520011354e-05, "loss": 0.00243720356374979, "step": 99880 }, { "epoch": 28.353675844450752, "grad_norm": 3.3160929679870605, "learning_rate": 7.165881351121205e-05, "loss": 0.008519315719604492, "step": 99890 }, { "epoch": 28.35651433437411, "grad_norm": 2.2206480503082275, "learning_rate": 7.165597502128867e-05, "loss": 0.007845304906368256, "step": 99900 }, { "epoch": 28.359352824297474, "grad_norm": 1.5007569789886475, "learning_rate": 7.165313653136531e-05, "loss": 0.019638608396053314, "step": 99910 }, { "epoch": 28.362191314220834, "grad_norm": 2.6874520778656006, "learning_rate": 7.165029804144196e-05, "loss": 0.01536380797624588, "step": 99920 }, { "epoch": 28.365029804144196, "grad_norm": 0.7885878682136536, "learning_rate": 7.16474595515186e-05, "loss": 0.014301693439483643, "step": 99930 }, { "epoch": 28.367868294067556, "grad_norm": 4.780697345733643, "learning_rate": 7.164462106159523e-05, "loss": 0.004816725850105286, "step": 99940 }, { "epoch": 28.37070678399092, "grad_norm": 3.338611602783203, "learning_rate": 7.164178257167188e-05, "loss": 0.012926584482192994, "step": 99950 }, { "epoch": 28.373545273914278, "grad_norm": 0.826906681060791, "learning_rate": 7.163894408174852e-05, "loss": 0.0029450468719005585, "step": 99960 }, { "epoch": 28.376383763837637, "grad_norm": 1.2988272905349731, "learning_rate": 7.163610559182515e-05, "loss": 0.012390592694282531, "step": 99970 }, { "epoch": 28.379222253761, "grad_norm": 12.215862274169922, "learning_rate": 7.163326710190179e-05, "loss": 0.010583670437335968, "step": 99980 }, { "epoch": 28.38206074368436, "grad_norm": 0.13780027627944946, "learning_rate": 7.163042861197843e-05, "loss": 0.006357983499765396, "step": 99990 }, { "epoch": 28.384899233607722, "grad_norm": 2.2430219650268555, "learning_rate": 7.162759012205506e-05, "loss": 0.004751493036746979, "step": 100000 }, { "epoch": 28.384899233607722, "eval_accuracy": 0.9714503719717683, "eval_loss": 0.09463127702474594, "eval_runtime": 31.9562, "eval_samples_per_second": 492.142, "eval_steps_per_second": 7.698, "step": 100000 }, { "epoch": 28.38773772353108, "grad_norm": 14.236083984375, "learning_rate": 7.162475163213172e-05, "loss": 0.008964965492486954, "step": 100010 }, { "epoch": 28.39057621345444, "grad_norm": 0.1571747064590454, "learning_rate": 7.162191314220836e-05, "loss": 0.006165198236703873, "step": 100020 }, { "epoch": 28.393414703377804, "grad_norm": 1.0151523351669312, "learning_rate": 7.161907465228498e-05, "loss": 0.004084819555282592, "step": 100030 }, { "epoch": 28.396253193301163, "grad_norm": 0.3392554521560669, "learning_rate": 7.161623616236163e-05, "loss": 0.0028458144515752792, "step": 100040 }, { "epoch": 28.399091683224526, "grad_norm": 0.07869723439216614, "learning_rate": 7.161339767243827e-05, "loss": 0.019164933264255522, "step": 100050 }, { "epoch": 28.401930173147885, "grad_norm": 0.5327182412147522, "learning_rate": 7.161055918251491e-05, "loss": 0.00611410140991211, "step": 100060 }, { "epoch": 28.404768663071245, "grad_norm": 2.274426221847534, "learning_rate": 7.160772069259154e-05, "loss": 0.006041222810745239, "step": 100070 }, { "epoch": 28.407607152994608, "grad_norm": 1.4661033153533936, "learning_rate": 7.160488220266819e-05, "loss": 0.0033572964370250703, "step": 100080 }, { "epoch": 28.410445642917967, "grad_norm": 0.10061443597078323, "learning_rate": 7.160204371274483e-05, "loss": 0.017109432816505434, "step": 100090 }, { "epoch": 28.41328413284133, "grad_norm": 0.09822703152894974, "learning_rate": 7.159920522282146e-05, "loss": 0.005338317155838013, "step": 100100 }, { "epoch": 28.41612262276469, "grad_norm": 0.4414235055446625, "learning_rate": 7.15963667328981e-05, "loss": 0.008624079823493957, "step": 100110 }, { "epoch": 28.41896111268805, "grad_norm": 0.32682645320892334, "learning_rate": 7.159352824297474e-05, "loss": 0.01851790100336075, "step": 100120 }, { "epoch": 28.42179960261141, "grad_norm": 1.7668160200119019, "learning_rate": 7.159068975305137e-05, "loss": 0.007768867909908295, "step": 100130 }, { "epoch": 28.42463809253477, "grad_norm": 0.9402163028717041, "learning_rate": 7.158785126312801e-05, "loss": 0.007426076382398605, "step": 100140 }, { "epoch": 28.427476582458134, "grad_norm": 6.239004611968994, "learning_rate": 7.158501277320467e-05, "loss": 0.009530752897262573, "step": 100150 }, { "epoch": 28.430315072381493, "grad_norm": 0.8270184993743896, "learning_rate": 7.15821742832813e-05, "loss": 0.003387078642845154, "step": 100160 }, { "epoch": 28.433153562304852, "grad_norm": 0.18614846467971802, "learning_rate": 7.157933579335794e-05, "loss": 0.006466030329465866, "step": 100170 }, { "epoch": 28.435992052228215, "grad_norm": 2.7785985469818115, "learning_rate": 7.157649730343458e-05, "loss": 0.008332522958517075, "step": 100180 }, { "epoch": 28.438830542151575, "grad_norm": 2.2295541763305664, "learning_rate": 7.157365881351122e-05, "loss": 0.009047508984804154, "step": 100190 }, { "epoch": 28.441669032074937, "grad_norm": 1.593537449836731, "learning_rate": 7.157082032358785e-05, "loss": 0.005991016328334808, "step": 100200 }, { "epoch": 28.444507521998297, "grad_norm": 1.1984152793884277, "learning_rate": 7.15679818336645e-05, "loss": 0.005390486493706703, "step": 100210 }, { "epoch": 28.447346011921656, "grad_norm": 6.124396324157715, "learning_rate": 7.156514334374114e-05, "loss": 0.013970617949962617, "step": 100220 }, { "epoch": 28.45018450184502, "grad_norm": 0.9215695858001709, "learning_rate": 7.156230485381777e-05, "loss": 0.008484168350696564, "step": 100230 }, { "epoch": 28.45302299176838, "grad_norm": 1.5122841596603394, "learning_rate": 7.155946636389441e-05, "loss": 0.007346770912408829, "step": 100240 }, { "epoch": 28.45586148169174, "grad_norm": 1.0301827192306519, "learning_rate": 7.155662787397105e-05, "loss": 0.007182473689317704, "step": 100250 }, { "epoch": 28.4586999716151, "grad_norm": 9.288808822631836, "learning_rate": 7.155378938404768e-05, "loss": 0.007451946288347245, "step": 100260 }, { "epoch": 28.46153846153846, "grad_norm": 15.230838775634766, "learning_rate": 7.155095089412432e-05, "loss": 0.0064343571662902836, "step": 100270 }, { "epoch": 28.464376951461823, "grad_norm": 4.403883457183838, "learning_rate": 7.154811240420098e-05, "loss": 0.01571103185415268, "step": 100280 }, { "epoch": 28.467215441385182, "grad_norm": 4.18380069732666, "learning_rate": 7.15452739142776e-05, "loss": 0.010106173902750015, "step": 100290 }, { "epoch": 28.470053931308545, "grad_norm": 0.986223042011261, "learning_rate": 7.154243542435425e-05, "loss": 0.01157679408788681, "step": 100300 }, { "epoch": 28.472892421231904, "grad_norm": 0.552253782749176, "learning_rate": 7.153959693443089e-05, "loss": 0.019702553749084473, "step": 100310 }, { "epoch": 28.475730911155264, "grad_norm": 1.2180366516113281, "learning_rate": 7.153675844450753e-05, "loss": 0.009517708420753479, "step": 100320 }, { "epoch": 28.478569401078627, "grad_norm": 1.1200079917907715, "learning_rate": 7.153391995458416e-05, "loss": 0.00806499570608139, "step": 100330 }, { "epoch": 28.481407891001986, "grad_norm": 0.4424768388271332, "learning_rate": 7.15310814646608e-05, "loss": 0.004847056791186332, "step": 100340 }, { "epoch": 28.48424638092535, "grad_norm": 10.234271049499512, "learning_rate": 7.152824297473744e-05, "loss": 0.017241856455802916, "step": 100350 }, { "epoch": 28.487084870848708, "grad_norm": 17.503644943237305, "learning_rate": 7.152540448481408e-05, "loss": 0.020249982178211213, "step": 100360 }, { "epoch": 28.48992336077207, "grad_norm": 0.2745189666748047, "learning_rate": 7.152256599489072e-05, "loss": 0.008146333694458007, "step": 100370 }, { "epoch": 28.49276185069543, "grad_norm": 0.2709268629550934, "learning_rate": 7.151972750496736e-05, "loss": 0.00816885232925415, "step": 100380 }, { "epoch": 28.49560034061879, "grad_norm": 8.198881149291992, "learning_rate": 7.151688901504399e-05, "loss": 0.006383834779262543, "step": 100390 }, { "epoch": 28.498438830542153, "grad_norm": 0.42209938168525696, "learning_rate": 7.151405052512063e-05, "loss": 0.013410773873329163, "step": 100400 }, { "epoch": 28.501277320465512, "grad_norm": 0.7425680756568909, "learning_rate": 7.151121203519729e-05, "loss": 0.009715896099805832, "step": 100410 }, { "epoch": 28.504115810388875, "grad_norm": 4.963202476501465, "learning_rate": 7.150837354527392e-05, "loss": 0.00965944305062294, "step": 100420 }, { "epoch": 28.506954300312234, "grad_norm": 1.4698773622512817, "learning_rate": 7.150553505535056e-05, "loss": 0.003851177915930748, "step": 100430 }, { "epoch": 28.509792790235593, "grad_norm": 2.54487681388855, "learning_rate": 7.15026965654272e-05, "loss": 0.004846449196338654, "step": 100440 }, { "epoch": 28.512631280158956, "grad_norm": 0.319017618894577, "learning_rate": 7.149985807550383e-05, "loss": 0.005213583633303642, "step": 100450 }, { "epoch": 28.515469770082316, "grad_norm": 0.5422877669334412, "learning_rate": 7.149701958558047e-05, "loss": 0.02071237862110138, "step": 100460 }, { "epoch": 28.51830826000568, "grad_norm": 0.8717231154441833, "learning_rate": 7.149418109565711e-05, "loss": 0.020321384072303772, "step": 100470 }, { "epoch": 28.521146749929038, "grad_norm": 0.016573797911405563, "learning_rate": 7.149134260573375e-05, "loss": 0.014075252413749694, "step": 100480 }, { "epoch": 28.523985239852397, "grad_norm": 0.07129339128732681, "learning_rate": 7.148850411581039e-05, "loss": 0.012800510227680206, "step": 100490 }, { "epoch": 28.52682372977576, "grad_norm": 0.4059860408306122, "learning_rate": 7.148566562588703e-05, "loss": 0.010989009588956832, "step": 100500 }, { "epoch": 28.52682372977576, "eval_accuracy": 0.9732307496661792, "eval_loss": 0.08732210099697113, "eval_runtime": 31.7492, "eval_samples_per_second": 495.351, "eval_steps_per_second": 7.748, "step": 100500 }, { "epoch": 28.52966221969912, "grad_norm": 19.60007667541504, "learning_rate": 7.148282713596368e-05, "loss": 0.019346103072166443, "step": 100510 }, { "epoch": 28.532500709622482, "grad_norm": 1.2773470878601074, "learning_rate": 7.14799886460403e-05, "loss": 0.005837064608931542, "step": 100520 }, { "epoch": 28.53533919954584, "grad_norm": 7.744444847106934, "learning_rate": 7.147715015611694e-05, "loss": 0.005224072188138962, "step": 100530 }, { "epoch": 28.5381776894692, "grad_norm": 12.79071044921875, "learning_rate": 7.147431166619359e-05, "loss": 0.018161511421203612, "step": 100540 }, { "epoch": 28.541016179392564, "grad_norm": 0.3449995517730713, "learning_rate": 7.147147317627023e-05, "loss": 0.0075508542358875275, "step": 100550 }, { "epoch": 28.543854669315923, "grad_norm": 0.908300518989563, "learning_rate": 7.146863468634687e-05, "loss": 0.004600193724036217, "step": 100560 }, { "epoch": 28.546693159239286, "grad_norm": 14.161280632019043, "learning_rate": 7.146579619642351e-05, "loss": 0.01685826778411865, "step": 100570 }, { "epoch": 28.549531649162645, "grad_norm": 0.5036726593971252, "learning_rate": 7.146295770650014e-05, "loss": 0.009659422188997268, "step": 100580 }, { "epoch": 28.552370139086005, "grad_norm": 3.546938419342041, "learning_rate": 7.146011921657678e-05, "loss": 0.016599640250205994, "step": 100590 }, { "epoch": 28.555208629009368, "grad_norm": 4.862947463989258, "learning_rate": 7.145728072665342e-05, "loss": 0.01475844979286194, "step": 100600 }, { "epoch": 28.558047118932727, "grad_norm": 2.6555538177490234, "learning_rate": 7.145444223673006e-05, "loss": 0.01906416714191437, "step": 100610 }, { "epoch": 28.56088560885609, "grad_norm": 0.3337307274341583, "learning_rate": 7.14516037468067e-05, "loss": 0.003660879284143448, "step": 100620 }, { "epoch": 28.56372409877945, "grad_norm": 0.8337148427963257, "learning_rate": 7.144876525688334e-05, "loss": 0.0020680509507656096, "step": 100630 }, { "epoch": 28.56656258870281, "grad_norm": 5.964834690093994, "learning_rate": 7.144592676695999e-05, "loss": 0.00949869304895401, "step": 100640 }, { "epoch": 28.56940107862617, "grad_norm": 1.5884510278701782, "learning_rate": 7.144308827703661e-05, "loss": 0.004462628811597824, "step": 100650 }, { "epoch": 28.57223956854953, "grad_norm": 0.7103903293609619, "learning_rate": 7.144024978711326e-05, "loss": 0.0022290127351880075, "step": 100660 }, { "epoch": 28.575078058472894, "grad_norm": 9.035968780517578, "learning_rate": 7.14374112971899e-05, "loss": 0.00630137026309967, "step": 100670 }, { "epoch": 28.577916548396253, "grad_norm": 7.934122562408447, "learning_rate": 7.143457280726654e-05, "loss": 0.008252602070569992, "step": 100680 }, { "epoch": 28.580755038319612, "grad_norm": 7.2589616775512695, "learning_rate": 7.143173431734318e-05, "loss": 0.012956231832504272, "step": 100690 }, { "epoch": 28.583593528242975, "grad_norm": 5.236428260803223, "learning_rate": 7.142889582741982e-05, "loss": 0.007449489831924438, "step": 100700 }, { "epoch": 28.586432018166335, "grad_norm": 0.2123892903327942, "learning_rate": 7.142605733749645e-05, "loss": 0.01028297021985054, "step": 100710 }, { "epoch": 28.589270508089697, "grad_norm": 9.255849838256836, "learning_rate": 7.142321884757309e-05, "loss": 0.021558667719364166, "step": 100720 }, { "epoch": 28.592108998013057, "grad_norm": 0.5491239428520203, "learning_rate": 7.142038035764973e-05, "loss": 0.007042942941188813, "step": 100730 }, { "epoch": 28.594947487936416, "grad_norm": 5.788693904876709, "learning_rate": 7.141754186772637e-05, "loss": 0.005673841387033462, "step": 100740 }, { "epoch": 28.59778597785978, "grad_norm": 7.073646545410156, "learning_rate": 7.141470337780301e-05, "loss": 0.02442290186882019, "step": 100750 }, { "epoch": 28.60062446778314, "grad_norm": 0.08729052543640137, "learning_rate": 7.141186488787966e-05, "loss": 0.011055664718151092, "step": 100760 }, { "epoch": 28.6034629577065, "grad_norm": 6.922945022583008, "learning_rate": 7.14090263979563e-05, "loss": 0.017723338305950166, "step": 100770 }, { "epoch": 28.60630144762986, "grad_norm": 1.4177234172821045, "learning_rate": 7.140618790803292e-05, "loss": 0.0036864906549453737, "step": 100780 }, { "epoch": 28.609139937553223, "grad_norm": 5.284659385681152, "learning_rate": 7.140334941810957e-05, "loss": 0.007224171608686447, "step": 100790 }, { "epoch": 28.611978427476583, "grad_norm": 1.7650071382522583, "learning_rate": 7.140051092818621e-05, "loss": 0.0037026748061180113, "step": 100800 }, { "epoch": 28.614816917399942, "grad_norm": 2.2681374549865723, "learning_rate": 7.139767243826285e-05, "loss": 0.011683833599090577, "step": 100810 }, { "epoch": 28.617655407323305, "grad_norm": 7.36200475692749, "learning_rate": 7.139483394833949e-05, "loss": 0.015427130460739135, "step": 100820 }, { "epoch": 28.620493897246664, "grad_norm": 4.682276248931885, "learning_rate": 7.139199545841613e-05, "loss": 0.011912651360034943, "step": 100830 }, { "epoch": 28.623332387170027, "grad_norm": 0.2080886960029602, "learning_rate": 7.138915696849276e-05, "loss": 0.011899788677692414, "step": 100840 }, { "epoch": 28.626170877093386, "grad_norm": 1.6055643558502197, "learning_rate": 7.13863184785694e-05, "loss": 0.021594719588756563, "step": 100850 }, { "epoch": 28.629009367016746, "grad_norm": 0.9615247249603271, "learning_rate": 7.138347998864604e-05, "loss": 0.014049062132835388, "step": 100860 }, { "epoch": 28.63184785694011, "grad_norm": 0.36892643570899963, "learning_rate": 7.138064149872268e-05, "loss": 0.005364476144313813, "step": 100870 }, { "epoch": 28.634686346863468, "grad_norm": 8.04882526397705, "learning_rate": 7.137780300879932e-05, "loss": 0.010923314839601517, "step": 100880 }, { "epoch": 28.63752483678683, "grad_norm": 0.1690046489238739, "learning_rate": 7.137496451887597e-05, "loss": 0.01091085970401764, "step": 100890 }, { "epoch": 28.64036332671019, "grad_norm": 0.24973492324352264, "learning_rate": 7.137212602895261e-05, "loss": 0.007949941605329514, "step": 100900 }, { "epoch": 28.64320181663355, "grad_norm": 0.97822105884552, "learning_rate": 7.136928753902924e-05, "loss": 0.01410357654094696, "step": 100910 }, { "epoch": 28.646040306556912, "grad_norm": 3.3417561054229736, "learning_rate": 7.136644904910588e-05, "loss": 0.015219548344612121, "step": 100920 }, { "epoch": 28.648878796480272, "grad_norm": 8.121927261352539, "learning_rate": 7.136361055918252e-05, "loss": 0.016392844915390014, "step": 100930 }, { "epoch": 28.651717286403635, "grad_norm": 8.13321304321289, "learning_rate": 7.136077206925916e-05, "loss": 0.017541772127151488, "step": 100940 }, { "epoch": 28.654555776326994, "grad_norm": 1.1033810377120972, "learning_rate": 7.13579335793358e-05, "loss": 0.015376982092857362, "step": 100950 }, { "epoch": 28.657394266250353, "grad_norm": 0.8146872520446777, "learning_rate": 7.135509508941244e-05, "loss": 0.013777221739292144, "step": 100960 }, { "epoch": 28.660232756173716, "grad_norm": 0.2390243113040924, "learning_rate": 7.135225659948907e-05, "loss": 0.005009932443499565, "step": 100970 }, { "epoch": 28.663071246097076, "grad_norm": 10.41771411895752, "learning_rate": 7.134941810956571e-05, "loss": 0.03552972376346588, "step": 100980 }, { "epoch": 28.66590973602044, "grad_norm": 3.416131019592285, "learning_rate": 7.134657961964235e-05, "loss": 0.00991942435503006, "step": 100990 }, { "epoch": 28.668748225943798, "grad_norm": 0.5241124629974365, "learning_rate": 7.1343741129719e-05, "loss": 0.01166784092783928, "step": 101000 }, { "epoch": 28.668748225943798, "eval_accuracy": 0.9717047116423985, "eval_loss": 0.09747762233018875, "eval_runtime": 31.9656, "eval_samples_per_second": 491.997, "eval_steps_per_second": 7.696, "step": 101000 }, { "epoch": 28.671586715867157, "grad_norm": 0.1518595665693283, "learning_rate": 7.134090263979564e-05, "loss": 0.007452286034822464, "step": 101010 }, { "epoch": 28.67442520579052, "grad_norm": 12.517583847045898, "learning_rate": 7.133806414987228e-05, "loss": 0.01999170035123825, "step": 101020 }, { "epoch": 28.67726369571388, "grad_norm": 4.50225305557251, "learning_rate": 7.133522565994892e-05, "loss": 0.01329212188720703, "step": 101030 }, { "epoch": 28.680102185637242, "grad_norm": 0.20579178631305695, "learning_rate": 7.133238717002555e-05, "loss": 0.010350432991981507, "step": 101040 }, { "epoch": 28.6829406755606, "grad_norm": 2.6723058223724365, "learning_rate": 7.132954868010219e-05, "loss": 0.0059473149478435515, "step": 101050 }, { "epoch": 28.68577916548396, "grad_norm": 2.880376100540161, "learning_rate": 7.132671019017883e-05, "loss": 0.007254064828157425, "step": 101060 }, { "epoch": 28.688617655407324, "grad_norm": 2.1201629638671875, "learning_rate": 7.132387170025546e-05, "loss": 0.004968840256333351, "step": 101070 }, { "epoch": 28.691456145330683, "grad_norm": 4.568824291229248, "learning_rate": 7.132103321033211e-05, "loss": 0.008316630125045776, "step": 101080 }, { "epoch": 28.694294635254046, "grad_norm": 0.46482187509536743, "learning_rate": 7.131819472040875e-05, "loss": 0.009304873645305634, "step": 101090 }, { "epoch": 28.697133125177405, "grad_norm": 1.7081698179244995, "learning_rate": 7.131535623048538e-05, "loss": 0.003351518139243126, "step": 101100 }, { "epoch": 28.699971615100765, "grad_norm": 0.5771971940994263, "learning_rate": 7.131251774056202e-05, "loss": 0.004367371648550033, "step": 101110 }, { "epoch": 28.702810105024128, "grad_norm": 2.7055492401123047, "learning_rate": 7.130967925063866e-05, "loss": 0.003940864652395249, "step": 101120 }, { "epoch": 28.705648594947487, "grad_norm": 0.07461835443973541, "learning_rate": 7.13068407607153e-05, "loss": 0.007275979220867157, "step": 101130 }, { "epoch": 28.70848708487085, "grad_norm": 0.7802384495735168, "learning_rate": 7.130400227079195e-05, "loss": 0.005133271589875222, "step": 101140 }, { "epoch": 28.71132557479421, "grad_norm": 6.766273498535156, "learning_rate": 7.130116378086859e-05, "loss": 0.010099205374717712, "step": 101150 }, { "epoch": 28.714164064717572, "grad_norm": 0.2780701518058777, "learning_rate": 7.129832529094523e-05, "loss": 0.0015162268653512, "step": 101160 }, { "epoch": 28.71700255464093, "grad_norm": 0.09015641361474991, "learning_rate": 7.129548680102186e-05, "loss": 0.00640840083360672, "step": 101170 }, { "epoch": 28.71984104456429, "grad_norm": 0.17100796103477478, "learning_rate": 7.12926483110985e-05, "loss": 0.005519610643386841, "step": 101180 }, { "epoch": 28.722679534487654, "grad_norm": 3.5195822715759277, "learning_rate": 7.128980982117514e-05, "loss": 0.005591012537479401, "step": 101190 }, { "epoch": 28.725518024411013, "grad_norm": 11.963899612426758, "learning_rate": 7.128697133125177e-05, "loss": 0.006942339241504669, "step": 101200 }, { "epoch": 28.728356514334376, "grad_norm": 1.07952082157135, "learning_rate": 7.128413284132842e-05, "loss": 0.005664400011301041, "step": 101210 }, { "epoch": 28.731195004257735, "grad_norm": 6.971432209014893, "learning_rate": 7.128129435140506e-05, "loss": 0.004131811484694481, "step": 101220 }, { "epoch": 28.734033494181094, "grad_norm": 5.83760929107666, "learning_rate": 7.127845586148169e-05, "loss": 0.008218763768672943, "step": 101230 }, { "epoch": 28.736871984104457, "grad_norm": 6.275095462799072, "learning_rate": 7.127561737155833e-05, "loss": 0.008962002396583558, "step": 101240 }, { "epoch": 28.739710474027817, "grad_norm": 11.295071601867676, "learning_rate": 7.127277888163497e-05, "loss": 0.01842151880264282, "step": 101250 }, { "epoch": 28.74254896395118, "grad_norm": 1.6877329349517822, "learning_rate": 7.126994039171162e-05, "loss": 0.006018535047769546, "step": 101260 }, { "epoch": 28.74538745387454, "grad_norm": 7.773751735687256, "learning_rate": 7.126710190178824e-05, "loss": 0.02105031907558441, "step": 101270 }, { "epoch": 28.748225943797898, "grad_norm": 1.135111927986145, "learning_rate": 7.12642634118649e-05, "loss": 0.008589871972799302, "step": 101280 }, { "epoch": 28.75106443372126, "grad_norm": 1.116197109222412, "learning_rate": 7.126142492194153e-05, "loss": 0.0016592826694250106, "step": 101290 }, { "epoch": 28.75390292364462, "grad_norm": 1.7515496015548706, "learning_rate": 7.125858643201817e-05, "loss": 0.006704209744930268, "step": 101300 }, { "epoch": 28.756741413567983, "grad_norm": 0.12796032428741455, "learning_rate": 7.125574794209481e-05, "loss": 0.0037404831498861315, "step": 101310 }, { "epoch": 28.759579903491343, "grad_norm": 1.580637812614441, "learning_rate": 7.125290945217145e-05, "loss": 0.004011278599500656, "step": 101320 }, { "epoch": 28.762418393414702, "grad_norm": 0.2218305468559265, "learning_rate": 7.125007096224808e-05, "loss": 0.015618444979190826, "step": 101330 }, { "epoch": 28.765256883338065, "grad_norm": 0.4241727590560913, "learning_rate": 7.124723247232473e-05, "loss": 0.010917829722166062, "step": 101340 }, { "epoch": 28.768095373261424, "grad_norm": 12.797643661499023, "learning_rate": 7.124439398240137e-05, "loss": 0.017390364408493043, "step": 101350 }, { "epoch": 28.770933863184787, "grad_norm": 0.7979776263237, "learning_rate": 7.1241555492478e-05, "loss": 0.006009407341480255, "step": 101360 }, { "epoch": 28.773772353108146, "grad_norm": 5.209594249725342, "learning_rate": 7.123871700255464e-05, "loss": 0.006633139401674271, "step": 101370 }, { "epoch": 28.776610843031506, "grad_norm": 0.2976085841655731, "learning_rate": 7.123587851263129e-05, "loss": 0.02513929009437561, "step": 101380 }, { "epoch": 28.77944933295487, "grad_norm": 0.3071852922439575, "learning_rate": 7.123304002270791e-05, "loss": 0.011713600158691407, "step": 101390 }, { "epoch": 28.782287822878228, "grad_norm": 0.08558647334575653, "learning_rate": 7.123020153278455e-05, "loss": 0.010322259366512298, "step": 101400 }, { "epoch": 28.78512631280159, "grad_norm": 0.4721450209617615, "learning_rate": 7.122736304286121e-05, "loss": 0.009536734223365784, "step": 101410 }, { "epoch": 28.78796480272495, "grad_norm": 1.1148145198822021, "learning_rate": 7.122452455293784e-05, "loss": 0.013054947555065154, "step": 101420 }, { "epoch": 28.79080329264831, "grad_norm": 7.7433576583862305, "learning_rate": 7.122168606301448e-05, "loss": 0.01272178441286087, "step": 101430 }, { "epoch": 28.793641782571672, "grad_norm": 1.0266673564910889, "learning_rate": 7.121884757309112e-05, "loss": 0.009355069696903228, "step": 101440 }, { "epoch": 28.79648027249503, "grad_norm": 1.1132599115371704, "learning_rate": 7.121600908316776e-05, "loss": 0.010987074673175811, "step": 101450 }, { "epoch": 28.799318762418395, "grad_norm": 1.3414323329925537, "learning_rate": 7.121317059324439e-05, "loss": 0.01369931697845459, "step": 101460 }, { "epoch": 28.802157252341754, "grad_norm": 4.0530266761779785, "learning_rate": 7.121033210332103e-05, "loss": 0.005238620936870575, "step": 101470 }, { "epoch": 28.804995742265113, "grad_norm": 0.662348210811615, "learning_rate": 7.120749361339769e-05, "loss": 0.014664453268051148, "step": 101480 }, { "epoch": 28.807834232188476, "grad_norm": 1.7385921478271484, "learning_rate": 7.120465512347431e-05, "loss": 0.002470959722995758, "step": 101490 }, { "epoch": 28.810672722111835, "grad_norm": 1.5477558374404907, "learning_rate": 7.120181663355095e-05, "loss": 0.022483333945274353, "step": 101500 }, { "epoch": 28.810672722111835, "eval_accuracy": 0.9665543333121384, "eval_loss": 0.11084696650505066, "eval_runtime": 32.0512, "eval_samples_per_second": 490.683, "eval_steps_per_second": 7.675, "step": 101500 }, { "epoch": 28.8135112120352, "grad_norm": 11.907801628112793, "learning_rate": 7.11989781436276e-05, "loss": 0.02277732938528061, "step": 101510 }, { "epoch": 28.816349701958558, "grad_norm": 0.2694256007671356, "learning_rate": 7.119613965370422e-05, "loss": 0.0041971862316131595, "step": 101520 }, { "epoch": 28.81918819188192, "grad_norm": 1.632904291152954, "learning_rate": 7.119330116378087e-05, "loss": 0.0043401304632425305, "step": 101530 }, { "epoch": 28.82202668180528, "grad_norm": 3.6642508506774902, "learning_rate": 7.119046267385752e-05, "loss": 0.007130427658557892, "step": 101540 }, { "epoch": 28.82486517172864, "grad_norm": 1.3708314895629883, "learning_rate": 7.118762418393415e-05, "loss": 0.007728991657495498, "step": 101550 }, { "epoch": 28.827703661652002, "grad_norm": 3.4043214321136475, "learning_rate": 7.118478569401079e-05, "loss": 0.00688321590423584, "step": 101560 }, { "epoch": 28.83054215157536, "grad_norm": 3.7715492248535156, "learning_rate": 7.118194720408743e-05, "loss": 0.014467497169971467, "step": 101570 }, { "epoch": 28.833380641498724, "grad_norm": 1.9979052543640137, "learning_rate": 7.117910871416407e-05, "loss": 0.02484259903430939, "step": 101580 }, { "epoch": 28.836219131422084, "grad_norm": 1.8113259077072144, "learning_rate": 7.11762702242407e-05, "loss": 0.008798358589410782, "step": 101590 }, { "epoch": 28.839057621345443, "grad_norm": 0.44838765263557434, "learning_rate": 7.117343173431734e-05, "loss": 0.00958983451128006, "step": 101600 }, { "epoch": 28.841896111268806, "grad_norm": 9.561324119567871, "learning_rate": 7.1170593244394e-05, "loss": 0.015247395634651184, "step": 101610 }, { "epoch": 28.844734601192165, "grad_norm": 8.192377090454102, "learning_rate": 7.116775475447062e-05, "loss": 0.01535978764295578, "step": 101620 }, { "epoch": 28.847573091115528, "grad_norm": 1.6025258302688599, "learning_rate": 7.116491626454727e-05, "loss": 0.013985240459442138, "step": 101630 }, { "epoch": 28.850411581038887, "grad_norm": 2.9541168212890625, "learning_rate": 7.116207777462391e-05, "loss": 0.004245470836758613, "step": 101640 }, { "epoch": 28.853250070962247, "grad_norm": 4.92551851272583, "learning_rate": 7.115923928470053e-05, "loss": 0.016747429966926575, "step": 101650 }, { "epoch": 28.85608856088561, "grad_norm": 0.7903095483779907, "learning_rate": 7.115640079477718e-05, "loss": 0.010327613353729248, "step": 101660 }, { "epoch": 28.85892705080897, "grad_norm": 6.856456756591797, "learning_rate": 7.115356230485382e-05, "loss": 0.01973172426223755, "step": 101670 }, { "epoch": 28.861765540732332, "grad_norm": 6.61758279800415, "learning_rate": 7.115072381493046e-05, "loss": 0.010151723772287369, "step": 101680 }, { "epoch": 28.86460403065569, "grad_norm": 0.915427029132843, "learning_rate": 7.11478853250071e-05, "loss": 0.001871420256793499, "step": 101690 }, { "epoch": 28.86744252057905, "grad_norm": 2.4556338787078857, "learning_rate": 7.114504683508374e-05, "loss": 0.02142704576253891, "step": 101700 }, { "epoch": 28.870281010502413, "grad_norm": 0.20605270564556122, "learning_rate": 7.114220834516038e-05, "loss": 0.013202217221260071, "step": 101710 }, { "epoch": 28.873119500425773, "grad_norm": 20.19638442993164, "learning_rate": 7.113936985523701e-05, "loss": 0.03857223391532898, "step": 101720 }, { "epoch": 28.875957990349136, "grad_norm": 2.921515941619873, "learning_rate": 7.113653136531365e-05, "loss": 0.0032826699316501617, "step": 101730 }, { "epoch": 28.878796480272495, "grad_norm": 0.9001482129096985, "learning_rate": 7.113369287539031e-05, "loss": 0.003273313492536545, "step": 101740 }, { "epoch": 28.881634970195854, "grad_norm": 4.514135360717773, "learning_rate": 7.113085438546693e-05, "loss": 0.013035798072814941, "step": 101750 }, { "epoch": 28.884473460119217, "grad_norm": 0.3059605062007904, "learning_rate": 7.112801589554358e-05, "loss": 0.01014857441186905, "step": 101760 }, { "epoch": 28.887311950042577, "grad_norm": 0.38479700684547424, "learning_rate": 7.112517740562022e-05, "loss": 0.018496225774288177, "step": 101770 }, { "epoch": 28.89015043996594, "grad_norm": 0.8560890555381775, "learning_rate": 7.112233891569685e-05, "loss": 0.004734523594379425, "step": 101780 }, { "epoch": 28.8929889298893, "grad_norm": 2.4990475177764893, "learning_rate": 7.111950042577349e-05, "loss": 0.01229851096868515, "step": 101790 }, { "epoch": 28.895827419812658, "grad_norm": 6.35207462310791, "learning_rate": 7.111666193585013e-05, "loss": 0.016225507855415343, "step": 101800 }, { "epoch": 28.89866590973602, "grad_norm": 0.111829474568367, "learning_rate": 7.111410729491911e-05, "loss": 0.0226533904671669, "step": 101810 }, { "epoch": 28.90150439965938, "grad_norm": 3.295138120651245, "learning_rate": 7.111126880499575e-05, "loss": 0.011834307760000228, "step": 101820 }, { "epoch": 28.904342889582743, "grad_norm": 0.06354343891143799, "learning_rate": 7.110843031507238e-05, "loss": 0.016262929141521453, "step": 101830 }, { "epoch": 28.907181379506103, "grad_norm": 5.150362014770508, "learning_rate": 7.110559182514902e-05, "loss": 0.0048189818859100345, "step": 101840 }, { "epoch": 28.910019869429462, "grad_norm": 3.729504108428955, "learning_rate": 7.110275333522566e-05, "loss": 0.009907587617635726, "step": 101850 }, { "epoch": 28.912858359352825, "grad_norm": 0.05235784873366356, "learning_rate": 7.10999148453023e-05, "loss": 0.006338720023632049, "step": 101860 }, { "epoch": 28.915696849276184, "grad_norm": 1.9165979623794556, "learning_rate": 7.109707635537894e-05, "loss": 0.0049846600741147995, "step": 101870 }, { "epoch": 28.918535339199547, "grad_norm": 0.6494324803352356, "learning_rate": 7.109423786545558e-05, "loss": 0.008502402901649475, "step": 101880 }, { "epoch": 28.921373829122906, "grad_norm": 2.0666415691375732, "learning_rate": 7.109139937553223e-05, "loss": 0.00657552108168602, "step": 101890 }, { "epoch": 28.92421231904627, "grad_norm": 5.774878025054932, "learning_rate": 7.108856088560885e-05, "loss": 0.015758527815341948, "step": 101900 }, { "epoch": 28.92705080896963, "grad_norm": 11.554808616638184, "learning_rate": 7.10857223956855e-05, "loss": 0.004877807945013047, "step": 101910 }, { "epoch": 28.929889298892988, "grad_norm": 0.165033757686615, "learning_rate": 7.108288390576215e-05, "loss": 0.007636648416519165, "step": 101920 }, { "epoch": 28.93272778881635, "grad_norm": 0.917658805847168, "learning_rate": 7.108004541583878e-05, "loss": 0.004572440683841705, "step": 101930 }, { "epoch": 28.93556627873971, "grad_norm": 0.5313619375228882, "learning_rate": 7.107720692591542e-05, "loss": 0.010018911957740784, "step": 101940 }, { "epoch": 28.93840476866307, "grad_norm": 0.20553848147392273, "learning_rate": 7.107436843599206e-05, "loss": 0.005402557551860809, "step": 101950 }, { "epoch": 28.941243258586432, "grad_norm": 0.5075179934501648, "learning_rate": 7.107152994606869e-05, "loss": 0.015724650025367735, "step": 101960 }, { "epoch": 28.94408174850979, "grad_norm": 1.000664472579956, "learning_rate": 7.106869145614533e-05, "loss": 0.010566221922636032, "step": 101970 }, { "epoch": 28.946920238433155, "grad_norm": 5.444941520690918, "learning_rate": 7.106585296622197e-05, "loss": 0.0061668016016483305, "step": 101980 }, { "epoch": 28.949758728356514, "grad_norm": 3.700824737548828, "learning_rate": 7.106301447629861e-05, "loss": 0.0048742715269327165, "step": 101990 }, { "epoch": 28.952597218279877, "grad_norm": 0.37488502264022827, "learning_rate": 7.106017598637525e-05, "loss": 0.014347702264785767, "step": 102000 }, { "epoch": 28.952597218279877, "eval_accuracy": 0.9671901824887137, "eval_loss": 0.11473508179187775, "eval_runtime": 32.3327, "eval_samples_per_second": 486.411, "eval_steps_per_second": 7.608, "step": 102000 }, { "epoch": 28.955435708203236, "grad_norm": 2.416783094406128, "learning_rate": 7.10573374964519e-05, "loss": 0.010539916902780532, "step": 102010 }, { "epoch": 28.958274198126595, "grad_norm": 1.0425323247909546, "learning_rate": 7.105449900652854e-05, "loss": 0.012587831914424896, "step": 102020 }, { "epoch": 28.96111268804996, "grad_norm": 0.219413623213768, "learning_rate": 7.105166051660516e-05, "loss": 0.005184582248330116, "step": 102030 }, { "epoch": 28.963951177973318, "grad_norm": 0.34706273674964905, "learning_rate": 7.10488220266818e-05, "loss": 0.007255873084068299, "step": 102040 }, { "epoch": 28.96678966789668, "grad_norm": 0.772415280342102, "learning_rate": 7.104598353675845e-05, "loss": 0.023017299175262452, "step": 102050 }, { "epoch": 28.96962815782004, "grad_norm": 0.8125987648963928, "learning_rate": 7.104314504683509e-05, "loss": 0.0028581274673342705, "step": 102060 }, { "epoch": 28.9724666477434, "grad_norm": 0.9142124056816101, "learning_rate": 7.104030655691173e-05, "loss": 0.007784959673881531, "step": 102070 }, { "epoch": 28.975305137666762, "grad_norm": 3.547302484512329, "learning_rate": 7.103746806698837e-05, "loss": 0.0034934565424919128, "step": 102080 }, { "epoch": 28.97814362759012, "grad_norm": 3.9685609340667725, "learning_rate": 7.1034629577065e-05, "loss": 0.007599274069070816, "step": 102090 }, { "epoch": 28.980982117513484, "grad_norm": 1.5260392427444458, "learning_rate": 7.103179108714164e-05, "loss": 0.010606472939252853, "step": 102100 }, { "epoch": 28.983820607436844, "grad_norm": 1.6723148822784424, "learning_rate": 7.102895259721828e-05, "loss": 0.007351754605770111, "step": 102110 }, { "epoch": 28.986659097360203, "grad_norm": 5.7239484786987305, "learning_rate": 7.102611410729492e-05, "loss": 0.005101994425058365, "step": 102120 }, { "epoch": 28.989497587283566, "grad_norm": 0.31368595361709595, "learning_rate": 7.102327561737156e-05, "loss": 0.006621608138084411, "step": 102130 }, { "epoch": 28.992336077206925, "grad_norm": 1.766554355621338, "learning_rate": 7.10204371274482e-05, "loss": 0.014098387956619263, "step": 102140 }, { "epoch": 28.995174567130288, "grad_norm": 0.29521605372428894, "learning_rate": 7.101759863752485e-05, "loss": 0.008170235157012939, "step": 102150 }, { "epoch": 28.998013057053647, "grad_norm": 0.7069188952445984, "learning_rate": 7.101476014760147e-05, "loss": 0.008200228959321976, "step": 102160 }, { "epoch": 29.000851546977007, "grad_norm": 0.9918931126594543, "learning_rate": 7.101192165767812e-05, "loss": 0.0060219407081604, "step": 102170 }, { "epoch": 29.00369003690037, "grad_norm": 0.28363242745399475, "learning_rate": 7.100908316775476e-05, "loss": 0.010178916901350022, "step": 102180 }, { "epoch": 29.00652852682373, "grad_norm": 0.2931966185569763, "learning_rate": 7.10062446778314e-05, "loss": 0.007821349799633026, "step": 102190 }, { "epoch": 29.009367016747092, "grad_norm": 0.5726573467254639, "learning_rate": 7.100340618790804e-05, "loss": 0.0022911567240953444, "step": 102200 }, { "epoch": 29.01220550667045, "grad_norm": 0.4696698784828186, "learning_rate": 7.100056769798468e-05, "loss": 0.0012418501079082488, "step": 102210 }, { "epoch": 29.01504399659381, "grad_norm": 8.253231048583984, "learning_rate": 7.099772920806131e-05, "loss": 0.006936902552843094, "step": 102220 }, { "epoch": 29.017882486517173, "grad_norm": 0.22573336958885193, "learning_rate": 7.099489071813795e-05, "loss": 0.0032431773841381075, "step": 102230 }, { "epoch": 29.020720976440533, "grad_norm": 1.0605664253234863, "learning_rate": 7.099205222821459e-05, "loss": 0.007231764495372772, "step": 102240 }, { "epoch": 29.023559466363896, "grad_norm": 0.14189422130584717, "learning_rate": 7.098921373829123e-05, "loss": 0.0010597817599773407, "step": 102250 }, { "epoch": 29.026397956287255, "grad_norm": 0.19582737982273102, "learning_rate": 7.098637524836788e-05, "loss": 0.0017875434830784797, "step": 102260 }, { "epoch": 29.029236446210614, "grad_norm": 0.40438878536224365, "learning_rate": 7.098353675844452e-05, "loss": 0.0010043013840913772, "step": 102270 }, { "epoch": 29.032074936133977, "grad_norm": 9.799649238586426, "learning_rate": 7.098069826852114e-05, "loss": 0.006715791672468186, "step": 102280 }, { "epoch": 29.034913426057336, "grad_norm": 0.307248055934906, "learning_rate": 7.097785977859779e-05, "loss": 0.0010018324479460715, "step": 102290 }, { "epoch": 29.0377519159807, "grad_norm": 9.453421592712402, "learning_rate": 7.097502128867443e-05, "loss": 0.00893569439649582, "step": 102300 }, { "epoch": 29.04059040590406, "grad_norm": 0.27662575244903564, "learning_rate": 7.097218279875107e-05, "loss": 0.003125188872218132, "step": 102310 }, { "epoch": 29.043428895827418, "grad_norm": 2.2073044776916504, "learning_rate": 7.096934430882771e-05, "loss": 0.009253701567649842, "step": 102320 }, { "epoch": 29.04626738575078, "grad_norm": 0.08398854732513428, "learning_rate": 7.096650581890435e-05, "loss": 0.0016285819932818414, "step": 102330 }, { "epoch": 29.04910587567414, "grad_norm": 0.9051011204719543, "learning_rate": 7.096366732898099e-05, "loss": 0.017110762000083924, "step": 102340 }, { "epoch": 29.051944365597503, "grad_norm": 2.4019415378570557, "learning_rate": 7.096082883905762e-05, "loss": 0.019776442646980287, "step": 102350 }, { "epoch": 29.054782855520862, "grad_norm": 5.2361860275268555, "learning_rate": 7.095799034913426e-05, "loss": 0.007220838963985443, "step": 102360 }, { "epoch": 29.057621345444225, "grad_norm": 1.2093380689620972, "learning_rate": 7.09551518592109e-05, "loss": 0.003241689130663872, "step": 102370 }, { "epoch": 29.060459835367585, "grad_norm": 2.4927449226379395, "learning_rate": 7.095231336928753e-05, "loss": 0.004962768405675888, "step": 102380 }, { "epoch": 29.063298325290944, "grad_norm": 0.18167811632156372, "learning_rate": 7.094947487936419e-05, "loss": 0.016752813756465913, "step": 102390 }, { "epoch": 29.066136815214307, "grad_norm": 1.8891583681106567, "learning_rate": 7.094663638944083e-05, "loss": 0.01641814112663269, "step": 102400 }, { "epoch": 29.068975305137666, "grad_norm": 5.791284084320068, "learning_rate": 7.094379789951746e-05, "loss": 0.00697208121418953, "step": 102410 }, { "epoch": 29.07181379506103, "grad_norm": 13.99333667755127, "learning_rate": 7.09409594095941e-05, "loss": 0.022768333554267883, "step": 102420 }, { "epoch": 29.07465228498439, "grad_norm": 0.8489956259727478, "learning_rate": 7.093812091967074e-05, "loss": 0.024726402759552003, "step": 102430 }, { "epoch": 29.077490774907748, "grad_norm": 7.231731414794922, "learning_rate": 7.093528242974738e-05, "loss": 0.03776673674583435, "step": 102440 }, { "epoch": 29.08032926483111, "grad_norm": 11.298152923583984, "learning_rate": 7.093244393982401e-05, "loss": 0.03368097543716431, "step": 102450 }, { "epoch": 29.08316775475447, "grad_norm": 3.5031540393829346, "learning_rate": 7.092960544990066e-05, "loss": 0.005102306604385376, "step": 102460 }, { "epoch": 29.086006244677833, "grad_norm": 0.5963215827941895, "learning_rate": 7.09267669599773e-05, "loss": 0.004594159126281738, "step": 102470 }, { "epoch": 29.088844734601192, "grad_norm": 9.248827934265137, "learning_rate": 7.092392847005393e-05, "loss": 0.014360295236110687, "step": 102480 }, { "epoch": 29.09168322452455, "grad_norm": 0.13988201320171356, "learning_rate": 7.092108998013057e-05, "loss": 0.0067124530673027035, "step": 102490 }, { "epoch": 29.094521714447914, "grad_norm": 0.7014368176460266, "learning_rate": 7.091825149020721e-05, "loss": 0.003118043951690197, "step": 102500 }, { "epoch": 29.094521714447914, "eval_accuracy": 0.9705601831245628, "eval_loss": 0.09572522342205048, "eval_runtime": 32.2884, "eval_samples_per_second": 487.079, "eval_steps_per_second": 7.619, "step": 102500 }, { "epoch": 29.097360204371274, "grad_norm": 0.26777759194374084, "learning_rate": 7.091541300028384e-05, "loss": 0.00574740469455719, "step": 102510 }, { "epoch": 29.100198694294637, "grad_norm": 0.31693804264068604, "learning_rate": 7.09125745103605e-05, "loss": 0.004652995616197586, "step": 102520 }, { "epoch": 29.103037184217996, "grad_norm": 0.5102849006652832, "learning_rate": 7.090973602043714e-05, "loss": 0.004267404228448868, "step": 102530 }, { "epoch": 29.105875674141355, "grad_norm": 0.8930804133415222, "learning_rate": 7.090689753051377e-05, "loss": 0.013785937428474426, "step": 102540 }, { "epoch": 29.10871416406472, "grad_norm": 0.16777321696281433, "learning_rate": 7.090405904059041e-05, "loss": 0.015562847256660461, "step": 102550 }, { "epoch": 29.111552653988078, "grad_norm": 6.0847086906433105, "learning_rate": 7.090122055066705e-05, "loss": 0.004124090075492859, "step": 102560 }, { "epoch": 29.11439114391144, "grad_norm": 0.2268490344285965, "learning_rate": 7.089838206074369e-05, "loss": 0.005310627818107605, "step": 102570 }, { "epoch": 29.1172296338348, "grad_norm": 3.9104433059692383, "learning_rate": 7.089554357082032e-05, "loss": 0.004322489351034164, "step": 102580 }, { "epoch": 29.12006812375816, "grad_norm": 0.33938488364219666, "learning_rate": 7.089270508089697e-05, "loss": 0.0035197902470827103, "step": 102590 }, { "epoch": 29.122906613681522, "grad_norm": 0.05653811991214752, "learning_rate": 7.088986659097361e-05, "loss": 0.015138925611972808, "step": 102600 }, { "epoch": 29.12574510360488, "grad_norm": 0.1376039683818817, "learning_rate": 7.088702810105024e-05, "loss": 0.006042524427175522, "step": 102610 }, { "epoch": 29.128583593528244, "grad_norm": 0.22564101219177246, "learning_rate": 7.088418961112688e-05, "loss": 0.008459549397230148, "step": 102620 }, { "epoch": 29.131422083451604, "grad_norm": 13.43925666809082, "learning_rate": 7.088135112120352e-05, "loss": 0.012538793683052062, "step": 102630 }, { "epoch": 29.134260573374963, "grad_norm": 8.326543807983398, "learning_rate": 7.087851263128015e-05, "loss": 0.0104373499751091, "step": 102640 }, { "epoch": 29.137099063298326, "grad_norm": 0.09716346114873886, "learning_rate": 7.087567414135681e-05, "loss": 0.009150402992963791, "step": 102650 }, { "epoch": 29.139937553221685, "grad_norm": 4.272698879241943, "learning_rate": 7.087283565143345e-05, "loss": 0.004462076723575592, "step": 102660 }, { "epoch": 29.142776043145048, "grad_norm": 0.21730031073093414, "learning_rate": 7.086999716151008e-05, "loss": 0.007064923644065857, "step": 102670 }, { "epoch": 29.145614533068407, "grad_norm": 9.249872207641602, "learning_rate": 7.086715867158672e-05, "loss": 0.015672376751899718, "step": 102680 }, { "epoch": 29.148453022991767, "grad_norm": 7.5102033615112305, "learning_rate": 7.086432018166336e-05, "loss": 0.004604329168796539, "step": 102690 }, { "epoch": 29.15129151291513, "grad_norm": 0.715934157371521, "learning_rate": 7.086148169174e-05, "loss": 0.01090754047036171, "step": 102700 }, { "epoch": 29.15413000283849, "grad_norm": 2.458418607711792, "learning_rate": 7.085864320181663e-05, "loss": 0.008639335632324219, "step": 102710 }, { "epoch": 29.15696849276185, "grad_norm": 11.255325317382812, "learning_rate": 7.085580471189328e-05, "loss": 0.014961728453636169, "step": 102720 }, { "epoch": 29.15980698268521, "grad_norm": 2.3089215755462646, "learning_rate": 7.085296622196992e-05, "loss": 0.019840715825557707, "step": 102730 }, { "epoch": 29.162645472608574, "grad_norm": 1.013698697090149, "learning_rate": 7.085012773204655e-05, "loss": 0.002451638504862785, "step": 102740 }, { "epoch": 29.165483962531933, "grad_norm": 5.578476905822754, "learning_rate": 7.08472892421232e-05, "loss": 0.0067212596535682675, "step": 102750 }, { "epoch": 29.168322452455293, "grad_norm": 0.8668773174285889, "learning_rate": 7.084445075219984e-05, "loss": 0.015506090223789215, "step": 102760 }, { "epoch": 29.171160942378656, "grad_norm": 0.264741986989975, "learning_rate": 7.084161226227646e-05, "loss": 0.0049165584146976474, "step": 102770 }, { "epoch": 29.173999432302015, "grad_norm": 0.295549213886261, "learning_rate": 7.08387737723531e-05, "loss": 0.006409379839897156, "step": 102780 }, { "epoch": 29.176837922225378, "grad_norm": 0.716139554977417, "learning_rate": 7.083593528242976e-05, "loss": 0.0056939773261547085, "step": 102790 }, { "epoch": 29.179676412148737, "grad_norm": 0.7113966345787048, "learning_rate": 7.083309679250639e-05, "loss": 0.010922443121671677, "step": 102800 }, { "epoch": 29.182514902072096, "grad_norm": 7.742880821228027, "learning_rate": 7.083025830258303e-05, "loss": 0.0054384000599384304, "step": 102810 }, { "epoch": 29.18535339199546, "grad_norm": 0.19212348759174347, "learning_rate": 7.082741981265967e-05, "loss": 0.003705166280269623, "step": 102820 }, { "epoch": 29.18819188191882, "grad_norm": 0.15441913902759552, "learning_rate": 7.082458132273631e-05, "loss": 0.0011392118409276008, "step": 102830 }, { "epoch": 29.19103037184218, "grad_norm": 0.08693257719278336, "learning_rate": 7.082174283281294e-05, "loss": 0.0065846554934978485, "step": 102840 }, { "epoch": 29.19386886176554, "grad_norm": 0.5288121700286865, "learning_rate": 7.08189043428896e-05, "loss": 0.0033392634242773054, "step": 102850 }, { "epoch": 29.1967073516889, "grad_norm": 0.5403031706809998, "learning_rate": 7.081606585296624e-05, "loss": 0.003260399401187897, "step": 102860 }, { "epoch": 29.199545841612263, "grad_norm": 0.14243869483470917, "learning_rate": 7.081322736304286e-05, "loss": 0.012542164325714112, "step": 102870 }, { "epoch": 29.202384331535622, "grad_norm": 0.8105551600456238, "learning_rate": 7.08103888731195e-05, "loss": 0.003414628654718399, "step": 102880 }, { "epoch": 29.205222821458985, "grad_norm": 3.517125368118286, "learning_rate": 7.080755038319615e-05, "loss": 0.0031416021287441255, "step": 102890 }, { "epoch": 29.208061311382345, "grad_norm": 0.46577849984169006, "learning_rate": 7.080471189327277e-05, "loss": 0.014421993494033813, "step": 102900 }, { "epoch": 29.210899801305704, "grad_norm": 14.920608520507812, "learning_rate": 7.080187340334942e-05, "loss": 0.006376342475414276, "step": 102910 }, { "epoch": 29.213738291229067, "grad_norm": 0.6095016598701477, "learning_rate": 7.079903491342607e-05, "loss": 0.004641420394182205, "step": 102920 }, { "epoch": 29.216576781152426, "grad_norm": 0.8011102676391602, "learning_rate": 7.07961964235027e-05, "loss": 0.0020334839820861815, "step": 102930 }, { "epoch": 29.21941527107579, "grad_norm": 1.759316086769104, "learning_rate": 7.079335793357934e-05, "loss": 0.0031461603939533234, "step": 102940 }, { "epoch": 29.22225376099915, "grad_norm": 1.7915518283843994, "learning_rate": 7.079051944365598e-05, "loss": 0.0028503136709332466, "step": 102950 }, { "epoch": 29.225092250922508, "grad_norm": 0.14384134113788605, "learning_rate": 7.078768095373262e-05, "loss": 0.002985171414911747, "step": 102960 }, { "epoch": 29.22793074084587, "grad_norm": 1.150887370109558, "learning_rate": 7.078484246380925e-05, "loss": 0.02281262129545212, "step": 102970 }, { "epoch": 29.23076923076923, "grad_norm": 0.2520335912704468, "learning_rate": 7.078200397388589e-05, "loss": 0.00878232643008232, "step": 102980 }, { "epoch": 29.233607720692593, "grad_norm": 0.08101112395524979, "learning_rate": 7.077916548396253e-05, "loss": 0.006850749254226685, "step": 102990 }, { "epoch": 29.236446210615952, "grad_norm": 0.7112416625022888, "learning_rate": 7.077632699403917e-05, "loss": 0.01065071001648903, "step": 103000 }, { "epoch": 29.236446210615952, "eval_accuracy": 0.9721498060660011, "eval_loss": 0.09248412400484085, "eval_runtime": 31.51, "eval_samples_per_second": 499.112, "eval_steps_per_second": 7.807, "step": 103000 }, { "epoch": 29.23928470053931, "grad_norm": 0.5933818221092224, "learning_rate": 7.077348850411582e-05, "loss": 0.007601629197597504, "step": 103010 }, { "epoch": 29.242123190462674, "grad_norm": 1.4413172006607056, "learning_rate": 7.077065001419246e-05, "loss": 0.0027877679094672203, "step": 103020 }, { "epoch": 29.244961680386034, "grad_norm": 3.930833339691162, "learning_rate": 7.076781152426908e-05, "loss": 0.01581648588180542, "step": 103030 }, { "epoch": 29.247800170309397, "grad_norm": 1.752811312675476, "learning_rate": 7.076497303434573e-05, "loss": 0.008138402551412582, "step": 103040 }, { "epoch": 29.250638660232756, "grad_norm": 4.996903896331787, "learning_rate": 7.076213454442238e-05, "loss": 0.009723015129566193, "step": 103050 }, { "epoch": 29.253477150156115, "grad_norm": 2.8598484992980957, "learning_rate": 7.075929605449901e-05, "loss": 0.0066393807530403135, "step": 103060 }, { "epoch": 29.256315640079478, "grad_norm": 2.6949291229248047, "learning_rate": 7.075645756457565e-05, "loss": 0.006281520426273346, "step": 103070 }, { "epoch": 29.259154130002837, "grad_norm": 0.902625560760498, "learning_rate": 7.075361907465229e-05, "loss": 0.006649890542030334, "step": 103080 }, { "epoch": 29.2619926199262, "grad_norm": 2.1878669261932373, "learning_rate": 7.075078058472892e-05, "loss": 0.01337079256772995, "step": 103090 }, { "epoch": 29.26483110984956, "grad_norm": 5.5928874015808105, "learning_rate": 7.074794209480556e-05, "loss": 0.013811884820461274, "step": 103100 }, { "epoch": 29.267669599772923, "grad_norm": 1.51506507396698, "learning_rate": 7.07451036048822e-05, "loss": 0.004733823239803314, "step": 103110 }, { "epoch": 29.270508089696282, "grad_norm": 0.5004401803016663, "learning_rate": 7.074226511495884e-05, "loss": 0.0018916735425591469, "step": 103120 }, { "epoch": 29.27334657961964, "grad_norm": 0.3076229989528656, "learning_rate": 7.073942662503548e-05, "loss": 0.00203386265784502, "step": 103130 }, { "epoch": 29.276185069543004, "grad_norm": 0.6887105107307434, "learning_rate": 7.073658813511213e-05, "loss": 0.005257030203938484, "step": 103140 }, { "epoch": 29.279023559466363, "grad_norm": 0.15275152027606964, "learning_rate": 7.073374964518877e-05, "loss": 0.006458427757024765, "step": 103150 }, { "epoch": 29.281862049389726, "grad_norm": 1.6810303926467896, "learning_rate": 7.07309111552654e-05, "loss": 0.005936475098133087, "step": 103160 }, { "epoch": 29.284700539313086, "grad_norm": 0.28178727626800537, "learning_rate": 7.072807266534204e-05, "loss": 0.0045090243220329285, "step": 103170 }, { "epoch": 29.287539029236445, "grad_norm": 4.965667724609375, "learning_rate": 7.072523417541868e-05, "loss": 0.007151518762111664, "step": 103180 }, { "epoch": 29.290377519159808, "grad_norm": 2.882051944732666, "learning_rate": 7.072239568549532e-05, "loss": 0.004828662425279617, "step": 103190 }, { "epoch": 29.293216009083167, "grad_norm": 2.061706781387329, "learning_rate": 7.071955719557196e-05, "loss": 0.003097831830382347, "step": 103200 }, { "epoch": 29.29605449900653, "grad_norm": 2.1179275512695312, "learning_rate": 7.07167187056486e-05, "loss": 0.0065132036805152895, "step": 103210 }, { "epoch": 29.29889298892989, "grad_norm": 0.9824199080467224, "learning_rate": 7.071388021572523e-05, "loss": 0.0025570323690772057, "step": 103220 }, { "epoch": 29.30173147885325, "grad_norm": 0.4234238266944885, "learning_rate": 7.071104172580187e-05, "loss": 0.006880425661802292, "step": 103230 }, { "epoch": 29.30456996877661, "grad_norm": 1.14798903465271, "learning_rate": 7.070820323587851e-05, "loss": 0.013638554513454438, "step": 103240 }, { "epoch": 29.30740845869997, "grad_norm": 10.852538108825684, "learning_rate": 7.070536474595515e-05, "loss": 0.00906771793961525, "step": 103250 }, { "epoch": 29.310246948623334, "grad_norm": 3.0900282859802246, "learning_rate": 7.07025262560318e-05, "loss": 0.0029064876958727836, "step": 103260 }, { "epoch": 29.313085438546693, "grad_norm": 0.25750496983528137, "learning_rate": 7.069968776610844e-05, "loss": 0.022493073344230653, "step": 103270 }, { "epoch": 29.315923928470053, "grad_norm": 0.046320654451847076, "learning_rate": 7.069684927618508e-05, "loss": 0.007278059422969818, "step": 103280 }, { "epoch": 29.318762418393415, "grad_norm": 6.1847686767578125, "learning_rate": 7.06940107862617e-05, "loss": 0.015126746892929078, "step": 103290 }, { "epoch": 29.321600908316775, "grad_norm": 0.7371032238006592, "learning_rate": 7.069117229633835e-05, "loss": 0.008068636059761047, "step": 103300 }, { "epoch": 29.324439398240138, "grad_norm": 0.09260711818933487, "learning_rate": 7.068833380641499e-05, "loss": 0.005408328026533127, "step": 103310 }, { "epoch": 29.327277888163497, "grad_norm": 0.9270020127296448, "learning_rate": 7.068549531649163e-05, "loss": 0.007327474653720856, "step": 103320 }, { "epoch": 29.330116378086856, "grad_norm": 0.679845929145813, "learning_rate": 7.068265682656827e-05, "loss": 0.0036801610141992567, "step": 103330 }, { "epoch": 29.33295486801022, "grad_norm": 4.866535663604736, "learning_rate": 7.067981833664491e-05, "loss": 0.004996693879365921, "step": 103340 }, { "epoch": 29.33579335793358, "grad_norm": 0.5973851680755615, "learning_rate": 7.067697984672154e-05, "loss": 0.010375338047742844, "step": 103350 }, { "epoch": 29.33863184785694, "grad_norm": 0.4207924008369446, "learning_rate": 7.067414135679818e-05, "loss": 0.0074117623269557955, "step": 103360 }, { "epoch": 29.3414703377803, "grad_norm": 2.033496141433716, "learning_rate": 7.067130286687482e-05, "loss": 0.0037599004805088042, "step": 103370 }, { "epoch": 29.34430882770366, "grad_norm": 0.33192357420921326, "learning_rate": 7.066846437695147e-05, "loss": 0.004513479769229889, "step": 103380 }, { "epoch": 29.347147317627023, "grad_norm": 10.976330757141113, "learning_rate": 7.06656258870281e-05, "loss": 0.006702899187803269, "step": 103390 }, { "epoch": 29.349985807550382, "grad_norm": 0.4565625488758087, "learning_rate": 7.066278739710475e-05, "loss": 0.0025810420513153074, "step": 103400 }, { "epoch": 29.352824297473745, "grad_norm": 1.2145016193389893, "learning_rate": 7.065994890718139e-05, "loss": 0.003390244394540787, "step": 103410 }, { "epoch": 29.355662787397105, "grad_norm": 1.3472965955734253, "learning_rate": 7.065711041725802e-05, "loss": 0.009109169244766235, "step": 103420 }, { "epoch": 29.358501277320464, "grad_norm": 2.320267677307129, "learning_rate": 7.065427192733466e-05, "loss": 0.012889544665813445, "step": 103430 }, { "epoch": 29.361339767243827, "grad_norm": 1.5763037204742432, "learning_rate": 7.06514334374113e-05, "loss": 0.011335603892803192, "step": 103440 }, { "epoch": 29.364178257167186, "grad_norm": 0.46581053733825684, "learning_rate": 7.064859494748794e-05, "loss": 0.011664146184921264, "step": 103450 }, { "epoch": 29.36701674709055, "grad_norm": 0.3456219732761383, "learning_rate": 7.064575645756458e-05, "loss": 0.0025394342839717863, "step": 103460 }, { "epoch": 29.36985523701391, "grad_norm": 0.09108962118625641, "learning_rate": 7.064291796764122e-05, "loss": 0.009935784339904784, "step": 103470 }, { "epoch": 29.372693726937268, "grad_norm": 3.5361814498901367, "learning_rate": 7.064007947771785e-05, "loss": 0.009024163335561752, "step": 103480 }, { "epoch": 29.37553221686063, "grad_norm": 0.19263404607772827, "learning_rate": 7.063724098779449e-05, "loss": 0.011487459391355514, "step": 103490 }, { "epoch": 29.37837070678399, "grad_norm": 0.8193291425704956, "learning_rate": 7.063440249787113e-05, "loss": 0.0118890643119812, "step": 103500 }, { "epoch": 29.37837070678399, "eval_accuracy": 0.9710052775481656, "eval_loss": 0.10009734332561493, "eval_runtime": 32.1347, "eval_samples_per_second": 489.408, "eval_steps_per_second": 7.655, "step": 103500 }, { "epoch": 29.381209196707353, "grad_norm": 6.362823963165283, "learning_rate": 7.063156400794778e-05, "loss": 0.006814927607774734, "step": 103510 }, { "epoch": 29.384047686630712, "grad_norm": 1.7562648057937622, "learning_rate": 7.062872551802442e-05, "loss": 0.005839845538139344, "step": 103520 }, { "epoch": 29.386886176554075, "grad_norm": 0.15425477921962738, "learning_rate": 7.062588702810106e-05, "loss": 0.004590987414121628, "step": 103530 }, { "epoch": 29.389724666477434, "grad_norm": 0.7263097167015076, "learning_rate": 7.06230485381777e-05, "loss": 0.010863494873046876, "step": 103540 }, { "epoch": 29.392563156400794, "grad_norm": 0.7492151260375977, "learning_rate": 7.062021004825433e-05, "loss": 0.007868213206529617, "step": 103550 }, { "epoch": 29.395401646324157, "grad_norm": 12.324872016906738, "learning_rate": 7.061737155833097e-05, "loss": 0.012037272751331329, "step": 103560 }, { "epoch": 29.398240136247516, "grad_norm": 2.4293723106384277, "learning_rate": 7.061453306840761e-05, "loss": 0.01658853143453598, "step": 103570 }, { "epoch": 29.40107862617088, "grad_norm": 0.14804920554161072, "learning_rate": 7.061169457848424e-05, "loss": 0.009303011000156403, "step": 103580 }, { "epoch": 29.403917116094238, "grad_norm": 0.9840238094329834, "learning_rate": 7.06088560885609e-05, "loss": 0.004961763694882393, "step": 103590 }, { "epoch": 29.406755606017597, "grad_norm": 11.881843566894531, "learning_rate": 7.060601759863753e-05, "loss": 0.0165410652756691, "step": 103600 }, { "epoch": 29.40959409594096, "grad_norm": 0.881999135017395, "learning_rate": 7.060317910871416e-05, "loss": 0.004866021871566773, "step": 103610 }, { "epoch": 29.41243258586432, "grad_norm": 1.2781749963760376, "learning_rate": 7.06003406187908e-05, "loss": 0.005559352785348892, "step": 103620 }, { "epoch": 29.415271075787683, "grad_norm": 0.5381531119346619, "learning_rate": 7.059750212886745e-05, "loss": 0.018800212442874907, "step": 103630 }, { "epoch": 29.418109565711042, "grad_norm": 0.5681717395782471, "learning_rate": 7.059466363894409e-05, "loss": 0.009892986714839935, "step": 103640 }, { "epoch": 29.4209480556344, "grad_norm": 1.7043876647949219, "learning_rate": 7.059182514902073e-05, "loss": 0.004879165068268776, "step": 103650 }, { "epoch": 29.423786545557764, "grad_norm": 0.06515082716941833, "learning_rate": 7.058898665909737e-05, "loss": 0.006353899836540222, "step": 103660 }, { "epoch": 29.426625035481123, "grad_norm": 9.811002731323242, "learning_rate": 7.058614816917401e-05, "loss": 0.008224469423294068, "step": 103670 }, { "epoch": 29.429463525404486, "grad_norm": 0.1130213513970375, "learning_rate": 7.058330967925064e-05, "loss": 0.0040574237704277035, "step": 103680 }, { "epoch": 29.432302015327846, "grad_norm": 0.4912196397781372, "learning_rate": 7.058047118932728e-05, "loss": 0.00849318653345108, "step": 103690 }, { "epoch": 29.435140505251205, "grad_norm": 0.9292280077934265, "learning_rate": 7.057763269940392e-05, "loss": 0.0053595930337905886, "step": 103700 }, { "epoch": 29.437978995174568, "grad_norm": 0.028805045410990715, "learning_rate": 7.057479420948055e-05, "loss": 0.004971916973590851, "step": 103710 }, { "epoch": 29.440817485097927, "grad_norm": 4.949158668518066, "learning_rate": 7.05719557195572e-05, "loss": 0.010285651683807373, "step": 103720 }, { "epoch": 29.44365597502129, "grad_norm": 2.8312160968780518, "learning_rate": 7.056911722963385e-05, "loss": 0.01690831035375595, "step": 103730 }, { "epoch": 29.44649446494465, "grad_norm": 1.025990605354309, "learning_rate": 7.056627873971047e-05, "loss": 0.005609037727117539, "step": 103740 }, { "epoch": 29.44933295486801, "grad_norm": 0.23950712382793427, "learning_rate": 7.056344024978711e-05, "loss": 0.0020457122474908827, "step": 103750 }, { "epoch": 29.45217144479137, "grad_norm": 0.6270509362220764, "learning_rate": 7.056060175986376e-05, "loss": 0.005263065919280052, "step": 103760 }, { "epoch": 29.45500993471473, "grad_norm": 18.475805282592773, "learning_rate": 7.05577632699404e-05, "loss": 0.03969619572162628, "step": 103770 }, { "epoch": 29.457848424638094, "grad_norm": 1.099635362625122, "learning_rate": 7.055492478001704e-05, "loss": 0.03034897744655609, "step": 103780 }, { "epoch": 29.460686914561453, "grad_norm": 12.22894287109375, "learning_rate": 7.055208629009368e-05, "loss": 0.02304292768239975, "step": 103790 }, { "epoch": 29.463525404484812, "grad_norm": 1.35223388671875, "learning_rate": 7.054924780017032e-05, "loss": 0.01624814122915268, "step": 103800 }, { "epoch": 29.466363894408175, "grad_norm": 1.2336528301239014, "learning_rate": 7.054640931024695e-05, "loss": 0.005838661640882492, "step": 103810 }, { "epoch": 29.469202384331535, "grad_norm": 5.567005157470703, "learning_rate": 7.054357082032359e-05, "loss": 0.011862041056156158, "step": 103820 }, { "epoch": 29.472040874254898, "grad_norm": 12.38755989074707, "learning_rate": 7.054073233040023e-05, "loss": 0.005309949815273285, "step": 103830 }, { "epoch": 29.474879364178257, "grad_norm": 0.29447826743125916, "learning_rate": 7.053789384047686e-05, "loss": 0.001903078705072403, "step": 103840 }, { "epoch": 29.477717854101616, "grad_norm": 0.08942021429538727, "learning_rate": 7.053505535055351e-05, "loss": 0.00615214891731739, "step": 103850 }, { "epoch": 29.48055634402498, "grad_norm": 0.8542690277099609, "learning_rate": 7.053221686063016e-05, "loss": 0.012230338156223297, "step": 103860 }, { "epoch": 29.48339483394834, "grad_norm": 15.261199951171875, "learning_rate": 7.052937837070678e-05, "loss": 0.006397219002246856, "step": 103870 }, { "epoch": 29.4862333238717, "grad_norm": 1.796958088874817, "learning_rate": 7.052653988078343e-05, "loss": 0.007944531738758087, "step": 103880 }, { "epoch": 29.48907181379506, "grad_norm": 0.5633137226104736, "learning_rate": 7.052370139086007e-05, "loss": 0.002471035346388817, "step": 103890 }, { "epoch": 29.49191030371842, "grad_norm": 0.9292938113212585, "learning_rate": 7.052086290093671e-05, "loss": 0.013493633270263672, "step": 103900 }, { "epoch": 29.494748793641783, "grad_norm": 1.0036776065826416, "learning_rate": 7.051802441101334e-05, "loss": 0.007279355823993683, "step": 103910 }, { "epoch": 29.497587283565142, "grad_norm": 1.0998727083206177, "learning_rate": 7.051518592108999e-05, "loss": 0.009480812400579453, "step": 103920 }, { "epoch": 29.500425773488505, "grad_norm": 0.9005405306816101, "learning_rate": 7.051234743116662e-05, "loss": 0.009021732211112975, "step": 103930 }, { "epoch": 29.503264263411864, "grad_norm": 0.09481550753116608, "learning_rate": 7.050950894124326e-05, "loss": 0.009876537322998046, "step": 103940 }, { "epoch": 29.506102753335227, "grad_norm": 0.43748241662979126, "learning_rate": 7.05066704513199e-05, "loss": 0.006156052649021149, "step": 103950 }, { "epoch": 29.508941243258587, "grad_norm": 2.789482831954956, "learning_rate": 7.050383196139654e-05, "loss": 0.005762055516242981, "step": 103960 }, { "epoch": 29.511779733181946, "grad_norm": 0.4689555764198303, "learning_rate": 7.050099347147317e-05, "loss": 0.006935521215200424, "step": 103970 }, { "epoch": 29.51461822310531, "grad_norm": 2.5231363773345947, "learning_rate": 7.049815498154983e-05, "loss": 0.0020012497901916505, "step": 103980 }, { "epoch": 29.51745671302867, "grad_norm": 0.8395127058029175, "learning_rate": 7.049531649162647e-05, "loss": 0.004673022404313087, "step": 103990 }, { "epoch": 29.52029520295203, "grad_norm": 0.774398148059845, "learning_rate": 7.04924780017031e-05, "loss": 0.0021633140742778776, "step": 104000 }, { "epoch": 29.52029520295203, "eval_accuracy": 0.9720226362306861, "eval_loss": 0.09336931258440018, "eval_runtime": 32.6696, "eval_samples_per_second": 481.395, "eval_steps_per_second": 7.53, "step": 104000 }, { "epoch": 29.52313369287539, "grad_norm": 0.10275911539793015, "learning_rate": 7.048963951177974e-05, "loss": 0.0031437806785106657, "step": 104010 }, { "epoch": 29.52597218279875, "grad_norm": 0.6999396681785583, "learning_rate": 7.048680102185638e-05, "loss": 0.007735978811979294, "step": 104020 }, { "epoch": 29.528810672722113, "grad_norm": 0.47529640793800354, "learning_rate": 7.0483962531933e-05, "loss": 0.0032308816909790037, "step": 104030 }, { "epoch": 29.531649162645472, "grad_norm": 0.5819925665855408, "learning_rate": 7.048112404200965e-05, "loss": 0.008918210864067078, "step": 104040 }, { "epoch": 29.534487652568835, "grad_norm": 9.107738494873047, "learning_rate": 7.04782855520863e-05, "loss": 0.015941126644611357, "step": 104050 }, { "epoch": 29.537326142492194, "grad_norm": 8.273719787597656, "learning_rate": 7.047544706216293e-05, "loss": 0.011851529031991959, "step": 104060 }, { "epoch": 29.540164632415554, "grad_norm": 0.35645440220832825, "learning_rate": 7.047260857223957e-05, "loss": 0.011782968789339066, "step": 104070 }, { "epoch": 29.543003122338916, "grad_norm": 1.1897735595703125, "learning_rate": 7.046977008231621e-05, "loss": 0.012898813188076019, "step": 104080 }, { "epoch": 29.545841612262276, "grad_norm": 1.441650390625, "learning_rate": 7.046721544138518e-05, "loss": 0.00846182107925415, "step": 104090 }, { "epoch": 29.54868010218564, "grad_norm": 10.010531425476074, "learning_rate": 7.046437695146183e-05, "loss": 0.011055965721607209, "step": 104100 }, { "epoch": 29.551518592108998, "grad_norm": 0.5085257291793823, "learning_rate": 7.046153846153846e-05, "loss": 0.012912529706954955, "step": 104110 }, { "epoch": 29.554357082032357, "grad_norm": 0.1733914464712143, "learning_rate": 7.04586999716151e-05, "loss": 0.006072824448347091, "step": 104120 }, { "epoch": 29.55719557195572, "grad_norm": 8.602968215942383, "learning_rate": 7.045586148169174e-05, "loss": 0.007088814675807953, "step": 104130 }, { "epoch": 29.56003406187908, "grad_norm": 0.20394794642925262, "learning_rate": 7.045302299176839e-05, "loss": 0.007820235937833786, "step": 104140 }, { "epoch": 29.562872551802442, "grad_norm": 0.2930407226085663, "learning_rate": 7.045018450184501e-05, "loss": 0.02364852875471115, "step": 104150 }, { "epoch": 29.565711041725802, "grad_norm": 0.12931327521800995, "learning_rate": 7.044734601192165e-05, "loss": 0.0038503125309944154, "step": 104160 }, { "epoch": 29.56854953164916, "grad_norm": 2.476289987564087, "learning_rate": 7.044450752199831e-05, "loss": 0.0060837332159280775, "step": 104170 }, { "epoch": 29.571388021572524, "grad_norm": 1.8453221321105957, "learning_rate": 7.044166903207494e-05, "loss": 0.009723147749900818, "step": 104180 }, { "epoch": 29.574226511495883, "grad_norm": 0.4030969738960266, "learning_rate": 7.043883054215158e-05, "loss": 0.021486718952655793, "step": 104190 }, { "epoch": 29.577065001419246, "grad_norm": 2.210861921310425, "learning_rate": 7.043599205222822e-05, "loss": 0.0045363172888755795, "step": 104200 }, { "epoch": 29.579903491342606, "grad_norm": 0.9037960171699524, "learning_rate": 7.043315356230485e-05, "loss": 0.01242876723408699, "step": 104210 }, { "epoch": 29.582741981265965, "grad_norm": 0.7394652962684631, "learning_rate": 7.043031507238149e-05, "loss": 0.0073909670114517215, "step": 104220 }, { "epoch": 29.585580471189328, "grad_norm": 0.026567431166768074, "learning_rate": 7.042747658245814e-05, "loss": 0.0064466923475265505, "step": 104230 }, { "epoch": 29.588418961112687, "grad_norm": 3.084294319152832, "learning_rate": 7.042463809253477e-05, "loss": 0.005016718432307244, "step": 104240 }, { "epoch": 29.59125745103605, "grad_norm": 6.4906229972839355, "learning_rate": 7.042179960261141e-05, "loss": 0.015404963493347168, "step": 104250 }, { "epoch": 29.59409594095941, "grad_norm": 12.337773323059082, "learning_rate": 7.041896111268805e-05, "loss": 0.017842201888561247, "step": 104260 }, { "epoch": 29.59693443088277, "grad_norm": 0.3310418725013733, "learning_rate": 7.04161226227647e-05, "loss": 0.004060607403516769, "step": 104270 }, { "epoch": 29.59977292080613, "grad_norm": 0.49323394894599915, "learning_rate": 7.041328413284132e-05, "loss": 0.018381719291210175, "step": 104280 }, { "epoch": 29.60261141072949, "grad_norm": 0.13789761066436768, "learning_rate": 7.041044564291797e-05, "loss": 0.004696869850158691, "step": 104290 }, { "epoch": 29.605449900652854, "grad_norm": 0.1203417032957077, "learning_rate": 7.040760715299462e-05, "loss": 0.0034710045903921126, "step": 104300 }, { "epoch": 29.608288390576213, "grad_norm": 2.6133992671966553, "learning_rate": 7.040476866307125e-05, "loss": 0.005593504756689072, "step": 104310 }, { "epoch": 29.611126880499576, "grad_norm": 0.10324645787477493, "learning_rate": 7.040193017314789e-05, "loss": 0.0038324929773807526, "step": 104320 }, { "epoch": 29.613965370422935, "grad_norm": 5.347020626068115, "learning_rate": 7.039909168322453e-05, "loss": 0.004506857320666313, "step": 104330 }, { "epoch": 29.616803860346295, "grad_norm": 0.5524942278862, "learning_rate": 7.039625319330116e-05, "loss": 0.006664811819791794, "step": 104340 }, { "epoch": 29.619642350269658, "grad_norm": 9.748819351196289, "learning_rate": 7.03934147033778e-05, "loss": 0.010068503767251968, "step": 104350 }, { "epoch": 29.622480840193017, "grad_norm": 1.6839975118637085, "learning_rate": 7.039057621345446e-05, "loss": 0.005738165974617004, "step": 104360 }, { "epoch": 29.62531933011638, "grad_norm": 0.795344889163971, "learning_rate": 7.038773772353108e-05, "loss": 0.006378365308046341, "step": 104370 }, { "epoch": 29.62815782003974, "grad_norm": 0.8398072123527527, "learning_rate": 7.038489923360772e-05, "loss": 0.01780163198709488, "step": 104380 }, { "epoch": 29.6309963099631, "grad_norm": 0.21247723698616028, "learning_rate": 7.038206074368437e-05, "loss": 0.02032453417778015, "step": 104390 }, { "epoch": 29.63383479988646, "grad_norm": 0.08152678608894348, "learning_rate": 7.037922225376101e-05, "loss": 0.0023228060454130173, "step": 104400 }, { "epoch": 29.63667328980982, "grad_norm": 0.2128494381904602, "learning_rate": 7.037638376383763e-05, "loss": 0.004381305724382401, "step": 104410 }, { "epoch": 29.639511779733184, "grad_norm": 1.3016115427017212, "learning_rate": 7.037354527391428e-05, "loss": 0.015048427879810334, "step": 104420 }, { "epoch": 29.642350269656543, "grad_norm": 1.2149841785430908, "learning_rate": 7.037070678399093e-05, "loss": 0.009348060935735703, "step": 104430 }, { "epoch": 29.645188759579902, "grad_norm": 8.566803932189941, "learning_rate": 7.036786829406756e-05, "loss": 0.005944855511188507, "step": 104440 }, { "epoch": 29.648027249503265, "grad_norm": 2.476390838623047, "learning_rate": 7.03650298041442e-05, "loss": 0.002931314334273338, "step": 104450 }, { "epoch": 29.650865739426624, "grad_norm": 19.095195770263672, "learning_rate": 7.036219131422084e-05, "loss": 0.02069008946418762, "step": 104460 }, { "epoch": 29.653704229349987, "grad_norm": 8.009967803955078, "learning_rate": 7.035935282429747e-05, "loss": 0.0062904275953769686, "step": 104470 }, { "epoch": 29.656542719273347, "grad_norm": 0.07890402525663376, "learning_rate": 7.035651433437411e-05, "loss": 0.007290425151586533, "step": 104480 }, { "epoch": 29.659381209196706, "grad_norm": 11.004659652709961, "learning_rate": 7.035367584445075e-05, "loss": 0.005387300997972489, "step": 104490 }, { "epoch": 29.66221969912007, "grad_norm": 3.862241268157959, "learning_rate": 7.03508373545274e-05, "loss": 0.0100385382771492, "step": 104500 }, { "epoch": 29.66221969912007, "eval_accuracy": 0.9682711260888918, "eval_loss": 0.10713754594326019, "eval_runtime": 31.8517, "eval_samples_per_second": 493.756, "eval_steps_per_second": 7.723, "step": 104500 }, { "epoch": 29.665058189043428, "grad_norm": 1.5849838256835938, "learning_rate": 7.034799886460404e-05, "loss": 0.011078003048896789, "step": 104510 }, { "epoch": 29.66789667896679, "grad_norm": 0.14465877413749695, "learning_rate": 7.034516037468068e-05, "loss": 0.00287940688431263, "step": 104520 }, { "epoch": 29.67073516889015, "grad_norm": 15.591758728027344, "learning_rate": 7.034232188475732e-05, "loss": 0.015643219649791717, "step": 104530 }, { "epoch": 29.67357365881351, "grad_norm": 0.7701877951622009, "learning_rate": 7.033948339483395e-05, "loss": 0.011083225905895232, "step": 104540 }, { "epoch": 29.676412148736873, "grad_norm": 1.5144898891448975, "learning_rate": 7.033664490491059e-05, "loss": 0.02409253120422363, "step": 104550 }, { "epoch": 29.679250638660232, "grad_norm": 0.06606651097536087, "learning_rate": 7.033380641498724e-05, "loss": 0.005826719850301742, "step": 104560 }, { "epoch": 29.682089128583595, "grad_norm": 0.249341681599617, "learning_rate": 7.033096792506387e-05, "loss": 0.0038238178938627245, "step": 104570 }, { "epoch": 29.684927618506954, "grad_norm": 10.029897689819336, "learning_rate": 7.032812943514051e-05, "loss": 0.005377654731273651, "step": 104580 }, { "epoch": 29.687766108430313, "grad_norm": 8.690874099731445, "learning_rate": 7.032529094521715e-05, "loss": 0.008487799018621445, "step": 104590 }, { "epoch": 29.690604598353676, "grad_norm": 1.082100510597229, "learning_rate": 7.032245245529378e-05, "loss": 0.00240826103836298, "step": 104600 }, { "epoch": 29.693443088277036, "grad_norm": 4.237621307373047, "learning_rate": 7.031961396537042e-05, "loss": 0.00365162156522274, "step": 104610 }, { "epoch": 29.6962815782004, "grad_norm": 5.967831611633301, "learning_rate": 7.031677547544706e-05, "loss": 0.021129617094993593, "step": 104620 }, { "epoch": 29.699120068123758, "grad_norm": 0.21645548939704895, "learning_rate": 7.03139369855237e-05, "loss": 0.00919828712940216, "step": 104630 }, { "epoch": 29.701958558047117, "grad_norm": 10.868873596191406, "learning_rate": 7.031109849560035e-05, "loss": 0.01018485650420189, "step": 104640 }, { "epoch": 29.70479704797048, "grad_norm": 0.10665369033813477, "learning_rate": 7.030826000567699e-05, "loss": 0.006110059097409248, "step": 104650 }, { "epoch": 29.70763553789384, "grad_norm": 4.778646469116211, "learning_rate": 7.030542151575363e-05, "loss": 0.014721235632896424, "step": 104660 }, { "epoch": 29.710474027817202, "grad_norm": 0.9279530644416809, "learning_rate": 7.030258302583026e-05, "loss": 0.0019113093614578248, "step": 104670 }, { "epoch": 29.71331251774056, "grad_norm": 0.3460279405117035, "learning_rate": 7.02997445359069e-05, "loss": 0.004975598677992821, "step": 104680 }, { "epoch": 29.716151007663925, "grad_norm": 6.227685928344727, "learning_rate": 7.029690604598354e-05, "loss": 0.024590209126472473, "step": 104690 }, { "epoch": 29.718989497587284, "grad_norm": 3.5708725452423096, "learning_rate": 7.029406755606018e-05, "loss": 0.011556456983089446, "step": 104700 }, { "epoch": 29.721827987510643, "grad_norm": 0.42200541496276855, "learning_rate": 7.029122906613682e-05, "loss": 0.008850054442882537, "step": 104710 }, { "epoch": 29.724666477434006, "grad_norm": 1.3981338739395142, "learning_rate": 7.028839057621346e-05, "loss": 0.01377348005771637, "step": 104720 }, { "epoch": 29.727504967357365, "grad_norm": 1.5834133625030518, "learning_rate": 7.028555208629009e-05, "loss": 0.00329880490899086, "step": 104730 }, { "epoch": 29.73034345728073, "grad_norm": 0.38446736335754395, "learning_rate": 7.028271359636673e-05, "loss": 0.012954989075660705, "step": 104740 }, { "epoch": 29.733181947204088, "grad_norm": 1.2588497400283813, "learning_rate": 7.027987510644337e-05, "loss": 0.008833683282136916, "step": 104750 }, { "epoch": 29.736020437127447, "grad_norm": 0.9540572762489319, "learning_rate": 7.027703661652002e-05, "loss": 0.001432366855442524, "step": 104760 }, { "epoch": 29.73885892705081, "grad_norm": 2.3396036624908447, "learning_rate": 7.027419812659666e-05, "loss": 0.003808160871267319, "step": 104770 }, { "epoch": 29.74169741697417, "grad_norm": 0.591968297958374, "learning_rate": 7.02713596366733e-05, "loss": 0.008841280639171601, "step": 104780 }, { "epoch": 29.744535906897532, "grad_norm": 5.853080749511719, "learning_rate": 7.026852114674994e-05, "loss": 0.011106960475444794, "step": 104790 }, { "epoch": 29.74737439682089, "grad_norm": 1.0360151529312134, "learning_rate": 7.026568265682657e-05, "loss": 0.0021541936323046683, "step": 104800 }, { "epoch": 29.75021288674425, "grad_norm": 0.989202618598938, "learning_rate": 7.026284416690321e-05, "loss": 0.0026784462854266165, "step": 104810 }, { "epoch": 29.753051376667614, "grad_norm": 4.244090557098389, "learning_rate": 7.026000567697985e-05, "loss": 0.004958838596940041, "step": 104820 }, { "epoch": 29.755889866590973, "grad_norm": 8.935042381286621, "learning_rate": 7.025716718705649e-05, "loss": 0.007089726626873016, "step": 104830 }, { "epoch": 29.758728356514336, "grad_norm": 0.5610134601593018, "learning_rate": 7.025432869713313e-05, "loss": 0.013608497381210328, "step": 104840 }, { "epoch": 29.761566846437695, "grad_norm": 10.513693809509277, "learning_rate": 7.025149020720977e-05, "loss": 0.0074524469673633575, "step": 104850 }, { "epoch": 29.764405336361055, "grad_norm": 0.41039466857910156, "learning_rate": 7.02486517172864e-05, "loss": 0.00746232271194458, "step": 104860 }, { "epoch": 29.767243826284417, "grad_norm": 0.17791564762592316, "learning_rate": 7.024581322736304e-05, "loss": 0.004781383275985718, "step": 104870 }, { "epoch": 29.770082316207777, "grad_norm": 11.260369300842285, "learning_rate": 7.024297473743968e-05, "loss": 0.009041262418031692, "step": 104880 }, { "epoch": 29.77292080613114, "grad_norm": 1.9183902740478516, "learning_rate": 7.024013624751633e-05, "loss": 0.006306622177362442, "step": 104890 }, { "epoch": 29.7757592960545, "grad_norm": 1.988551378250122, "learning_rate": 7.023729775759297e-05, "loss": 0.009582685679197312, "step": 104900 }, { "epoch": 29.77859778597786, "grad_norm": 1.1689237356185913, "learning_rate": 7.023445926766961e-05, "loss": 0.004791789874434471, "step": 104910 }, { "epoch": 29.78143627590122, "grad_norm": 6.808248519897461, "learning_rate": 7.023162077774624e-05, "loss": 0.006833506375551223, "step": 104920 }, { "epoch": 29.78427476582458, "grad_norm": 5.847177505493164, "learning_rate": 7.022878228782288e-05, "loss": 0.022859284281730653, "step": 104930 }, { "epoch": 29.787113255747943, "grad_norm": 1.2152276039123535, "learning_rate": 7.022594379789952e-05, "loss": 0.004604285210371017, "step": 104940 }, { "epoch": 29.789951745671303, "grad_norm": 12.555948257446289, "learning_rate": 7.022310530797616e-05, "loss": 0.007945778965950012, "step": 104950 }, { "epoch": 29.792790235594662, "grad_norm": 0.548850417137146, "learning_rate": 7.02202668180528e-05, "loss": 0.015255066752433776, "step": 104960 }, { "epoch": 29.795628725518025, "grad_norm": 9.073225021362305, "learning_rate": 7.021742832812944e-05, "loss": 0.02067209780216217, "step": 104970 }, { "epoch": 29.798467215441384, "grad_norm": 0.09555802494287491, "learning_rate": 7.021458983820608e-05, "loss": 0.01641386151313782, "step": 104980 }, { "epoch": 29.801305705364747, "grad_norm": 0.27824223041534424, "learning_rate": 7.021175134828271e-05, "loss": 0.003710051625967026, "step": 104990 }, { "epoch": 29.804144195288107, "grad_norm": 0.20276014506816864, "learning_rate": 7.020891285835935e-05, "loss": 0.002675805240869522, "step": 105000 }, { "epoch": 29.804144195288107, "eval_accuracy": 0.9730399949132066, "eval_loss": 0.09709598869085312, "eval_runtime": 31.8749, "eval_samples_per_second": 493.397, "eval_steps_per_second": 7.718, "step": 105000 }, { "epoch": 29.806982685211466, "grad_norm": 0.12756562232971191, "learning_rate": 7.0206074368436e-05, "loss": 0.013611838221549988, "step": 105010 }, { "epoch": 29.80982117513483, "grad_norm": 2.987877130508423, "learning_rate": 7.020323587851262e-05, "loss": 0.006055672466754913, "step": 105020 }, { "epoch": 29.812659665058188, "grad_norm": 0.5880629420280457, "learning_rate": 7.020039738858928e-05, "loss": 0.00497266948223114, "step": 105030 }, { "epoch": 29.81549815498155, "grad_norm": 0.9611206650733948, "learning_rate": 7.019755889866592e-05, "loss": 0.008511928468942642, "step": 105040 }, { "epoch": 29.81833664490491, "grad_norm": 0.722910463809967, "learning_rate": 7.019472040874255e-05, "loss": 0.008684001863002777, "step": 105050 }, { "epoch": 29.82117513482827, "grad_norm": 3.6950082778930664, "learning_rate": 7.019188191881919e-05, "loss": 0.008162318170070649, "step": 105060 }, { "epoch": 29.824013624751633, "grad_norm": 1.219530463218689, "learning_rate": 7.018904342889583e-05, "loss": 0.009760137647390366, "step": 105070 }, { "epoch": 29.826852114674992, "grad_norm": 12.354999542236328, "learning_rate": 7.018620493897247e-05, "loss": 0.00713539719581604, "step": 105080 }, { "epoch": 29.829690604598355, "grad_norm": 0.24815727770328522, "learning_rate": 7.01833664490491e-05, "loss": 0.008691404759883881, "step": 105090 }, { "epoch": 29.832529094521714, "grad_norm": 1.3925886154174805, "learning_rate": 7.018052795912575e-05, "loss": 0.004267697036266327, "step": 105100 }, { "epoch": 29.835367584445073, "grad_norm": 1.9502575397491455, "learning_rate": 7.01776894692024e-05, "loss": 0.005763031169772148, "step": 105110 }, { "epoch": 29.838206074368436, "grad_norm": 0.5452384948730469, "learning_rate": 7.017485097927902e-05, "loss": 0.014007450640201568, "step": 105120 }, { "epoch": 29.841044564291796, "grad_norm": 1.8186901807785034, "learning_rate": 7.017201248935566e-05, "loss": 0.011604809761047363, "step": 105130 }, { "epoch": 29.84388305421516, "grad_norm": 1.2743494510650635, "learning_rate": 7.01691739994323e-05, "loss": 0.021057575941085815, "step": 105140 }, { "epoch": 29.846721544138518, "grad_norm": 0.2516765892505646, "learning_rate": 7.016633550950893e-05, "loss": 0.013565397262573243, "step": 105150 }, { "epoch": 29.84956003406188, "grad_norm": 0.30280566215515137, "learning_rate": 7.016349701958559e-05, "loss": 0.018691384792327882, "step": 105160 }, { "epoch": 29.85239852398524, "grad_norm": 1.5659123659133911, "learning_rate": 7.016065852966223e-05, "loss": 0.017920032143592834, "step": 105170 }, { "epoch": 29.8552370139086, "grad_norm": 0.8984304070472717, "learning_rate": 7.015782003973886e-05, "loss": 0.010006338357925415, "step": 105180 }, { "epoch": 29.858075503831962, "grad_norm": 5.645437717437744, "learning_rate": 7.01549815498155e-05, "loss": 0.004256200045347214, "step": 105190 }, { "epoch": 29.86091399375532, "grad_norm": 0.836696982383728, "learning_rate": 7.015214305989214e-05, "loss": 0.006336534023284912, "step": 105200 }, { "epoch": 29.863752483678685, "grad_norm": 0.309998095035553, "learning_rate": 7.014930456996878e-05, "loss": 0.006125971674919128, "step": 105210 }, { "epoch": 29.866590973602044, "grad_norm": 1.6069484949111938, "learning_rate": 7.014646608004541e-05, "loss": 0.028770118951797485, "step": 105220 }, { "epoch": 29.869429463525403, "grad_norm": 0.4023089110851288, "learning_rate": 7.014362759012206e-05, "loss": 0.0036197923123836516, "step": 105230 }, { "epoch": 29.872267953448766, "grad_norm": 3.956683874130249, "learning_rate": 7.01407891001987e-05, "loss": 0.007113201916217804, "step": 105240 }, { "epoch": 29.875106443372125, "grad_norm": 0.1666334718465805, "learning_rate": 7.013795061027533e-05, "loss": 0.011134114861488343, "step": 105250 }, { "epoch": 29.87794493329549, "grad_norm": 3.045342206954956, "learning_rate": 7.013511212035198e-05, "loss": 0.014592207968235016, "step": 105260 }, { "epoch": 29.880783423218848, "grad_norm": 4.169530868530273, "learning_rate": 7.013227363042862e-05, "loss": 0.005603998899459839, "step": 105270 }, { "epoch": 29.883621913142207, "grad_norm": 1.0987203121185303, "learning_rate": 7.012943514050524e-05, "loss": 0.011438168585300446, "step": 105280 }, { "epoch": 29.88646040306557, "grad_norm": 8.558287620544434, "learning_rate": 7.012659665058189e-05, "loss": 0.0048771630972623825, "step": 105290 }, { "epoch": 29.88929889298893, "grad_norm": 0.1910807192325592, "learning_rate": 7.012375816065854e-05, "loss": 0.007158765196800232, "step": 105300 }, { "epoch": 29.892137382912292, "grad_norm": 12.604031562805176, "learning_rate": 7.012091967073517e-05, "loss": 0.02035667896270752, "step": 105310 }, { "epoch": 29.89497587283565, "grad_norm": 0.136419415473938, "learning_rate": 7.011808118081181e-05, "loss": 0.006935358792543411, "step": 105320 }, { "epoch": 29.89781436275901, "grad_norm": 3.6841089725494385, "learning_rate": 7.011524269088845e-05, "loss": 0.012233049422502518, "step": 105330 }, { "epoch": 29.900652852682374, "grad_norm": 5.590911865234375, "learning_rate": 7.011240420096509e-05, "loss": 0.007571963220834732, "step": 105340 }, { "epoch": 29.903491342605733, "grad_norm": 1.7996746301651, "learning_rate": 7.010956571104172e-05, "loss": 0.0033944204449653627, "step": 105350 }, { "epoch": 29.906329832529096, "grad_norm": 0.05662470683455467, "learning_rate": 7.010672722111838e-05, "loss": 0.008465706557035445, "step": 105360 }, { "epoch": 29.909168322452455, "grad_norm": 4.030693054199219, "learning_rate": 7.010388873119502e-05, "loss": 0.0069080710411071776, "step": 105370 }, { "epoch": 29.912006812375814, "grad_norm": 16.568635940551758, "learning_rate": 7.010105024127164e-05, "loss": 0.026711615920066833, "step": 105380 }, { "epoch": 29.914845302299177, "grad_norm": 3.9256691932678223, "learning_rate": 7.009821175134829e-05, "loss": 0.013809213042259216, "step": 105390 }, { "epoch": 29.917683792222537, "grad_norm": 0.5532540082931519, "learning_rate": 7.009537326142493e-05, "loss": 0.007177532464265823, "step": 105400 }, { "epoch": 29.9205222821459, "grad_norm": 0.753618597984314, "learning_rate": 7.009253477150156e-05, "loss": 0.0048213407397270204, "step": 105410 }, { "epoch": 29.92336077206926, "grad_norm": 0.06940331310033798, "learning_rate": 7.00896962815782e-05, "loss": 0.0029305074363946914, "step": 105420 }, { "epoch": 29.92619926199262, "grad_norm": 5.825138568878174, "learning_rate": 7.008685779165485e-05, "loss": 0.011952961981296539, "step": 105430 }, { "epoch": 29.92903775191598, "grad_norm": 0.7064264416694641, "learning_rate": 7.008401930173148e-05, "loss": 0.0056862208992242815, "step": 105440 }, { "epoch": 29.93187624183934, "grad_norm": 0.05675726756453514, "learning_rate": 7.008118081180812e-05, "loss": 0.004222647100687027, "step": 105450 }, { "epoch": 29.934714731762703, "grad_norm": 5.888077259063721, "learning_rate": 7.007834232188476e-05, "loss": 0.005757405608892441, "step": 105460 }, { "epoch": 29.937553221686063, "grad_norm": 9.987832069396973, "learning_rate": 7.00755038319614e-05, "loss": 0.010487261414527892, "step": 105470 }, { "epoch": 29.940391711609422, "grad_norm": 0.5974326133728027, "learning_rate": 7.007266534203803e-05, "loss": 0.0030100906267762186, "step": 105480 }, { "epoch": 29.943230201532785, "grad_norm": 6.271804332733154, "learning_rate": 7.006982685211469e-05, "loss": 0.004383539408445358, "step": 105490 }, { "epoch": 29.946068691456144, "grad_norm": 0.05634259805083275, "learning_rate": 7.006698836219133e-05, "loss": 0.003014201112091541, "step": 105500 }, { "epoch": 29.946068691456144, "eval_accuracy": 0.9731671647485216, "eval_loss": 0.086981400847435, "eval_runtime": 32.0241, "eval_samples_per_second": 491.1, "eval_steps_per_second": 7.682, "step": 105500 }, { "epoch": 29.948907181379507, "grad_norm": 6.278529644012451, "learning_rate": 7.006414987226796e-05, "loss": 0.003655734285712242, "step": 105510 }, { "epoch": 29.951745671302866, "grad_norm": 2.368325710296631, "learning_rate": 7.00613113823446e-05, "loss": 0.01915167421102524, "step": 105520 }, { "epoch": 29.95458416122623, "grad_norm": 0.7736899852752686, "learning_rate": 7.005847289242124e-05, "loss": 0.015487337112426757, "step": 105530 }, { "epoch": 29.95742265114959, "grad_norm": 7.630155563354492, "learning_rate": 7.005563440249787e-05, "loss": 0.004491440951824188, "step": 105540 }, { "epoch": 29.960261141072948, "grad_norm": 1.8255661725997925, "learning_rate": 7.005279591257451e-05, "loss": 0.004574146866798401, "step": 105550 }, { "epoch": 29.96309963099631, "grad_norm": 14.693599700927734, "learning_rate": 7.004995742265116e-05, "loss": 0.016626988351345063, "step": 105560 }, { "epoch": 29.96593812091967, "grad_norm": 0.3312087953090668, "learning_rate": 7.004711893272779e-05, "loss": 0.0026948297396302222, "step": 105570 }, { "epoch": 29.968776610843033, "grad_norm": 5.3750319480896, "learning_rate": 7.004428044280443e-05, "loss": 0.00822022631764412, "step": 105580 }, { "epoch": 29.971615100766392, "grad_norm": 1.4358543157577515, "learning_rate": 7.004144195288107e-05, "loss": 0.01368415355682373, "step": 105590 }, { "epoch": 29.974453590689752, "grad_norm": 0.16050013899803162, "learning_rate": 7.003860346295771e-05, "loss": 0.007451897859573365, "step": 105600 }, { "epoch": 29.977292080613115, "grad_norm": 10.538488388061523, "learning_rate": 7.003576497303434e-05, "loss": 0.00491114854812622, "step": 105610 }, { "epoch": 29.980130570536474, "grad_norm": 6.043573379516602, "learning_rate": 7.003292648311098e-05, "loss": 0.004744678735733032, "step": 105620 }, { "epoch": 29.982969060459837, "grad_norm": 0.26893484592437744, "learning_rate": 7.003008799318764e-05, "loss": 0.013608326017856599, "step": 105630 }, { "epoch": 29.985807550383196, "grad_norm": 0.4437483549118042, "learning_rate": 7.002724950326427e-05, "loss": 0.027003341913223268, "step": 105640 }, { "epoch": 29.988646040306556, "grad_norm": 1.630883812904358, "learning_rate": 7.002441101334091e-05, "loss": 0.01748095899820328, "step": 105650 }, { "epoch": 29.99148453022992, "grad_norm": 1.0774277448654175, "learning_rate": 7.002157252341755e-05, "loss": 0.013404801487922668, "step": 105660 }, { "epoch": 29.994323020153278, "grad_norm": 10.68116283416748, "learning_rate": 7.001873403349418e-05, "loss": 0.007102926820516586, "step": 105670 }, { "epoch": 29.99716151007664, "grad_norm": 14.528780937194824, "learning_rate": 7.001589554357082e-05, "loss": 0.009707969427108765, "step": 105680 }, { "epoch": 30.0, "grad_norm": 0.1280197948217392, "learning_rate": 7.001305705364747e-05, "loss": 0.0049740664660930635, "step": 105690 }, { "epoch": 30.00283848992336, "grad_norm": 2.396388292312622, "learning_rate": 7.00102185637241e-05, "loss": 0.003515026718378067, "step": 105700 }, { "epoch": 30.005676979846722, "grad_norm": 0.1878899484872818, "learning_rate": 7.000738007380074e-05, "loss": 0.0049481231719255446, "step": 105710 }, { "epoch": 30.00851546977008, "grad_norm": 1.3411624431610107, "learning_rate": 7.000454158387738e-05, "loss": 0.016996750235557558, "step": 105720 }, { "epoch": 30.011353959693444, "grad_norm": 1.184022068977356, "learning_rate": 7.000170309395403e-05, "loss": 0.008787652850151062, "step": 105730 }, { "epoch": 30.014192449616804, "grad_norm": 1.0792043209075928, "learning_rate": 6.999886460403065e-05, "loss": 0.0060198113322258, "step": 105740 }, { "epoch": 30.017030939540163, "grad_norm": 0.22205938398838043, "learning_rate": 6.99960261141073e-05, "loss": 0.007967585325241089, "step": 105750 }, { "epoch": 30.019869429463526, "grad_norm": 1.0031933784484863, "learning_rate": 6.999318762418394e-05, "loss": 0.0023270612582564354, "step": 105760 }, { "epoch": 30.022707919386885, "grad_norm": 1.9731190204620361, "learning_rate": 6.999034913426058e-05, "loss": 0.006109318137168885, "step": 105770 }, { "epoch": 30.025546409310248, "grad_norm": 0.1088927835226059, "learning_rate": 6.998751064433722e-05, "loss": 0.005002518370747566, "step": 105780 }, { "epoch": 30.028384899233608, "grad_norm": 2.598339319229126, "learning_rate": 6.998467215441386e-05, "loss": 0.008690781891345978, "step": 105790 }, { "epoch": 30.031223389156967, "grad_norm": 0.371311217546463, "learning_rate": 6.998183366449049e-05, "loss": 0.00646035298705101, "step": 105800 }, { "epoch": 30.03406187908033, "grad_norm": 1.5478878021240234, "learning_rate": 6.997899517456713e-05, "loss": 0.0044115521013736725, "step": 105810 }, { "epoch": 30.03690036900369, "grad_norm": 0.035102955996990204, "learning_rate": 6.997615668464377e-05, "loss": 0.004960698634386062, "step": 105820 }, { "epoch": 30.039738858927052, "grad_norm": 11.09414291381836, "learning_rate": 6.997331819472041e-05, "loss": 0.01140763759613037, "step": 105830 }, { "epoch": 30.04257734885041, "grad_norm": 1.037131905555725, "learning_rate": 6.997047970479705e-05, "loss": 0.0031506657600402833, "step": 105840 }, { "epoch": 30.04541583877377, "grad_norm": 0.20512822270393372, "learning_rate": 6.99676412148737e-05, "loss": 0.00498289093375206, "step": 105850 }, { "epoch": 30.048254328697134, "grad_norm": 0.9733025431632996, "learning_rate": 6.996480272495032e-05, "loss": 0.006488683819770813, "step": 105860 }, { "epoch": 30.051092818620493, "grad_norm": 3.6543848514556885, "learning_rate": 6.996196423502696e-05, "loss": 0.011569606512784958, "step": 105870 }, { "epoch": 30.053931308543856, "grad_norm": 1.5402642488479614, "learning_rate": 6.99591257451036e-05, "loss": 0.0052514314651489254, "step": 105880 }, { "epoch": 30.056769798467215, "grad_norm": 0.5969558954238892, "learning_rate": 6.995628725518025e-05, "loss": 0.004439448192715645, "step": 105890 }, { "epoch": 30.059608288390578, "grad_norm": 2.308130979537964, "learning_rate": 6.995344876525689e-05, "loss": 0.009626329690217972, "step": 105900 }, { "epoch": 30.062446778313937, "grad_norm": 4.41294527053833, "learning_rate": 6.995061027533353e-05, "loss": 0.017955288290977478, "step": 105910 }, { "epoch": 30.065285268237297, "grad_norm": 0.8770581483840942, "learning_rate": 6.994777178541017e-05, "loss": 0.010635729134082793, "step": 105920 }, { "epoch": 30.06812375816066, "grad_norm": 0.449901282787323, "learning_rate": 6.99449332954868e-05, "loss": 0.0016405627131462098, "step": 105930 }, { "epoch": 30.07096224808402, "grad_norm": 8.81567096710205, "learning_rate": 6.994209480556344e-05, "loss": 0.002677132375538349, "step": 105940 }, { "epoch": 30.07380073800738, "grad_norm": 0.08543285727500916, "learning_rate": 6.993925631564008e-05, "loss": 0.008950378000736236, "step": 105950 }, { "epoch": 30.07663922793074, "grad_norm": 0.2524518370628357, "learning_rate": 6.993641782571672e-05, "loss": 0.002813626453280449, "step": 105960 }, { "epoch": 30.0794777178541, "grad_norm": 0.47767505049705505, "learning_rate": 6.993357933579336e-05, "loss": 0.0023371132090687754, "step": 105970 }, { "epoch": 30.082316207777463, "grad_norm": 0.7387546300888062, "learning_rate": 6.993074084587e-05, "loss": 0.009980396181344987, "step": 105980 }, { "epoch": 30.085154697700823, "grad_norm": 0.49040329456329346, "learning_rate": 6.992790235594663e-05, "loss": 0.023836809396743774, "step": 105990 }, { "epoch": 30.087993187624186, "grad_norm": 0.2203972041606903, "learning_rate": 6.992506386602327e-05, "loss": 0.0026527363806962967, "step": 106000 }, { "epoch": 30.087993187624186, "eval_accuracy": 0.972722070324919, "eval_loss": 0.0928550735116005, "eval_runtime": 31.2649, "eval_samples_per_second": 503.025, "eval_steps_per_second": 7.868, "step": 106000 }, { "epoch": 30.090831677547545, "grad_norm": 3.8761022090911865, "learning_rate": 6.992222537609992e-05, "loss": 0.006042758002877235, "step": 106010 }, { "epoch": 30.093670167470904, "grad_norm": 0.7080287337303162, "learning_rate": 6.991938688617656e-05, "loss": 0.00552593469619751, "step": 106020 }, { "epoch": 30.096508657394267, "grad_norm": 2.840165376663208, "learning_rate": 6.99165483962532e-05, "loss": 0.019345279037952422, "step": 106030 }, { "epoch": 30.099347147317626, "grad_norm": 1.2198984622955322, "learning_rate": 6.991370990632984e-05, "loss": 0.0035472460091114043, "step": 106040 }, { "epoch": 30.10218563724099, "grad_norm": 2.3342795372009277, "learning_rate": 6.991087141640648e-05, "loss": 0.005064839497208595, "step": 106050 }, { "epoch": 30.10502412716435, "grad_norm": 1.139198660850525, "learning_rate": 6.990803292648311e-05, "loss": 0.003509926795959473, "step": 106060 }, { "epoch": 30.107862617087708, "grad_norm": 4.902596473693848, "learning_rate": 6.990519443655975e-05, "loss": 0.004718238115310669, "step": 106070 }, { "epoch": 30.11070110701107, "grad_norm": 0.13578669726848602, "learning_rate": 6.990235594663639e-05, "loss": 0.004302560538053513, "step": 106080 }, { "epoch": 30.11353959693443, "grad_norm": 1.3112000226974487, "learning_rate": 6.989951745671303e-05, "loss": 0.004853302240371704, "step": 106090 }, { "epoch": 30.116378086857793, "grad_norm": 0.3177511394023895, "learning_rate": 6.989667896678967e-05, "loss": 0.011783809959888458, "step": 106100 }, { "epoch": 30.119216576781152, "grad_norm": 0.5152670741081238, "learning_rate": 6.989384047686632e-05, "loss": 0.010212457180023194, "step": 106110 }, { "epoch": 30.12205506670451, "grad_norm": 0.783198356628418, "learning_rate": 6.989100198694294e-05, "loss": 0.005921649187803269, "step": 106120 }, { "epoch": 30.124893556627875, "grad_norm": 0.011008935049176216, "learning_rate": 6.988816349701959e-05, "loss": 0.007097382843494415, "step": 106130 }, { "epoch": 30.127732046551234, "grad_norm": 1.8573511838912964, "learning_rate": 6.988532500709623e-05, "loss": 0.00810546949505806, "step": 106140 }, { "epoch": 30.130570536474597, "grad_norm": 1.0891584157943726, "learning_rate": 6.988248651717287e-05, "loss": 0.003433910757303238, "step": 106150 }, { "epoch": 30.133409026397956, "grad_norm": 0.09283721446990967, "learning_rate": 6.987964802724951e-05, "loss": 0.008321655541658401, "step": 106160 }, { "epoch": 30.136247516321315, "grad_norm": 3.6078662872314453, "learning_rate": 6.987680953732615e-05, "loss": 0.0042728234082460405, "step": 106170 }, { "epoch": 30.13908600624468, "grad_norm": 0.5276238918304443, "learning_rate": 6.987397104740279e-05, "loss": 0.008118095993995666, "step": 106180 }, { "epoch": 30.141924496168038, "grad_norm": 4.638086795806885, "learning_rate": 6.987113255747942e-05, "loss": 0.009959150850772858, "step": 106190 }, { "epoch": 30.1447629860914, "grad_norm": 0.8666144609451294, "learning_rate": 6.986829406755606e-05, "loss": 0.0019546801224350928, "step": 106200 }, { "epoch": 30.14760147601476, "grad_norm": 1.9906501770019531, "learning_rate": 6.98654555776327e-05, "loss": 0.007720711827278137, "step": 106210 }, { "epoch": 30.15043996593812, "grad_norm": 0.15926460921764374, "learning_rate": 6.986261708770933e-05, "loss": 0.0020163768902421, "step": 106220 }, { "epoch": 30.153278455861482, "grad_norm": 0.05704344063997269, "learning_rate": 6.985977859778599e-05, "loss": 0.0011629033833742142, "step": 106230 }, { "epoch": 30.15611694578484, "grad_norm": 0.4602694511413574, "learning_rate": 6.985694010786263e-05, "loss": 0.004268961399793625, "step": 106240 }, { "epoch": 30.158955435708204, "grad_norm": 15.870415687561035, "learning_rate": 6.985410161793925e-05, "loss": 0.006577381491661071, "step": 106250 }, { "epoch": 30.161793925631564, "grad_norm": 5.653326988220215, "learning_rate": 6.98512631280159e-05, "loss": 0.012542919814586639, "step": 106260 }, { "epoch": 30.164632415554923, "grad_norm": 0.23508703708648682, "learning_rate": 6.984842463809254e-05, "loss": 0.016380329430103303, "step": 106270 }, { "epoch": 30.167470905478286, "grad_norm": 10.603766441345215, "learning_rate": 6.984558614816918e-05, "loss": 0.01786281615495682, "step": 106280 }, { "epoch": 30.170309395401645, "grad_norm": 0.1227574422955513, "learning_rate": 6.984274765824582e-05, "loss": 0.00489969439804554, "step": 106290 }, { "epoch": 30.173147885325008, "grad_norm": 3.113767147064209, "learning_rate": 6.983990916832246e-05, "loss": 0.001922149769961834, "step": 106300 }, { "epoch": 30.175986375248367, "grad_norm": 0.8641319274902344, "learning_rate": 6.98370706783991e-05, "loss": 0.008935289829969406, "step": 106310 }, { "epoch": 30.17882486517173, "grad_norm": 1.948310136795044, "learning_rate": 6.983423218847573e-05, "loss": 0.005337618291378021, "step": 106320 }, { "epoch": 30.18166335509509, "grad_norm": 4.1576247215271, "learning_rate": 6.983139369855237e-05, "loss": 0.005285832658410072, "step": 106330 }, { "epoch": 30.18450184501845, "grad_norm": 0.24573393166065216, "learning_rate": 6.982855520862901e-05, "loss": 0.004684130847454071, "step": 106340 }, { "epoch": 30.187340334941812, "grad_norm": 11.290939331054688, "learning_rate": 6.982571671870564e-05, "loss": 0.017245198786258697, "step": 106350 }, { "epoch": 30.19017882486517, "grad_norm": 0.8905802965164185, "learning_rate": 6.98228782287823e-05, "loss": 0.003093671053647995, "step": 106360 }, { "epoch": 30.193017314788534, "grad_norm": 0.2338159680366516, "learning_rate": 6.982003973885894e-05, "loss": 0.008088739216327667, "step": 106370 }, { "epoch": 30.195855804711893, "grad_norm": 0.6568982005119324, "learning_rate": 6.981720124893557e-05, "loss": 0.003516152501106262, "step": 106380 }, { "epoch": 30.198694294635253, "grad_norm": 0.2358493208885193, "learning_rate": 6.981436275901221e-05, "loss": 0.0026024546474218368, "step": 106390 }, { "epoch": 30.201532784558616, "grad_norm": 2.402280330657959, "learning_rate": 6.981152426908885e-05, "loss": 0.00999762788414955, "step": 106400 }, { "epoch": 30.204371274481975, "grad_norm": 0.05988219380378723, "learning_rate": 6.980868577916549e-05, "loss": 0.009985704720020295, "step": 106410 }, { "epoch": 30.207209764405338, "grad_norm": 6.06828498840332, "learning_rate": 6.980584728924212e-05, "loss": 0.01427840143442154, "step": 106420 }, { "epoch": 30.210048254328697, "grad_norm": 4.138000965118408, "learning_rate": 6.980300879931877e-05, "loss": 0.0058008283376693726, "step": 106430 }, { "epoch": 30.212886744252057, "grad_norm": 0.35986533761024475, "learning_rate": 6.980017030939541e-05, "loss": 0.009734107553958893, "step": 106440 }, { "epoch": 30.21572523417542, "grad_norm": 0.9744632840156555, "learning_rate": 6.979733181947204e-05, "loss": 0.005061643570661545, "step": 106450 }, { "epoch": 30.21856372409878, "grad_norm": 0.09392717480659485, "learning_rate": 6.979449332954868e-05, "loss": 0.004758042097091675, "step": 106460 }, { "epoch": 30.22140221402214, "grad_norm": 3.729412078857422, "learning_rate": 6.979165483962532e-05, "loss": 0.0031662628054618834, "step": 106470 }, { "epoch": 30.2242407039455, "grad_norm": 6.4889302253723145, "learning_rate": 6.978881634970195e-05, "loss": 0.010526859760284423, "step": 106480 }, { "epoch": 30.22707919386886, "grad_norm": 0.206144779920578, "learning_rate": 6.978597785977861e-05, "loss": 0.0030444245785474777, "step": 106490 }, { "epoch": 30.229917683792223, "grad_norm": 0.44263148307800293, "learning_rate": 6.978313936985525e-05, "loss": 0.0020123934373259544, "step": 106500 }, { "epoch": 30.229917683792223, "eval_accuracy": 0.9743116932663572, "eval_loss": 0.09001249074935913, "eval_runtime": 31.7375, "eval_samples_per_second": 495.534, "eval_steps_per_second": 7.751, "step": 106500 }, { "epoch": 30.232756173715583, "grad_norm": 1.2730681896209717, "learning_rate": 6.978030087993188e-05, "loss": 0.007269562780857086, "step": 106510 }, { "epoch": 30.235594663638945, "grad_norm": 1.1230195760726929, "learning_rate": 6.977746239000852e-05, "loss": 0.0027793947607278823, "step": 106520 }, { "epoch": 30.238433153562305, "grad_norm": 9.606831550598145, "learning_rate": 6.977462390008516e-05, "loss": 0.01779661625623703, "step": 106530 }, { "epoch": 30.241271643485664, "grad_norm": 0.4312793016433716, "learning_rate": 6.97717854101618e-05, "loss": 0.003860405087471008, "step": 106540 }, { "epoch": 30.244110133409027, "grad_norm": 0.9846163988113403, "learning_rate": 6.976894692023843e-05, "loss": 0.012633754312992096, "step": 106550 }, { "epoch": 30.246948623332386, "grad_norm": 0.9642807245254517, "learning_rate": 6.976610843031508e-05, "loss": 0.009551146626472473, "step": 106560 }, { "epoch": 30.24978711325575, "grad_norm": 6.0422868728637695, "learning_rate": 6.976326994039171e-05, "loss": 0.010451701283454896, "step": 106570 }, { "epoch": 30.25262560317911, "grad_norm": 1.267256498336792, "learning_rate": 6.976043145046835e-05, "loss": 0.011924322694540024, "step": 106580 }, { "epoch": 30.255464093102468, "grad_norm": 0.03753344714641571, "learning_rate": 6.9757592960545e-05, "loss": 0.004644815623760223, "step": 106590 }, { "epoch": 30.25830258302583, "grad_norm": 0.05368761345744133, "learning_rate": 6.975475447062164e-05, "loss": 0.008215417712926864, "step": 106600 }, { "epoch": 30.26114107294919, "grad_norm": 0.12343554943799973, "learning_rate": 6.975191598069826e-05, "loss": 0.010541865229606628, "step": 106610 }, { "epoch": 30.263979562872553, "grad_norm": 0.11323529481887817, "learning_rate": 6.97490774907749e-05, "loss": 0.009044089913368225, "step": 106620 }, { "epoch": 30.266818052795912, "grad_norm": 0.32372793555259705, "learning_rate": 6.974623900085156e-05, "loss": 0.004682055860757828, "step": 106630 }, { "epoch": 30.26965654271927, "grad_norm": 0.4746091067790985, "learning_rate": 6.974340051092819e-05, "loss": 0.00407271534204483, "step": 106640 }, { "epoch": 30.272495032642635, "grad_norm": 0.26943066716194153, "learning_rate": 6.974056202100483e-05, "loss": 0.0013801522552967071, "step": 106650 }, { "epoch": 30.275333522565994, "grad_norm": 21.439119338989258, "learning_rate": 6.973772353108147e-05, "loss": 0.015241830050945282, "step": 106660 }, { "epoch": 30.278172012489357, "grad_norm": 1.1437351703643799, "learning_rate": 6.973488504115811e-05, "loss": 0.008684326708316804, "step": 106670 }, { "epoch": 30.281010502412716, "grad_norm": 0.7628901600837708, "learning_rate": 6.973204655123474e-05, "loss": 0.005086052417755127, "step": 106680 }, { "epoch": 30.283848992336075, "grad_norm": 0.06304983049631119, "learning_rate": 6.97292080613114e-05, "loss": 0.0066182568669319155, "step": 106690 }, { "epoch": 30.28668748225944, "grad_norm": 0.5394207835197449, "learning_rate": 6.972636957138802e-05, "loss": 0.008395110815763473, "step": 106700 }, { "epoch": 30.289525972182798, "grad_norm": 6.3293328285217285, "learning_rate": 6.972353108146466e-05, "loss": 0.007367582619190216, "step": 106710 }, { "epoch": 30.29236446210616, "grad_norm": 0.8390308618545532, "learning_rate": 6.97206925915413e-05, "loss": 0.01210056096315384, "step": 106720 }, { "epoch": 30.29520295202952, "grad_norm": 13.101326942443848, "learning_rate": 6.971785410161795e-05, "loss": 0.008845705538988113, "step": 106730 }, { "epoch": 30.298041441952883, "grad_norm": 3.6768155097961426, "learning_rate": 6.971501561169457e-05, "loss": 0.008067777752876282, "step": 106740 }, { "epoch": 30.300879931876242, "grad_norm": 0.10059312731027603, "learning_rate": 6.971217712177122e-05, "loss": 0.002944110706448555, "step": 106750 }, { "epoch": 30.3037184217996, "grad_norm": 4.790853023529053, "learning_rate": 6.970933863184787e-05, "loss": 0.0054957568645477295, "step": 106760 }, { "epoch": 30.306556911722964, "grad_norm": 3.790133237838745, "learning_rate": 6.97065001419245e-05, "loss": 0.008353428542613983, "step": 106770 }, { "epoch": 30.309395401646324, "grad_norm": 0.8537731766700745, "learning_rate": 6.970366165200114e-05, "loss": 0.02075006812810898, "step": 106780 }, { "epoch": 30.312233891569687, "grad_norm": 0.6488693356513977, "learning_rate": 6.970082316207778e-05, "loss": 0.00684410035610199, "step": 106790 }, { "epoch": 30.315072381493046, "grad_norm": 14.015950202941895, "learning_rate": 6.969798467215441e-05, "loss": 0.014066335558891297, "step": 106800 }, { "epoch": 30.317910871416405, "grad_norm": 1.2011818885803223, "learning_rate": 6.969514618223105e-05, "loss": 0.0031739674508571626, "step": 106810 }, { "epoch": 30.320749361339768, "grad_norm": 0.09802905470132828, "learning_rate": 6.96923076923077e-05, "loss": 0.009791883826255798, "step": 106820 }, { "epoch": 30.323587851263127, "grad_norm": 0.366552472114563, "learning_rate": 6.968946920238433e-05, "loss": 0.005262854322791099, "step": 106830 }, { "epoch": 30.32642634118649, "grad_norm": 0.14670521020889282, "learning_rate": 6.968663071246097e-05, "loss": 0.0035857915878295898, "step": 106840 }, { "epoch": 30.32926483110985, "grad_norm": 1.46879243850708, "learning_rate": 6.968379222253762e-05, "loss": 0.002786737121641636, "step": 106850 }, { "epoch": 30.33210332103321, "grad_norm": 14.968185424804688, "learning_rate": 6.968095373261426e-05, "loss": 0.014249712228775024, "step": 106860 }, { "epoch": 30.334941810956572, "grad_norm": 15.365283012390137, "learning_rate": 6.967811524269088e-05, "loss": 0.013446538150310517, "step": 106870 }, { "epoch": 30.33778030087993, "grad_norm": 3.4720499515533447, "learning_rate": 6.967527675276753e-05, "loss": 0.0047527171671390535, "step": 106880 }, { "epoch": 30.340618790803294, "grad_norm": 14.858259201049805, "learning_rate": 6.967243826284418e-05, "loss": 0.009643489867448807, "step": 106890 }, { "epoch": 30.343457280726653, "grad_norm": 0.49035152792930603, "learning_rate": 6.966959977292081e-05, "loss": 0.01388126313686371, "step": 106900 }, { "epoch": 30.346295770650013, "grad_norm": 0.1488294154405594, "learning_rate": 6.966676128299745e-05, "loss": 0.003382474184036255, "step": 106910 }, { "epoch": 30.349134260573376, "grad_norm": 0.8586639165878296, "learning_rate": 6.966392279307409e-05, "loss": 0.0041621245443820955, "step": 106920 }, { "epoch": 30.351972750496735, "grad_norm": 0.21107231080532074, "learning_rate": 6.966108430315072e-05, "loss": 0.0048301398754119875, "step": 106930 }, { "epoch": 30.354811240420098, "grad_norm": 0.08656536042690277, "learning_rate": 6.965824581322736e-05, "loss": 0.008052317053079605, "step": 106940 }, { "epoch": 30.357649730343457, "grad_norm": 0.718498706817627, "learning_rate": 6.9655407323304e-05, "loss": 0.006357775628566742, "step": 106950 }, { "epoch": 30.360488220266816, "grad_norm": 1.1139857769012451, "learning_rate": 6.965256883338064e-05, "loss": 0.006194550544023514, "step": 106960 }, { "epoch": 30.36332671019018, "grad_norm": 0.044830795377492905, "learning_rate": 6.964973034345728e-05, "loss": 0.025629836320877075, "step": 106970 }, { "epoch": 30.36616520011354, "grad_norm": 2.819146156311035, "learning_rate": 6.964689185353393e-05, "loss": 0.007767321914434433, "step": 106980 }, { "epoch": 30.3690036900369, "grad_norm": 2.2762677669525146, "learning_rate": 6.964405336361057e-05, "loss": 0.006254389137029648, "step": 106990 }, { "epoch": 30.37184217996026, "grad_norm": 0.17382320761680603, "learning_rate": 6.96412148736872e-05, "loss": 0.0013229331001639367, "step": 107000 }, { "epoch": 30.37184217996026, "eval_accuracy": 0.9684618808418644, "eval_loss": 0.10519951581954956, "eval_runtime": 31.7689, "eval_samples_per_second": 495.043, "eval_steps_per_second": 7.743, "step": 107000 }, { "epoch": 30.37468066988362, "grad_norm": 1.1010828018188477, "learning_rate": 6.963837638376384e-05, "loss": 0.002672521583735943, "step": 107010 }, { "epoch": 30.377519159806983, "grad_norm": 0.24397417902946472, "learning_rate": 6.963553789384049e-05, "loss": 0.011796991527080535, "step": 107020 }, { "epoch": 30.380357649730342, "grad_norm": 2.6973917484283447, "learning_rate": 6.963269940391712e-05, "loss": 0.005751124024391175, "step": 107030 }, { "epoch": 30.383196139653705, "grad_norm": 0.49739399552345276, "learning_rate": 6.962986091399376e-05, "loss": 0.008355898410081863, "step": 107040 }, { "epoch": 30.386034629577065, "grad_norm": 0.17412716150283813, "learning_rate": 6.96270224240704e-05, "loss": 0.006954370439052582, "step": 107050 }, { "epoch": 30.388873119500424, "grad_norm": 0.30756106972694397, "learning_rate": 6.962418393414703e-05, "loss": 0.006160261109471321, "step": 107060 }, { "epoch": 30.391711609423787, "grad_norm": 0.26673129200935364, "learning_rate": 6.962134544422367e-05, "loss": 0.005115195363759995, "step": 107070 }, { "epoch": 30.394550099347146, "grad_norm": 5.067689895629883, "learning_rate": 6.961850695430031e-05, "loss": 0.005962491407990456, "step": 107080 }, { "epoch": 30.39738858927051, "grad_norm": 2.64968204498291, "learning_rate": 6.961566846437695e-05, "loss": 0.003581015020608902, "step": 107090 }, { "epoch": 30.40022707919387, "grad_norm": 3.2901649475097656, "learning_rate": 6.96128299744536e-05, "loss": 0.008079561591148376, "step": 107100 }, { "epoch": 30.40306556911723, "grad_norm": 11.252418518066406, "learning_rate": 6.960999148453024e-05, "loss": 0.007207413762807846, "step": 107110 }, { "epoch": 30.40590405904059, "grad_norm": 0.093897745013237, "learning_rate": 6.960715299460688e-05, "loss": 0.0017349829897284508, "step": 107120 }, { "epoch": 30.40874254896395, "grad_norm": 0.22159616649150848, "learning_rate": 6.96043145046835e-05, "loss": 0.006933601945638657, "step": 107130 }, { "epoch": 30.411581038887313, "grad_norm": 4.26894998550415, "learning_rate": 6.960147601476015e-05, "loss": 0.012655366957187653, "step": 107140 }, { "epoch": 30.414419528810672, "grad_norm": 0.9774757623672485, "learning_rate": 6.959863752483679e-05, "loss": 0.010772609710693359, "step": 107150 }, { "epoch": 30.417258018734035, "grad_norm": 0.05760965496301651, "learning_rate": 6.959579903491343e-05, "loss": 0.00941249281167984, "step": 107160 }, { "epoch": 30.420096508657394, "grad_norm": 0.2402833253145218, "learning_rate": 6.959296054499007e-05, "loss": 0.004643053561449051, "step": 107170 }, { "epoch": 30.422934998580754, "grad_norm": 0.2679772675037384, "learning_rate": 6.959012205506671e-05, "loss": 0.004228454828262329, "step": 107180 }, { "epoch": 30.425773488504117, "grad_norm": 0.06389496475458145, "learning_rate": 6.958728356514334e-05, "loss": 0.009680062532424927, "step": 107190 }, { "epoch": 30.428611978427476, "grad_norm": 0.20385506749153137, "learning_rate": 6.958444507521998e-05, "loss": 0.003939318656921387, "step": 107200 }, { "epoch": 30.43145046835084, "grad_norm": 0.24842417240142822, "learning_rate": 6.958160658529662e-05, "loss": 0.00550590455532074, "step": 107210 }, { "epoch": 30.434288958274198, "grad_norm": 12.713326454162598, "learning_rate": 6.957876809537326e-05, "loss": 0.012340693920850753, "step": 107220 }, { "epoch": 30.437127448197558, "grad_norm": 4.94619083404541, "learning_rate": 6.95759296054499e-05, "loss": 0.005766192823648453, "step": 107230 }, { "epoch": 30.43996593812092, "grad_norm": 0.2717863917350769, "learning_rate": 6.957309111552655e-05, "loss": 0.006558281928300857, "step": 107240 }, { "epoch": 30.44280442804428, "grad_norm": 1.4751020669937134, "learning_rate": 6.957025262560319e-05, "loss": 0.0044717960059642795, "step": 107250 }, { "epoch": 30.445642917967643, "grad_norm": 0.2662867307662964, "learning_rate": 6.956741413567982e-05, "loss": 0.004062313959002495, "step": 107260 }, { "epoch": 30.448481407891002, "grad_norm": 7.888032913208008, "learning_rate": 6.956457564575646e-05, "loss": 0.009430086612701416, "step": 107270 }, { "epoch": 30.45131989781436, "grad_norm": 0.8077722787857056, "learning_rate": 6.95617371558331e-05, "loss": 0.003736356645822525, "step": 107280 }, { "epoch": 30.454158387737724, "grad_norm": 0.28179091215133667, "learning_rate": 6.955889866590974e-05, "loss": 0.004337352514266968, "step": 107290 }, { "epoch": 30.456996877661084, "grad_norm": 0.07351366430521011, "learning_rate": 6.955606017598638e-05, "loss": 0.007281792908906936, "step": 107300 }, { "epoch": 30.459835367584446, "grad_norm": 0.7152825593948364, "learning_rate": 6.955322168606302e-05, "loss": 0.011105398088693619, "step": 107310 }, { "epoch": 30.462673857507806, "grad_norm": 8.032296180725098, "learning_rate": 6.955038319613965e-05, "loss": 0.006558387726545334, "step": 107320 }, { "epoch": 30.465512347431165, "grad_norm": 0.055718906223773956, "learning_rate": 6.954754470621629e-05, "loss": 0.007521789520978928, "step": 107330 }, { "epoch": 30.468350837354528, "grad_norm": 1.203129768371582, "learning_rate": 6.954470621629293e-05, "loss": 0.004936205223202705, "step": 107340 }, { "epoch": 30.471189327277887, "grad_norm": 0.5501689910888672, "learning_rate": 6.954186772636958e-05, "loss": 0.002939385548233986, "step": 107350 }, { "epoch": 30.47402781720125, "grad_norm": 0.2961748242378235, "learning_rate": 6.953902923644622e-05, "loss": 0.006236843764781952, "step": 107360 }, { "epoch": 30.47686630712461, "grad_norm": 1.6446514129638672, "learning_rate": 6.953619074652286e-05, "loss": 0.004973116517066956, "step": 107370 }, { "epoch": 30.47970479704797, "grad_norm": 9.791794776916504, "learning_rate": 6.95333522565995e-05, "loss": 0.009722109884023666, "step": 107380 }, { "epoch": 30.48254328697133, "grad_norm": 9.704020500183105, "learning_rate": 6.953051376667613e-05, "loss": 0.002977362275123596, "step": 107390 }, { "epoch": 30.48538177689469, "grad_norm": 6.575239658355713, "learning_rate": 6.952767527675277e-05, "loss": 0.0028316739946603773, "step": 107400 }, { "epoch": 30.488220266818054, "grad_norm": 0.3652830719947815, "learning_rate": 6.952483678682941e-05, "loss": 0.002178196795284748, "step": 107410 }, { "epoch": 30.491058756741413, "grad_norm": 1.559021234512329, "learning_rate": 6.952199829690605e-05, "loss": 0.004749777540564537, "step": 107420 }, { "epoch": 30.493897246664773, "grad_norm": 0.36734530329704285, "learning_rate": 6.951915980698269e-05, "loss": 0.029553133249282836, "step": 107430 }, { "epoch": 30.496735736588136, "grad_norm": 1.6299757957458496, "learning_rate": 6.951632131705933e-05, "loss": 0.023069895803928375, "step": 107440 }, { "epoch": 30.499574226511495, "grad_norm": 2.6239864826202393, "learning_rate": 6.951348282713596e-05, "loss": 0.014397625625133515, "step": 107450 }, { "epoch": 30.502412716434858, "grad_norm": 3.03676176071167, "learning_rate": 6.95106443372126e-05, "loss": 0.01890326291322708, "step": 107460 }, { "epoch": 30.505251206358217, "grad_norm": 16.67452049255371, "learning_rate": 6.950780584728925e-05, "loss": 0.011852525174617767, "step": 107470 }, { "epoch": 30.50808969628158, "grad_norm": 0.45365962386131287, "learning_rate": 6.950496735736589e-05, "loss": 0.01840576231479645, "step": 107480 }, { "epoch": 30.51092818620494, "grad_norm": 0.7891889810562134, "learning_rate": 6.950241271643487e-05, "loss": 0.014406821131706238, "step": 107490 }, { "epoch": 30.5137666761283, "grad_norm": 0.30091020464897156, "learning_rate": 6.94995742265115e-05, "loss": 0.008165588229894638, "step": 107500 }, { "epoch": 30.5137666761283, "eval_accuracy": 0.972976409995549, "eval_loss": 0.09441757947206497, "eval_runtime": 31.896, "eval_samples_per_second": 493.071, "eval_steps_per_second": 7.713, "step": 107500 }, { "epoch": 30.51660516605166, "grad_norm": 0.29898929595947266, "learning_rate": 6.949673573658814e-05, "loss": 0.006231321394443512, "step": 107510 }, { "epoch": 30.51944365597502, "grad_norm": 13.883907318115234, "learning_rate": 6.949389724666478e-05, "loss": 0.02149525284767151, "step": 107520 }, { "epoch": 30.522282145898384, "grad_norm": 1.4167956113815308, "learning_rate": 6.949105875674142e-05, "loss": 0.010841847956180572, "step": 107530 }, { "epoch": 30.525120635821743, "grad_norm": 0.19293639063835144, "learning_rate": 6.948822026681806e-05, "loss": 0.03395317494869232, "step": 107540 }, { "epoch": 30.527959125745102, "grad_norm": 4.602264881134033, "learning_rate": 6.94853817768947e-05, "loss": 0.027455607056617738, "step": 107550 }, { "epoch": 30.530797615668465, "grad_norm": 2.570601463317871, "learning_rate": 6.948254328697133e-05, "loss": 0.00917329341173172, "step": 107560 }, { "epoch": 30.533636105591825, "grad_norm": 9.291815757751465, "learning_rate": 6.947970479704797e-05, "loss": 0.016403791308403016, "step": 107570 }, { "epoch": 30.536474595515188, "grad_norm": 17.26911163330078, "learning_rate": 6.947686630712461e-05, "loss": 0.019516144692897797, "step": 107580 }, { "epoch": 30.539313085438547, "grad_norm": 0.5098200440406799, "learning_rate": 6.947402781720125e-05, "loss": 0.01340814381837845, "step": 107590 }, { "epoch": 30.542151575361906, "grad_norm": 2.5358641147613525, "learning_rate": 6.94711893272779e-05, "loss": 0.014878882467746735, "step": 107600 }, { "epoch": 30.54499006528527, "grad_norm": 2.153550386428833, "learning_rate": 6.946835083735454e-05, "loss": 0.005201190710067749, "step": 107610 }, { "epoch": 30.54782855520863, "grad_norm": 8.61658000946045, "learning_rate": 6.946551234743118e-05, "loss": 0.008176913857460022, "step": 107620 }, { "epoch": 30.55066704513199, "grad_norm": 0.9009307026863098, "learning_rate": 6.94626738575078e-05, "loss": 0.007943210005760194, "step": 107630 }, { "epoch": 30.55350553505535, "grad_norm": 0.7712855339050293, "learning_rate": 6.945983536758445e-05, "loss": 0.006427846848964691, "step": 107640 }, { "epoch": 30.55634402497871, "grad_norm": 1.9485176801681519, "learning_rate": 6.945699687766109e-05, "loss": 0.00943184643983841, "step": 107650 }, { "epoch": 30.559182514902073, "grad_norm": 0.13528601825237274, "learning_rate": 6.945415838773772e-05, "loss": 0.00398874245584011, "step": 107660 }, { "epoch": 30.562021004825432, "grad_norm": 4.162069797515869, "learning_rate": 6.945131989781437e-05, "loss": 0.0052512742578983305, "step": 107670 }, { "epoch": 30.564859494748795, "grad_norm": 0.6233922839164734, "learning_rate": 6.944848140789101e-05, "loss": 0.0026118399575352667, "step": 107680 }, { "epoch": 30.567697984672154, "grad_norm": 0.3950548470020294, "learning_rate": 6.944564291796764e-05, "loss": 0.01069374978542328, "step": 107690 }, { "epoch": 30.570536474595514, "grad_norm": 2.6571319103240967, "learning_rate": 6.944280442804428e-05, "loss": 0.00518101379275322, "step": 107700 }, { "epoch": 30.573374964518877, "grad_norm": 0.1842406690120697, "learning_rate": 6.943996593812092e-05, "loss": 0.006627582758665085, "step": 107710 }, { "epoch": 30.576213454442236, "grad_norm": 0.6428417563438416, "learning_rate": 6.943712744819756e-05, "loss": 0.004073525220155716, "step": 107720 }, { "epoch": 30.5790519443656, "grad_norm": 1.0376739501953125, "learning_rate": 6.943428895827419e-05, "loss": 0.007203148305416107, "step": 107730 }, { "epoch": 30.581890434288958, "grad_norm": 0.8839228749275208, "learning_rate": 6.943145046835085e-05, "loss": 0.0031544677913188934, "step": 107740 }, { "epoch": 30.584728924212317, "grad_norm": 0.24821701645851135, "learning_rate": 6.942861197842749e-05, "loss": 0.010313296318054199, "step": 107750 }, { "epoch": 30.58756741413568, "grad_norm": 0.30817911028862, "learning_rate": 6.942577348850412e-05, "loss": 0.00305517241358757, "step": 107760 }, { "epoch": 30.59040590405904, "grad_norm": 0.1983669251203537, "learning_rate": 6.942293499858076e-05, "loss": 0.0030417123809456824, "step": 107770 }, { "epoch": 30.593244393982403, "grad_norm": 0.060916002839803696, "learning_rate": 6.94200965086574e-05, "loss": 0.006653342396020889, "step": 107780 }, { "epoch": 30.596082883905762, "grad_norm": 9.371432304382324, "learning_rate": 6.941725801873403e-05, "loss": 0.011831273138523103, "step": 107790 }, { "epoch": 30.59892137382912, "grad_norm": 2.3389103412628174, "learning_rate": 6.941441952881068e-05, "loss": 0.012470263242721557, "step": 107800 }, { "epoch": 30.601759863752484, "grad_norm": 1.4401503801345825, "learning_rate": 6.941158103888732e-05, "loss": 0.006735991686582565, "step": 107810 }, { "epoch": 30.604598353675843, "grad_norm": 5.884941577911377, "learning_rate": 6.940874254896395e-05, "loss": 0.01686566472053528, "step": 107820 }, { "epoch": 30.607436843599206, "grad_norm": 0.4924392104148865, "learning_rate": 6.940590405904059e-05, "loss": 0.013892973959445953, "step": 107830 }, { "epoch": 30.610275333522566, "grad_norm": 4.052254676818848, "learning_rate": 6.940306556911723e-05, "loss": 0.01976458877325058, "step": 107840 }, { "epoch": 30.61311382344593, "grad_norm": 0.05058027431368828, "learning_rate": 6.940022707919387e-05, "loss": 0.003322318196296692, "step": 107850 }, { "epoch": 30.615952313369288, "grad_norm": 11.225318908691406, "learning_rate": 6.93973885892705e-05, "loss": 0.016716179251670838, "step": 107860 }, { "epoch": 30.618790803292647, "grad_norm": 3.4344892501831055, "learning_rate": 6.939455009934716e-05, "loss": 0.028201863169670105, "step": 107870 }, { "epoch": 30.62162929321601, "grad_norm": 0.6744145750999451, "learning_rate": 6.93917116094238e-05, "loss": 0.009844663739204406, "step": 107880 }, { "epoch": 30.62446778313937, "grad_norm": 0.9063137173652649, "learning_rate": 6.938887311950043e-05, "loss": 0.007519689947366714, "step": 107890 }, { "epoch": 30.627306273062732, "grad_norm": 0.0644942969083786, "learning_rate": 6.938603462957707e-05, "loss": 0.0030784472823143006, "step": 107900 }, { "epoch": 30.63014476298609, "grad_norm": 0.17795416712760925, "learning_rate": 6.938319613965371e-05, "loss": 0.00314871184527874, "step": 107910 }, { "epoch": 30.63298325290945, "grad_norm": 2.768763780593872, "learning_rate": 6.938035764973034e-05, "loss": 0.01097598671913147, "step": 107920 }, { "epoch": 30.635821742832814, "grad_norm": 1.2334949970245361, "learning_rate": 6.937751915980698e-05, "loss": 0.006962595880031586, "step": 107930 }, { "epoch": 30.638660232756173, "grad_norm": 2.8110365867614746, "learning_rate": 6.937468066988363e-05, "loss": 0.00794026106595993, "step": 107940 }, { "epoch": 30.641498722679536, "grad_norm": 3.5465445518493652, "learning_rate": 6.937184217996026e-05, "loss": 0.008417610824108124, "step": 107950 }, { "epoch": 30.644337212602895, "grad_norm": 0.07554007321596146, "learning_rate": 6.93690036900369e-05, "loss": 0.009914899617433548, "step": 107960 }, { "epoch": 30.647175702526255, "grad_norm": 0.6295055747032166, "learning_rate": 6.936616520011354e-05, "loss": 0.0012453913688659668, "step": 107970 }, { "epoch": 30.650014192449618, "grad_norm": 8.498252868652344, "learning_rate": 6.936332671019019e-05, "loss": 0.00815141424536705, "step": 107980 }, { "epoch": 30.652852682372977, "grad_norm": 0.23362283408641815, "learning_rate": 6.936048822026681e-05, "loss": 0.0049035467207431795, "step": 107990 }, { "epoch": 30.65569117229634, "grad_norm": 1.8322539329528809, "learning_rate": 6.935764973034347e-05, "loss": 0.002581584453582764, "step": 108000 }, { "epoch": 30.65569117229634, "eval_accuracy": 0.9760284860431105, "eval_loss": 0.08486516773700714, "eval_runtime": 32.7396, "eval_samples_per_second": 480.366, "eval_steps_per_second": 7.514, "step": 108000 }, { "epoch": 30.6585296622197, "grad_norm": 0.3404947519302368, "learning_rate": 6.935481124042011e-05, "loss": 0.010818032175302505, "step": 108010 }, { "epoch": 30.66136815214306, "grad_norm": 0.24520830810070038, "learning_rate": 6.935197275049674e-05, "loss": 0.00346316397190094, "step": 108020 }, { "epoch": 30.66420664206642, "grad_norm": 0.7958977818489075, "learning_rate": 6.934913426057338e-05, "loss": 0.007261732965707779, "step": 108030 }, { "epoch": 30.66704513198978, "grad_norm": 9.968762397766113, "learning_rate": 6.934629577065002e-05, "loss": 0.007016778737306595, "step": 108040 }, { "epoch": 30.669883621913144, "grad_norm": 2.3097550868988037, "learning_rate": 6.934345728072665e-05, "loss": 0.006440217792987824, "step": 108050 }, { "epoch": 30.672722111836503, "grad_norm": 0.1297004222869873, "learning_rate": 6.934061879080329e-05, "loss": 0.005982833728194237, "step": 108060 }, { "epoch": 30.675560601759862, "grad_norm": 1.8034029006958008, "learning_rate": 6.933778030087994e-05, "loss": 0.0028621893376111983, "step": 108070 }, { "epoch": 30.678399091683225, "grad_norm": 7.865789890289307, "learning_rate": 6.933494181095657e-05, "loss": 0.005302838236093521, "step": 108080 }, { "epoch": 30.681237581606585, "grad_norm": 4.873446941375732, "learning_rate": 6.933210332103321e-05, "loss": 0.007929010689258576, "step": 108090 }, { "epoch": 30.684076071529947, "grad_norm": 0.32345637679100037, "learning_rate": 6.932926483110985e-05, "loss": 0.006076682358980179, "step": 108100 }, { "epoch": 30.686914561453307, "grad_norm": 9.707159042358398, "learning_rate": 6.93264263411865e-05, "loss": 0.009933063387870788, "step": 108110 }, { "epoch": 30.689753051376666, "grad_norm": 0.2627275586128235, "learning_rate": 6.932358785126312e-05, "loss": 0.003286503255367279, "step": 108120 }, { "epoch": 30.69259154130003, "grad_norm": 0.389260858297348, "learning_rate": 6.932074936133977e-05, "loss": 0.03746757209300995, "step": 108130 }, { "epoch": 30.69543003122339, "grad_norm": 1.2788050174713135, "learning_rate": 6.931791087141642e-05, "loss": 0.0080003023147583, "step": 108140 }, { "epoch": 30.69826852114675, "grad_norm": 0.8984013199806213, "learning_rate": 6.931507238149305e-05, "loss": 0.010008556395769119, "step": 108150 }, { "epoch": 30.70110701107011, "grad_norm": 5.575248718261719, "learning_rate": 6.931223389156969e-05, "loss": 0.009293365478515624, "step": 108160 }, { "epoch": 30.70394550099347, "grad_norm": 6.0118584632873535, "learning_rate": 6.930939540164633e-05, "loss": 0.005042307451367379, "step": 108170 }, { "epoch": 30.706783990916833, "grad_norm": 0.4735325574874878, "learning_rate": 6.930655691172296e-05, "loss": 0.011386523395776749, "step": 108180 }, { "epoch": 30.709622480840192, "grad_norm": 15.680233001708984, "learning_rate": 6.93037184217996e-05, "loss": 0.017141708731651308, "step": 108190 }, { "epoch": 30.712460970763555, "grad_norm": 1.390152096748352, "learning_rate": 6.930087993187625e-05, "loss": 0.012368039041757584, "step": 108200 }, { "epoch": 30.715299460686914, "grad_norm": 0.25055795907974243, "learning_rate": 6.929804144195288e-05, "loss": 0.007332011312246323, "step": 108210 }, { "epoch": 30.718137950610274, "grad_norm": 0.04950626194477081, "learning_rate": 6.929520295202952e-05, "loss": 0.0061017446219921116, "step": 108220 }, { "epoch": 30.720976440533637, "grad_norm": 7.89745569229126, "learning_rate": 6.929236446210617e-05, "loss": 0.010081503540277481, "step": 108230 }, { "epoch": 30.723814930456996, "grad_norm": 15.37328052520752, "learning_rate": 6.928952597218281e-05, "loss": 0.012599964439868928, "step": 108240 }, { "epoch": 30.72665342038036, "grad_norm": 0.20166586339473724, "learning_rate": 6.928668748225943e-05, "loss": 0.0031342938542366027, "step": 108250 }, { "epoch": 30.729491910303718, "grad_norm": 4.87388277053833, "learning_rate": 6.928384899233608e-05, "loss": 0.005464524775743484, "step": 108260 }, { "epoch": 30.732330400227077, "grad_norm": 0.41176560521125793, "learning_rate": 6.928101050241273e-05, "loss": 0.004394324496388435, "step": 108270 }, { "epoch": 30.73516889015044, "grad_norm": 1.5845898389816284, "learning_rate": 6.927817201248936e-05, "loss": 0.016941487789154053, "step": 108280 }, { "epoch": 30.7380073800738, "grad_norm": 3.9480695724487305, "learning_rate": 6.9275333522566e-05, "loss": 0.004984812065958977, "step": 108290 }, { "epoch": 30.740845869997163, "grad_norm": 0.4992816746234894, "learning_rate": 6.927249503264264e-05, "loss": 0.006943334639072418, "step": 108300 }, { "epoch": 30.743684359920522, "grad_norm": 0.29689881205558777, "learning_rate": 6.926965654271927e-05, "loss": 0.01232173889875412, "step": 108310 }, { "epoch": 30.746522849843885, "grad_norm": 0.48550698161125183, "learning_rate": 6.926681805279591e-05, "loss": 0.0021306611597537994, "step": 108320 }, { "epoch": 30.749361339767244, "grad_norm": 1.0176149606704712, "learning_rate": 6.926397956287255e-05, "loss": 0.005108620971441269, "step": 108330 }, { "epoch": 30.752199829690603, "grad_norm": 0.7577868700027466, "learning_rate": 6.92611410729492e-05, "loss": 0.004457224905490875, "step": 108340 }, { "epoch": 30.755038319613966, "grad_norm": 0.5519736409187317, "learning_rate": 6.925830258302583e-05, "loss": 0.0028335295617580416, "step": 108350 }, { "epoch": 30.757876809537326, "grad_norm": 0.5229926705360413, "learning_rate": 6.925546409310248e-05, "loss": 0.0112568698823452, "step": 108360 }, { "epoch": 30.76071529946069, "grad_norm": 1.1270365715026855, "learning_rate": 6.925262560317912e-05, "loss": 0.0031514562666416167, "step": 108370 }, { "epoch": 30.763553789384048, "grad_norm": 0.4757707715034485, "learning_rate": 6.924978711325575e-05, "loss": 0.006855834275484085, "step": 108380 }, { "epoch": 30.766392279307407, "grad_norm": 4.540040016174316, "learning_rate": 6.924694862333239e-05, "loss": 0.003883615881204605, "step": 108390 }, { "epoch": 30.76923076923077, "grad_norm": 10.909433364868164, "learning_rate": 6.924411013340903e-05, "loss": 0.011317004263401032, "step": 108400 }, { "epoch": 30.77206925915413, "grad_norm": 1.481785535812378, "learning_rate": 6.924127164348567e-05, "loss": 0.007198366522789002, "step": 108410 }, { "epoch": 30.774907749077492, "grad_norm": 7.005059242248535, "learning_rate": 6.923843315356231e-05, "loss": 0.01086823120713234, "step": 108420 }, { "epoch": 30.77774623900085, "grad_norm": 6.113331317901611, "learning_rate": 6.923559466363895e-05, "loss": 0.009817898273468018, "step": 108430 }, { "epoch": 30.78058472892421, "grad_norm": 6.23679780960083, "learning_rate": 6.923275617371558e-05, "loss": 0.0035656318068504333, "step": 108440 }, { "epoch": 30.783423218847574, "grad_norm": 0.08819983154535294, "learning_rate": 6.922991768379222e-05, "loss": 0.013261735439300537, "step": 108450 }, { "epoch": 30.786261708770933, "grad_norm": 0.16571711003780365, "learning_rate": 6.922707919386886e-05, "loss": 0.0022543082013726234, "step": 108460 }, { "epoch": 30.789100198694296, "grad_norm": 0.45750248432159424, "learning_rate": 6.92242407039455e-05, "loss": 0.009078514575958253, "step": 108470 }, { "epoch": 30.791938688617655, "grad_norm": 0.2267298698425293, "learning_rate": 6.922140221402215e-05, "loss": 0.0027978764846920967, "step": 108480 }, { "epoch": 30.794777178541015, "grad_norm": 0.5764914155006409, "learning_rate": 6.921856372409879e-05, "loss": 0.0023072164505720137, "step": 108490 }, { "epoch": 30.797615668464378, "grad_norm": 0.5392889976501465, "learning_rate": 6.921572523417541e-05, "loss": 0.0019822876900434496, "step": 108500 }, { "epoch": 30.797615668464378, "eval_accuracy": 0.9725949004896038, "eval_loss": 0.09143594652414322, "eval_runtime": 31.8455, "eval_samples_per_second": 493.853, "eval_steps_per_second": 7.725, "step": 108500 }, { "epoch": 30.800454158387737, "grad_norm": 0.3037811517715454, "learning_rate": 6.921288674425206e-05, "loss": 0.004638592898845673, "step": 108510 }, { "epoch": 30.8032926483111, "grad_norm": 0.3628709614276886, "learning_rate": 6.92100482543287e-05, "loss": 0.007847178727388382, "step": 108520 }, { "epoch": 30.80613113823446, "grad_norm": 0.5309145450592041, "learning_rate": 6.920720976440534e-05, "loss": 0.007410767674446106, "step": 108530 }, { "epoch": 30.80896962815782, "grad_norm": 12.710382461547852, "learning_rate": 6.920437127448198e-05, "loss": 0.007016655802726745, "step": 108540 }, { "epoch": 30.81180811808118, "grad_norm": 0.19307665526866913, "learning_rate": 6.920153278455862e-05, "loss": 0.003414461761713028, "step": 108550 }, { "epoch": 30.81464660800454, "grad_norm": 6.276671409606934, "learning_rate": 6.919869429463526e-05, "loss": 0.007214243710041046, "step": 108560 }, { "epoch": 30.817485097927904, "grad_norm": 0.0478070005774498, "learning_rate": 6.919585580471189e-05, "loss": 0.006894369423389435, "step": 108570 }, { "epoch": 30.820323587851263, "grad_norm": 0.6948897242546082, "learning_rate": 6.919301731478853e-05, "loss": 0.012487723678350448, "step": 108580 }, { "epoch": 30.823162077774622, "grad_norm": 0.15150880813598633, "learning_rate": 6.919017882486517e-05, "loss": 0.003312503919005394, "step": 108590 }, { "epoch": 30.826000567697985, "grad_norm": 0.2102944403886795, "learning_rate": 6.918734033494182e-05, "loss": 0.009315693378448486, "step": 108600 }, { "epoch": 30.828839057621344, "grad_norm": 5.4872894287109375, "learning_rate": 6.918450184501846e-05, "loss": 0.012835185229778289, "step": 108610 }, { "epoch": 30.831677547544707, "grad_norm": 3.5935654640197754, "learning_rate": 6.91816633550951e-05, "loss": 0.007239504158496857, "step": 108620 }, { "epoch": 30.834516037468067, "grad_norm": 2.220975637435913, "learning_rate": 6.917882486517173e-05, "loss": 0.003723222389817238, "step": 108630 }, { "epoch": 30.837354527391426, "grad_norm": 1.1570467948913574, "learning_rate": 6.917598637524837e-05, "loss": 0.009163733571767807, "step": 108640 }, { "epoch": 30.84019301731479, "grad_norm": 5.570822238922119, "learning_rate": 6.917314788532501e-05, "loss": 0.005310343578457832, "step": 108650 }, { "epoch": 30.84303150723815, "grad_norm": 1.2894381284713745, "learning_rate": 6.917030939540165e-05, "loss": 0.011832579970359802, "step": 108660 }, { "epoch": 30.84586999716151, "grad_norm": 0.827042281627655, "learning_rate": 6.916747090547829e-05, "loss": 0.013782040774822235, "step": 108670 }, { "epoch": 30.84870848708487, "grad_norm": 3.7277839183807373, "learning_rate": 6.916463241555493e-05, "loss": 0.015850119292736053, "step": 108680 }, { "epoch": 30.851546977008233, "grad_norm": 0.08223532885313034, "learning_rate": 6.916179392563157e-05, "loss": 0.015784841775894166, "step": 108690 }, { "epoch": 30.854385466931593, "grad_norm": 6.215513229370117, "learning_rate": 6.91589554357082e-05, "loss": 0.011094256490468978, "step": 108700 }, { "epoch": 30.857223956854952, "grad_norm": 6.886241912841797, "learning_rate": 6.915611694578484e-05, "loss": 0.00689724013209343, "step": 108710 }, { "epoch": 30.860062446778315, "grad_norm": 1.544342279434204, "learning_rate": 6.915327845586148e-05, "loss": 0.003053635358810425, "step": 108720 }, { "epoch": 30.862900936701674, "grad_norm": 1.0471025705337524, "learning_rate": 6.915043996593813e-05, "loss": 0.008847501873970032, "step": 108730 }, { "epoch": 30.865739426625037, "grad_norm": 0.2514213025569916, "learning_rate": 6.914760147601477e-05, "loss": 0.0037189237773418427, "step": 108740 }, { "epoch": 30.868577916548396, "grad_norm": 0.21771681308746338, "learning_rate": 6.914476298609141e-05, "loss": 0.0018671870231628418, "step": 108750 }, { "epoch": 30.871416406471756, "grad_norm": 0.23573359847068787, "learning_rate": 6.914192449616804e-05, "loss": 0.00984133556485176, "step": 108760 }, { "epoch": 30.87425489639512, "grad_norm": 0.32714954018592834, "learning_rate": 6.913908600624468e-05, "loss": 0.005342333018779755, "step": 108770 }, { "epoch": 30.877093386318478, "grad_norm": 1.5743167400360107, "learning_rate": 6.913624751632132e-05, "loss": 0.008735018968582153, "step": 108780 }, { "epoch": 30.87993187624184, "grad_norm": 3.8770406246185303, "learning_rate": 6.913340902639796e-05, "loss": 0.0037653017789125443, "step": 108790 }, { "epoch": 30.8827703661652, "grad_norm": 0.1317831128835678, "learning_rate": 6.91305705364746e-05, "loss": 0.005857710540294647, "step": 108800 }, { "epoch": 30.88560885608856, "grad_norm": 0.2634664475917816, "learning_rate": 6.912773204655124e-05, "loss": 0.0039584323763847355, "step": 108810 }, { "epoch": 30.888447346011922, "grad_norm": 0.5718883275985718, "learning_rate": 6.912489355662788e-05, "loss": 0.004155462980270386, "step": 108820 }, { "epoch": 30.89128583593528, "grad_norm": 3.588263988494873, "learning_rate": 6.912205506670451e-05, "loss": 0.007106095552444458, "step": 108830 }, { "epoch": 30.894124325858645, "grad_norm": 0.23640671372413635, "learning_rate": 6.911921657678115e-05, "loss": 0.010854107141494752, "step": 108840 }, { "epoch": 30.896962815782004, "grad_norm": 0.3888101875782013, "learning_rate": 6.91163780868578e-05, "loss": 0.01080252230167389, "step": 108850 }, { "epoch": 30.899801305705363, "grad_norm": 2.3882203102111816, "learning_rate": 6.911353959693442e-05, "loss": 0.003141210228204727, "step": 108860 }, { "epoch": 30.902639795628726, "grad_norm": 6.844480514526367, "learning_rate": 6.911070110701108e-05, "loss": 0.0035825975239276884, "step": 108870 }, { "epoch": 30.905478285552086, "grad_norm": 0.12960387766361237, "learning_rate": 6.910786261708772e-05, "loss": 0.003118118643760681, "step": 108880 }, { "epoch": 30.90831677547545, "grad_norm": 7.557692527770996, "learning_rate": 6.910502412716435e-05, "loss": 0.009565930813550949, "step": 108890 }, { "epoch": 30.911155265398808, "grad_norm": 0.8376349806785583, "learning_rate": 6.910218563724099e-05, "loss": 0.004120534658432007, "step": 108900 }, { "epoch": 30.913993755322167, "grad_norm": 12.440242767333984, "learning_rate": 6.909934714731763e-05, "loss": 0.006873039156198501, "step": 108910 }, { "epoch": 30.91683224524553, "grad_norm": 0.580194890499115, "learning_rate": 6.909650865739427e-05, "loss": 0.0025897204875946047, "step": 108920 }, { "epoch": 30.91967073516889, "grad_norm": 1.3408043384552002, "learning_rate": 6.909367016747091e-05, "loss": 0.002481282874941826, "step": 108930 }, { "epoch": 30.922509225092252, "grad_norm": 3.358201265335083, "learning_rate": 6.909083167754755e-05, "loss": 0.0063421323895454405, "step": 108940 }, { "epoch": 30.92534771501561, "grad_norm": 4.337606906890869, "learning_rate": 6.90879931876242e-05, "loss": 0.005390232801437378, "step": 108950 }, { "epoch": 30.92818620493897, "grad_norm": 5.306088924407959, "learning_rate": 6.908515469770082e-05, "loss": 0.006689278036355972, "step": 108960 }, { "epoch": 30.931024694862334, "grad_norm": 1.0455058813095093, "learning_rate": 6.908231620777746e-05, "loss": 0.013076059520244598, "step": 108970 }, { "epoch": 30.933863184785693, "grad_norm": 8.647638320922852, "learning_rate": 6.90794777178541e-05, "loss": 0.01877009719610214, "step": 108980 }, { "epoch": 30.936701674709056, "grad_norm": 1.3454164266586304, "learning_rate": 6.907663922793073e-05, "loss": 0.013234955072402955, "step": 108990 }, { "epoch": 30.939540164632415, "grad_norm": 9.878752708435059, "learning_rate": 6.907380073800739e-05, "loss": 0.006241678074002266, "step": 109000 }, { "epoch": 30.939540164632415, "eval_accuracy": 0.9729128250778916, "eval_loss": 0.09103207290172577, "eval_runtime": 31.5111, "eval_samples_per_second": 499.094, "eval_steps_per_second": 7.807, "step": 109000 }, { "epoch": 30.942378654555775, "grad_norm": 2.9619123935699463, "learning_rate": 6.907096224808403e-05, "loss": 0.010336744785308837, "step": 109010 }, { "epoch": 30.945217144479138, "grad_norm": 8.400907516479492, "learning_rate": 6.906812375816066e-05, "loss": 0.018994046747684477, "step": 109020 }, { "epoch": 30.948055634402497, "grad_norm": 1.5803632736206055, "learning_rate": 6.90652852682373e-05, "loss": 0.006797703355550766, "step": 109030 }, { "epoch": 30.95089412432586, "grad_norm": 0.19018879532814026, "learning_rate": 6.906244677831394e-05, "loss": 0.001114131696522236, "step": 109040 }, { "epoch": 30.95373261424922, "grad_norm": 1.1932650804519653, "learning_rate": 6.905960828839058e-05, "loss": 0.005487564206123352, "step": 109050 }, { "epoch": 30.956571104172582, "grad_norm": 0.868994414806366, "learning_rate": 6.905676979846721e-05, "loss": 0.006269472092390061, "step": 109060 }, { "epoch": 30.95940959409594, "grad_norm": 0.10979997366666794, "learning_rate": 6.905393130854386e-05, "loss": 0.002353961765766144, "step": 109070 }, { "epoch": 30.9622480840193, "grad_norm": 1.49248206615448, "learning_rate": 6.90510928186205e-05, "loss": 0.003878706693649292, "step": 109080 }, { "epoch": 30.965086573942664, "grad_norm": 9.79456901550293, "learning_rate": 6.904825432869713e-05, "loss": 0.011083433032035827, "step": 109090 }, { "epoch": 30.967925063866023, "grad_norm": 6.354611396789551, "learning_rate": 6.904541583877378e-05, "loss": 0.012200254946947098, "step": 109100 }, { "epoch": 30.970763553789386, "grad_norm": 5.914480209350586, "learning_rate": 6.904257734885042e-05, "loss": 0.01154486984014511, "step": 109110 }, { "epoch": 30.973602043712745, "grad_norm": 4.286909580230713, "learning_rate": 6.903973885892704e-05, "loss": 0.024362482130527496, "step": 109120 }, { "epoch": 30.976440533636104, "grad_norm": 11.273080825805664, "learning_rate": 6.90369003690037e-05, "loss": 0.007235788553953171, "step": 109130 }, { "epoch": 30.979279023559467, "grad_norm": 0.5949984192848206, "learning_rate": 6.903406187908034e-05, "loss": 0.0036271050572395324, "step": 109140 }, { "epoch": 30.982117513482827, "grad_norm": 1.39650297164917, "learning_rate": 6.903122338915697e-05, "loss": 0.007727381587028503, "step": 109150 }, { "epoch": 30.98495600340619, "grad_norm": 1.249563455581665, "learning_rate": 6.902838489923361e-05, "loss": 0.0018823854625225067, "step": 109160 }, { "epoch": 30.98779449332955, "grad_norm": 0.5204665660858154, "learning_rate": 6.902554640931025e-05, "loss": 0.005594105646014214, "step": 109170 }, { "epoch": 30.990632983252908, "grad_norm": 13.691636085510254, "learning_rate": 6.902270791938689e-05, "loss": 0.006807877123355866, "step": 109180 }, { "epoch": 30.99347147317627, "grad_norm": 1.05091392993927, "learning_rate": 6.901986942946352e-05, "loss": 0.0050909101963043215, "step": 109190 }, { "epoch": 30.99630996309963, "grad_norm": 0.4603828489780426, "learning_rate": 6.901703093954018e-05, "loss": 0.0030404597520828245, "step": 109200 }, { "epoch": 30.999148453022993, "grad_norm": 4.102952480316162, "learning_rate": 6.901419244961682e-05, "loss": 0.012967592477798462, "step": 109210 }, { "epoch": 31.001986942946353, "grad_norm": 0.3826521635055542, "learning_rate": 6.901135395969344e-05, "loss": 0.012312310189008713, "step": 109220 }, { "epoch": 31.004825432869712, "grad_norm": 0.3739717900753021, "learning_rate": 6.900851546977009e-05, "loss": 0.014077697694301606, "step": 109230 }, { "epoch": 31.007663922793075, "grad_norm": 3.453364133834839, "learning_rate": 6.900567697984673e-05, "loss": 0.011640869081020355, "step": 109240 }, { "epoch": 31.010502412716434, "grad_norm": 3.8308191299438477, "learning_rate": 6.900283848992336e-05, "loss": 0.007118683308362961, "step": 109250 }, { "epoch": 31.013340902639797, "grad_norm": 6.8070807456970215, "learning_rate": 6.9e-05, "loss": 0.011011771857738495, "step": 109260 }, { "epoch": 31.016179392563156, "grad_norm": 4.504818916320801, "learning_rate": 6.899716151007665e-05, "loss": 0.005058494210243225, "step": 109270 }, { "epoch": 31.019017882486516, "grad_norm": 10.797273635864258, "learning_rate": 6.899432302015328e-05, "loss": 0.010744699090719224, "step": 109280 }, { "epoch": 31.02185637240988, "grad_norm": 0.6540567278862, "learning_rate": 6.899148453022992e-05, "loss": 0.004901528358459473, "step": 109290 }, { "epoch": 31.024694862333238, "grad_norm": 0.7021021842956543, "learning_rate": 6.898864604030656e-05, "loss": 0.010893645882606506, "step": 109300 }, { "epoch": 31.0275333522566, "grad_norm": 2.056149482727051, "learning_rate": 6.89858075503832e-05, "loss": 0.0032764200121164324, "step": 109310 }, { "epoch": 31.03037184217996, "grad_norm": 1.4120187759399414, "learning_rate": 6.898296906045983e-05, "loss": 0.009088733047246934, "step": 109320 }, { "epoch": 31.03321033210332, "grad_norm": 0.5974423289299011, "learning_rate": 6.898013057053649e-05, "loss": 0.007695899903774261, "step": 109330 }, { "epoch": 31.036048822026682, "grad_norm": 2.229387044906616, "learning_rate": 6.897729208061311e-05, "loss": 0.0016472173854708671, "step": 109340 }, { "epoch": 31.03888731195004, "grad_norm": 0.854745090007782, "learning_rate": 6.897445359068976e-05, "loss": 0.00120619498193264, "step": 109350 }, { "epoch": 31.041725801873405, "grad_norm": 0.1090780571103096, "learning_rate": 6.89716151007664e-05, "loss": 0.004297414794564247, "step": 109360 }, { "epoch": 31.044564291796764, "grad_norm": 1.188860535621643, "learning_rate": 6.896877661084304e-05, "loss": 0.004518118128180504, "step": 109370 }, { "epoch": 31.047402781720123, "grad_norm": 0.9042288064956665, "learning_rate": 6.896593812091967e-05, "loss": 0.002831179276108742, "step": 109380 }, { "epoch": 31.050241271643486, "grad_norm": 1.4692059755325317, "learning_rate": 6.896309963099631e-05, "loss": 0.0025312095880508424, "step": 109390 }, { "epoch": 31.053079761566845, "grad_norm": 0.167169988155365, "learning_rate": 6.896026114107296e-05, "loss": 0.005077884346246719, "step": 109400 }, { "epoch": 31.05591825149021, "grad_norm": 4.306193828582764, "learning_rate": 6.895742265114959e-05, "loss": 0.007326644659042358, "step": 109410 }, { "epoch": 31.058756741413568, "grad_norm": 2.896209716796875, "learning_rate": 6.895458416122623e-05, "loss": 0.00741128996014595, "step": 109420 }, { "epoch": 31.061595231336927, "grad_norm": 0.09312529116868973, "learning_rate": 6.895174567130287e-05, "loss": 0.0058695215731859205, "step": 109430 }, { "epoch": 31.06443372126029, "grad_norm": 0.24507619440555573, "learning_rate": 6.89489071813795e-05, "loss": 0.0023714618757367132, "step": 109440 }, { "epoch": 31.06727221118365, "grad_norm": 1.8298733234405518, "learning_rate": 6.894606869145614e-05, "loss": 0.004635128378868103, "step": 109450 }, { "epoch": 31.070110701107012, "grad_norm": 4.069555282592773, "learning_rate": 6.894323020153278e-05, "loss": 0.00393756628036499, "step": 109460 }, { "epoch": 31.07294919103037, "grad_norm": 4.072540283203125, "learning_rate": 6.894039171160942e-05, "loss": 0.004617356136441231, "step": 109470 }, { "epoch": 31.075787680953734, "grad_norm": 0.8224989175796509, "learning_rate": 6.893755322168607e-05, "loss": 0.003340653330087662, "step": 109480 }, { "epoch": 31.078626170877094, "grad_norm": 0.061782754957675934, "learning_rate": 6.893471473176271e-05, "loss": 0.0017155501991510391, "step": 109490 }, { "epoch": 31.081464660800453, "grad_norm": 1.8758052587509155, "learning_rate": 6.893187624183935e-05, "loss": 0.006179077550768852, "step": 109500 }, { "epoch": 31.081464660800453, "eval_accuracy": 0.9671265975710561, "eval_loss": 0.11372263729572296, "eval_runtime": 31.2977, "eval_samples_per_second": 502.497, "eval_steps_per_second": 7.86, "step": 109500 }, { "epoch": 31.084303150723816, "grad_norm": 0.04453524947166443, "learning_rate": 6.892903775191598e-05, "loss": 0.004898843914270401, "step": 109510 }, { "epoch": 31.087141640647175, "grad_norm": 0.5262728929519653, "learning_rate": 6.892619926199262e-05, "loss": 0.0020381802693009376, "step": 109520 }, { "epoch": 31.089980130570538, "grad_norm": 0.6505150198936462, "learning_rate": 6.892336077206927e-05, "loss": 0.003230230510234833, "step": 109530 }, { "epoch": 31.092818620493897, "grad_norm": 8.726950645446777, "learning_rate": 6.89205222821459e-05, "loss": 0.004477530345320701, "step": 109540 }, { "epoch": 31.095657110417257, "grad_norm": 6.587113857269287, "learning_rate": 6.891768379222254e-05, "loss": 0.0033368490636348723, "step": 109550 }, { "epoch": 31.09849560034062, "grad_norm": 3.4111487865448, "learning_rate": 6.891484530229918e-05, "loss": 0.004448067769408226, "step": 109560 }, { "epoch": 31.10133409026398, "grad_norm": 9.678326606750488, "learning_rate": 6.891200681237581e-05, "loss": 0.0057912968099117276, "step": 109570 }, { "epoch": 31.104172580187342, "grad_norm": 0.32832619547843933, "learning_rate": 6.890916832245245e-05, "loss": 0.015910378098487853, "step": 109580 }, { "epoch": 31.1070110701107, "grad_norm": 0.5947364568710327, "learning_rate": 6.89063298325291e-05, "loss": 0.003208599239587784, "step": 109590 }, { "epoch": 31.10984956003406, "grad_norm": 0.36792829632759094, "learning_rate": 6.890349134260574e-05, "loss": 0.00662335455417633, "step": 109600 }, { "epoch": 31.112688049957423, "grad_norm": 0.19571690261363983, "learning_rate": 6.890065285268238e-05, "loss": 0.0040141865611076355, "step": 109610 }, { "epoch": 31.115526539880783, "grad_norm": 0.11512776464223862, "learning_rate": 6.889781436275902e-05, "loss": 0.014331360161304475, "step": 109620 }, { "epoch": 31.118365029804146, "grad_norm": 10.446988105773926, "learning_rate": 6.889497587283566e-05, "loss": 0.01079077273607254, "step": 109630 }, { "epoch": 31.121203519727505, "grad_norm": 1.0412211418151855, "learning_rate": 6.889213738291229e-05, "loss": 0.001989450864493847, "step": 109640 }, { "epoch": 31.124042009650864, "grad_norm": 0.4010404646396637, "learning_rate": 6.888929889298893e-05, "loss": 0.002150782383978367, "step": 109650 }, { "epoch": 31.126880499574227, "grad_norm": 0.5337924361228943, "learning_rate": 6.888646040306557e-05, "loss": 0.0021382132545113564, "step": 109660 }, { "epoch": 31.129718989497587, "grad_norm": 0.34064602851867676, "learning_rate": 6.888362191314221e-05, "loss": 0.003113337233662605, "step": 109670 }, { "epoch": 31.13255747942095, "grad_norm": 0.26821035146713257, "learning_rate": 6.888078342321885e-05, "loss": 0.0013156134635210037, "step": 109680 }, { "epoch": 31.13539596934431, "grad_norm": 0.3603932559490204, "learning_rate": 6.88779449332955e-05, "loss": 0.013469606637954712, "step": 109690 }, { "epoch": 31.138234459267668, "grad_norm": 0.2603108584880829, "learning_rate": 6.887510644337212e-05, "loss": 0.0018426725640892983, "step": 109700 }, { "epoch": 31.14107294919103, "grad_norm": 0.03381152078509331, "learning_rate": 6.887226795344876e-05, "loss": 0.0016591455787420273, "step": 109710 }, { "epoch": 31.14391143911439, "grad_norm": 1.1416629552841187, "learning_rate": 6.88694294635254e-05, "loss": 0.001323341391980648, "step": 109720 }, { "epoch": 31.146749929037753, "grad_norm": 0.07575463503599167, "learning_rate": 6.886659097360205e-05, "loss": 0.004193737357854843, "step": 109730 }, { "epoch": 31.149588418961113, "grad_norm": 1.0716884136199951, "learning_rate": 6.886375248367869e-05, "loss": 0.00312422476708889, "step": 109740 }, { "epoch": 31.152426908884472, "grad_norm": 13.743085861206055, "learning_rate": 6.886091399375533e-05, "loss": 0.009309540688991546, "step": 109750 }, { "epoch": 31.155265398807835, "grad_norm": 0.9184790253639221, "learning_rate": 6.885807550383197e-05, "loss": 0.007169279456138611, "step": 109760 }, { "epoch": 31.158103888731194, "grad_norm": 0.03757747262716293, "learning_rate": 6.88552370139086e-05, "loss": 0.011610660701990128, "step": 109770 }, { "epoch": 31.160942378654557, "grad_norm": 1.7496674060821533, "learning_rate": 6.885239852398524e-05, "loss": 0.007120468467473984, "step": 109780 }, { "epoch": 31.163780868577916, "grad_norm": 0.11229296773672104, "learning_rate": 6.884956003406188e-05, "loss": 0.003583366423845291, "step": 109790 }, { "epoch": 31.166619358501276, "grad_norm": 1.0766123533248901, "learning_rate": 6.884672154413852e-05, "loss": 0.006882242858409882, "step": 109800 }, { "epoch": 31.16945784842464, "grad_norm": 0.09572646021842957, "learning_rate": 6.884388305421516e-05, "loss": 0.0033465396612882615, "step": 109810 }, { "epoch": 31.172296338347998, "grad_norm": 0.6458649635314941, "learning_rate": 6.88410445642918e-05, "loss": 0.004496787860989571, "step": 109820 }, { "epoch": 31.17513482827136, "grad_norm": 0.2917235791683197, "learning_rate": 6.883820607436843e-05, "loss": 0.0022122312337160112, "step": 109830 }, { "epoch": 31.17797331819472, "grad_norm": 1.432818055152893, "learning_rate": 6.883536758444507e-05, "loss": 0.014394460618495942, "step": 109840 }, { "epoch": 31.18081180811808, "grad_norm": 0.40243399143218994, "learning_rate": 6.883252909452172e-05, "loss": 0.017813853919506073, "step": 109850 }, { "epoch": 31.183650298041442, "grad_norm": 0.10484733432531357, "learning_rate": 6.882969060459836e-05, "loss": 0.009522389620542526, "step": 109860 }, { "epoch": 31.1864887879648, "grad_norm": 1.1457058191299438, "learning_rate": 6.8826852114675e-05, "loss": 0.003856068104505539, "step": 109870 }, { "epoch": 31.189327277888165, "grad_norm": 0.12661395967006683, "learning_rate": 6.882401362475164e-05, "loss": 0.011374379694461822, "step": 109880 }, { "epoch": 31.192165767811524, "grad_norm": 1.457999348640442, "learning_rate": 6.882117513482828e-05, "loss": 0.00343543142080307, "step": 109890 }, { "epoch": 31.195004257734887, "grad_norm": 1.8819317817687988, "learning_rate": 6.881833664490491e-05, "loss": 0.006346265971660614, "step": 109900 }, { "epoch": 31.197842747658246, "grad_norm": 15.678508758544922, "learning_rate": 6.881549815498155e-05, "loss": 0.008815167099237442, "step": 109910 }, { "epoch": 31.200681237581605, "grad_norm": 0.033606600016355515, "learning_rate": 6.881265966505819e-05, "loss": 0.0013654526323080064, "step": 109920 }, { "epoch": 31.20351972750497, "grad_norm": 0.2993066608905792, "learning_rate": 6.880982117513483e-05, "loss": 0.004777495563030243, "step": 109930 }, { "epoch": 31.206358217428328, "grad_norm": 0.47858014702796936, "learning_rate": 6.880698268521147e-05, "loss": 0.01005203053355217, "step": 109940 }, { "epoch": 31.20919670735169, "grad_norm": 6.438141345977783, "learning_rate": 6.880414419528812e-05, "loss": 0.003955466300249099, "step": 109950 }, { "epoch": 31.21203519727505, "grad_norm": 0.46179136633872986, "learning_rate": 6.880130570536474e-05, "loss": 0.003509771078824997, "step": 109960 }, { "epoch": 31.21487368719841, "grad_norm": 1.218802571296692, "learning_rate": 6.879846721544139e-05, "loss": 0.00413697436451912, "step": 109970 }, { "epoch": 31.217712177121772, "grad_norm": 0.936011016368866, "learning_rate": 6.879562872551803e-05, "loss": 0.003762988746166229, "step": 109980 }, { "epoch": 31.22055066704513, "grad_norm": 0.21286432445049286, "learning_rate": 6.879279023559467e-05, "loss": 0.0033331245183944704, "step": 109990 }, { "epoch": 31.223389156968494, "grad_norm": 5.036059856414795, "learning_rate": 6.878995174567131e-05, "loss": 0.004612860083580017, "step": 110000 }, { "epoch": 31.223389156968494, "eval_accuracy": 0.9720862211483436, "eval_loss": 0.09537944197654724, "eval_runtime": 32.1478, "eval_samples_per_second": 489.209, "eval_steps_per_second": 7.652, "step": 110000 }, { "epoch": 31.226227646891854, "grad_norm": 0.325810045003891, "learning_rate": 6.878711325574795e-05, "loss": 0.005835617333650589, "step": 110010 }, { "epoch": 31.229066136815213, "grad_norm": 3.817789316177368, "learning_rate": 6.878427476582459e-05, "loss": 0.006984712183475494, "step": 110020 }, { "epoch": 31.231904626738576, "grad_norm": 0.7725151181221008, "learning_rate": 6.878143627590122e-05, "loss": 0.00346316397190094, "step": 110030 }, { "epoch": 31.234743116661935, "grad_norm": 3.954335927963257, "learning_rate": 6.877859778597786e-05, "loss": 0.0041181530803442, "step": 110040 }, { "epoch": 31.237581606585298, "grad_norm": 2.2062954902648926, "learning_rate": 6.87757592960545e-05, "loss": 0.011963410675525666, "step": 110050 }, { "epoch": 31.240420096508657, "grad_norm": 0.5785346031188965, "learning_rate": 6.877292080613114e-05, "loss": 0.007881715893745422, "step": 110060 }, { "epoch": 31.243258586432017, "grad_norm": 1.0986262559890747, "learning_rate": 6.877008231620779e-05, "loss": 0.004345748573541641, "step": 110070 }, { "epoch": 31.24609707635538, "grad_norm": 0.11184180527925491, "learning_rate": 6.876724382628443e-05, "loss": 0.0021884990856051445, "step": 110080 }, { "epoch": 31.24893556627874, "grad_norm": 0.5900527834892273, "learning_rate": 6.876440533636105e-05, "loss": 0.00928773283958435, "step": 110090 }, { "epoch": 31.251774056202102, "grad_norm": 0.3124460279941559, "learning_rate": 6.87615668464377e-05, "loss": 0.00806880071759224, "step": 110100 }, { "epoch": 31.25461254612546, "grad_norm": 0.2854596674442291, "learning_rate": 6.875872835651434e-05, "loss": 0.021215544641017915, "step": 110110 }, { "epoch": 31.25745103604882, "grad_norm": 13.262415885925293, "learning_rate": 6.875588986659098e-05, "loss": 0.007200486958026886, "step": 110120 }, { "epoch": 31.260289525972183, "grad_norm": 0.34954404830932617, "learning_rate": 6.875305137666762e-05, "loss": 0.01924653798341751, "step": 110130 }, { "epoch": 31.263128015895543, "grad_norm": 0.680980920791626, "learning_rate": 6.875021288674426e-05, "loss": 0.0025742601603269575, "step": 110140 }, { "epoch": 31.265966505818906, "grad_norm": 0.5135800242424011, "learning_rate": 6.87473743968209e-05, "loss": 0.011292635649442672, "step": 110150 }, { "epoch": 31.268804995742265, "grad_norm": 0.3259054720401764, "learning_rate": 6.874453590689753e-05, "loss": 0.0019381167367100716, "step": 110160 }, { "epoch": 31.271643485665624, "grad_norm": 0.1391202211380005, "learning_rate": 6.874169741697417e-05, "loss": 0.0026612916961312296, "step": 110170 }, { "epoch": 31.274481975588987, "grad_norm": 0.08063234388828278, "learning_rate": 6.873885892705081e-05, "loss": 0.00681048184633255, "step": 110180 }, { "epoch": 31.277320465512346, "grad_norm": 4.031476974487305, "learning_rate": 6.873602043712744e-05, "loss": 0.004949242621660232, "step": 110190 }, { "epoch": 31.28015895543571, "grad_norm": 0.2691288888454437, "learning_rate": 6.87331819472041e-05, "loss": 0.010847274214029312, "step": 110200 }, { "epoch": 31.28299744535907, "grad_norm": 1.322691559791565, "learning_rate": 6.873034345728074e-05, "loss": 0.005416613817214966, "step": 110210 }, { "epoch": 31.285835935282428, "grad_norm": 0.07893897593021393, "learning_rate": 6.872750496735737e-05, "loss": 0.0030096346512436868, "step": 110220 }, { "epoch": 31.28867442520579, "grad_norm": 1.8029356002807617, "learning_rate": 6.872495032642635e-05, "loss": 0.01636136770248413, "step": 110230 }, { "epoch": 31.29151291512915, "grad_norm": 0.17369554936885834, "learning_rate": 6.872211183650299e-05, "loss": 0.002129828371107578, "step": 110240 }, { "epoch": 31.294351405052513, "grad_norm": 0.037399448454380035, "learning_rate": 6.871927334657963e-05, "loss": 0.011778134852647781, "step": 110250 }, { "epoch": 31.297189894975872, "grad_norm": 1.994122862815857, "learning_rate": 6.871643485665627e-05, "loss": 0.0034459367394447326, "step": 110260 }, { "epoch": 31.300028384899235, "grad_norm": 0.6011873483657837, "learning_rate": 6.87135963667329e-05, "loss": 0.0038243308663368225, "step": 110270 }, { "epoch": 31.302866874822595, "grad_norm": 3.11600399017334, "learning_rate": 6.871075787680954e-05, "loss": 0.009884706884622573, "step": 110280 }, { "epoch": 31.305705364745954, "grad_norm": 8.012303352355957, "learning_rate": 6.870791938688618e-05, "loss": 0.007906264811754226, "step": 110290 }, { "epoch": 31.308543854669317, "grad_norm": 1.3132743835449219, "learning_rate": 6.870508089696281e-05, "loss": 0.005196478217840195, "step": 110300 }, { "epoch": 31.311382344592676, "grad_norm": 1.0238229036331177, "learning_rate": 6.870224240703946e-05, "loss": 0.015683819353580476, "step": 110310 }, { "epoch": 31.31422083451604, "grad_norm": 0.6103878617286682, "learning_rate": 6.86994039171161e-05, "loss": 0.00277481023222208, "step": 110320 }, { "epoch": 31.3170593244394, "grad_norm": 0.2669052183628082, "learning_rate": 6.869656542719273e-05, "loss": 0.0042536847293376924, "step": 110330 }, { "epoch": 31.319897814362758, "grad_norm": 9.841928482055664, "learning_rate": 6.869372693726937e-05, "loss": 0.008023180067539215, "step": 110340 }, { "epoch": 31.32273630428612, "grad_norm": 8.319967269897461, "learning_rate": 6.869088844734601e-05, "loss": 0.0038462065160274504, "step": 110350 }, { "epoch": 31.32557479420948, "grad_norm": 3.77957820892334, "learning_rate": 6.868804995742266e-05, "loss": 0.01015644371509552, "step": 110360 }, { "epoch": 31.328413284132843, "grad_norm": 0.8644422888755798, "learning_rate": 6.868521146749928e-05, "loss": 0.006607858836650849, "step": 110370 }, { "epoch": 31.331251774056202, "grad_norm": 1.9416636228561401, "learning_rate": 6.868237297757594e-05, "loss": 0.002058306522667408, "step": 110380 }, { "epoch": 31.33409026397956, "grad_norm": 2.39371919631958, "learning_rate": 6.867953448765258e-05, "loss": 0.008337047696113587, "step": 110390 }, { "epoch": 31.336928753902924, "grad_norm": 0.9490315914154053, "learning_rate": 6.867669599772921e-05, "loss": 0.003460328280925751, "step": 110400 }, { "epoch": 31.339767243826284, "grad_norm": 1.3871650695800781, "learning_rate": 6.867385750780585e-05, "loss": 0.008450895547866821, "step": 110410 }, { "epoch": 31.342605733749647, "grad_norm": 7.065817356109619, "learning_rate": 6.867101901788249e-05, "loss": 0.009674198925495148, "step": 110420 }, { "epoch": 31.345444223673006, "grad_norm": 5.321462631225586, "learning_rate": 6.866818052795912e-05, "loss": 0.0072591789066791534, "step": 110430 }, { "epoch": 31.348282713596365, "grad_norm": 0.13979879021644592, "learning_rate": 6.866534203803577e-05, "loss": 0.009426911175251008, "step": 110440 }, { "epoch": 31.351121203519728, "grad_norm": 5.368879795074463, "learning_rate": 6.866250354811241e-05, "loss": 0.005856205895543099, "step": 110450 }, { "epoch": 31.353959693443088, "grad_norm": 2.286088466644287, "learning_rate": 6.865966505818904e-05, "loss": 0.002498668059706688, "step": 110460 }, { "epoch": 31.35679818336645, "grad_norm": 0.03411642089486122, "learning_rate": 6.865682656826568e-05, "loss": 0.009229302406311035, "step": 110470 }, { "epoch": 31.35963667328981, "grad_norm": 0.13410453498363495, "learning_rate": 6.865398807834233e-05, "loss": 0.003095555305480957, "step": 110480 }, { "epoch": 31.36247516321317, "grad_norm": 8.528481483459473, "learning_rate": 6.865114958841897e-05, "loss": 0.00674191266298294, "step": 110490 }, { "epoch": 31.365313653136532, "grad_norm": 1.5840367078781128, "learning_rate": 6.86483110984956e-05, "loss": 0.002378770150244236, "step": 110500 }, { "epoch": 31.365313653136532, "eval_accuracy": 0.9734215044191518, "eval_loss": 0.09068916738033295, "eval_runtime": 31.8541, "eval_samples_per_second": 493.719, "eval_steps_per_second": 7.723, "step": 110500 }, { "epoch": 31.36815214305989, "grad_norm": 0.19217880070209503, "learning_rate": 6.864547260857225e-05, "loss": 0.005883803218603134, "step": 110510 }, { "epoch": 31.370990632983254, "grad_norm": 0.2941473126411438, "learning_rate": 6.864263411864889e-05, "loss": 0.010930477827787399, "step": 110520 }, { "epoch": 31.373829122906614, "grad_norm": 0.07004305720329285, "learning_rate": 6.863979562872552e-05, "loss": 0.005409552156925202, "step": 110530 }, { "epoch": 31.376667612829973, "grad_norm": 2.602637529373169, "learning_rate": 6.863695713880216e-05, "loss": 0.0034328415989875794, "step": 110540 }, { "epoch": 31.379506102753336, "grad_norm": 4.1790571212768555, "learning_rate": 6.86341186488788e-05, "loss": 0.006360481679439545, "step": 110550 }, { "epoch": 31.382344592676695, "grad_norm": 12.8060884475708, "learning_rate": 6.863128015895543e-05, "loss": 0.012725891172885894, "step": 110560 }, { "epoch": 31.385183082600058, "grad_norm": 0.6453924775123596, "learning_rate": 6.862844166903207e-05, "loss": 0.0034523710608482363, "step": 110570 }, { "epoch": 31.388021572523417, "grad_norm": 11.52908706665039, "learning_rate": 6.862560317910873e-05, "loss": 0.01105356365442276, "step": 110580 }, { "epoch": 31.390860062446777, "grad_norm": 3.1964778900146484, "learning_rate": 6.862276468918535e-05, "loss": 0.014997807145118714, "step": 110590 }, { "epoch": 31.39369855237014, "grad_norm": 12.168212890625, "learning_rate": 6.8619926199262e-05, "loss": 0.004727430269122123, "step": 110600 }, { "epoch": 31.3965370422935, "grad_norm": 4.784825801849365, "learning_rate": 6.861708770933864e-05, "loss": 0.011191074550151826, "step": 110610 }, { "epoch": 31.39937553221686, "grad_norm": 0.1228862777352333, "learning_rate": 6.861424921941528e-05, "loss": 0.003466445952653885, "step": 110620 }, { "epoch": 31.40221402214022, "grad_norm": 0.1359306424856186, "learning_rate": 6.86114107294919e-05, "loss": 0.00432380847632885, "step": 110630 }, { "epoch": 31.405052512063584, "grad_norm": 0.5755993723869324, "learning_rate": 6.860857223956856e-05, "loss": 0.0037147924304008484, "step": 110640 }, { "epoch": 31.407891001986943, "grad_norm": 0.23398731648921967, "learning_rate": 6.86057337496452e-05, "loss": 0.006961977481842041, "step": 110650 }, { "epoch": 31.410729491910303, "grad_norm": 3.57582426071167, "learning_rate": 6.860289525972183e-05, "loss": 0.0070944100618362425, "step": 110660 }, { "epoch": 31.413567981833665, "grad_norm": 3.537637948989868, "learning_rate": 6.860005676979847e-05, "loss": 0.0019071191549301148, "step": 110670 }, { "epoch": 31.416406471757025, "grad_norm": 0.09797897934913635, "learning_rate": 6.859721827987511e-05, "loss": 0.011883698403835297, "step": 110680 }, { "epoch": 31.419244961680388, "grad_norm": 0.2095937728881836, "learning_rate": 6.859437978995174e-05, "loss": 0.021143296360969545, "step": 110690 }, { "epoch": 31.422083451603747, "grad_norm": 2.3575212955474854, "learning_rate": 6.859154130002838e-05, "loss": 0.008230999112129211, "step": 110700 }, { "epoch": 31.424921941527106, "grad_norm": 0.20458194613456726, "learning_rate": 6.858870281010504e-05, "loss": 0.00540386289358139, "step": 110710 }, { "epoch": 31.42776043145047, "grad_norm": 1.0072506666183472, "learning_rate": 6.858586432018166e-05, "loss": 0.007096894085407257, "step": 110720 }, { "epoch": 31.43059892137383, "grad_norm": 0.15148283541202545, "learning_rate": 6.85830258302583e-05, "loss": 0.0016185205429792404, "step": 110730 }, { "epoch": 31.43343741129719, "grad_norm": 1.6251716613769531, "learning_rate": 6.858018734033495e-05, "loss": 0.002851945534348488, "step": 110740 }, { "epoch": 31.43627590122055, "grad_norm": 0.309197336435318, "learning_rate": 6.857734885041159e-05, "loss": 0.003495413064956665, "step": 110750 }, { "epoch": 31.43911439114391, "grad_norm": 6.319087505340576, "learning_rate": 6.857451036048822e-05, "loss": 0.005122091993689537, "step": 110760 }, { "epoch": 31.441952881067273, "grad_norm": 0.07136861979961395, "learning_rate": 6.857167187056486e-05, "loss": 0.00905277132987976, "step": 110770 }, { "epoch": 31.444791370990632, "grad_norm": 0.3060901463031769, "learning_rate": 6.856883338064151e-05, "loss": 0.005736605823040008, "step": 110780 }, { "epoch": 31.447629860913995, "grad_norm": 1.0271434783935547, "learning_rate": 6.856599489071814e-05, "loss": 0.005364743247628212, "step": 110790 }, { "epoch": 31.450468350837355, "grad_norm": 0.17331670224666595, "learning_rate": 6.856315640079478e-05, "loss": 0.0030073340982198714, "step": 110800 }, { "epoch": 31.453306840760714, "grad_norm": 0.28742823004722595, "learning_rate": 6.856031791087142e-05, "loss": 0.008888522535562516, "step": 110810 }, { "epoch": 31.456145330684077, "grad_norm": 2.2763500213623047, "learning_rate": 6.855747942094805e-05, "loss": 0.0019772512838244437, "step": 110820 }, { "epoch": 31.458983820607436, "grad_norm": 0.2442139834165573, "learning_rate": 6.855464093102469e-05, "loss": 0.00967795103788376, "step": 110830 }, { "epoch": 31.4618223105308, "grad_norm": 0.14647503197193146, "learning_rate": 6.855180244110135e-05, "loss": 0.0020475735887885093, "step": 110840 }, { "epoch": 31.46466080045416, "grad_norm": 0.0963652953505516, "learning_rate": 6.854896395117798e-05, "loss": 0.004030054435133934, "step": 110850 }, { "epoch": 31.467499290377518, "grad_norm": 0.39934444427490234, "learning_rate": 6.854612546125462e-05, "loss": 0.01303512454032898, "step": 110860 }, { "epoch": 31.47033778030088, "grad_norm": 0.16033871471881866, "learning_rate": 6.854328697133126e-05, "loss": 0.005937670916318893, "step": 110870 }, { "epoch": 31.47317627022424, "grad_norm": 0.41917967796325684, "learning_rate": 6.85404484814079e-05, "loss": 0.007911767065525054, "step": 110880 }, { "epoch": 31.476014760147603, "grad_norm": 0.2379283457994461, "learning_rate": 6.853760999148453e-05, "loss": 0.01058848351240158, "step": 110890 }, { "epoch": 31.478853250070962, "grad_norm": 0.4411291778087616, "learning_rate": 6.853477150156117e-05, "loss": 0.015578216314315796, "step": 110900 }, { "epoch": 31.48169173999432, "grad_norm": 0.8294801712036133, "learning_rate": 6.853193301163782e-05, "loss": 0.003521782532334328, "step": 110910 }, { "epoch": 31.484530229917684, "grad_norm": 2.874532461166382, "learning_rate": 6.852909452171445e-05, "loss": 0.002393849566578865, "step": 110920 }, { "epoch": 31.487368719841044, "grad_norm": 1.7254900932312012, "learning_rate": 6.852625603179109e-05, "loss": 0.008758755028247833, "step": 110930 }, { "epoch": 31.490207209764407, "grad_norm": 0.591719925403595, "learning_rate": 6.852341754186773e-05, "loss": 0.004954873025417328, "step": 110940 }, { "epoch": 31.493045699687766, "grad_norm": 0.4086301028728485, "learning_rate": 6.852057905194436e-05, "loss": 0.01292019784450531, "step": 110950 }, { "epoch": 31.495884189611125, "grad_norm": 0.6835862398147583, "learning_rate": 6.8517740562021e-05, "loss": 0.0055124431848526, "step": 110960 }, { "epoch": 31.498722679534488, "grad_norm": 12.664715766906738, "learning_rate": 6.851490207209764e-05, "loss": 0.012074268609285354, "step": 110970 }, { "epoch": 31.501561169457847, "grad_norm": 0.9274953007698059, "learning_rate": 6.851206358217429e-05, "loss": 0.003194229304790497, "step": 110980 }, { "epoch": 31.50439965938121, "grad_norm": 1.6582834720611572, "learning_rate": 6.850922509225093e-05, "loss": 0.005818652361631394, "step": 110990 }, { "epoch": 31.50723814930457, "grad_norm": 1.101373314857483, "learning_rate": 6.850638660232757e-05, "loss": 0.006683534383773804, "step": 111000 }, { "epoch": 31.50723814930457, "eval_accuracy": 0.9708781077128505, "eval_loss": 0.09602735191583633, "eval_runtime": 32.6859, "eval_samples_per_second": 481.156, "eval_steps_per_second": 7.526, "step": 111000 }, { "epoch": 31.510076639227933, "grad_norm": 11.351529121398926, "learning_rate": 6.850354811240421e-05, "loss": 0.008687594532966613, "step": 111010 }, { "epoch": 31.512915129151292, "grad_norm": 4.659715175628662, "learning_rate": 6.850070962248084e-05, "loss": 0.009848679602146148, "step": 111020 }, { "epoch": 31.51575361907465, "grad_norm": 3.9261043071746826, "learning_rate": 6.849787113255748e-05, "loss": 0.01351223886013031, "step": 111030 }, { "epoch": 31.518592108998014, "grad_norm": 0.31158801913261414, "learning_rate": 6.849503264263412e-05, "loss": 0.0028676217421889304, "step": 111040 }, { "epoch": 31.521430598921373, "grad_norm": 0.8351271152496338, "learning_rate": 6.849219415271076e-05, "loss": 0.005151634663343429, "step": 111050 }, { "epoch": 31.524269088844733, "grad_norm": 0.262214720249176, "learning_rate": 6.84893556627874e-05, "loss": 0.004819277673959732, "step": 111060 }, { "epoch": 31.527107578768096, "grad_norm": 0.8365206122398376, "learning_rate": 6.848651717286404e-05, "loss": 0.0012492701411247254, "step": 111070 }, { "epoch": 31.529946068691455, "grad_norm": 1.2457735538482666, "learning_rate": 6.848367868294067e-05, "loss": 0.016850826144218446, "step": 111080 }, { "epoch": 31.532784558614818, "grad_norm": 3.509990930557251, "learning_rate": 6.848084019301731e-05, "loss": 0.02714107632637024, "step": 111090 }, { "epoch": 31.535623048538177, "grad_norm": 1.167598843574524, "learning_rate": 6.847800170309396e-05, "loss": 0.008013924956321717, "step": 111100 }, { "epoch": 31.53846153846154, "grad_norm": 0.9673077464103699, "learning_rate": 6.84751632131706e-05, "loss": 0.019352704286575317, "step": 111110 }, { "epoch": 31.5413000283849, "grad_norm": 23.924570083618164, "learning_rate": 6.847232472324724e-05, "loss": 0.04818193912506104, "step": 111120 }, { "epoch": 31.54413851830826, "grad_norm": 0.1178336888551712, "learning_rate": 6.846948623332388e-05, "loss": 0.032082986831665036, "step": 111130 }, { "epoch": 31.54697700823162, "grad_norm": 6.005351543426514, "learning_rate": 6.846664774340051e-05, "loss": 0.01379796713590622, "step": 111140 }, { "epoch": 31.54981549815498, "grad_norm": 1.2460291385650635, "learning_rate": 6.846380925347715e-05, "loss": 0.0038434986025094988, "step": 111150 }, { "epoch": 31.552653988078344, "grad_norm": 0.07101533561944962, "learning_rate": 6.846097076355379e-05, "loss": 0.005717720836400986, "step": 111160 }, { "epoch": 31.555492478001703, "grad_norm": 8.381739616394043, "learning_rate": 6.845813227363043e-05, "loss": 0.005305105075240135, "step": 111170 }, { "epoch": 31.558330967925063, "grad_norm": 0.8707379102706909, "learning_rate": 6.845529378370707e-05, "loss": 0.002006872557103634, "step": 111180 }, { "epoch": 31.561169457848425, "grad_norm": 1.032707929611206, "learning_rate": 6.845245529378371e-05, "loss": 0.003454416245222092, "step": 111190 }, { "epoch": 31.564007947771785, "grad_norm": 0.5950360894203186, "learning_rate": 6.844961680386036e-05, "loss": 0.005815703794360161, "step": 111200 }, { "epoch": 31.566846437695148, "grad_norm": 1.1969664096832275, "learning_rate": 6.844677831393698e-05, "loss": 0.011549285054206848, "step": 111210 }, { "epoch": 31.569684927618507, "grad_norm": 0.086768239736557, "learning_rate": 6.844393982401362e-05, "loss": 0.0018267950043082237, "step": 111220 }, { "epoch": 31.572523417541866, "grad_norm": 0.7640368938446045, "learning_rate": 6.844110133409027e-05, "loss": 0.0064444899559021, "step": 111230 }, { "epoch": 31.57536190746523, "grad_norm": 1.149147629737854, "learning_rate": 6.843826284416691e-05, "loss": 0.003337462246417999, "step": 111240 }, { "epoch": 31.57820039738859, "grad_norm": 0.5072917342185974, "learning_rate": 6.843542435424355e-05, "loss": 0.009381873905658722, "step": 111250 }, { "epoch": 31.58103888731195, "grad_norm": 0.4425654113292694, "learning_rate": 6.843258586432019e-05, "loss": 0.004287391155958176, "step": 111260 }, { "epoch": 31.58387737723531, "grad_norm": 1.9969398975372314, "learning_rate": 6.842974737439682e-05, "loss": 0.00957765281200409, "step": 111270 }, { "epoch": 31.58671586715867, "grad_norm": 3.201249599456787, "learning_rate": 6.842690888447346e-05, "loss": 0.012544181942939759, "step": 111280 }, { "epoch": 31.589554357082033, "grad_norm": 8.698814392089844, "learning_rate": 6.84240703945501e-05, "loss": 0.004697917774319649, "step": 111290 }, { "epoch": 31.592392847005392, "grad_norm": 0.2324749082326889, "learning_rate": 6.842123190462674e-05, "loss": 0.0021651582792401314, "step": 111300 }, { "epoch": 31.595231336928755, "grad_norm": 0.34886401891708374, "learning_rate": 6.841839341470338e-05, "loss": 0.006894345581531525, "step": 111310 }, { "epoch": 31.598069826852115, "grad_norm": 0.1326005756855011, "learning_rate": 6.841555492478002e-05, "loss": 0.01654348522424698, "step": 111320 }, { "epoch": 31.600908316775474, "grad_norm": 0.32292553782463074, "learning_rate": 6.841271643485667e-05, "loss": 0.004765563830733299, "step": 111330 }, { "epoch": 31.603746806698837, "grad_norm": 9.727119445800781, "learning_rate": 6.84098779449333e-05, "loss": 0.023321005702018737, "step": 111340 }, { "epoch": 31.606585296622196, "grad_norm": 0.8241628408432007, "learning_rate": 6.840703945500994e-05, "loss": 0.005406643822789192, "step": 111350 }, { "epoch": 31.60942378654556, "grad_norm": 0.11491557955741882, "learning_rate": 6.840420096508658e-05, "loss": 0.014735694229602813, "step": 111360 }, { "epoch": 31.61226227646892, "grad_norm": 0.5627094507217407, "learning_rate": 6.84013624751632e-05, "loss": 0.006907147169113159, "step": 111370 }, { "epoch": 31.615100766392278, "grad_norm": 0.4521954357624054, "learning_rate": 6.839852398523986e-05, "loss": 0.014480642974376678, "step": 111380 }, { "epoch": 31.61793925631564, "grad_norm": 2.4065756797790527, "learning_rate": 6.83956854953165e-05, "loss": 0.0038803480565547943, "step": 111390 }, { "epoch": 31.620777746239, "grad_norm": 0.04546709358692169, "learning_rate": 6.839284700539313e-05, "loss": 0.0016823584213852883, "step": 111400 }, { "epoch": 31.623616236162363, "grad_norm": 5.215712547302246, "learning_rate": 6.839000851546977e-05, "loss": 0.004511076956987381, "step": 111410 }, { "epoch": 31.626454726085722, "grad_norm": 1.4145536422729492, "learning_rate": 6.838717002554641e-05, "loss": 0.014722645282745361, "step": 111420 }, { "epoch": 31.62929321600908, "grad_norm": 0.3794224262237549, "learning_rate": 6.838433153562305e-05, "loss": 0.025443363189697265, "step": 111430 }, { "epoch": 31.632131705932444, "grad_norm": 4.338743686676025, "learning_rate": 6.83814930456997e-05, "loss": 0.006875064969062805, "step": 111440 }, { "epoch": 31.634970195855804, "grad_norm": 1.1352838277816772, "learning_rate": 6.837865455577634e-05, "loss": 0.00943584144115448, "step": 111450 }, { "epoch": 31.637808685779166, "grad_norm": 4.876721382141113, "learning_rate": 6.837581606585298e-05, "loss": 0.001181977242231369, "step": 111460 }, { "epoch": 31.640647175702526, "grad_norm": 0.3526777923107147, "learning_rate": 6.83729775759296e-05, "loss": 0.0032115235924720766, "step": 111470 }, { "epoch": 31.64348566562589, "grad_norm": 14.518491744995117, "learning_rate": 6.837013908600625e-05, "loss": 0.008310648798942565, "step": 111480 }, { "epoch": 31.646324155549248, "grad_norm": 0.31408146023750305, "learning_rate": 6.836730059608289e-05, "loss": 0.014019936323165894, "step": 111490 }, { "epoch": 31.649162645472607, "grad_norm": 2.747941732406616, "learning_rate": 6.836446210615952e-05, "loss": 0.004994688928127289, "step": 111500 }, { "epoch": 31.649162645472607, "eval_accuracy": 0.972849240160234, "eval_loss": 0.09299984574317932, "eval_runtime": 31.7372, "eval_samples_per_second": 495.539, "eval_steps_per_second": 7.751, "step": 111500 }, { "epoch": 31.65200113539597, "grad_norm": 0.1669388860464096, "learning_rate": 6.836162361623617e-05, "loss": 0.0029633624479174616, "step": 111510 }, { "epoch": 31.65483962531933, "grad_norm": 2.906898260116577, "learning_rate": 6.835878512631281e-05, "loss": 0.00947681963443756, "step": 111520 }, { "epoch": 31.657678115242692, "grad_norm": 0.23977884650230408, "learning_rate": 6.835594663638944e-05, "loss": 0.002341122180223465, "step": 111530 }, { "epoch": 31.660516605166052, "grad_norm": 0.05543351173400879, "learning_rate": 6.835310814646608e-05, "loss": 0.005055475980043411, "step": 111540 }, { "epoch": 31.66335509508941, "grad_norm": 6.8705878257751465, "learning_rate": 6.835026965654272e-05, "loss": 0.006296662986278534, "step": 111550 }, { "epoch": 31.666193585012774, "grad_norm": 0.07508984953165054, "learning_rate": 6.834743116661936e-05, "loss": 0.009136031568050384, "step": 111560 }, { "epoch": 31.669032074936133, "grad_norm": 0.4821889102458954, "learning_rate": 6.8344592676696e-05, "loss": 0.00289219431579113, "step": 111570 }, { "epoch": 31.671870564859496, "grad_norm": 0.5724067091941833, "learning_rate": 6.834175418677265e-05, "loss": 0.010458598285913468, "step": 111580 }, { "epoch": 31.674709054782856, "grad_norm": 1.17878258228302, "learning_rate": 6.833891569684929e-05, "loss": 0.007089054584503174, "step": 111590 }, { "epoch": 31.677547544706215, "grad_norm": 6.668631553649902, "learning_rate": 6.833607720692592e-05, "loss": 0.004987912625074387, "step": 111600 }, { "epoch": 31.680386034629578, "grad_norm": 0.12468308955430984, "learning_rate": 6.833323871700256e-05, "loss": 0.0031174197793006897, "step": 111610 }, { "epoch": 31.683224524552937, "grad_norm": 5.912275314331055, "learning_rate": 6.83304002270792e-05, "loss": 0.007123376429080963, "step": 111620 }, { "epoch": 31.6860630144763, "grad_norm": 0.08171559125185013, "learning_rate": 6.832756173715583e-05, "loss": 0.014561636745929718, "step": 111630 }, { "epoch": 31.68890150439966, "grad_norm": 0.5651805400848389, "learning_rate": 6.832472324723248e-05, "loss": 0.009940367192029953, "step": 111640 }, { "epoch": 31.69173999432302, "grad_norm": 0.20194415748119354, "learning_rate": 6.832188475730912e-05, "loss": 0.0039100654423236845, "step": 111650 }, { "epoch": 31.69457848424638, "grad_norm": 0.21964016556739807, "learning_rate": 6.831904626738575e-05, "loss": 0.015068444609642028, "step": 111660 }, { "epoch": 31.69741697416974, "grad_norm": 0.029672732576727867, "learning_rate": 6.831620777746239e-05, "loss": 0.00910898819565773, "step": 111670 }, { "epoch": 31.700255464093104, "grad_norm": 0.8006603717803955, "learning_rate": 6.831336928753903e-05, "loss": 0.004151589050889015, "step": 111680 }, { "epoch": 31.703093954016463, "grad_norm": 0.4289233982563019, "learning_rate": 6.831053079761567e-05, "loss": 0.007662437856197357, "step": 111690 }, { "epoch": 31.705932443939822, "grad_norm": 0.19920805096626282, "learning_rate": 6.83076923076923e-05, "loss": 0.011945778876543045, "step": 111700 }, { "epoch": 31.708770933863185, "grad_norm": 0.16350744664669037, "learning_rate": 6.830485381776896e-05, "loss": 0.0032811239361763, "step": 111710 }, { "epoch": 31.711609423786545, "grad_norm": 0.7621529698371887, "learning_rate": 6.83020153278456e-05, "loss": 0.0025921674445271493, "step": 111720 }, { "epoch": 31.714447913709908, "grad_norm": 11.359150886535645, "learning_rate": 6.829917683792223e-05, "loss": 0.006132229417562485, "step": 111730 }, { "epoch": 31.717286403633267, "grad_norm": 11.966462135314941, "learning_rate": 6.829633834799887e-05, "loss": 0.011749979853630067, "step": 111740 }, { "epoch": 31.720124893556626, "grad_norm": 0.20635463297367096, "learning_rate": 6.829349985807551e-05, "loss": 0.011074859648942947, "step": 111750 }, { "epoch": 31.72296338347999, "grad_norm": 2.355894088745117, "learning_rate": 6.829066136815214e-05, "loss": 0.003608880192041397, "step": 111760 }, { "epoch": 31.72580187340335, "grad_norm": 0.19789737462997437, "learning_rate": 6.828782287822879e-05, "loss": 0.006971706449985504, "step": 111770 }, { "epoch": 31.72864036332671, "grad_norm": 22.51043128967285, "learning_rate": 6.828498438830543e-05, "loss": 0.023025889694690705, "step": 111780 }, { "epoch": 31.73147885325007, "grad_norm": 0.44341930747032166, "learning_rate": 6.828214589838206e-05, "loss": 0.010177663713693618, "step": 111790 }, { "epoch": 31.73431734317343, "grad_norm": 0.13608318567276, "learning_rate": 6.82793074084587e-05, "loss": 0.008347529172897338, "step": 111800 }, { "epoch": 31.737155833096793, "grad_norm": 4.800083160400391, "learning_rate": 6.827646891853534e-05, "loss": 0.010078371316194535, "step": 111810 }, { "epoch": 31.739994323020152, "grad_norm": 0.4727610647678375, "learning_rate": 6.827363042861199e-05, "loss": 0.009345553815364838, "step": 111820 }, { "epoch": 31.742832812943515, "grad_norm": 1.2807178497314453, "learning_rate": 6.827079193868861e-05, "loss": 0.002239334024488926, "step": 111830 }, { "epoch": 31.745671302866874, "grad_norm": 0.31881776452064514, "learning_rate": 6.826795344876527e-05, "loss": 0.005826468765735626, "step": 111840 }, { "epoch": 31.748509792790237, "grad_norm": 0.3899812400341034, "learning_rate": 6.826511495884191e-05, "loss": 0.00906587541103363, "step": 111850 }, { "epoch": 31.751348282713597, "grad_norm": 1.1504793167114258, "learning_rate": 6.826227646891854e-05, "loss": 0.004249905422329903, "step": 111860 }, { "epoch": 31.754186772636956, "grad_norm": 6.78041410446167, "learning_rate": 6.825943797899518e-05, "loss": 0.0038106441497802734, "step": 111870 }, { "epoch": 31.75702526256032, "grad_norm": 1.5143126249313354, "learning_rate": 6.825659948907182e-05, "loss": 0.010604381561279297, "step": 111880 }, { "epoch": 31.759863752483678, "grad_norm": 7.184882640838623, "learning_rate": 6.825376099914845e-05, "loss": 0.0044593319296836855, "step": 111890 }, { "epoch": 31.76270224240704, "grad_norm": 4.225615978240967, "learning_rate": 6.825092250922509e-05, "loss": 0.0053154781460762026, "step": 111900 }, { "epoch": 31.7655407323304, "grad_norm": 6.64150857925415, "learning_rate": 6.824808401930174e-05, "loss": 0.004802253097295761, "step": 111910 }, { "epoch": 31.76837922225376, "grad_norm": 9.94938850402832, "learning_rate": 6.824524552937837e-05, "loss": 0.012674580514430999, "step": 111920 }, { "epoch": 31.771217712177123, "grad_norm": 14.90323543548584, "learning_rate": 6.824240703945501e-05, "loss": 0.02048947513103485, "step": 111930 }, { "epoch": 31.774056202100482, "grad_norm": 0.5821764469146729, "learning_rate": 6.823956854953165e-05, "loss": 0.013197378814220428, "step": 111940 }, { "epoch": 31.776894692023845, "grad_norm": 0.5471833944320679, "learning_rate": 6.82367300596083e-05, "loss": 0.018742209672927855, "step": 111950 }, { "epoch": 31.779733181947204, "grad_norm": 18.066133499145508, "learning_rate": 6.823389156968492e-05, "loss": 0.016490170359611513, "step": 111960 }, { "epoch": 31.782571671870564, "grad_norm": 0.054250482469797134, "learning_rate": 6.823105307976158e-05, "loss": 0.005137863382697105, "step": 111970 }, { "epoch": 31.785410161793926, "grad_norm": 0.026516495272517204, "learning_rate": 6.82282145898382e-05, "loss": 0.01019509881734848, "step": 111980 }, { "epoch": 31.788248651717286, "grad_norm": 0.22025428712368011, "learning_rate": 6.822537609991485e-05, "loss": 0.0022360729053616524, "step": 111990 }, { "epoch": 31.79108714164065, "grad_norm": 0.33896440267562866, "learning_rate": 6.822253760999149e-05, "loss": 0.008224617689847946, "step": 112000 }, { "epoch": 31.79108714164065, "eval_accuracy": 0.9695428244420423, "eval_loss": 0.10078894346952438, "eval_runtime": 31.5115, "eval_samples_per_second": 499.088, "eval_steps_per_second": 7.807, "step": 112000 }, { "epoch": 31.793925631564008, "grad_norm": 0.2273392528295517, "learning_rate": 6.821969912006813e-05, "loss": 0.005847939848899841, "step": 112010 }, { "epoch": 31.796764121487367, "grad_norm": 0.12986287474632263, "learning_rate": 6.821686063014476e-05, "loss": 0.008841508626937866, "step": 112020 }, { "epoch": 31.79960261141073, "grad_norm": 11.993029594421387, "learning_rate": 6.82140221402214e-05, "loss": 0.007772567868232727, "step": 112030 }, { "epoch": 31.80244110133409, "grad_norm": 0.20143119990825653, "learning_rate": 6.821118365029805e-05, "loss": 0.003965846076607704, "step": 112040 }, { "epoch": 31.805279591257452, "grad_norm": 0.16670043766498566, "learning_rate": 6.820834516037468e-05, "loss": 0.0041697613894939424, "step": 112050 }, { "epoch": 31.80811808118081, "grad_norm": 0.1777687817811966, "learning_rate": 6.820550667045132e-05, "loss": 0.0049547705799341205, "step": 112060 }, { "epoch": 31.81095657110417, "grad_norm": 0.5711122751235962, "learning_rate": 6.820266818052797e-05, "loss": 0.00910588949918747, "step": 112070 }, { "epoch": 31.813795061027534, "grad_norm": 4.641970634460449, "learning_rate": 6.819982969060459e-05, "loss": 0.0032428544014692306, "step": 112080 }, { "epoch": 31.816633550950893, "grad_norm": 4.070696830749512, "learning_rate": 6.819699120068123e-05, "loss": 0.0025896903127431868, "step": 112090 }, { "epoch": 31.819472040874256, "grad_norm": 14.867886543273926, "learning_rate": 6.819415271075788e-05, "loss": 0.017729231715202333, "step": 112100 }, { "epoch": 31.822310530797616, "grad_norm": 0.16451802849769592, "learning_rate": 6.819131422083452e-05, "loss": 0.020681332051753997, "step": 112110 }, { "epoch": 31.825149020720975, "grad_norm": 0.20276722311973572, "learning_rate": 6.818847573091116e-05, "loss": 0.0071222282946109775, "step": 112120 }, { "epoch": 31.827987510644338, "grad_norm": 0.12054027616977692, "learning_rate": 6.81856372409878e-05, "loss": 0.0025136858224868775, "step": 112130 }, { "epoch": 31.830826000567697, "grad_norm": 2.381458282470703, "learning_rate": 6.818279875106444e-05, "loss": 0.016549375653266907, "step": 112140 }, { "epoch": 31.83366449049106, "grad_norm": 0.2634775638580322, "learning_rate": 6.817996026114107e-05, "loss": 0.003928965330123902, "step": 112150 }, { "epoch": 31.83650298041442, "grad_norm": 0.09217740595340729, "learning_rate": 6.817712177121771e-05, "loss": 0.0027442745864391326, "step": 112160 }, { "epoch": 31.83934147033778, "grad_norm": 7.691119194030762, "learning_rate": 6.817428328129437e-05, "loss": 0.01693858504295349, "step": 112170 }, { "epoch": 31.84217996026114, "grad_norm": 0.05064951628446579, "learning_rate": 6.8171444791371e-05, "loss": 0.0014888802543282509, "step": 112180 }, { "epoch": 31.8450184501845, "grad_norm": 0.3167444169521332, "learning_rate": 6.816860630144763e-05, "loss": 0.010639182478189468, "step": 112190 }, { "epoch": 31.847856940107864, "grad_norm": 3.1098062992095947, "learning_rate": 6.816576781152428e-05, "loss": 0.008044886589050292, "step": 112200 }, { "epoch": 31.850695430031223, "grad_norm": 3.930600881576538, "learning_rate": 6.81629293216009e-05, "loss": 0.003921622782945633, "step": 112210 }, { "epoch": 31.853533919954586, "grad_norm": 0.3429722785949707, "learning_rate": 6.816009083167755e-05, "loss": 0.010262078046798706, "step": 112220 }, { "epoch": 31.856372409877945, "grad_norm": 1.0070666074752808, "learning_rate": 6.815725234175419e-05, "loss": 0.007831378281116486, "step": 112230 }, { "epoch": 31.859210899801305, "grad_norm": 0.9816868305206299, "learning_rate": 6.815441385183083e-05, "loss": 0.005563776940107346, "step": 112240 }, { "epoch": 31.862049389724667, "grad_norm": 0.7258110642433167, "learning_rate": 6.815157536190747e-05, "loss": 0.005391286313533783, "step": 112250 }, { "epoch": 31.864887879648027, "grad_norm": 0.11904098838567734, "learning_rate": 6.814873687198411e-05, "loss": 0.006604784727096557, "step": 112260 }, { "epoch": 31.86772636957139, "grad_norm": 1.7833142280578613, "learning_rate": 6.814589838206075e-05, "loss": 0.009094425290822983, "step": 112270 }, { "epoch": 31.87056485949475, "grad_norm": 0.11489982157945633, "learning_rate": 6.814305989213738e-05, "loss": 0.005363580584526062, "step": 112280 }, { "epoch": 31.87340334941811, "grad_norm": 3.3595969676971436, "learning_rate": 6.814022140221402e-05, "loss": 0.00427178293466568, "step": 112290 }, { "epoch": 31.87624183934147, "grad_norm": 7.13802433013916, "learning_rate": 6.813738291229066e-05, "loss": 0.008593113720417022, "step": 112300 }, { "epoch": 31.87908032926483, "grad_norm": 1.7315257787704468, "learning_rate": 6.81345444223673e-05, "loss": 0.004069085046648979, "step": 112310 }, { "epoch": 31.881918819188193, "grad_norm": 0.6548401117324829, "learning_rate": 6.813170593244395e-05, "loss": 0.012579810619354249, "step": 112320 }, { "epoch": 31.884757309111553, "grad_norm": 0.7378200888633728, "learning_rate": 6.812886744252059e-05, "loss": 0.014098866283893586, "step": 112330 }, { "epoch": 31.887595799034912, "grad_norm": 1.0156601667404175, "learning_rate": 6.812631280158955e-05, "loss": 0.04399470686912536, "step": 112340 }, { "epoch": 31.890434288958275, "grad_norm": 0.3062853217124939, "learning_rate": 6.812347431166621e-05, "loss": 0.01922585517168045, "step": 112350 }, { "epoch": 31.893272778881634, "grad_norm": 1.0315234661102295, "learning_rate": 6.812063582174284e-05, "loss": 0.010519689321517945, "step": 112360 }, { "epoch": 31.896111268804997, "grad_norm": 11.50934886932373, "learning_rate": 6.811779733181948e-05, "loss": 0.009597477316856385, "step": 112370 }, { "epoch": 31.898949758728357, "grad_norm": 0.14066126942634583, "learning_rate": 6.811495884189612e-05, "loss": 0.0028650419786572455, "step": 112380 }, { "epoch": 31.901788248651716, "grad_norm": 11.789419174194336, "learning_rate": 6.811212035197275e-05, "loss": 0.004871566221117974, "step": 112390 }, { "epoch": 31.90462673857508, "grad_norm": 3.155224561691284, "learning_rate": 6.810928186204939e-05, "loss": 0.006400573253631592, "step": 112400 }, { "epoch": 31.907465228498438, "grad_norm": 3.1055116653442383, "learning_rate": 6.810644337212603e-05, "loss": 0.005016482621431351, "step": 112410 }, { "epoch": 31.9103037184218, "grad_norm": 0.9981109499931335, "learning_rate": 6.810360488220267e-05, "loss": 0.011814363300800323, "step": 112420 }, { "epoch": 31.91314220834516, "grad_norm": 3.2621138095855713, "learning_rate": 6.810076639227931e-05, "loss": 0.003862859308719635, "step": 112430 }, { "epoch": 31.91598069826852, "grad_norm": 0.027387892827391624, "learning_rate": 6.809792790235595e-05, "loss": 0.005342646315693855, "step": 112440 }, { "epoch": 31.918819188191883, "grad_norm": 1.1023670434951782, "learning_rate": 6.80950894124326e-05, "loss": 0.002160440571606159, "step": 112450 }, { "epoch": 31.921657678115242, "grad_norm": 0.1597622036933899, "learning_rate": 6.809225092250922e-05, "loss": 0.0019233804196119308, "step": 112460 }, { "epoch": 31.924496168038605, "grad_norm": 7.0468034744262695, "learning_rate": 6.808941243258586e-05, "loss": 0.008442067354917527, "step": 112470 }, { "epoch": 31.927334657961964, "grad_norm": 0.3132229745388031, "learning_rate": 6.80865739426625e-05, "loss": 0.016583122313022614, "step": 112480 }, { "epoch": 31.930173147885323, "grad_norm": 0.4543894827365875, "learning_rate": 6.808373545273915e-05, "loss": 0.007372811436653137, "step": 112490 }, { "epoch": 31.933011637808686, "grad_norm": 0.06805482506752014, "learning_rate": 6.808089696281579e-05, "loss": 0.0041646726429462435, "step": 112500 }, { "epoch": 31.933011637808686, "eval_accuracy": 0.9738030139250969, "eval_loss": 0.08944963663816452, "eval_runtime": 31.5907, "eval_samples_per_second": 497.836, "eval_steps_per_second": 7.787, "step": 112500 }, { "epoch": 31.935850127732046, "grad_norm": 1.4102498292922974, "learning_rate": 6.807805847289243e-05, "loss": 0.006246492266654968, "step": 112510 }, { "epoch": 31.93868861765541, "grad_norm": 6.589541912078857, "learning_rate": 6.807521998296906e-05, "loss": 0.011974203586578368, "step": 112520 }, { "epoch": 31.941527107578768, "grad_norm": 0.6699870824813843, "learning_rate": 6.80723814930457e-05, "loss": 0.0017713833600282668, "step": 112530 }, { "epoch": 31.944365597502127, "grad_norm": 0.26982733607292175, "learning_rate": 6.806954300312234e-05, "loss": 0.004998511075973511, "step": 112540 }, { "epoch": 31.94720408742549, "grad_norm": 0.07027362287044525, "learning_rate": 6.806670451319898e-05, "loss": 0.019851690530776976, "step": 112550 }, { "epoch": 31.95004257734885, "grad_norm": 0.21667777001857758, "learning_rate": 6.806386602327562e-05, "loss": 0.01027294248342514, "step": 112560 }, { "epoch": 31.952881067272212, "grad_norm": 0.40778666734695435, "learning_rate": 6.806102753335226e-05, "loss": 0.006953151524066925, "step": 112570 }, { "epoch": 31.95571955719557, "grad_norm": 0.1079963818192482, "learning_rate": 6.80581890434289e-05, "loss": 0.013990335166454315, "step": 112580 }, { "epoch": 31.958558047118935, "grad_norm": 6.210877418518066, "learning_rate": 6.805535055350553e-05, "loss": 0.008871030807495118, "step": 112590 }, { "epoch": 31.961396537042294, "grad_norm": 0.3015681505203247, "learning_rate": 6.805251206358217e-05, "loss": 0.004505900293588638, "step": 112600 }, { "epoch": 31.964235026965653, "grad_norm": 6.5876007080078125, "learning_rate": 6.804967357365882e-05, "loss": 0.013760749995708466, "step": 112610 }, { "epoch": 31.967073516889016, "grad_norm": 22.009775161743164, "learning_rate": 6.804683508373546e-05, "loss": 0.029219388961791992, "step": 112620 }, { "epoch": 31.969912006812375, "grad_norm": 0.18849162757396698, "learning_rate": 6.80439965938121e-05, "loss": 0.0029139140620827674, "step": 112630 }, { "epoch": 31.972750496735735, "grad_norm": 0.8211190104484558, "learning_rate": 6.804115810388874e-05, "loss": 0.011607404053211211, "step": 112640 }, { "epoch": 31.975588986659098, "grad_norm": 1.1625237464904785, "learning_rate": 6.803831961396537e-05, "loss": 0.019713284075260164, "step": 112650 }, { "epoch": 31.978427476582457, "grad_norm": 8.339205741882324, "learning_rate": 6.803548112404201e-05, "loss": 0.006902317702770233, "step": 112660 }, { "epoch": 31.98126596650582, "grad_norm": 1.7514382600784302, "learning_rate": 6.803264263411865e-05, "loss": 0.002683527022600174, "step": 112670 }, { "epoch": 31.98410445642918, "grad_norm": 0.6259246468544006, "learning_rate": 6.802980414419529e-05, "loss": 0.005297352373600006, "step": 112680 }, { "epoch": 31.986942946352542, "grad_norm": 0.25685709714889526, "learning_rate": 6.802696565427193e-05, "loss": 0.0030623583123087885, "step": 112690 }, { "epoch": 31.9897814362759, "grad_norm": 0.7202678918838501, "learning_rate": 6.802412716434857e-05, "loss": 0.0024009345099329947, "step": 112700 }, { "epoch": 31.99261992619926, "grad_norm": 0.1389661431312561, "learning_rate": 6.802128867442522e-05, "loss": 0.007260927557945251, "step": 112710 }, { "epoch": 31.995458416122624, "grad_norm": 0.7495160102844238, "learning_rate": 6.801845018450184e-05, "loss": 0.003939095884561539, "step": 112720 }, { "epoch": 31.998296906045983, "grad_norm": 1.1752415895462036, "learning_rate": 6.801561169457849e-05, "loss": 0.004041685163974762, "step": 112730 }, { "epoch": 32.00113539596934, "grad_norm": 0.341753751039505, "learning_rate": 6.801277320465513e-05, "loss": 0.004040905460715294, "step": 112740 }, { "epoch": 32.003973885892705, "grad_norm": 0.38411256670951843, "learning_rate": 6.800993471473177e-05, "loss": 0.011023762077093125, "step": 112750 }, { "epoch": 32.00681237581607, "grad_norm": 0.9074758887290955, "learning_rate": 6.800709622480841e-05, "loss": 0.004296112060546875, "step": 112760 }, { "epoch": 32.009650865739424, "grad_norm": 5.970540523529053, "learning_rate": 6.800425773488505e-05, "loss": 0.0031841062009334564, "step": 112770 }, { "epoch": 32.01248935566279, "grad_norm": 0.2918536067008972, "learning_rate": 6.800141924496168e-05, "loss": 0.009583040326833724, "step": 112780 }, { "epoch": 32.01532784558615, "grad_norm": 2.315399646759033, "learning_rate": 6.799858075503832e-05, "loss": 0.0017800677567720413, "step": 112790 }, { "epoch": 32.018166335509505, "grad_norm": 0.33088240027427673, "learning_rate": 6.799574226511496e-05, "loss": 0.0007648751139640808, "step": 112800 }, { "epoch": 32.02100482543287, "grad_norm": 0.8620145916938782, "learning_rate": 6.79929037751916e-05, "loss": 0.004749047011137009, "step": 112810 }, { "epoch": 32.02384331535623, "grad_norm": 0.5967131853103638, "learning_rate": 6.799006528526824e-05, "loss": 0.011634477227926255, "step": 112820 }, { "epoch": 32.026681805279594, "grad_norm": 1.3156980276107788, "learning_rate": 6.798722679534489e-05, "loss": 0.0015296123921871186, "step": 112830 }, { "epoch": 32.02952029520295, "grad_norm": 9.440923690795898, "learning_rate": 6.798438830542153e-05, "loss": 0.010443995893001556, "step": 112840 }, { "epoch": 32.03235878512631, "grad_norm": 1.3170238733291626, "learning_rate": 6.798154981549815e-05, "loss": 0.005109821259975433, "step": 112850 }, { "epoch": 32.035197275049676, "grad_norm": 0.15561915934085846, "learning_rate": 6.79787113255748e-05, "loss": 0.017511712014675142, "step": 112860 }, { "epoch": 32.03803576497303, "grad_norm": 0.0910177007317543, "learning_rate": 6.797587283565144e-05, "loss": 0.016135452687740325, "step": 112870 }, { "epoch": 32.040874254896394, "grad_norm": 0.21093660593032837, "learning_rate": 6.797303434572807e-05, "loss": 0.0010104564949870109, "step": 112880 }, { "epoch": 32.04371274481976, "grad_norm": 0.17900234460830688, "learning_rate": 6.797019585580472e-05, "loss": 0.007267052680253983, "step": 112890 }, { "epoch": 32.04655123474312, "grad_norm": 0.11418800801038742, "learning_rate": 6.796735736588136e-05, "loss": 0.003210850805044174, "step": 112900 }, { "epoch": 32.049389724666476, "grad_norm": 0.5047315359115601, "learning_rate": 6.796451887595799e-05, "loss": 0.0027661284431815146, "step": 112910 }, { "epoch": 32.05222821458984, "grad_norm": 0.1105075255036354, "learning_rate": 6.796168038603463e-05, "loss": 0.0017645357176661492, "step": 112920 }, { "epoch": 32.0550667045132, "grad_norm": 0.5283951759338379, "learning_rate": 6.795884189611127e-05, "loss": 0.008849997818470002, "step": 112930 }, { "epoch": 32.05790519443656, "grad_norm": 0.139421284198761, "learning_rate": 6.795600340618791e-05, "loss": 0.003753601759672165, "step": 112940 }, { "epoch": 32.06074368435992, "grad_norm": 1.4927922487258911, "learning_rate": 6.795316491626456e-05, "loss": 0.005814217031002045, "step": 112950 }, { "epoch": 32.06358217428328, "grad_norm": 0.27753469347953796, "learning_rate": 6.79503264263412e-05, "loss": 0.002770618535578251, "step": 112960 }, { "epoch": 32.06642066420664, "grad_norm": 0.4666532576084137, "learning_rate": 6.794748793641782e-05, "loss": 0.00104095209389925, "step": 112970 }, { "epoch": 32.06925915413, "grad_norm": 0.01714489981532097, "learning_rate": 6.794464944649447e-05, "loss": 0.004307209700345993, "step": 112980 }, { "epoch": 32.072097644053365, "grad_norm": 0.5552462339401245, "learning_rate": 6.794181095657111e-05, "loss": 0.0042713616043329235, "step": 112990 }, { "epoch": 32.07493613397673, "grad_norm": 0.058778196573257446, "learning_rate": 6.793897246664775e-05, "loss": 0.002907343581318855, "step": 113000 }, { "epoch": 32.07493613397673, "eval_accuracy": 0.9753290519488778, "eval_loss": 0.08318768441677094, "eval_runtime": 32.0473, "eval_samples_per_second": 490.744, "eval_steps_per_second": 7.676, "step": 113000 }, { "epoch": 32.07777462390008, "grad_norm": 0.14211684465408325, "learning_rate": 6.793613397672438e-05, "loss": 0.0030759992077946663, "step": 113010 }, { "epoch": 32.080613113823446, "grad_norm": 0.8389070630073547, "learning_rate": 6.793329548680103e-05, "loss": 0.006130092591047287, "step": 113020 }, { "epoch": 32.08345160374681, "grad_norm": 4.882470607757568, "learning_rate": 6.793045699687767e-05, "loss": 0.006557777523994446, "step": 113030 }, { "epoch": 32.086290093670165, "grad_norm": 4.013255596160889, "learning_rate": 6.79276185069543e-05, "loss": 0.001519666612148285, "step": 113040 }, { "epoch": 32.08912858359353, "grad_norm": 12.725340843200684, "learning_rate": 6.792478001703094e-05, "loss": 0.0051808558404445645, "step": 113050 }, { "epoch": 32.09196707351689, "grad_norm": 0.04267686977982521, "learning_rate": 6.792194152710758e-05, "loss": 0.009982483088970184, "step": 113060 }, { "epoch": 32.09480556344025, "grad_norm": 11.574191093444824, "learning_rate": 6.791910303718421e-05, "loss": 0.015403810143470763, "step": 113070 }, { "epoch": 32.09764405336361, "grad_norm": 0.1928398460149765, "learning_rate": 6.791626454726085e-05, "loss": 0.0013574745506048202, "step": 113080 }, { "epoch": 32.10048254328697, "grad_norm": 0.1670929193496704, "learning_rate": 6.791342605733751e-05, "loss": 0.0019810723140835764, "step": 113090 }, { "epoch": 32.103321033210335, "grad_norm": 8.155051231384277, "learning_rate": 6.791058756741414e-05, "loss": 0.0050445936620235445, "step": 113100 }, { "epoch": 32.10615952313369, "grad_norm": 0.05738574638962746, "learning_rate": 6.790774907749078e-05, "loss": 0.002214076742529869, "step": 113110 }, { "epoch": 32.108998013057054, "grad_norm": 3.105058431625366, "learning_rate": 6.790491058756742e-05, "loss": 0.0025078589096665382, "step": 113120 }, { "epoch": 32.11183650298042, "grad_norm": 0.04810801520943642, "learning_rate": 6.790207209764406e-05, "loss": 0.029672807455062865, "step": 113130 }, { "epoch": 32.11467499290377, "grad_norm": 1.1031101942062378, "learning_rate": 6.789923360772069e-05, "loss": 0.008333415538072587, "step": 113140 }, { "epoch": 32.117513482827135, "grad_norm": 0.23200233280658722, "learning_rate": 6.789639511779734e-05, "loss": 0.004605308175086975, "step": 113150 }, { "epoch": 32.1203519727505, "grad_norm": 0.8474830985069275, "learning_rate": 6.789355662787398e-05, "loss": 0.009357067197561264, "step": 113160 }, { "epoch": 32.123190462673854, "grad_norm": 5.159415245056152, "learning_rate": 6.789071813795061e-05, "loss": 0.010340078175067902, "step": 113170 }, { "epoch": 32.12602895259722, "grad_norm": 0.28161850571632385, "learning_rate": 6.788787964802725e-05, "loss": 0.0026564829051494597, "step": 113180 }, { "epoch": 32.12886744252058, "grad_norm": 0.056018196046352386, "learning_rate": 6.78850411581039e-05, "loss": 0.009786094725131988, "step": 113190 }, { "epoch": 32.13170593244394, "grad_norm": 0.08952254056930542, "learning_rate": 6.788220266818052e-05, "loss": 0.0024977266788482667, "step": 113200 }, { "epoch": 32.1345444223673, "grad_norm": 0.09973842650651932, "learning_rate": 6.787936417825716e-05, "loss": 0.0027458056807518005, "step": 113210 }, { "epoch": 32.13738291229066, "grad_norm": 0.24031929671764374, "learning_rate": 6.787652568833382e-05, "loss": 0.009127199649810791, "step": 113220 }, { "epoch": 32.140221402214024, "grad_norm": 1.9957661628723145, "learning_rate": 6.787368719841045e-05, "loss": 0.004055127501487732, "step": 113230 }, { "epoch": 32.14305989213738, "grad_norm": 0.3266041576862335, "learning_rate": 6.787084870848709e-05, "loss": 0.0052783235907554625, "step": 113240 }, { "epoch": 32.14589838206074, "grad_norm": 0.3612093925476074, "learning_rate": 6.786801021856373e-05, "loss": 0.007082802057266235, "step": 113250 }, { "epoch": 32.148736871984106, "grad_norm": 0.1516474038362503, "learning_rate": 6.786517172864037e-05, "loss": 0.005345091223716736, "step": 113260 }, { "epoch": 32.15157536190747, "grad_norm": 0.09937619417905807, "learning_rate": 6.7862333238717e-05, "loss": 0.00119515098631382, "step": 113270 }, { "epoch": 32.154413851830824, "grad_norm": 13.256715774536133, "learning_rate": 6.785949474879365e-05, "loss": 0.0075756900012493135, "step": 113280 }, { "epoch": 32.15725234175419, "grad_norm": 0.06190642714500427, "learning_rate": 6.78566562588703e-05, "loss": 0.0021891776472330094, "step": 113290 }, { "epoch": 32.16009083167755, "grad_norm": 0.2855495512485504, "learning_rate": 6.785381776894692e-05, "loss": 0.003480631858110428, "step": 113300 }, { "epoch": 32.162929321600906, "grad_norm": 1.2464630603790283, "learning_rate": 6.785097927902356e-05, "loss": 0.0023585380986332893, "step": 113310 }, { "epoch": 32.16576781152427, "grad_norm": 0.13999396562576294, "learning_rate": 6.78481407891002e-05, "loss": 0.003312782943248749, "step": 113320 }, { "epoch": 32.16860630144763, "grad_norm": 4.94569206237793, "learning_rate": 6.784530229917683e-05, "loss": 0.0034679710865020753, "step": 113330 }, { "epoch": 32.17144479137099, "grad_norm": 0.38071539998054504, "learning_rate": 6.784246380925347e-05, "loss": 0.0010952912271022797, "step": 113340 }, { "epoch": 32.17428328129435, "grad_norm": 1.7886611223220825, "learning_rate": 6.783962531933013e-05, "loss": 0.003755585104227066, "step": 113350 }, { "epoch": 32.17712177121771, "grad_norm": 0.37936854362487793, "learning_rate": 6.783678682940676e-05, "loss": 0.0019212637096643447, "step": 113360 }, { "epoch": 32.179960261141076, "grad_norm": 12.635538101196289, "learning_rate": 6.78339483394834e-05, "loss": 0.014818759262561798, "step": 113370 }, { "epoch": 32.18279875106443, "grad_norm": 6.771005153656006, "learning_rate": 6.783110984956004e-05, "loss": 0.004397585242986679, "step": 113380 }, { "epoch": 32.185637240987795, "grad_norm": 2.8157174587249756, "learning_rate": 6.782827135963668e-05, "loss": 0.011247241497039795, "step": 113390 }, { "epoch": 32.18847573091116, "grad_norm": 0.02333211526274681, "learning_rate": 6.782543286971331e-05, "loss": 0.002285885252058506, "step": 113400 }, { "epoch": 32.19131422083451, "grad_norm": 0.7998051047325134, "learning_rate": 6.782259437978995e-05, "loss": 0.004526317864656448, "step": 113410 }, { "epoch": 32.194152710757876, "grad_norm": 0.11939138919115067, "learning_rate": 6.78197558898666e-05, "loss": 0.003458837419748306, "step": 113420 }, { "epoch": 32.19699120068124, "grad_norm": 0.254188597202301, "learning_rate": 6.781691739994323e-05, "loss": 0.0029236553236842155, "step": 113430 }, { "epoch": 32.199829690604595, "grad_norm": 0.388068825006485, "learning_rate": 6.781407891001987e-05, "loss": 0.00623173788189888, "step": 113440 }, { "epoch": 32.20266818052796, "grad_norm": 1.9414633512496948, "learning_rate": 6.781124042009652e-05, "loss": 0.020403265953063965, "step": 113450 }, { "epoch": 32.20550667045132, "grad_norm": 0.13548274338245392, "learning_rate": 6.780840193017314e-05, "loss": 0.003528503328561783, "step": 113460 }, { "epoch": 32.208345160374684, "grad_norm": 0.645809531211853, "learning_rate": 6.780556344024978e-05, "loss": 0.016390702128410338, "step": 113470 }, { "epoch": 32.21118365029804, "grad_norm": 1.702184796333313, "learning_rate": 6.780272495032644e-05, "loss": 0.014328551292419434, "step": 113480 }, { "epoch": 32.2140221402214, "grad_norm": 1.6160175800323486, "learning_rate": 6.779988646040307e-05, "loss": 0.020063447952270507, "step": 113490 }, { "epoch": 32.216860630144765, "grad_norm": 7.184590816497803, "learning_rate": 6.779704797047971e-05, "loss": 0.00843912735581398, "step": 113500 }, { "epoch": 32.216860630144765, "eval_accuracy": 0.9726584854072614, "eval_loss": 0.09012199938297272, "eval_runtime": 32.3481, "eval_samples_per_second": 486.18, "eval_steps_per_second": 7.605, "step": 113500 }, { "epoch": 32.21969912006812, "grad_norm": 0.3943791687488556, "learning_rate": 6.779420948055635e-05, "loss": 0.006679734587669373, "step": 113510 }, { "epoch": 32.222537609991484, "grad_norm": 3.206735372543335, "learning_rate": 6.779137099063299e-05, "loss": 0.0063702456653118135, "step": 113520 }, { "epoch": 32.22537609991485, "grad_norm": 0.4102219045162201, "learning_rate": 6.778853250070962e-05, "loss": 0.005461166054010392, "step": 113530 }, { "epoch": 32.2282145898382, "grad_norm": 11.555947303771973, "learning_rate": 6.778569401078626e-05, "loss": 0.021569731831550597, "step": 113540 }, { "epoch": 32.231053079761566, "grad_norm": 1.0789655447006226, "learning_rate": 6.778285552086292e-05, "loss": 0.0069807559251785275, "step": 113550 }, { "epoch": 32.23389156968493, "grad_norm": 2.709413528442383, "learning_rate": 6.778001703093954e-05, "loss": 0.012616276741027832, "step": 113560 }, { "epoch": 32.23673005960829, "grad_norm": 0.2750871479511261, "learning_rate": 6.777717854101618e-05, "loss": 0.00706607848405838, "step": 113570 }, { "epoch": 32.23956854953165, "grad_norm": 0.15584616363048553, "learning_rate": 6.777434005109283e-05, "loss": 0.00997084304690361, "step": 113580 }, { "epoch": 32.24240703945501, "grad_norm": 0.5570263266563416, "learning_rate": 6.777150156116945e-05, "loss": 0.0024737365543842315, "step": 113590 }, { "epoch": 32.24524552937837, "grad_norm": 0.10775743424892426, "learning_rate": 6.77686630712461e-05, "loss": 0.0042981825768947605, "step": 113600 }, { "epoch": 32.24808401930173, "grad_norm": 0.38575270771980286, "learning_rate": 6.776582458132274e-05, "loss": 0.0026128383353352545, "step": 113610 }, { "epoch": 32.25092250922509, "grad_norm": 0.18029536306858063, "learning_rate": 6.776298609139938e-05, "loss": 0.003873720020055771, "step": 113620 }, { "epoch": 32.253760999148454, "grad_norm": 0.3262554705142975, "learning_rate": 6.776014760147602e-05, "loss": 0.006739795207977295, "step": 113630 }, { "epoch": 32.25659948907182, "grad_norm": 0.26409804821014404, "learning_rate": 6.775730911155266e-05, "loss": 0.004262235015630722, "step": 113640 }, { "epoch": 32.25943797899517, "grad_norm": 0.3425769805908203, "learning_rate": 6.77544706216293e-05, "loss": 0.002111529745161533, "step": 113650 }, { "epoch": 32.262276468918536, "grad_norm": 0.72054123878479, "learning_rate": 6.775163213170593e-05, "loss": 0.00101242084056139, "step": 113660 }, { "epoch": 32.2651149588419, "grad_norm": 7.504349231719971, "learning_rate": 6.774879364178257e-05, "loss": 0.0037106871604919434, "step": 113670 }, { "epoch": 32.267953448765255, "grad_norm": 9.559056282043457, "learning_rate": 6.774595515185921e-05, "loss": 0.010646243393421174, "step": 113680 }, { "epoch": 32.27079193868862, "grad_norm": 0.05254020914435387, "learning_rate": 6.774311666193585e-05, "loss": 0.0009725546464323997, "step": 113690 }, { "epoch": 32.27363042861198, "grad_norm": 1.4245951175689697, "learning_rate": 6.77402781720125e-05, "loss": 0.004171088710427285, "step": 113700 }, { "epoch": 32.276468918535336, "grad_norm": 0.15658673644065857, "learning_rate": 6.773743968208914e-05, "loss": 0.00855831429362297, "step": 113710 }, { "epoch": 32.2793074084587, "grad_norm": 1.3264540433883667, "learning_rate": 6.773460119216576e-05, "loss": 0.006788210570812225, "step": 113720 }, { "epoch": 32.28214589838206, "grad_norm": 0.03787371888756752, "learning_rate": 6.77317627022424e-05, "loss": 0.0025403501465916635, "step": 113730 }, { "epoch": 32.284984388305425, "grad_norm": 0.7597050070762634, "learning_rate": 6.772892421231905e-05, "loss": 0.007415363192558288, "step": 113740 }, { "epoch": 32.28782287822878, "grad_norm": 1.1544818878173828, "learning_rate": 6.772608572239569e-05, "loss": 0.0034408807754516602, "step": 113750 }, { "epoch": 32.29066136815214, "grad_norm": 0.3979548215866089, "learning_rate": 6.772324723247233e-05, "loss": 0.002588382549583912, "step": 113760 }, { "epoch": 32.293499858075506, "grad_norm": 0.025236234068870544, "learning_rate": 6.772040874254897e-05, "loss": 0.00349077433347702, "step": 113770 }, { "epoch": 32.29633834799886, "grad_norm": 0.3230131268501282, "learning_rate": 6.77175702526256e-05, "loss": 0.0201138973236084, "step": 113780 }, { "epoch": 32.299176837922225, "grad_norm": 2.1914327144622803, "learning_rate": 6.771473176270224e-05, "loss": 0.006201978772878647, "step": 113790 }, { "epoch": 32.30201532784559, "grad_norm": 0.03451480716466904, "learning_rate": 6.771189327277888e-05, "loss": 0.005251342803239823, "step": 113800 }, { "epoch": 32.304853817768944, "grad_norm": 1.743640422821045, "learning_rate": 6.770905478285552e-05, "loss": 0.002060825377702713, "step": 113810 }, { "epoch": 32.30769230769231, "grad_norm": 0.22964555025100708, "learning_rate": 6.770621629293216e-05, "loss": 0.0022746950387954714, "step": 113820 }, { "epoch": 32.31053079761567, "grad_norm": 2.10322904586792, "learning_rate": 6.77033778030088e-05, "loss": 0.007051681727170944, "step": 113830 }, { "epoch": 32.31336928753903, "grad_norm": 0.04390684515237808, "learning_rate": 6.770053931308545e-05, "loss": 0.008618693053722381, "step": 113840 }, { "epoch": 32.31620777746239, "grad_norm": 16.890708923339844, "learning_rate": 6.769770082316208e-05, "loss": 0.006460443139076233, "step": 113850 }, { "epoch": 32.31904626738575, "grad_norm": 1.251128077507019, "learning_rate": 6.769486233323872e-05, "loss": 0.012202472239732743, "step": 113860 }, { "epoch": 32.321884757309114, "grad_norm": 1.1209325790405273, "learning_rate": 6.769202384331536e-05, "loss": 0.0037514366209506988, "step": 113870 }, { "epoch": 32.32472324723247, "grad_norm": 2.015658378601074, "learning_rate": 6.7689185353392e-05, "loss": 0.011603143811225892, "step": 113880 }, { "epoch": 32.32756173715583, "grad_norm": 0.68536776304245, "learning_rate": 6.768634686346864e-05, "loss": 0.0025602078065276148, "step": 113890 }, { "epoch": 32.330400227079195, "grad_norm": 0.606306254863739, "learning_rate": 6.768350837354528e-05, "loss": 0.0037806253880262376, "step": 113900 }, { "epoch": 32.33323871700255, "grad_norm": 0.476772665977478, "learning_rate": 6.768066988362191e-05, "loss": 0.0032348908483982087, "step": 113910 }, { "epoch": 32.336077206925914, "grad_norm": 0.708996057510376, "learning_rate": 6.767783139369855e-05, "loss": 0.010649625211954117, "step": 113920 }, { "epoch": 32.33891569684928, "grad_norm": 2.5708374977111816, "learning_rate": 6.767499290377519e-05, "loss": 0.0012338876724243164, "step": 113930 }, { "epoch": 32.34175418677264, "grad_norm": 6.922987461090088, "learning_rate": 6.767215441385183e-05, "loss": 0.0027254000306129456, "step": 113940 }, { "epoch": 32.344592676695996, "grad_norm": 0.010990005917847157, "learning_rate": 6.766931592392848e-05, "loss": 0.004330401495099068, "step": 113950 }, { "epoch": 32.34743116661936, "grad_norm": 3.2982280254364014, "learning_rate": 6.766647743400512e-05, "loss": 0.009295592457056046, "step": 113960 }, { "epoch": 32.35026965654272, "grad_norm": 2.494295358657837, "learning_rate": 6.766363894408176e-05, "loss": 0.006447308510541916, "step": 113970 }, { "epoch": 32.35310814646608, "grad_norm": 4.8559746742248535, "learning_rate": 6.766080045415839e-05, "loss": 0.008917318284511566, "step": 113980 }, { "epoch": 32.35594663638944, "grad_norm": 2.0082144737243652, "learning_rate": 6.765796196423503e-05, "loss": 0.002145201340317726, "step": 113990 }, { "epoch": 32.3587851263128, "grad_norm": 6.820044040679932, "learning_rate": 6.765512347431167e-05, "loss": 0.011418785899877548, "step": 114000 }, { "epoch": 32.3587851263128, "eval_accuracy": 0.9685890506771794, "eval_loss": 0.10716046392917633, "eval_runtime": 32.3516, "eval_samples_per_second": 486.127, "eval_steps_per_second": 7.604, "step": 114000 }, { "epoch": 32.36162361623616, "grad_norm": 0.42937737703323364, "learning_rate": 6.76522849843883e-05, "loss": 0.007951658964157105, "step": 114010 }, { "epoch": 32.36446210615952, "grad_norm": 0.15281030535697937, "learning_rate": 6.764944649446495e-05, "loss": 0.0021665066480636595, "step": 114020 }, { "epoch": 32.367300596082885, "grad_norm": 0.5121237635612488, "learning_rate": 6.764660800454159e-05, "loss": 0.003668125718832016, "step": 114030 }, { "epoch": 32.37013908600625, "grad_norm": 0.6893860697746277, "learning_rate": 6.764376951461822e-05, "loss": 0.005641918629407883, "step": 114040 }, { "epoch": 32.3729775759296, "grad_norm": 0.22708742320537567, "learning_rate": 6.764093102469486e-05, "loss": 0.003956592082977295, "step": 114050 }, { "epoch": 32.375816065852966, "grad_norm": 0.43383705615997314, "learning_rate": 6.76380925347715e-05, "loss": 0.004662372171878815, "step": 114060 }, { "epoch": 32.37865455577633, "grad_norm": 3.3631432056427, "learning_rate": 6.763525404484815e-05, "loss": 0.004196884483098984, "step": 114070 }, { "epoch": 32.381493045699685, "grad_norm": 6.487922668457031, "learning_rate": 6.763241555492479e-05, "loss": 0.008331590890884399, "step": 114080 }, { "epoch": 32.38433153562305, "grad_norm": 0.16430975496768951, "learning_rate": 6.762957706500143e-05, "loss": 0.0068503163754940035, "step": 114090 }, { "epoch": 32.38717002554641, "grad_norm": 0.17624656856060028, "learning_rate": 6.762673857507807e-05, "loss": 0.0027605874463915827, "step": 114100 }, { "epoch": 32.39000851546977, "grad_norm": 1.004976511001587, "learning_rate": 6.76239000851547e-05, "loss": 0.003381132334470749, "step": 114110 }, { "epoch": 32.39284700539313, "grad_norm": 5.970203399658203, "learning_rate": 6.762106159523134e-05, "loss": 0.007661319524049759, "step": 114120 }, { "epoch": 32.39568549531649, "grad_norm": 5.56792688369751, "learning_rate": 6.761822310530798e-05, "loss": 0.00980997532606125, "step": 114130 }, { "epoch": 32.398523985239855, "grad_norm": 2.284970283508301, "learning_rate": 6.761538461538461e-05, "loss": 0.0037452824413776397, "step": 114140 }, { "epoch": 32.40136247516321, "grad_norm": 0.7185450792312622, "learning_rate": 6.761254612546126e-05, "loss": 0.0057276800274848935, "step": 114150 }, { "epoch": 32.404200965086574, "grad_norm": 0.7018328309059143, "learning_rate": 6.76097076355379e-05, "loss": 0.0018034325912594795, "step": 114160 }, { "epoch": 32.40703945500994, "grad_norm": 1.0469950437545776, "learning_rate": 6.760686914561453e-05, "loss": 0.003090457245707512, "step": 114170 }, { "epoch": 32.40987794493329, "grad_norm": 7.388956546783447, "learning_rate": 6.760403065569117e-05, "loss": 0.01225554421544075, "step": 114180 }, { "epoch": 32.412716434856655, "grad_norm": 3.4771299362182617, "learning_rate": 6.760119216576781e-05, "loss": 0.009931269288063049, "step": 114190 }, { "epoch": 32.41555492478002, "grad_norm": 5.119248867034912, "learning_rate": 6.759835367584446e-05, "loss": 0.012873387336730957, "step": 114200 }, { "epoch": 32.41839341470338, "grad_norm": 3.1303441524505615, "learning_rate": 6.759551518592108e-05, "loss": 0.00588199570775032, "step": 114210 }, { "epoch": 32.42123190462674, "grad_norm": 3.0082404613494873, "learning_rate": 6.759267669599774e-05, "loss": 0.010008987784385682, "step": 114220 }, { "epoch": 32.4240703945501, "grad_norm": 6.552779197692871, "learning_rate": 6.758983820607438e-05, "loss": 0.011698020249605178, "step": 114230 }, { "epoch": 32.42690888447346, "grad_norm": 0.15180553495883942, "learning_rate": 6.758699971615101e-05, "loss": 0.009099248051643371, "step": 114240 }, { "epoch": 32.42974737439682, "grad_norm": 2.1065170764923096, "learning_rate": 6.758416122622765e-05, "loss": 0.02688334584236145, "step": 114250 }, { "epoch": 32.43258586432018, "grad_norm": 0.3048502504825592, "learning_rate": 6.758132273630429e-05, "loss": 0.010374457389116288, "step": 114260 }, { "epoch": 32.435424354243544, "grad_norm": 0.6098058819770813, "learning_rate": 6.757848424638092e-05, "loss": 0.00654008835554123, "step": 114270 }, { "epoch": 32.4382628441669, "grad_norm": 0.9462328553199768, "learning_rate": 6.757564575645757e-05, "loss": 0.009632043540477753, "step": 114280 }, { "epoch": 32.44110133409026, "grad_norm": 0.11408546566963196, "learning_rate": 6.757280726653421e-05, "loss": 0.007204797863960266, "step": 114290 }, { "epoch": 32.443939824013626, "grad_norm": 1.948219895362854, "learning_rate": 6.756996877661084e-05, "loss": 0.00823022872209549, "step": 114300 }, { "epoch": 32.44677831393699, "grad_norm": 0.8632134199142456, "learning_rate": 6.756713028668748e-05, "loss": 0.0020861439406871796, "step": 114310 }, { "epoch": 32.449616803860344, "grad_norm": 4.6393561363220215, "learning_rate": 6.756429179676413e-05, "loss": 0.009569059312343597, "step": 114320 }, { "epoch": 32.45245529378371, "grad_norm": 0.14281955361366272, "learning_rate": 6.756145330684077e-05, "loss": 0.011338746547698975, "step": 114330 }, { "epoch": 32.45529378370707, "grad_norm": 0.2974180579185486, "learning_rate": 6.75586148169174e-05, "loss": 0.01003139391541481, "step": 114340 }, { "epoch": 32.458132273630426, "grad_norm": 0.28766438364982605, "learning_rate": 6.755577632699405e-05, "loss": 0.007242113351821899, "step": 114350 }, { "epoch": 32.46097076355379, "grad_norm": 9.838591575622559, "learning_rate": 6.755293783707069e-05, "loss": 0.016401273012161256, "step": 114360 }, { "epoch": 32.46380925347715, "grad_norm": 0.8696520328521729, "learning_rate": 6.755009934714732e-05, "loss": 0.006739495694637299, "step": 114370 }, { "epoch": 32.46664774340051, "grad_norm": 11.485713005065918, "learning_rate": 6.754726085722396e-05, "loss": 0.010914240032434464, "step": 114380 }, { "epoch": 32.46948623332387, "grad_norm": 0.9926250576972961, "learning_rate": 6.75444223673006e-05, "loss": 0.004315894842147827, "step": 114390 }, { "epoch": 32.47232472324723, "grad_norm": 6.109653472900391, "learning_rate": 6.754158387737723e-05, "loss": 0.006377974152565002, "step": 114400 }, { "epoch": 32.475163213170596, "grad_norm": 1.0385361909866333, "learning_rate": 6.753874538745387e-05, "loss": 0.001586764119565487, "step": 114410 }, { "epoch": 32.47800170309395, "grad_norm": 0.43349531292915344, "learning_rate": 6.753590689753053e-05, "loss": 0.002714262530207634, "step": 114420 }, { "epoch": 32.480840193017315, "grad_norm": 6.788949966430664, "learning_rate": 6.753306840760715e-05, "loss": 0.007755088806152344, "step": 114430 }, { "epoch": 32.48367868294068, "grad_norm": 0.26468878984451294, "learning_rate": 6.75302299176838e-05, "loss": 0.01067410334944725, "step": 114440 }, { "epoch": 32.48651717286403, "grad_norm": 1.767830491065979, "learning_rate": 6.752739142776044e-05, "loss": 0.011746762692928315, "step": 114450 }, { "epoch": 32.489355662787396, "grad_norm": 6.436022758483887, "learning_rate": 6.752483678682942e-05, "loss": 0.016062068939208984, "step": 114460 }, { "epoch": 32.49219415271076, "grad_norm": 0.5355373024940491, "learning_rate": 6.752199829690606e-05, "loss": 0.005138899385929108, "step": 114470 }, { "epoch": 32.49503264263412, "grad_norm": 14.387542724609375, "learning_rate": 6.751915980698269e-05, "loss": 0.006621388345956802, "step": 114480 }, { "epoch": 32.49787113255748, "grad_norm": 5.61583137512207, "learning_rate": 6.751632131705933e-05, "loss": 0.007416114956140518, "step": 114490 }, { "epoch": 32.50070962248084, "grad_norm": 12.762537956237793, "learning_rate": 6.751348282713597e-05, "loss": 0.009079305082559585, "step": 114500 }, { "epoch": 32.50070962248084, "eval_accuracy": 0.9683347110065492, "eval_loss": 0.11194168031215668, "eval_runtime": 31.9034, "eval_samples_per_second": 492.956, "eval_steps_per_second": 7.711, "step": 114500 }, { "epoch": 32.503548112404204, "grad_norm": 0.8387620449066162, "learning_rate": 6.751064433721261e-05, "loss": 0.010013447701931, "step": 114510 }, { "epoch": 32.50638660232756, "grad_norm": 1.4259146451950073, "learning_rate": 6.750780584728924e-05, "loss": 0.003548775985836983, "step": 114520 }, { "epoch": 32.50922509225092, "grad_norm": 1.381527304649353, "learning_rate": 6.750496735736589e-05, "loss": 0.0032875917851924895, "step": 114530 }, { "epoch": 32.512063582174285, "grad_norm": 0.05731292441487312, "learning_rate": 6.750212886744253e-05, "loss": 0.0030773110687732697, "step": 114540 }, { "epoch": 32.51490207209764, "grad_norm": 1.166197657585144, "learning_rate": 6.749929037751916e-05, "loss": 0.003911464288830757, "step": 114550 }, { "epoch": 32.517740562021004, "grad_norm": 0.865229070186615, "learning_rate": 6.74964518875958e-05, "loss": 0.009720176458358765, "step": 114560 }, { "epoch": 32.52057905194437, "grad_norm": 1.2768666744232178, "learning_rate": 6.749361339767244e-05, "loss": 0.005226878821849823, "step": 114570 }, { "epoch": 32.52341754186773, "grad_norm": 10.134637832641602, "learning_rate": 6.749077490774907e-05, "loss": 0.004003226011991501, "step": 114580 }, { "epoch": 32.526256031791085, "grad_norm": 0.021010393276810646, "learning_rate": 6.748793641782571e-05, "loss": 0.004367262870073318, "step": 114590 }, { "epoch": 32.52909452171445, "grad_norm": 1.3082411289215088, "learning_rate": 6.748509792790237e-05, "loss": 0.0022121984511613846, "step": 114600 }, { "epoch": 32.53193301163781, "grad_norm": 0.13424289226531982, "learning_rate": 6.7482259437979e-05, "loss": 0.004100373387336731, "step": 114610 }, { "epoch": 32.53477150156117, "grad_norm": 0.02352616749703884, "learning_rate": 6.747942094805564e-05, "loss": 0.0024976471439003946, "step": 114620 }, { "epoch": 32.53760999148453, "grad_norm": 0.36953461170196533, "learning_rate": 6.747658245813228e-05, "loss": 0.0067359328269958494, "step": 114630 }, { "epoch": 32.54044848140789, "grad_norm": 1.9611560106277466, "learning_rate": 6.747374396820892e-05, "loss": 0.002916085720062256, "step": 114640 }, { "epoch": 32.54328697133125, "grad_norm": 0.06334231048822403, "learning_rate": 6.747090547828555e-05, "loss": 0.004664639756083488, "step": 114650 }, { "epoch": 32.54612546125461, "grad_norm": 0.27794936299324036, "learning_rate": 6.74680669883622e-05, "loss": 0.004776120185852051, "step": 114660 }, { "epoch": 32.548963951177974, "grad_norm": 0.05011090636253357, "learning_rate": 6.746522849843883e-05, "loss": 0.017741817235946655, "step": 114670 }, { "epoch": 32.55180244110134, "grad_norm": 9.946207046508789, "learning_rate": 6.746239000851547e-05, "loss": 0.0071158245205879215, "step": 114680 }, { "epoch": 32.55464093102469, "grad_norm": 11.498275756835938, "learning_rate": 6.745955151859211e-05, "loss": 0.011961480975151062, "step": 114690 }, { "epoch": 32.557479420948056, "grad_norm": 0.5114032030105591, "learning_rate": 6.745671302866875e-05, "loss": 0.007185105234384537, "step": 114700 }, { "epoch": 32.56031791087142, "grad_norm": 3.362053871154785, "learning_rate": 6.745387453874538e-05, "loss": 0.0054019328206777574, "step": 114710 }, { "epoch": 32.563156400794774, "grad_norm": 0.14989155530929565, "learning_rate": 6.745103604882202e-05, "loss": 0.002203916572034359, "step": 114720 }, { "epoch": 32.56599489071814, "grad_norm": 2.6054012775421143, "learning_rate": 6.744819755889868e-05, "loss": 0.003519074618816376, "step": 114730 }, { "epoch": 32.5688333806415, "grad_norm": 0.688409149646759, "learning_rate": 6.74453590689753e-05, "loss": 0.0169049933552742, "step": 114740 }, { "epoch": 32.571671870564856, "grad_norm": 1.8891355991363525, "learning_rate": 6.744252057905195e-05, "loss": 0.008291397243738174, "step": 114750 }, { "epoch": 32.57451036048822, "grad_norm": 0.9401333332061768, "learning_rate": 6.743968208912859e-05, "loss": 0.010648560523986817, "step": 114760 }, { "epoch": 32.57734885041158, "grad_norm": 0.44594964385032654, "learning_rate": 6.743684359920522e-05, "loss": 0.006029950082302093, "step": 114770 }, { "epoch": 32.580187340334945, "grad_norm": 0.15582551062107086, "learning_rate": 6.743400510928186e-05, "loss": 0.0011174697428941727, "step": 114780 }, { "epoch": 32.5830258302583, "grad_norm": 14.366189002990723, "learning_rate": 6.74311666193585e-05, "loss": 0.017259415984153748, "step": 114790 }, { "epoch": 32.58586432018166, "grad_norm": 6.311891555786133, "learning_rate": 6.742832812943514e-05, "loss": 0.0034424431622028353, "step": 114800 }, { "epoch": 32.588702810105026, "grad_norm": 0.5163989663124084, "learning_rate": 6.742548963951178e-05, "loss": 0.0036771148443222046, "step": 114810 }, { "epoch": 32.59154130002838, "grad_norm": 0.13861115276813507, "learning_rate": 6.742265114958842e-05, "loss": 0.0036024361848831176, "step": 114820 }, { "epoch": 32.594379789951745, "grad_norm": 0.10339943319559097, "learning_rate": 6.741981265966507e-05, "loss": 0.0018397554755210876, "step": 114830 }, { "epoch": 32.59721827987511, "grad_norm": 0.09476244449615479, "learning_rate": 6.74169741697417e-05, "loss": 0.006398230791091919, "step": 114840 }, { "epoch": 32.60005676979847, "grad_norm": 5.224700450897217, "learning_rate": 6.741413567981833e-05, "loss": 0.0051482245326042175, "step": 114850 }, { "epoch": 32.60289525972183, "grad_norm": 0.10674237459897995, "learning_rate": 6.741129718989499e-05, "loss": 0.007873215526342393, "step": 114860 }, { "epoch": 32.60573374964519, "grad_norm": 0.29810312390327454, "learning_rate": 6.740845869997162e-05, "loss": 0.00433870404958725, "step": 114870 }, { "epoch": 32.60857223956855, "grad_norm": 0.07812470197677612, "learning_rate": 6.740562021004826e-05, "loss": 0.0114394411444664, "step": 114880 }, { "epoch": 32.61141072949191, "grad_norm": 0.3613995611667633, "learning_rate": 6.74027817201249e-05, "loss": 0.006175015866756439, "step": 114890 }, { "epoch": 32.61424921941527, "grad_norm": 2.0568666458129883, "learning_rate": 6.739994323020153e-05, "loss": 0.0044817298650741575, "step": 114900 }, { "epoch": 32.617087709338634, "grad_norm": 10.966506958007812, "learning_rate": 6.739710474027817e-05, "loss": 0.010058432817459106, "step": 114910 }, { "epoch": 32.61992619926199, "grad_norm": 0.2765500247478485, "learning_rate": 6.739426625035481e-05, "loss": 0.006062079221010208, "step": 114920 }, { "epoch": 32.62276468918535, "grad_norm": 0.9741296172142029, "learning_rate": 6.739142776043145e-05, "loss": 0.013068430125713348, "step": 114930 }, { "epoch": 32.625603179108715, "grad_norm": 0.11648418009281158, "learning_rate": 6.73885892705081e-05, "loss": 0.003900745138525963, "step": 114940 }, { "epoch": 32.62844166903208, "grad_norm": 0.2820674479007721, "learning_rate": 6.738575078058473e-05, "loss": 0.020217898488044738, "step": 114950 }, { "epoch": 32.631280158955434, "grad_norm": 1.2293736934661865, "learning_rate": 6.738291229066138e-05, "loss": 0.008869744092226028, "step": 114960 }, { "epoch": 32.6341186488788, "grad_norm": 1.0221351385116577, "learning_rate": 6.7380073800738e-05, "loss": 0.002987317927181721, "step": 114970 }, { "epoch": 32.63695713880216, "grad_norm": 0.21123717725276947, "learning_rate": 6.737723531081465e-05, "loss": 0.006773201376199722, "step": 114980 }, { "epoch": 32.639795628725516, "grad_norm": 0.2600741386413574, "learning_rate": 6.73743968208913e-05, "loss": 0.013052636384963989, "step": 114990 }, { "epoch": 32.64263411864888, "grad_norm": 5.860227584838867, "learning_rate": 6.737155833096793e-05, "loss": 0.007457506656646728, "step": 115000 }, { "epoch": 32.64263411864888, "eval_accuracy": 0.9722133909836587, "eval_loss": 0.09420241415500641, "eval_runtime": 31.7041, "eval_samples_per_second": 496.056, "eval_steps_per_second": 7.759, "step": 115000 }, { "epoch": 32.64547260857224, "grad_norm": 0.8755062222480774, "learning_rate": 6.736871984104457e-05, "loss": 0.0052278783172369, "step": 115010 }, { "epoch": 32.6483110984956, "grad_norm": 3.6057851314544678, "learning_rate": 6.736588135112121e-05, "loss": 0.0036074697971343996, "step": 115020 }, { "epoch": 32.65114958841896, "grad_norm": 1.4727593660354614, "learning_rate": 6.736304286119784e-05, "loss": 0.0014422724023461341, "step": 115030 }, { "epoch": 32.65398807834232, "grad_norm": 0.27728572487831116, "learning_rate": 6.736020437127448e-05, "loss": 0.003662591427564621, "step": 115040 }, { "epoch": 32.656826568265686, "grad_norm": 7.0233540534973145, "learning_rate": 6.735736588135112e-05, "loss": 0.00503544993698597, "step": 115050 }, { "epoch": 32.65966505818904, "grad_norm": 0.052828531712293625, "learning_rate": 6.735452739142776e-05, "loss": 0.005121733248233795, "step": 115060 }, { "epoch": 32.662503548112404, "grad_norm": 1.258283019065857, "learning_rate": 6.73516889015044e-05, "loss": 0.0035679638385772705, "step": 115070 }, { "epoch": 32.66534203803577, "grad_norm": 1.0674129724502563, "learning_rate": 6.734885041158105e-05, "loss": 0.0040055826306343075, "step": 115080 }, { "epoch": 32.66818052795912, "grad_norm": 5.228094577789307, "learning_rate": 6.734601192165769e-05, "loss": 0.0068355418741703035, "step": 115090 }, { "epoch": 32.671019017882486, "grad_norm": 0.4123722314834595, "learning_rate": 6.734317343173431e-05, "loss": 0.0017310673370957374, "step": 115100 }, { "epoch": 32.67385750780585, "grad_norm": 2.8289482593536377, "learning_rate": 6.734033494181096e-05, "loss": 0.00764349028468132, "step": 115110 }, { "epoch": 32.676695997729205, "grad_norm": 5.455272197723389, "learning_rate": 6.73374964518876e-05, "loss": 0.005287186056375503, "step": 115120 }, { "epoch": 32.67953448765257, "grad_norm": 1.1194788217544556, "learning_rate": 6.733465796196424e-05, "loss": 0.003764507174491882, "step": 115130 }, { "epoch": 32.68237297757593, "grad_norm": 0.3061029016971588, "learning_rate": 6.733181947204088e-05, "loss": 0.002386040985584259, "step": 115140 }, { "epoch": 32.68521146749929, "grad_norm": 16.222301483154297, "learning_rate": 6.732898098211752e-05, "loss": 0.013604512810707093, "step": 115150 }, { "epoch": 32.68804995742265, "grad_norm": 0.13470429182052612, "learning_rate": 6.732614249219415e-05, "loss": 0.004993880167603493, "step": 115160 }, { "epoch": 32.69088844734601, "grad_norm": 0.17951203882694244, "learning_rate": 6.732330400227079e-05, "loss": 0.0012356458231806756, "step": 115170 }, { "epoch": 32.693726937269375, "grad_norm": 15.069907188415527, "learning_rate": 6.732046551234743e-05, "loss": 0.0108691968023777, "step": 115180 }, { "epoch": 32.69656542719273, "grad_norm": 1.161422610282898, "learning_rate": 6.731762702242407e-05, "loss": 0.0027802960947155954, "step": 115190 }, { "epoch": 32.69940391711609, "grad_norm": 10.603035926818848, "learning_rate": 6.731478853250072e-05, "loss": 0.01007988303899765, "step": 115200 }, { "epoch": 32.702242407039456, "grad_norm": 0.06016096845269203, "learning_rate": 6.731195004257736e-05, "loss": 0.006822508573532104, "step": 115210 }, { "epoch": 32.70508089696281, "grad_norm": 0.155114084482193, "learning_rate": 6.7309111552654e-05, "loss": 0.0016975319012999535, "step": 115220 }, { "epoch": 32.707919386886175, "grad_norm": 0.5317871570587158, "learning_rate": 6.730627306273063e-05, "loss": 0.010537860542535782, "step": 115230 }, { "epoch": 32.71075787680954, "grad_norm": 0.2896820306777954, "learning_rate": 6.730343457280727e-05, "loss": 0.004714943468570709, "step": 115240 }, { "epoch": 32.7135963667329, "grad_norm": 1.5274219512939453, "learning_rate": 6.730059608288391e-05, "loss": 0.004906470328569413, "step": 115250 }, { "epoch": 32.71643485665626, "grad_norm": 3.5981786251068115, "learning_rate": 6.729775759296055e-05, "loss": 0.006560913473367691, "step": 115260 }, { "epoch": 32.71927334657962, "grad_norm": 0.1862967163324356, "learning_rate": 6.729491910303719e-05, "loss": 0.015175075829029083, "step": 115270 }, { "epoch": 32.72211183650298, "grad_norm": 0.7134433388710022, "learning_rate": 6.729208061311383e-05, "loss": 0.007485075294971466, "step": 115280 }, { "epoch": 32.72495032642634, "grad_norm": 6.883807182312012, "learning_rate": 6.728924212319046e-05, "loss": 0.009930762648582458, "step": 115290 }, { "epoch": 32.7277888163497, "grad_norm": 0.060246542096138, "learning_rate": 6.72864036332671e-05, "loss": 0.01544618308544159, "step": 115300 }, { "epoch": 32.730627306273064, "grad_norm": 0.44245436787605286, "learning_rate": 6.728356514334374e-05, "loss": 0.011322715133428574, "step": 115310 }, { "epoch": 32.73346579619643, "grad_norm": 9.175596237182617, "learning_rate": 6.728072665342038e-05, "loss": 0.003295111656188965, "step": 115320 }, { "epoch": 32.73630428611978, "grad_norm": 0.7547280788421631, "learning_rate": 6.727788816349703e-05, "loss": 0.008293887227773666, "step": 115330 }, { "epoch": 32.739142776043145, "grad_norm": 1.4769798517227173, "learning_rate": 6.727504967357367e-05, "loss": 0.0032275639474391936, "step": 115340 }, { "epoch": 32.74198126596651, "grad_norm": 2.8032193183898926, "learning_rate": 6.727221118365031e-05, "loss": 0.006897037476301193, "step": 115350 }, { "epoch": 32.744819755889864, "grad_norm": 3.883713960647583, "learning_rate": 6.726937269372694e-05, "loss": 0.01650850772857666, "step": 115360 }, { "epoch": 32.74765824581323, "grad_norm": 1.6761583089828491, "learning_rate": 6.726653420380358e-05, "loss": 0.0033865243196487425, "step": 115370 }, { "epoch": 32.75049673573659, "grad_norm": 0.7773210406303406, "learning_rate": 6.726369571388022e-05, "loss": 0.007043173909187317, "step": 115380 }, { "epoch": 32.753335225659946, "grad_norm": 4.391389846801758, "learning_rate": 6.726085722395686e-05, "loss": 0.005350909382104874, "step": 115390 }, { "epoch": 32.75617371558331, "grad_norm": 0.4465000331401825, "learning_rate": 6.72580187340335e-05, "loss": 0.0065145701169967655, "step": 115400 }, { "epoch": 32.75901220550667, "grad_norm": 7.537452697753906, "learning_rate": 6.725518024411014e-05, "loss": 0.0343333750963211, "step": 115410 }, { "epoch": 32.761850695430034, "grad_norm": 0.2141539305448532, "learning_rate": 6.725234175418677e-05, "loss": 0.010860981047153473, "step": 115420 }, { "epoch": 32.76468918535339, "grad_norm": 3.0888445377349854, "learning_rate": 6.724950326426341e-05, "loss": 0.006370636820793152, "step": 115430 }, { "epoch": 32.76752767527675, "grad_norm": 0.05383579060435295, "learning_rate": 6.724666477434005e-05, "loss": 0.008997134864330292, "step": 115440 }, { "epoch": 32.770366165200116, "grad_norm": 0.975656270980835, "learning_rate": 6.72438262844167e-05, "loss": 0.004427803680300713, "step": 115450 }, { "epoch": 32.77320465512347, "grad_norm": 0.31610989570617676, "learning_rate": 6.724098779449334e-05, "loss": 0.004494031518697738, "step": 115460 }, { "epoch": 32.776043145046835, "grad_norm": 0.03594844043254852, "learning_rate": 6.723814930456998e-05, "loss": 0.007528479397296906, "step": 115470 }, { "epoch": 32.7788816349702, "grad_norm": 0.27360719442367554, "learning_rate": 6.723531081464662e-05, "loss": 0.008582120388746261, "step": 115480 }, { "epoch": 32.78172012489355, "grad_norm": 2.762314796447754, "learning_rate": 6.723247232472325e-05, "loss": 0.02116602659225464, "step": 115490 }, { "epoch": 32.784558614816916, "grad_norm": 0.6268345713615417, "learning_rate": 6.722963383479989e-05, "loss": 0.0033383250236511232, "step": 115500 }, { "epoch": 32.784558614816916, "eval_accuracy": 0.9647103707000699, "eval_loss": 0.12168284505605698, "eval_runtime": 31.7737, "eval_samples_per_second": 494.969, "eval_steps_per_second": 7.742, "step": 115500 }, { "epoch": 32.78739710474028, "grad_norm": 1.470258116722107, "learning_rate": 6.722679534487653e-05, "loss": 0.009023544937372207, "step": 115510 }, { "epoch": 32.79023559466364, "grad_norm": 0.1403089314699173, "learning_rate": 6.722395685495316e-05, "loss": 0.005251587182283401, "step": 115520 }, { "epoch": 32.793074084587, "grad_norm": 0.5549132227897644, "learning_rate": 6.722111836502981e-05, "loss": 0.0010290227830410004, "step": 115530 }, { "epoch": 32.79591257451036, "grad_norm": 0.252884179353714, "learning_rate": 6.721827987510645e-05, "loss": 0.005143879354000092, "step": 115540 }, { "epoch": 32.79875106443372, "grad_norm": 2.968977451324463, "learning_rate": 6.721544138518308e-05, "loss": 0.0060463428497314455, "step": 115550 }, { "epoch": 32.80158955435708, "grad_norm": 1.8864237070083618, "learning_rate": 6.721260289525972e-05, "loss": 0.005672759190201759, "step": 115560 }, { "epoch": 32.80442804428044, "grad_norm": 2.8491411209106445, "learning_rate": 6.720976440533636e-05, "loss": 0.0023312605917453766, "step": 115570 }, { "epoch": 32.807266534203805, "grad_norm": 0.12990666925907135, "learning_rate": 6.7206925915413e-05, "loss": 0.00189936812967062, "step": 115580 }, { "epoch": 32.81010502412717, "grad_norm": 0.3765394985675812, "learning_rate": 6.720408742548965e-05, "loss": 0.005508154630661011, "step": 115590 }, { "epoch": 32.812943514050524, "grad_norm": 0.41737282276153564, "learning_rate": 6.720124893556629e-05, "loss": 0.00699404776096344, "step": 115600 }, { "epoch": 32.81578200397389, "grad_norm": 0.5483384132385254, "learning_rate": 6.719841044564292e-05, "loss": 0.009424223750829696, "step": 115610 }, { "epoch": 32.81862049389725, "grad_norm": 0.21697883307933807, "learning_rate": 6.719557195571956e-05, "loss": 0.003758159279823303, "step": 115620 }, { "epoch": 32.821458983820605, "grad_norm": 0.18853676319122314, "learning_rate": 6.71927334657962e-05, "loss": 0.0030297825112938883, "step": 115630 }, { "epoch": 32.82429747374397, "grad_norm": 0.42127612233161926, "learning_rate": 6.718989497587284e-05, "loss": 0.009587431699037552, "step": 115640 }, { "epoch": 32.82713596366733, "grad_norm": 1.9128867387771606, "learning_rate": 6.718705648594947e-05, "loss": 0.007943371683359146, "step": 115650 }, { "epoch": 32.82997445359069, "grad_norm": 1.639733076095581, "learning_rate": 6.718421799602612e-05, "loss": 0.014045782387256622, "step": 115660 }, { "epoch": 32.83281294351405, "grad_norm": 0.5386577844619751, "learning_rate": 6.718137950610276e-05, "loss": 0.003357705473899841, "step": 115670 }, { "epoch": 32.83565143343741, "grad_norm": 1.4894598722457886, "learning_rate": 6.717854101617939e-05, "loss": 0.005198423564434051, "step": 115680 }, { "epoch": 32.838489923360775, "grad_norm": 0.9534806609153748, "learning_rate": 6.717570252625603e-05, "loss": 0.013977539539337159, "step": 115690 }, { "epoch": 32.84132841328413, "grad_norm": 0.915469765663147, "learning_rate": 6.717286403633268e-05, "loss": 0.008945496380329132, "step": 115700 }, { "epoch": 32.844166903207494, "grad_norm": 0.45460212230682373, "learning_rate": 6.71700255464093e-05, "loss": 0.010270661860704421, "step": 115710 }, { "epoch": 32.84700539313086, "grad_norm": 11.189266204833984, "learning_rate": 6.716718705648594e-05, "loss": 0.02140667140483856, "step": 115720 }, { "epoch": 32.84984388305421, "grad_norm": 0.3372981548309326, "learning_rate": 6.71643485665626e-05, "loss": 0.02159825712442398, "step": 115730 }, { "epoch": 32.852682372977576, "grad_norm": 0.22836704552173615, "learning_rate": 6.716151007663923e-05, "loss": 0.009089440852403641, "step": 115740 }, { "epoch": 32.85552086290094, "grad_norm": 0.3199300169944763, "learning_rate": 6.715867158671587e-05, "loss": 0.013751201331615448, "step": 115750 }, { "epoch": 32.858359352824294, "grad_norm": 0.32413825392723083, "learning_rate": 6.715583309679251e-05, "loss": 0.012880058586597442, "step": 115760 }, { "epoch": 32.86119784274766, "grad_norm": 10.195226669311523, "learning_rate": 6.715299460686915e-05, "loss": 0.0071233630180358885, "step": 115770 }, { "epoch": 32.86403633267102, "grad_norm": 15.308408737182617, "learning_rate": 6.715015611694578e-05, "loss": 0.009597312659025192, "step": 115780 }, { "epoch": 32.86687482259438, "grad_norm": 1.6286441087722778, "learning_rate": 6.714731762702243e-05, "loss": 0.004611574113368988, "step": 115790 }, { "epoch": 32.86971331251774, "grad_norm": 1.9988278150558472, "learning_rate": 6.714447913709908e-05, "loss": 0.02878168523311615, "step": 115800 }, { "epoch": 32.8725518024411, "grad_norm": 0.016223937273025513, "learning_rate": 6.71416406471757e-05, "loss": 0.017253577709197998, "step": 115810 }, { "epoch": 32.875390292364465, "grad_norm": 0.15361864864826202, "learning_rate": 6.713880215725234e-05, "loss": 0.005740371346473694, "step": 115820 }, { "epoch": 32.87822878228782, "grad_norm": 0.810506284236908, "learning_rate": 6.713596366732899e-05, "loss": 0.00804566890001297, "step": 115830 }, { "epoch": 32.88106727221118, "grad_norm": 12.61858081817627, "learning_rate": 6.713312517740561e-05, "loss": 0.030058106780052184, "step": 115840 }, { "epoch": 32.883905762134546, "grad_norm": 11.883477210998535, "learning_rate": 6.713028668748226e-05, "loss": 0.0060219340026378635, "step": 115850 }, { "epoch": 32.8867442520579, "grad_norm": 9.442068099975586, "learning_rate": 6.712744819755891e-05, "loss": 0.007366572320461273, "step": 115860 }, { "epoch": 32.889582741981265, "grad_norm": 3.7921175956726074, "learning_rate": 6.712460970763554e-05, "loss": 0.0037923097610473634, "step": 115870 }, { "epoch": 32.89242123190463, "grad_norm": 2.9303460121154785, "learning_rate": 6.712177121771218e-05, "loss": 0.005532452464103698, "step": 115880 }, { "epoch": 32.89525972182799, "grad_norm": 0.18757256865501404, "learning_rate": 6.711893272778882e-05, "loss": 0.004501210898160935, "step": 115890 }, { "epoch": 32.898098211751346, "grad_norm": 5.959294319152832, "learning_rate": 6.711609423786546e-05, "loss": 0.011784429103136063, "step": 115900 }, { "epoch": 32.90093670167471, "grad_norm": 0.16381728649139404, "learning_rate": 6.711325574794209e-05, "loss": 0.004614159464836121, "step": 115910 }, { "epoch": 32.90377519159807, "grad_norm": 14.170340538024902, "learning_rate": 6.711041725801873e-05, "loss": 0.010023683309555054, "step": 115920 }, { "epoch": 32.90661368152143, "grad_norm": 0.35233521461486816, "learning_rate": 6.710757876809539e-05, "loss": 0.00986594781279564, "step": 115930 }, { "epoch": 32.90945217144479, "grad_norm": 9.273916244506836, "learning_rate": 6.710474027817201e-05, "loss": 0.010228903591632843, "step": 115940 }, { "epoch": 32.912290661368154, "grad_norm": 0.0840216651558876, "learning_rate": 6.710190178824866e-05, "loss": 0.01614183038473129, "step": 115950 }, { "epoch": 32.91512915129151, "grad_norm": 0.9385865330696106, "learning_rate": 6.70990632983253e-05, "loss": 0.0143500417470932, "step": 115960 }, { "epoch": 32.91796764121487, "grad_norm": 0.17552220821380615, "learning_rate": 6.709622480840192e-05, "loss": 0.0020470311865210533, "step": 115970 }, { "epoch": 32.920806131138235, "grad_norm": 0.12615561485290527, "learning_rate": 6.709338631847857e-05, "loss": 0.01490943431854248, "step": 115980 }, { "epoch": 32.9236446210616, "grad_norm": 3.789140224456787, "learning_rate": 6.709054782855522e-05, "loss": 0.005563593283295631, "step": 115990 }, { "epoch": 32.926483110984954, "grad_norm": 3.951472520828247, "learning_rate": 6.708770933863185e-05, "loss": 0.01912008970975876, "step": 116000 }, { "epoch": 32.926483110984954, "eval_accuracy": 0.9711324473834806, "eval_loss": 0.10250042378902435, "eval_runtime": 31.8767, "eval_samples_per_second": 493.369, "eval_steps_per_second": 7.717, "step": 116000 }, { "epoch": 32.92932160090832, "grad_norm": 10.958779335021973, "learning_rate": 6.708487084870849e-05, "loss": 0.03415845632553101, "step": 116010 }, { "epoch": 32.93216009083168, "grad_norm": 8.922451972961426, "learning_rate": 6.708203235878513e-05, "loss": 0.017027565836906434, "step": 116020 }, { "epoch": 32.934998580755035, "grad_norm": 1.252122402191162, "learning_rate": 6.707919386886177e-05, "loss": 0.013306140899658203, "step": 116030 }, { "epoch": 32.9378370706784, "grad_norm": 0.48863062262535095, "learning_rate": 6.70763553789384e-05, "loss": 0.006216781213879585, "step": 116040 }, { "epoch": 32.94067556060176, "grad_norm": 1.121009111404419, "learning_rate": 6.707351688901504e-05, "loss": 0.010345569252967835, "step": 116050 }, { "epoch": 32.943514050525124, "grad_norm": 7.219755172729492, "learning_rate": 6.70706783990917e-05, "loss": 0.011841758340597152, "step": 116060 }, { "epoch": 32.94635254044848, "grad_norm": 0.11633042246103287, "learning_rate": 6.706783990916832e-05, "loss": 0.0032011404633522033, "step": 116070 }, { "epoch": 32.94919103037184, "grad_norm": 0.7642565965652466, "learning_rate": 6.706500141924497e-05, "loss": 0.007887057960033417, "step": 116080 }, { "epoch": 32.952029520295206, "grad_norm": 0.4195108711719513, "learning_rate": 6.706216292932161e-05, "loss": 0.003325618803501129, "step": 116090 }, { "epoch": 32.95486801021856, "grad_norm": 0.1415892243385315, "learning_rate": 6.705932443939824e-05, "loss": 0.010518977791070938, "step": 116100 }, { "epoch": 32.957706500141924, "grad_norm": 0.05278736725449562, "learning_rate": 6.705648594947488e-05, "loss": 0.0027638593688607215, "step": 116110 }, { "epoch": 32.96054499006529, "grad_norm": 0.5976791381835938, "learning_rate": 6.705364745955152e-05, "loss": 0.0037149250507354735, "step": 116120 }, { "epoch": 32.96338347998864, "grad_norm": 0.13159441947937012, "learning_rate": 6.705080896962816e-05, "loss": 0.01016145497560501, "step": 116130 }, { "epoch": 32.966221969912006, "grad_norm": 0.9723941683769226, "learning_rate": 6.70479704797048e-05, "loss": 0.0015779921784996986, "step": 116140 }, { "epoch": 32.96906045983537, "grad_norm": 9.23215103149414, "learning_rate": 6.704513198978144e-05, "loss": 0.00601348951458931, "step": 116150 }, { "epoch": 32.97189894975873, "grad_norm": 0.2786255478858948, "learning_rate": 6.704229349985808e-05, "loss": 0.0034077394753694533, "step": 116160 }, { "epoch": 32.97473743968209, "grad_norm": 1.6322020292282104, "learning_rate": 6.703945500993471e-05, "loss": 0.004880034178495407, "step": 116170 }, { "epoch": 32.97757592960545, "grad_norm": 0.08954855054616928, "learning_rate": 6.703661652001135e-05, "loss": 0.003108261153101921, "step": 116180 }, { "epoch": 32.98041441952881, "grad_norm": 1.7511281967163086, "learning_rate": 6.703377803008801e-05, "loss": 0.0126246839761734, "step": 116190 }, { "epoch": 32.98325290945217, "grad_norm": 0.8042444586753845, "learning_rate": 6.703093954016464e-05, "loss": 0.0025573816150426865, "step": 116200 }, { "epoch": 32.98609139937553, "grad_norm": 0.40912291407585144, "learning_rate": 6.702810105024128e-05, "loss": 0.007548514008522034, "step": 116210 }, { "epoch": 32.988929889298895, "grad_norm": 1.2805516719818115, "learning_rate": 6.702526256031792e-05, "loss": 0.024196968972682954, "step": 116220 }, { "epoch": 32.99176837922225, "grad_norm": 0.6784161329269409, "learning_rate": 6.702242407039455e-05, "loss": 0.009925782680511475, "step": 116230 }, { "epoch": 32.99460686914561, "grad_norm": 6.0436625480651855, "learning_rate": 6.701958558047119e-05, "loss": 0.009577143192291259, "step": 116240 }, { "epoch": 32.997445359068976, "grad_norm": 9.224055290222168, "learning_rate": 6.701674709054783e-05, "loss": 0.006043463200330734, "step": 116250 }, { "epoch": 33.00028384899234, "grad_norm": 12.260200500488281, "learning_rate": 6.701390860062447e-05, "loss": 0.009040240198373795, "step": 116260 }, { "epoch": 33.003122338915695, "grad_norm": 0.10167325288057327, "learning_rate": 6.701107011070111e-05, "loss": 0.0020274695008993147, "step": 116270 }, { "epoch": 33.00596082883906, "grad_norm": 0.9623202085494995, "learning_rate": 6.700823162077775e-05, "loss": 0.008140146732330322, "step": 116280 }, { "epoch": 33.00879931876242, "grad_norm": 0.6246654987335205, "learning_rate": 6.70053931308544e-05, "loss": 0.010078358650207519, "step": 116290 }, { "epoch": 33.01163780868578, "grad_norm": 0.2700178027153015, "learning_rate": 6.700255464093102e-05, "loss": 0.010890664160251617, "step": 116300 }, { "epoch": 33.01447629860914, "grad_norm": 2.104504346847534, "learning_rate": 6.699971615100766e-05, "loss": 0.002321568131446838, "step": 116310 }, { "epoch": 33.0173147885325, "grad_norm": 7.427857398986816, "learning_rate": 6.699687766108432e-05, "loss": 0.0026299754157662393, "step": 116320 }, { "epoch": 33.02015327845586, "grad_norm": 0.5689986348152161, "learning_rate": 6.699403917116095e-05, "loss": 0.001200598292052746, "step": 116330 }, { "epoch": 33.02299176837922, "grad_norm": 0.6372933983802795, "learning_rate": 6.699120068123759e-05, "loss": 0.0012028954923152923, "step": 116340 }, { "epoch": 33.025830258302584, "grad_norm": 1.522961974143982, "learning_rate": 6.698836219131423e-05, "loss": 0.005192311853170395, "step": 116350 }, { "epoch": 33.02866874822595, "grad_norm": 0.04061300307512283, "learning_rate": 6.698552370139086e-05, "loss": 0.0055526383221149445, "step": 116360 }, { "epoch": 33.0315072381493, "grad_norm": 4.503653526306152, "learning_rate": 6.69826852114675e-05, "loss": 0.006962700188159943, "step": 116370 }, { "epoch": 33.034345728072665, "grad_norm": 0.3460148870944977, "learning_rate": 6.697984672154414e-05, "loss": 0.006532601267099381, "step": 116380 }, { "epoch": 33.03718421799603, "grad_norm": 0.06929676234722137, "learning_rate": 6.697700823162078e-05, "loss": 0.0016426945105195046, "step": 116390 }, { "epoch": 33.040022707919384, "grad_norm": 9.749085426330566, "learning_rate": 6.697416974169742e-05, "loss": 0.00736527219414711, "step": 116400 }, { "epoch": 33.04286119784275, "grad_norm": 2.1877694129943848, "learning_rate": 6.697133125177406e-05, "loss": 0.009657806903123855, "step": 116410 }, { "epoch": 33.04569968776611, "grad_norm": 1.5757001638412476, "learning_rate": 6.69684927618507e-05, "loss": 0.01277148723602295, "step": 116420 }, { "epoch": 33.04853817768947, "grad_norm": 0.596792459487915, "learning_rate": 6.696565427192733e-05, "loss": 0.011281078308820724, "step": 116430 }, { "epoch": 33.05137666761283, "grad_norm": 0.9761044979095459, "learning_rate": 6.696281578200397e-05, "loss": 0.002975606918334961, "step": 116440 }, { "epoch": 33.05421515753619, "grad_norm": 1.9367868900299072, "learning_rate": 6.695997729208062e-05, "loss": 0.004025884717702865, "step": 116450 }, { "epoch": 33.057053647459554, "grad_norm": 1.3975588083267212, "learning_rate": 6.695713880215726e-05, "loss": 0.005501432716846466, "step": 116460 }, { "epoch": 33.05989213738291, "grad_norm": 3.3480329513549805, "learning_rate": 6.69543003122339e-05, "loss": 0.01589973121881485, "step": 116470 }, { "epoch": 33.06273062730627, "grad_norm": 0.6771692633628845, "learning_rate": 6.695146182231054e-05, "loss": 0.007020026445388794, "step": 116480 }, { "epoch": 33.065569117229636, "grad_norm": 0.5640496015548706, "learning_rate": 6.694862333238717e-05, "loss": 0.01740611791610718, "step": 116490 }, { "epoch": 33.06840760715299, "grad_norm": 0.7360278367996216, "learning_rate": 6.694578484246381e-05, "loss": 0.002274811826646328, "step": 116500 }, { "epoch": 33.06840760715299, "eval_accuracy": 0.9696699942773574, "eval_loss": 0.10040533542633057, "eval_runtime": 32.0197, "eval_samples_per_second": 491.166, "eval_steps_per_second": 7.683, "step": 116500 }, { "epoch": 33.071246097076354, "grad_norm": 0.07331196218729019, "learning_rate": 6.694294635254045e-05, "loss": 0.0054709076881408695, "step": 116510 }, { "epoch": 33.07408458699972, "grad_norm": 0.5650719404220581, "learning_rate": 6.694010786261709e-05, "loss": 0.002010716684162617, "step": 116520 }, { "epoch": 33.07692307692308, "grad_norm": 0.06461764127016068, "learning_rate": 6.693726937269373e-05, "loss": 0.0028999261558055878, "step": 116530 }, { "epoch": 33.079761566846436, "grad_norm": 4.413096904754639, "learning_rate": 6.693443088277037e-05, "loss": 0.029185721278190614, "step": 116540 }, { "epoch": 33.0826000567698, "grad_norm": 2.3148601055145264, "learning_rate": 6.6931592392847e-05, "loss": 0.008036541938781738, "step": 116550 }, { "epoch": 33.08543854669316, "grad_norm": 0.23697713017463684, "learning_rate": 6.692875390292364e-05, "loss": 0.0027852648869156837, "step": 116560 }, { "epoch": 33.08827703661652, "grad_norm": 10.562323570251465, "learning_rate": 6.692591541300029e-05, "loss": 0.0027911074459552763, "step": 116570 }, { "epoch": 33.09111552653988, "grad_norm": 0.9765073657035828, "learning_rate": 6.692307692307693e-05, "loss": 0.009827610105276108, "step": 116580 }, { "epoch": 33.09395401646324, "grad_norm": 0.493289053440094, "learning_rate": 6.692023843315357e-05, "loss": 0.029821884632110596, "step": 116590 }, { "epoch": 33.0967925063866, "grad_norm": 3.4773964881896973, "learning_rate": 6.691739994323021e-05, "loss": 0.0027792293578386305, "step": 116600 }, { "epoch": 33.09963099630996, "grad_norm": 3.2410078048706055, "learning_rate": 6.691456145330685e-05, "loss": 0.005583294481039047, "step": 116610 }, { "epoch": 33.102469486233325, "grad_norm": 0.11262920498847961, "learning_rate": 6.691172296338348e-05, "loss": 0.006002588570117951, "step": 116620 }, { "epoch": 33.10530797615669, "grad_norm": 0.17572687566280365, "learning_rate": 6.690888447346012e-05, "loss": 0.008459733426570892, "step": 116630 }, { "epoch": 33.10814646608004, "grad_norm": 0.7166721224784851, "learning_rate": 6.690604598353676e-05, "loss": 0.011703053861856461, "step": 116640 }, { "epoch": 33.110984956003406, "grad_norm": 2.6370303630828857, "learning_rate": 6.690320749361339e-05, "loss": 0.007249914109706879, "step": 116650 }, { "epoch": 33.11382344592677, "grad_norm": 12.838744163513184, "learning_rate": 6.690065285268238e-05, "loss": 0.023855648934841156, "step": 116660 }, { "epoch": 33.116661935850125, "grad_norm": 2.6808087825775146, "learning_rate": 6.689781436275901e-05, "loss": 0.002728269062936306, "step": 116670 }, { "epoch": 33.11950042577349, "grad_norm": 1.0343464612960815, "learning_rate": 6.689497587283565e-05, "loss": 0.008923836797475816, "step": 116680 }, { "epoch": 33.12233891569685, "grad_norm": 6.45535135269165, "learning_rate": 6.689213738291229e-05, "loss": 0.008817353844642639, "step": 116690 }, { "epoch": 33.12517740562021, "grad_norm": 0.08838874846696854, "learning_rate": 6.688929889298893e-05, "loss": 0.008095046877861023, "step": 116700 }, { "epoch": 33.12801589554357, "grad_norm": 0.2174573540687561, "learning_rate": 6.688646040306558e-05, "loss": 0.0015282340347766877, "step": 116710 }, { "epoch": 33.13085438546693, "grad_norm": 7.259507179260254, "learning_rate": 6.688362191314222e-05, "loss": 0.003509655222296715, "step": 116720 }, { "epoch": 33.133692875390295, "grad_norm": 0.03537416085600853, "learning_rate": 6.688078342321885e-05, "loss": 0.0032778024673461915, "step": 116730 }, { "epoch": 33.13653136531365, "grad_norm": 0.6632115840911865, "learning_rate": 6.687794493329549e-05, "loss": 0.0017957322299480439, "step": 116740 }, { "epoch": 33.139369855237014, "grad_norm": 0.3281362056732178, "learning_rate": 6.687510644337213e-05, "loss": 0.005974175781011582, "step": 116750 }, { "epoch": 33.14220834516038, "grad_norm": 0.11487170308828354, "learning_rate": 6.687226795344877e-05, "loss": 0.0028112603351473807, "step": 116760 }, { "epoch": 33.14504683508373, "grad_norm": 2.022303581237793, "learning_rate": 6.686942946352541e-05, "loss": 0.0074214845895767215, "step": 116770 }, { "epoch": 33.147885325007096, "grad_norm": 0.12305603176355362, "learning_rate": 6.686659097360205e-05, "loss": 0.005791975557804108, "step": 116780 }, { "epoch": 33.15072381493046, "grad_norm": 0.21567302942276, "learning_rate": 6.68637524836787e-05, "loss": 0.0017539359629154204, "step": 116790 }, { "epoch": 33.15356230485382, "grad_norm": 0.10450593382120132, "learning_rate": 6.686091399375532e-05, "loss": 0.0034534059464931487, "step": 116800 }, { "epoch": 33.15640079477718, "grad_norm": 0.20200063288211823, "learning_rate": 6.685807550383196e-05, "loss": 0.003671243041753769, "step": 116810 }, { "epoch": 33.15923928470054, "grad_norm": 0.20154725015163422, "learning_rate": 6.68552370139086e-05, "loss": 0.005336741358041764, "step": 116820 }, { "epoch": 33.1620777746239, "grad_norm": 3.94284987449646, "learning_rate": 6.685239852398523e-05, "loss": 0.007192402333021164, "step": 116830 }, { "epoch": 33.16491626454726, "grad_norm": 0.7578408718109131, "learning_rate": 6.684956003406189e-05, "loss": 0.0037848081439733504, "step": 116840 }, { "epoch": 33.16775475447062, "grad_norm": 1.526747226715088, "learning_rate": 6.684672154413853e-05, "loss": 0.0022581979632377625, "step": 116850 }, { "epoch": 33.170593244393984, "grad_norm": 0.3071438670158386, "learning_rate": 6.684388305421516e-05, "loss": 0.0032968707382678986, "step": 116860 }, { "epoch": 33.17343173431734, "grad_norm": 12.856816291809082, "learning_rate": 6.68410445642918e-05, "loss": 0.009627282619476318, "step": 116870 }, { "epoch": 33.1762702242407, "grad_norm": 0.07208612561225891, "learning_rate": 6.683820607436844e-05, "loss": 0.0008730346336960793, "step": 116880 }, { "epoch": 33.179108714164066, "grad_norm": 0.8568087816238403, "learning_rate": 6.683536758444508e-05, "loss": 0.005911839753389358, "step": 116890 }, { "epoch": 33.18194720408743, "grad_norm": 0.3375151753425598, "learning_rate": 6.683252909452172e-05, "loss": 0.0071736820042133335, "step": 116900 }, { "epoch": 33.184785694010785, "grad_norm": 0.8520231246948242, "learning_rate": 6.682969060459836e-05, "loss": 0.0014691008254885674, "step": 116910 }, { "epoch": 33.18762418393415, "grad_norm": 0.49842408299446106, "learning_rate": 6.6826852114675e-05, "loss": 0.004839428514242172, "step": 116920 }, { "epoch": 33.19046267385751, "grad_norm": 8.875041007995605, "learning_rate": 6.682401362475163e-05, "loss": 0.012935957312583924, "step": 116930 }, { "epoch": 33.193301163780866, "grad_norm": 0.18533369898796082, "learning_rate": 6.682117513482827e-05, "loss": 0.002854626253247261, "step": 116940 }, { "epoch": 33.19613965370423, "grad_norm": 0.6381564140319824, "learning_rate": 6.681833664490491e-05, "loss": 0.005965876579284668, "step": 116950 }, { "epoch": 33.19897814362759, "grad_norm": 0.7147642374038696, "learning_rate": 6.681549815498154e-05, "loss": 0.004762191697955132, "step": 116960 }, { "epoch": 33.20181663355095, "grad_norm": 3.5744431018829346, "learning_rate": 6.68126596650582e-05, "loss": 0.0021830499172210693, "step": 116970 }, { "epoch": 33.20465512347431, "grad_norm": 1.0354992151260376, "learning_rate": 6.680982117513484e-05, "loss": 0.003933040797710419, "step": 116980 }, { "epoch": 33.20749361339767, "grad_norm": 5.887685775756836, "learning_rate": 6.680698268521147e-05, "loss": 0.009693623334169389, "step": 116990 }, { "epoch": 33.210332103321036, "grad_norm": 0.5320745706558228, "learning_rate": 6.680414419528811e-05, "loss": 0.006465865671634674, "step": 117000 }, { "epoch": 33.210332103321036, "eval_accuracy": 0.9697335791950149, "eval_loss": 0.10549739748239517, "eval_runtime": 31.8203, "eval_samples_per_second": 494.244, "eval_steps_per_second": 7.731, "step": 117000 }, { "epoch": 33.21317059324439, "grad_norm": 15.010702133178711, "learning_rate": 6.680130570536475e-05, "loss": 0.011933570355176925, "step": 117010 }, { "epoch": 33.216009083167755, "grad_norm": 0.10318238288164139, "learning_rate": 6.679846721544139e-05, "loss": 0.002451440691947937, "step": 117020 }, { "epoch": 33.21884757309112, "grad_norm": 0.4122534394264221, "learning_rate": 6.679562872551802e-05, "loss": 0.005117706209421158, "step": 117030 }, { "epoch": 33.221686063014474, "grad_norm": 0.19807782769203186, "learning_rate": 6.679279023559467e-05, "loss": 0.0018423140048980714, "step": 117040 }, { "epoch": 33.22452455293784, "grad_norm": 3.906538963317871, "learning_rate": 6.678995174567131e-05, "loss": 0.0032202389091253282, "step": 117050 }, { "epoch": 33.2273630428612, "grad_norm": 3.8841419219970703, "learning_rate": 6.678711325574794e-05, "loss": 0.0033941879868507386, "step": 117060 }, { "epoch": 33.230201532784555, "grad_norm": 1.8240785598754883, "learning_rate": 6.678427476582458e-05, "loss": 0.011822700500488281, "step": 117070 }, { "epoch": 33.23304002270792, "grad_norm": 3.9706709384918213, "learning_rate": 6.678143627590123e-05, "loss": 0.007514472305774689, "step": 117080 }, { "epoch": 33.23587851263128, "grad_norm": 3.4607510566711426, "learning_rate": 6.677859778597785e-05, "loss": 0.011683598905801774, "step": 117090 }, { "epoch": 33.238717002554644, "grad_norm": 8.42945671081543, "learning_rate": 6.677575929605451e-05, "loss": 0.004024039581418037, "step": 117100 }, { "epoch": 33.241555492478, "grad_norm": 0.8852787613868713, "learning_rate": 6.677292080613115e-05, "loss": 0.013066606223583221, "step": 117110 }, { "epoch": 33.24439398240136, "grad_norm": 14.635392189025879, "learning_rate": 6.677008231620778e-05, "loss": 0.00991058349609375, "step": 117120 }, { "epoch": 33.247232472324725, "grad_norm": 0.1147436648607254, "learning_rate": 6.676724382628442e-05, "loss": 0.003976458311080932, "step": 117130 }, { "epoch": 33.25007096224808, "grad_norm": 0.5611915588378906, "learning_rate": 6.676440533636106e-05, "loss": 0.007162712514400482, "step": 117140 }, { "epoch": 33.252909452171444, "grad_norm": 2.005566358566284, "learning_rate": 6.67615668464377e-05, "loss": 0.0027954330667853355, "step": 117150 }, { "epoch": 33.25574794209481, "grad_norm": 4.083441734313965, "learning_rate": 6.675872835651433e-05, "loss": 0.0035261183977127073, "step": 117160 }, { "epoch": 33.25858643201816, "grad_norm": 2.79569673538208, "learning_rate": 6.675588986659098e-05, "loss": 0.004823222756385803, "step": 117170 }, { "epoch": 33.261424921941526, "grad_norm": 0.2593329846858978, "learning_rate": 6.675305137666763e-05, "loss": 0.005334918200969696, "step": 117180 }, { "epoch": 33.26426341186489, "grad_norm": 0.19562114775180817, "learning_rate": 6.675021288674425e-05, "loss": 0.002492145448923111, "step": 117190 }, { "epoch": 33.26710190178825, "grad_norm": 0.03985077515244484, "learning_rate": 6.67473743968209e-05, "loss": 0.003584921360015869, "step": 117200 }, { "epoch": 33.26994039171161, "grad_norm": 0.5491010546684265, "learning_rate": 6.674453590689754e-05, "loss": 0.008333267271518707, "step": 117210 }, { "epoch": 33.27277888163497, "grad_norm": 1.1943808794021606, "learning_rate": 6.674169741697416e-05, "loss": 0.004083572328090668, "step": 117220 }, { "epoch": 33.27561737155833, "grad_norm": 0.14809031784534454, "learning_rate": 6.67388589270508e-05, "loss": 0.001823628693819046, "step": 117230 }, { "epoch": 33.27845586148169, "grad_norm": 1.3379476070404053, "learning_rate": 6.673602043712746e-05, "loss": 0.010470181703567505, "step": 117240 }, { "epoch": 33.28129435140505, "grad_norm": 0.4275990128517151, "learning_rate": 6.673318194720409e-05, "loss": 0.0018964186310768127, "step": 117250 }, { "epoch": 33.284132841328415, "grad_norm": 0.5102622509002686, "learning_rate": 6.673034345728073e-05, "loss": 0.009090592712163925, "step": 117260 }, { "epoch": 33.28697133125178, "grad_norm": 1.5648858547210693, "learning_rate": 6.672750496735737e-05, "loss": 0.0017020264640450478, "step": 117270 }, { "epoch": 33.28980982117513, "grad_norm": 1.519100308418274, "learning_rate": 6.672466647743401e-05, "loss": 0.0022423505783081056, "step": 117280 }, { "epoch": 33.292648311098496, "grad_norm": 1.1540566682815552, "learning_rate": 6.672182798751064e-05, "loss": 0.0035326041281223295, "step": 117290 }, { "epoch": 33.29548680102186, "grad_norm": 9.145088195800781, "learning_rate": 6.67189894975873e-05, "loss": 0.007082366943359375, "step": 117300 }, { "epoch": 33.298325290945215, "grad_norm": 2.6998770236968994, "learning_rate": 6.671615100766392e-05, "loss": 0.006271549314260483, "step": 117310 }, { "epoch": 33.30116378086858, "grad_norm": 0.6100093722343445, "learning_rate": 6.671331251774056e-05, "loss": 0.004267510771751404, "step": 117320 }, { "epoch": 33.30400227079194, "grad_norm": 0.9361395835876465, "learning_rate": 6.67104740278172e-05, "loss": 0.003236216306686401, "step": 117330 }, { "epoch": 33.306840760715296, "grad_norm": 0.11130796372890472, "learning_rate": 6.670763553789385e-05, "loss": 0.0078055053949356076, "step": 117340 }, { "epoch": 33.30967925063866, "grad_norm": 0.9566770792007446, "learning_rate": 6.670479704797047e-05, "loss": 0.003107859753072262, "step": 117350 }, { "epoch": 33.31251774056202, "grad_norm": 0.27331191301345825, "learning_rate": 6.670195855804712e-05, "loss": 0.004045741260051727, "step": 117360 }, { "epoch": 33.315356230485385, "grad_norm": 3.211246967315674, "learning_rate": 6.669912006812377e-05, "loss": 0.006707516312599182, "step": 117370 }, { "epoch": 33.31819472040874, "grad_norm": 1.721397876739502, "learning_rate": 6.66962815782004e-05, "loss": 0.004508708417415619, "step": 117380 }, { "epoch": 33.321033210332104, "grad_norm": 5.4185051918029785, "learning_rate": 6.669344308827704e-05, "loss": 0.010296161472797393, "step": 117390 }, { "epoch": 33.32387170025547, "grad_norm": 0.3519374132156372, "learning_rate": 6.669060459835368e-05, "loss": 0.0030946435406804087, "step": 117400 }, { "epoch": 33.32671019017882, "grad_norm": 0.49655184149742126, "learning_rate": 6.668776610843031e-05, "loss": 0.003404499590396881, "step": 117410 }, { "epoch": 33.329548680102185, "grad_norm": 0.4788326323032379, "learning_rate": 6.668492761850695e-05, "loss": 0.004749372228980064, "step": 117420 }, { "epoch": 33.33238717002555, "grad_norm": 0.4188593626022339, "learning_rate": 6.668208912858359e-05, "loss": 0.003109121322631836, "step": 117430 }, { "epoch": 33.335225659948904, "grad_norm": 0.3335094153881073, "learning_rate": 6.667925063866023e-05, "loss": 0.0029158854857087137, "step": 117440 }, { "epoch": 33.33806414987227, "grad_norm": 0.9115496873855591, "learning_rate": 6.667641214873688e-05, "loss": 0.006710460782051087, "step": 117450 }, { "epoch": 33.34090263979563, "grad_norm": 0.8134279251098633, "learning_rate": 6.667357365881352e-05, "loss": 0.010479222983121872, "step": 117460 }, { "epoch": 33.34374112971899, "grad_norm": 0.3062744140625, "learning_rate": 6.667073516889016e-05, "loss": 0.003768964856863022, "step": 117470 }, { "epoch": 33.34657961964235, "grad_norm": 0.04725195840001106, "learning_rate": 6.666789667896679e-05, "loss": 0.005931961536407471, "step": 117480 }, { "epoch": 33.34941810956571, "grad_norm": 0.6409868001937866, "learning_rate": 6.666505818904343e-05, "loss": 0.007691122591495514, "step": 117490 }, { "epoch": 33.352256599489074, "grad_norm": 0.13280260562896729, "learning_rate": 6.666221969912008e-05, "loss": 0.004103472828865052, "step": 117500 }, { "epoch": 33.352256599489074, "eval_accuracy": 0.9710052775481656, "eval_loss": 0.10176429152488708, "eval_runtime": 32.0871, "eval_samples_per_second": 490.134, "eval_steps_per_second": 7.667, "step": 117500 }, { "epoch": 33.35509508941243, "grad_norm": 6.041482448577881, "learning_rate": 6.665938120919671e-05, "loss": 0.004424911737442016, "step": 117510 }, { "epoch": 33.35793357933579, "grad_norm": 1.8242093324661255, "learning_rate": 6.665654271927335e-05, "loss": 0.0030678030103445052, "step": 117520 }, { "epoch": 33.360772069259156, "grad_norm": 0.25828006863594055, "learning_rate": 6.665370422934999e-05, "loss": 0.002407431975007057, "step": 117530 }, { "epoch": 33.36361055918251, "grad_norm": 2.303197145462036, "learning_rate": 6.665086573942662e-05, "loss": 0.0020633935928344727, "step": 117540 }, { "epoch": 33.366449049105874, "grad_norm": 0.10785436630249023, "learning_rate": 6.664802724950326e-05, "loss": 0.005741871893405914, "step": 117550 }, { "epoch": 33.36928753902924, "grad_norm": 0.14172351360321045, "learning_rate": 6.66451887595799e-05, "loss": 0.01993151903152466, "step": 117560 }, { "epoch": 33.3721260289526, "grad_norm": 6.424083709716797, "learning_rate": 6.664235026965654e-05, "loss": 0.005172615498304367, "step": 117570 }, { "epoch": 33.374964518875956, "grad_norm": 0.3155151903629303, "learning_rate": 6.663951177973319e-05, "loss": 0.002070758119225502, "step": 117580 }, { "epoch": 33.37780300879932, "grad_norm": 0.1375153660774231, "learning_rate": 6.663667328980983e-05, "loss": 0.0025551144033670427, "step": 117590 }, { "epoch": 33.38064149872268, "grad_norm": 0.4031994938850403, "learning_rate": 6.663383479988647e-05, "loss": 0.0011833008378744126, "step": 117600 }, { "epoch": 33.38347998864604, "grad_norm": 4.744685649871826, "learning_rate": 6.66309963099631e-05, "loss": 0.012042104452848434, "step": 117610 }, { "epoch": 33.3863184785694, "grad_norm": 2.998222827911377, "learning_rate": 6.662815782003974e-05, "loss": 0.0020521258935332297, "step": 117620 }, { "epoch": 33.38915696849276, "grad_norm": 0.7382822036743164, "learning_rate": 6.662531933011638e-05, "loss": 0.0011817242950201035, "step": 117630 }, { "epoch": 33.391995458416126, "grad_norm": 9.048192024230957, "learning_rate": 6.662248084019302e-05, "loss": 0.004943189769983291, "step": 117640 }, { "epoch": 33.39483394833948, "grad_norm": 0.1631862223148346, "learning_rate": 6.661964235026966e-05, "loss": 0.0077832967042922975, "step": 117650 }, { "epoch": 33.397672438262845, "grad_norm": 0.20605161786079407, "learning_rate": 6.66168038603463e-05, "loss": 0.0020784838125109673, "step": 117660 }, { "epoch": 33.40051092818621, "grad_norm": 0.12529434263706207, "learning_rate": 6.661396537042293e-05, "loss": 0.002091728150844574, "step": 117670 }, { "epoch": 33.40334941810956, "grad_norm": 0.054162587970495224, "learning_rate": 6.661112688049957e-05, "loss": 0.0035916931927204134, "step": 117680 }, { "epoch": 33.406187908032926, "grad_norm": 1.2742531299591064, "learning_rate": 6.660828839057621e-05, "loss": 0.009231005609035493, "step": 117690 }, { "epoch": 33.40902639795629, "grad_norm": 7.961148262023926, "learning_rate": 6.660544990065286e-05, "loss": 0.0023948408663272856, "step": 117700 }, { "epoch": 33.411864887879645, "grad_norm": 0.05892060697078705, "learning_rate": 6.66026114107295e-05, "loss": 0.006335830688476563, "step": 117710 }, { "epoch": 33.41470337780301, "grad_norm": 0.3514561057090759, "learning_rate": 6.659977292080614e-05, "loss": 0.007222630828619003, "step": 117720 }, { "epoch": 33.41754186772637, "grad_norm": 17.219820022583008, "learning_rate": 6.659693443088278e-05, "loss": 0.0086212158203125, "step": 117730 }, { "epoch": 33.420380357649734, "grad_norm": 0.617728590965271, "learning_rate": 6.659409594095941e-05, "loss": 0.01009007915854454, "step": 117740 }, { "epoch": 33.42321884757309, "grad_norm": 1.9427403211593628, "learning_rate": 6.659125745103605e-05, "loss": 0.0019489798694849014, "step": 117750 }, { "epoch": 33.42605733749645, "grad_norm": 1.0313529968261719, "learning_rate": 6.658841896111269e-05, "loss": 0.005363673716783523, "step": 117760 }, { "epoch": 33.428895827419815, "grad_norm": 0.3089061677455902, "learning_rate": 6.658558047118933e-05, "loss": 0.0030334291979670523, "step": 117770 }, { "epoch": 33.43173431734317, "grad_norm": 0.05528602749109268, "learning_rate": 6.658274198126597e-05, "loss": 0.0025103989988565444, "step": 117780 }, { "epoch": 33.434572807266534, "grad_norm": 0.3590381145477295, "learning_rate": 6.657990349134261e-05, "loss": 0.001472625322639942, "step": 117790 }, { "epoch": 33.4374112971899, "grad_norm": 0.6082022786140442, "learning_rate": 6.657706500141924e-05, "loss": 0.008881958574056626, "step": 117800 }, { "epoch": 33.44024978711325, "grad_norm": 11.770957946777344, "learning_rate": 6.657422651149588e-05, "loss": 0.008058828115463258, "step": 117810 }, { "epoch": 33.443088277036615, "grad_norm": 0.6515213847160339, "learning_rate": 6.657138802157252e-05, "loss": 0.0040625520050525665, "step": 117820 }, { "epoch": 33.44592676695998, "grad_norm": 0.10093910992145538, "learning_rate": 6.656854953164917e-05, "loss": 0.0039804231375455855, "step": 117830 }, { "epoch": 33.44876525688334, "grad_norm": 0.03526574373245239, "learning_rate": 6.656571104172581e-05, "loss": 0.0024762894958257676, "step": 117840 }, { "epoch": 33.4516037468067, "grad_norm": 0.11977417021989822, "learning_rate": 6.656287255180245e-05, "loss": 0.0009741321206092835, "step": 117850 }, { "epoch": 33.45444223673006, "grad_norm": 0.03153037652373314, "learning_rate": 6.656003406187909e-05, "loss": 0.0016812663525342941, "step": 117860 }, { "epoch": 33.45728072665342, "grad_norm": 0.21912778913974762, "learning_rate": 6.655719557195572e-05, "loss": 0.0011515764519572258, "step": 117870 }, { "epoch": 33.46011921657678, "grad_norm": 0.1310998797416687, "learning_rate": 6.655435708203236e-05, "loss": 0.001064511202275753, "step": 117880 }, { "epoch": 33.46295770650014, "grad_norm": 0.9500657320022583, "learning_rate": 6.6551518592109e-05, "loss": 0.0011550221592187882, "step": 117890 }, { "epoch": 33.465796196423504, "grad_norm": 1.2433335781097412, "learning_rate": 6.654868010218564e-05, "loss": 0.0013618255034089088, "step": 117900 }, { "epoch": 33.46863468634686, "grad_norm": 0.625828206539154, "learning_rate": 6.654584161226228e-05, "loss": 0.0035163335502147674, "step": 117910 }, { "epoch": 33.47147317627022, "grad_norm": 0.40807151794433594, "learning_rate": 6.654300312233892e-05, "loss": 0.0018518872559070588, "step": 117920 }, { "epoch": 33.474311666193586, "grad_norm": 0.37297049164772034, "learning_rate": 6.654016463241555e-05, "loss": 0.0020586153492331504, "step": 117930 }, { "epoch": 33.47715015611695, "grad_norm": 0.11709805577993393, "learning_rate": 6.65373261424922e-05, "loss": 0.0014977343380451202, "step": 117940 }, { "epoch": 33.479988646040304, "grad_norm": 0.23101064562797546, "learning_rate": 6.653448765256884e-05, "loss": 0.0036140423268079757, "step": 117950 }, { "epoch": 33.48282713596367, "grad_norm": 0.026506168767809868, "learning_rate": 6.653164916264548e-05, "loss": 0.001249462366104126, "step": 117960 }, { "epoch": 33.48566562588703, "grad_norm": 5.453563213348389, "learning_rate": 6.652881067272212e-05, "loss": 0.0033817552030086517, "step": 117970 }, { "epoch": 33.488504115810386, "grad_norm": 0.9004629850387573, "learning_rate": 6.652597218279876e-05, "loss": 0.005653075873851776, "step": 117980 }, { "epoch": 33.49134260573375, "grad_norm": 0.3424185812473297, "learning_rate": 6.65231336928754e-05, "loss": 0.010584371536970139, "step": 117990 }, { "epoch": 33.49418109565711, "grad_norm": 0.1876843422651291, "learning_rate": 6.652029520295203e-05, "loss": 0.0024077557027339935, "step": 118000 }, { "epoch": 33.49418109565711, "eval_accuracy": 0.9739301837604121, "eval_loss": 0.09370450675487518, "eval_runtime": 32.4486, "eval_samples_per_second": 484.674, "eval_steps_per_second": 7.581, "step": 118000 }, { "epoch": 33.497019585580475, "grad_norm": 16.735023498535156, "learning_rate": 6.651745671302867e-05, "loss": 0.008442622423171998, "step": 118010 }, { "epoch": 33.49985807550383, "grad_norm": 0.6759421825408936, "learning_rate": 6.651461822310531e-05, "loss": 0.0022421222180128097, "step": 118020 }, { "epoch": 33.50269656542719, "grad_norm": 16.317548751831055, "learning_rate": 6.651177973318195e-05, "loss": 0.011362408101558686, "step": 118030 }, { "epoch": 33.505535055350556, "grad_norm": 0.19789162278175354, "learning_rate": 6.65089412432586e-05, "loss": 0.0050257094204425815, "step": 118040 }, { "epoch": 33.50837354527391, "grad_norm": 8.306166648864746, "learning_rate": 6.650610275333524e-05, "loss": 0.007889412343502045, "step": 118050 }, { "epoch": 33.511212035197275, "grad_norm": 1.6560711860656738, "learning_rate": 6.650326426341186e-05, "loss": 0.008737242221832276, "step": 118060 }, { "epoch": 33.51405052512064, "grad_norm": 0.582359254360199, "learning_rate": 6.65004257734885e-05, "loss": 0.0024565406143665313, "step": 118070 }, { "epoch": 33.51688901504399, "grad_norm": 0.4189947545528412, "learning_rate": 6.649758728356515e-05, "loss": 0.002820005640387535, "step": 118080 }, { "epoch": 33.519727504967356, "grad_norm": 0.03143097832798958, "learning_rate": 6.649474879364179e-05, "loss": 0.004976770654320717, "step": 118090 }, { "epoch": 33.52256599489072, "grad_norm": 1.8940445184707642, "learning_rate": 6.649191030371843e-05, "loss": 0.005611953884363174, "step": 118100 }, { "epoch": 33.52540448481408, "grad_norm": 2.091912269592285, "learning_rate": 6.648907181379507e-05, "loss": 0.012508454918861388, "step": 118110 }, { "epoch": 33.52824297473744, "grad_norm": 0.15885968506336212, "learning_rate": 6.648623332387171e-05, "loss": 0.002666310220956802, "step": 118120 }, { "epoch": 33.5310814646608, "grad_norm": 3.1097934246063232, "learning_rate": 6.648339483394834e-05, "loss": 0.006251376867294311, "step": 118130 }, { "epoch": 33.533919954584164, "grad_norm": 1.7463575601577759, "learning_rate": 6.648055634402498e-05, "loss": 0.007226001471281052, "step": 118140 }, { "epoch": 33.53675844450752, "grad_norm": 0.28277215361595154, "learning_rate": 6.647771785410162e-05, "loss": 0.006041520088911056, "step": 118150 }, { "epoch": 33.53959693443088, "grad_norm": 1.0614129304885864, "learning_rate": 6.647487936417825e-05, "loss": 0.010642955452203751, "step": 118160 }, { "epoch": 33.542435424354245, "grad_norm": 0.38257908821105957, "learning_rate": 6.64720408742549e-05, "loss": 0.002994239330291748, "step": 118170 }, { "epoch": 33.5452739142776, "grad_norm": 0.17365016043186188, "learning_rate": 6.646920238433155e-05, "loss": 0.006804358959197998, "step": 118180 }, { "epoch": 33.548112404200964, "grad_norm": 0.09420165419578552, "learning_rate": 6.646636389440817e-05, "loss": 0.014821115136146545, "step": 118190 }, { "epoch": 33.55095089412433, "grad_norm": 3.804135322570801, "learning_rate": 6.646352540448482e-05, "loss": 0.007811338454484939, "step": 118200 }, { "epoch": 33.55378938404769, "grad_norm": 0.16372185945510864, "learning_rate": 6.646068691456146e-05, "loss": 0.0031401701271533966, "step": 118210 }, { "epoch": 33.556627873971046, "grad_norm": 1.3102390766143799, "learning_rate": 6.64578484246381e-05, "loss": 0.001990114897489548, "step": 118220 }, { "epoch": 33.55946636389441, "grad_norm": 0.27745521068573, "learning_rate": 6.645500993471474e-05, "loss": 0.006152790039777756, "step": 118230 }, { "epoch": 33.56230485381777, "grad_norm": 0.13625353574752808, "learning_rate": 6.645217144479138e-05, "loss": 0.00917484685778618, "step": 118240 }, { "epoch": 33.56514334374113, "grad_norm": 0.15715394914150238, "learning_rate": 6.644933295486801e-05, "loss": 0.007474235445261002, "step": 118250 }, { "epoch": 33.56798183366449, "grad_norm": 4.922328948974609, "learning_rate": 6.644649446494465e-05, "loss": 0.004115850478410721, "step": 118260 }, { "epoch": 33.57082032358785, "grad_norm": 0.2671124339103699, "learning_rate": 6.644365597502129e-05, "loss": 0.004395883902907372, "step": 118270 }, { "epoch": 33.57365881351121, "grad_norm": 1.0523269176483154, "learning_rate": 6.644081748509793e-05, "loss": 0.009388954937458038, "step": 118280 }, { "epoch": 33.57649730343457, "grad_norm": 0.6893566250801086, "learning_rate": 6.643797899517456e-05, "loss": 0.018699949979782103, "step": 118290 }, { "epoch": 33.579335793357934, "grad_norm": 0.3954017162322998, "learning_rate": 6.643514050525122e-05, "loss": 0.005369166284799576, "step": 118300 }, { "epoch": 33.5821742832813, "grad_norm": 1.7790608406066895, "learning_rate": 6.643230201532786e-05, "loss": 0.0035857439041137697, "step": 118310 }, { "epoch": 33.58501277320465, "grad_norm": 0.8237112760543823, "learning_rate": 6.642946352540448e-05, "loss": 0.00260560717433691, "step": 118320 }, { "epoch": 33.587851263128016, "grad_norm": 1.2745623588562012, "learning_rate": 6.642662503548113e-05, "loss": 0.009308111667633057, "step": 118330 }, { "epoch": 33.59068975305138, "grad_norm": 2.8505353927612305, "learning_rate": 6.642378654555777e-05, "loss": 0.0034400660544633864, "step": 118340 }, { "epoch": 33.593528242974735, "grad_norm": 0.832823634147644, "learning_rate": 6.64209480556344e-05, "loss": 0.0026300268247723578, "step": 118350 }, { "epoch": 33.5963667328981, "grad_norm": 0.11357098072767258, "learning_rate": 6.641810956571104e-05, "loss": 0.004665753990411759, "step": 118360 }, { "epoch": 33.59920522282146, "grad_norm": 0.12819187343120575, "learning_rate": 6.641527107578769e-05, "loss": 0.004100415855646133, "step": 118370 }, { "epoch": 33.602043712744816, "grad_norm": 1.0581845045089722, "learning_rate": 6.641243258586432e-05, "loss": 0.013720394670963287, "step": 118380 }, { "epoch": 33.60488220266818, "grad_norm": 9.020030975341797, "learning_rate": 6.640959409594096e-05, "loss": 0.009853778034448623, "step": 118390 }, { "epoch": 33.60772069259154, "grad_norm": 2.7118237018585205, "learning_rate": 6.64067556060176e-05, "loss": 0.00771016925573349, "step": 118400 }, { "epoch": 33.610559182514905, "grad_norm": 0.40723446011543274, "learning_rate": 6.640391711609424e-05, "loss": 0.006423964351415634, "step": 118410 }, { "epoch": 33.61339767243826, "grad_norm": 12.010626792907715, "learning_rate": 6.640107862617087e-05, "loss": 0.013506373763084412, "step": 118420 }, { "epoch": 33.61623616236162, "grad_norm": 18.09345817565918, "learning_rate": 6.639824013624753e-05, "loss": 0.017544372379779814, "step": 118430 }, { "epoch": 33.619074652284986, "grad_norm": 2.6834728717803955, "learning_rate": 6.639540164632417e-05, "loss": 0.005507006123661995, "step": 118440 }, { "epoch": 33.62191314220834, "grad_norm": 0.1859956681728363, "learning_rate": 6.63925631564008e-05, "loss": 0.004598772898316383, "step": 118450 }, { "epoch": 33.624751632131705, "grad_norm": 0.677299439907074, "learning_rate": 6.638972466647744e-05, "loss": 0.002621745876967907, "step": 118460 }, { "epoch": 33.62759012205507, "grad_norm": 0.10251724720001221, "learning_rate": 6.638688617655408e-05, "loss": 0.003544922173023224, "step": 118470 }, { "epoch": 33.63042861197843, "grad_norm": 2.2537879943847656, "learning_rate": 6.63840476866307e-05, "loss": 0.0033119872212409975, "step": 118480 }, { "epoch": 33.63326710190179, "grad_norm": 14.366512298583984, "learning_rate": 6.638120919670735e-05, "loss": 0.0173773393034935, "step": 118490 }, { "epoch": 33.63610559182515, "grad_norm": 0.27313777804374695, "learning_rate": 6.6378370706784e-05, "loss": 0.004499942064285278, "step": 118500 }, { "epoch": 33.63610559182515, "eval_accuracy": 0.9694156546067273, "eval_loss": 0.10657570511102676, "eval_runtime": 31.9221, "eval_samples_per_second": 492.667, "eval_steps_per_second": 7.706, "step": 118500 }, { "epoch": 33.63894408174851, "grad_norm": 12.45316219329834, "learning_rate": 6.637553221686063e-05, "loss": 0.010004299879074096, "step": 118510 }, { "epoch": 33.64178257167187, "grad_norm": 4.8703694343566895, "learning_rate": 6.637269372693727e-05, "loss": 0.01895466595888138, "step": 118520 }, { "epoch": 33.64462106159523, "grad_norm": 0.43803897500038147, "learning_rate": 6.636985523701391e-05, "loss": 0.008851355314254761, "step": 118530 }, { "epoch": 33.647459551518594, "grad_norm": 0.45062896609306335, "learning_rate": 6.636701674709055e-05, "loss": 0.005634177848696709, "step": 118540 }, { "epoch": 33.65029804144195, "grad_norm": 7.604182720184326, "learning_rate": 6.636417825716718e-05, "loss": 0.006014469265937805, "step": 118550 }, { "epoch": 33.65313653136531, "grad_norm": 1.0684056282043457, "learning_rate": 6.636133976724382e-05, "loss": 0.005597289651632309, "step": 118560 }, { "epoch": 33.655975021288675, "grad_norm": 0.15650734305381775, "learning_rate": 6.635850127732048e-05, "loss": 0.003872975707054138, "step": 118570 }, { "epoch": 33.65881351121204, "grad_norm": 0.13986270129680634, "learning_rate": 6.63556627873971e-05, "loss": 0.01006293147802353, "step": 118580 }, { "epoch": 33.661652001135394, "grad_norm": 0.5802822709083557, "learning_rate": 6.635282429747375e-05, "loss": 0.005593271553516388, "step": 118590 }, { "epoch": 33.66449049105876, "grad_norm": 0.9265126585960388, "learning_rate": 6.634998580755039e-05, "loss": 0.0027913374826312063, "step": 118600 }, { "epoch": 33.66732898098212, "grad_norm": 0.06432979553937912, "learning_rate": 6.634714731762702e-05, "loss": 0.001954894885420799, "step": 118610 }, { "epoch": 33.670167470905476, "grad_norm": 0.18897230923175812, "learning_rate": 6.634430882770366e-05, "loss": 0.0052628666162490845, "step": 118620 }, { "epoch": 33.67300596082884, "grad_norm": 10.748712539672852, "learning_rate": 6.634147033778031e-05, "loss": 0.014139921963214874, "step": 118630 }, { "epoch": 33.6758444507522, "grad_norm": 1.3574153184890747, "learning_rate": 6.633863184785694e-05, "loss": 0.0062481328845024105, "step": 118640 }, { "epoch": 33.67868294067556, "grad_norm": 0.28493428230285645, "learning_rate": 6.633579335793358e-05, "loss": 0.002008277177810669, "step": 118650 }, { "epoch": 33.68152143059892, "grad_norm": 1.6867533922195435, "learning_rate": 6.633295486801022e-05, "loss": 0.01278042197227478, "step": 118660 }, { "epoch": 33.68435992052228, "grad_norm": 0.05934680253267288, "learning_rate": 6.633011637808687e-05, "loss": 0.011395807564258575, "step": 118670 }, { "epoch": 33.687198410445646, "grad_norm": 4.227171421051025, "learning_rate": 6.632727788816349e-05, "loss": 0.0017517570406198502, "step": 118680 }, { "epoch": 33.690036900369, "grad_norm": 0.5052345395088196, "learning_rate": 6.632443939824013e-05, "loss": 0.003521227091550827, "step": 118690 }, { "epoch": 33.692875390292365, "grad_norm": 1.6888450384140015, "learning_rate": 6.632160090831679e-05, "loss": 0.005626156181097031, "step": 118700 }, { "epoch": 33.69571388021573, "grad_norm": 0.39632701873779297, "learning_rate": 6.631876241839342e-05, "loss": 0.005465247482061386, "step": 118710 }, { "epoch": 33.69855237013908, "grad_norm": 9.108137130737305, "learning_rate": 6.631592392847006e-05, "loss": 0.0037058554589748383, "step": 118720 }, { "epoch": 33.701390860062446, "grad_norm": 1.7768782377243042, "learning_rate": 6.63130854385467e-05, "loss": 0.00430828109383583, "step": 118730 }, { "epoch": 33.70422934998581, "grad_norm": 1.021230936050415, "learning_rate": 6.631024694862333e-05, "loss": 0.00667378231883049, "step": 118740 }, { "epoch": 33.70706783990917, "grad_norm": 1.0527336597442627, "learning_rate": 6.630740845869997e-05, "loss": 0.004770369455218315, "step": 118750 }, { "epoch": 33.70990632983253, "grad_norm": 2.049328565597534, "learning_rate": 6.630456996877661e-05, "loss": 0.0014734882861375809, "step": 118760 }, { "epoch": 33.71274481975589, "grad_norm": 0.03193238750100136, "learning_rate": 6.630173147885325e-05, "loss": 0.0069622650742530824, "step": 118770 }, { "epoch": 33.71558330967925, "grad_norm": 0.3328356146812439, "learning_rate": 6.62988929889299e-05, "loss": 0.0036469902843236923, "step": 118780 }, { "epoch": 33.71842179960261, "grad_norm": 0.5575516819953918, "learning_rate": 6.629605449900653e-05, "loss": 0.009485018998384475, "step": 118790 }, { "epoch": 33.72126028952597, "grad_norm": 0.4404263198375702, "learning_rate": 6.629321600908318e-05, "loss": 0.006475710123777389, "step": 118800 }, { "epoch": 33.724098779449335, "grad_norm": 0.07204845547676086, "learning_rate": 6.62903775191598e-05, "loss": 0.0013757141306996346, "step": 118810 }, { "epoch": 33.72693726937269, "grad_norm": 0.10146921873092651, "learning_rate": 6.628753902923645e-05, "loss": 0.0020020967349410057, "step": 118820 }, { "epoch": 33.729775759296054, "grad_norm": 0.33432987332344055, "learning_rate": 6.62847005393131e-05, "loss": 0.012701784074306489, "step": 118830 }, { "epoch": 33.73261424921942, "grad_norm": 0.6578109264373779, "learning_rate": 6.628186204938973e-05, "loss": 0.004331035912036896, "step": 118840 }, { "epoch": 33.73545273914278, "grad_norm": 1.9578584432601929, "learning_rate": 6.627902355946637e-05, "loss": 0.002381997928023338, "step": 118850 }, { "epoch": 33.738291229066135, "grad_norm": 0.22290807962417603, "learning_rate": 6.627618506954301e-05, "loss": 0.013844422996044159, "step": 118860 }, { "epoch": 33.7411297189895, "grad_norm": 11.913777351379395, "learning_rate": 6.627334657961964e-05, "loss": 0.010574301332235336, "step": 118870 }, { "epoch": 33.74396820891286, "grad_norm": 0.3002444803714752, "learning_rate": 6.627050808969628e-05, "loss": 0.014105919003486633, "step": 118880 }, { "epoch": 33.74680669883622, "grad_norm": 0.7923224568367004, "learning_rate": 6.626766959977292e-05, "loss": 0.002935725264251232, "step": 118890 }, { "epoch": 33.74964518875958, "grad_norm": 0.2138952612876892, "learning_rate": 6.626483110984956e-05, "loss": 0.007966580986976623, "step": 118900 }, { "epoch": 33.75248367868294, "grad_norm": 0.1423509120941162, "learning_rate": 6.62619926199262e-05, "loss": 0.00687478557229042, "step": 118910 }, { "epoch": 33.7553221686063, "grad_norm": 0.32538729906082153, "learning_rate": 6.625915413000285e-05, "loss": 0.007221842557191849, "step": 118920 }, { "epoch": 33.75816065852966, "grad_norm": 2.1803457736968994, "learning_rate": 6.625631564007949e-05, "loss": 0.002268337085843086, "step": 118930 }, { "epoch": 33.760999148453024, "grad_norm": 11.882611274719238, "learning_rate": 6.625347715015611e-05, "loss": 0.008549454063177109, "step": 118940 }, { "epoch": 33.76383763837639, "grad_norm": 0.8159015774726868, "learning_rate": 6.625063866023276e-05, "loss": 0.012360304594039917, "step": 118950 }, { "epoch": 33.76667612829974, "grad_norm": 4.055695056915283, "learning_rate": 6.62478001703094e-05, "loss": 0.0035634774714708327, "step": 118960 }, { "epoch": 33.769514618223106, "grad_norm": 0.03216114640235901, "learning_rate": 6.624496168038604e-05, "loss": 0.005543794855475426, "step": 118970 }, { "epoch": 33.77235310814647, "grad_norm": 0.5488452911376953, "learning_rate": 6.624212319046268e-05, "loss": 0.0016342954710125922, "step": 118980 }, { "epoch": 33.775191598069824, "grad_norm": 1.044049620628357, "learning_rate": 6.623928470053932e-05, "loss": 0.007719455659389496, "step": 118990 }, { "epoch": 33.77803008799319, "grad_norm": 0.8266379833221436, "learning_rate": 6.623644621061595e-05, "loss": 0.005917473509907722, "step": 119000 }, { "epoch": 33.77803008799319, "eval_accuracy": 0.972722070324919, "eval_loss": 0.09347350895404816, "eval_runtime": 31.7361, "eval_samples_per_second": 495.555, "eval_steps_per_second": 7.751, "step": 119000 }, { "epoch": 33.78086857791655, "grad_norm": 0.18504337966442108, "learning_rate": 6.623360772069259e-05, "loss": 0.0025294085964560507, "step": 119010 }, { "epoch": 33.783707067839906, "grad_norm": 1.9773956537246704, "learning_rate": 6.623076923076923e-05, "loss": 0.0055483274161815645, "step": 119020 }, { "epoch": 33.78654555776327, "grad_norm": 19.135757446289062, "learning_rate": 6.622793074084587e-05, "loss": 0.021419247984886168, "step": 119030 }, { "epoch": 33.78938404768663, "grad_norm": 0.40320274233818054, "learning_rate": 6.622509225092251e-05, "loss": 0.005116710811853409, "step": 119040 }, { "epoch": 33.792222537609995, "grad_norm": 14.738720893859863, "learning_rate": 6.622253760999148e-05, "loss": 0.0250186026096344, "step": 119050 }, { "epoch": 33.79506102753335, "grad_norm": 0.658054769039154, "learning_rate": 6.621969912006812e-05, "loss": 0.005745350569486618, "step": 119060 }, { "epoch": 33.79789951745671, "grad_norm": 7.9984025955200195, "learning_rate": 6.621686063014476e-05, "loss": 0.008312442153692246, "step": 119070 }, { "epoch": 33.800738007380076, "grad_norm": 4.527327537536621, "learning_rate": 6.62140221402214e-05, "loss": 0.004733985289931297, "step": 119080 }, { "epoch": 33.80357649730343, "grad_norm": 0.683903694152832, "learning_rate": 6.621118365029805e-05, "loss": 0.008793150633573532, "step": 119090 }, { "epoch": 33.806414987226795, "grad_norm": 0.10046201944351196, "learning_rate": 6.620834516037469e-05, "loss": 0.005145049095153809, "step": 119100 }, { "epoch": 33.80925347715016, "grad_norm": 0.527502179145813, "learning_rate": 6.620550667045133e-05, "loss": 0.003981425613164902, "step": 119110 }, { "epoch": 33.81209196707351, "grad_norm": 1.7586145401000977, "learning_rate": 6.620266818052796e-05, "loss": 0.03190743923187256, "step": 119120 }, { "epoch": 33.814930456996876, "grad_norm": 0.8558820486068726, "learning_rate": 6.61998296906046e-05, "loss": 0.0036613203585147856, "step": 119130 }, { "epoch": 33.81776894692024, "grad_norm": 5.289999961853027, "learning_rate": 6.619699120068124e-05, "loss": 0.004878237470984459, "step": 119140 }, { "epoch": 33.8206074368436, "grad_norm": 2.6638681888580322, "learning_rate": 6.619415271075788e-05, "loss": 0.002939035929739475, "step": 119150 }, { "epoch": 33.82344592676696, "grad_norm": 0.48380157351493835, "learning_rate": 6.619131422083452e-05, "loss": 0.013601365685462951, "step": 119160 }, { "epoch": 33.82628441669032, "grad_norm": 0.13613036274909973, "learning_rate": 6.618847573091116e-05, "loss": 0.006524347513914108, "step": 119170 }, { "epoch": 33.829122906613684, "grad_norm": 0.08632665872573853, "learning_rate": 6.618563724098779e-05, "loss": 0.0023039234802126884, "step": 119180 }, { "epoch": 33.83196139653704, "grad_norm": 3.8785178661346436, "learning_rate": 6.618279875106443e-05, "loss": 0.006764629483222961, "step": 119190 }, { "epoch": 33.8347998864604, "grad_norm": 7.321354389190674, "learning_rate": 6.617996026114107e-05, "loss": 0.005146923661231995, "step": 119200 }, { "epoch": 33.837638376383765, "grad_norm": 1.948907732963562, "learning_rate": 6.617712177121772e-05, "loss": 0.0028041077777743338, "step": 119210 }, { "epoch": 33.84047686630713, "grad_norm": 0.5973936915397644, "learning_rate": 6.617428328129436e-05, "loss": 0.011538489907979965, "step": 119220 }, { "epoch": 33.843315356230484, "grad_norm": 1.4393985271453857, "learning_rate": 6.6171444791371e-05, "loss": 0.0024961814284324645, "step": 119230 }, { "epoch": 33.84615384615385, "grad_norm": 0.06161246448755264, "learning_rate": 6.616860630144763e-05, "loss": 0.006900164484977722, "step": 119240 }, { "epoch": 33.84899233607721, "grad_norm": 0.19868294894695282, "learning_rate": 6.616576781152427e-05, "loss": 0.0014829928055405616, "step": 119250 }, { "epoch": 33.851830826000565, "grad_norm": 1.5332965850830078, "learning_rate": 6.616292932160091e-05, "loss": 0.005622328072786331, "step": 119260 }, { "epoch": 33.85466931592393, "grad_norm": 0.18568873405456543, "learning_rate": 6.616009083167755e-05, "loss": 0.0013919040560722352, "step": 119270 }, { "epoch": 33.85750780584729, "grad_norm": 2.5446200370788574, "learning_rate": 6.615725234175419e-05, "loss": 0.0068504571914672855, "step": 119280 }, { "epoch": 33.86034629577065, "grad_norm": 16.650548934936523, "learning_rate": 6.615441385183083e-05, "loss": 0.009473662078380584, "step": 119290 }, { "epoch": 33.86318478569401, "grad_norm": 0.08913446217775345, "learning_rate": 6.615157536190747e-05, "loss": 0.0028274305164813994, "step": 119300 }, { "epoch": 33.86602327561737, "grad_norm": 0.8502799868583679, "learning_rate": 6.61487368719841e-05, "loss": 0.002146740071475506, "step": 119310 }, { "epoch": 33.868861765540736, "grad_norm": 0.9019044041633606, "learning_rate": 6.614589838206074e-05, "loss": 0.004552269354462624, "step": 119320 }, { "epoch": 33.87170025546409, "grad_norm": 0.37760862708091736, "learning_rate": 6.614305989213739e-05, "loss": 0.005337106063961983, "step": 119330 }, { "epoch": 33.874538745387454, "grad_norm": 9.183419227600098, "learning_rate": 6.614022140221401e-05, "loss": 0.008210556954145432, "step": 119340 }, { "epoch": 33.87737723531082, "grad_norm": 6.854710102081299, "learning_rate": 6.613738291229067e-05, "loss": 0.005192628130316734, "step": 119350 }, { "epoch": 33.88021572523417, "grad_norm": 0.9912996888160706, "learning_rate": 6.613454442236731e-05, "loss": 0.008374271541833877, "step": 119360 }, { "epoch": 33.883054215157536, "grad_norm": 0.19534367322921753, "learning_rate": 6.613170593244394e-05, "loss": 0.005766856670379639, "step": 119370 }, { "epoch": 33.8858927050809, "grad_norm": 4.808982849121094, "learning_rate": 6.612886744252058e-05, "loss": 0.005156569182872772, "step": 119380 }, { "epoch": 33.888731195004254, "grad_norm": 1.2250205278396606, "learning_rate": 6.612602895259722e-05, "loss": 0.016035692393779756, "step": 119390 }, { "epoch": 33.89156968492762, "grad_norm": 1.4834147691726685, "learning_rate": 6.612319046267386e-05, "loss": 0.004677487164735794, "step": 119400 }, { "epoch": 33.89440817485098, "grad_norm": 0.7850981950759888, "learning_rate": 6.61203519727505e-05, "loss": 0.0022174160927534103, "step": 119410 }, { "epoch": 33.89724666477434, "grad_norm": 0.38531237840652466, "learning_rate": 6.611751348282714e-05, "loss": 0.004461555182933808, "step": 119420 }, { "epoch": 33.9000851546977, "grad_norm": 0.4589572548866272, "learning_rate": 6.611467499290379e-05, "loss": 0.005551864206790924, "step": 119430 }, { "epoch": 33.90292364462106, "grad_norm": 0.008059073239564896, "learning_rate": 6.611183650298041e-05, "loss": 0.006022947281599045, "step": 119440 }, { "epoch": 33.905762134544425, "grad_norm": 0.009944310411810875, "learning_rate": 6.610899801305705e-05, "loss": 0.006749437004327774, "step": 119450 }, { "epoch": 33.90860062446778, "grad_norm": 0.11665124446153641, "learning_rate": 6.61061595231337e-05, "loss": 0.00559355691075325, "step": 119460 }, { "epoch": 33.91143911439114, "grad_norm": 1.4448776245117188, "learning_rate": 6.610332103321032e-05, "loss": 0.002122136205434799, "step": 119470 }, { "epoch": 33.914277604314506, "grad_norm": 0.32620781660079956, "learning_rate": 6.610048254328698e-05, "loss": 0.006646633148193359, "step": 119480 }, { "epoch": 33.91711609423786, "grad_norm": 0.5517118573188782, "learning_rate": 6.609764405336362e-05, "loss": 0.003561890125274658, "step": 119490 }, { "epoch": 33.919954584161225, "grad_norm": 0.19459865987300873, "learning_rate": 6.609480556344025e-05, "loss": 0.002037677727639675, "step": 119500 }, { "epoch": 33.919954584161225, "eval_accuracy": 0.9712596172187957, "eval_loss": 0.0952950045466423, "eval_runtime": 32.0591, "eval_samples_per_second": 490.562, "eval_steps_per_second": 7.673, "step": 119500 }, { "epoch": 33.92279307408459, "grad_norm": 0.42899176478385925, "learning_rate": 6.609196707351689e-05, "loss": 0.002759654074907303, "step": 119510 }, { "epoch": 33.92563156400795, "grad_norm": 0.42460140585899353, "learning_rate": 6.608912858359353e-05, "loss": 0.007476992160081864, "step": 119520 }, { "epoch": 33.928470053931306, "grad_norm": 1.0898528099060059, "learning_rate": 6.608629009367017e-05, "loss": 0.002463330328464508, "step": 119530 }, { "epoch": 33.93130854385467, "grad_norm": 0.07702265679836273, "learning_rate": 6.60834516037468e-05, "loss": 0.017772521078586578, "step": 119540 }, { "epoch": 33.93414703377803, "grad_norm": 8.78197956085205, "learning_rate": 6.608061311382346e-05, "loss": 0.01873738467693329, "step": 119550 }, { "epoch": 33.93698552370139, "grad_norm": 0.35545623302459717, "learning_rate": 6.60777746239001e-05, "loss": 0.0028546275570988655, "step": 119560 }, { "epoch": 33.93982401362475, "grad_norm": 0.6335259675979614, "learning_rate": 6.607493613397672e-05, "loss": 0.006526433676481247, "step": 119570 }, { "epoch": 33.942662503548114, "grad_norm": 2.91133713722229, "learning_rate": 6.607209764405337e-05, "loss": 0.004704262316226959, "step": 119580 }, { "epoch": 33.94550099347148, "grad_norm": 10.96039867401123, "learning_rate": 6.606925915413001e-05, "loss": 0.012880182266235352, "step": 119590 }, { "epoch": 33.94833948339483, "grad_norm": 12.452750205993652, "learning_rate": 6.606642066420663e-05, "loss": 0.006967640668153763, "step": 119600 }, { "epoch": 33.951177973318195, "grad_norm": 3.2905380725860596, "learning_rate": 6.606358217428329e-05, "loss": 0.0017825430259108543, "step": 119610 }, { "epoch": 33.95401646324156, "grad_norm": 0.3956787884235382, "learning_rate": 6.606074368435993e-05, "loss": 0.0058490529656410216, "step": 119620 }, { "epoch": 33.956854953164914, "grad_norm": 3.344552516937256, "learning_rate": 6.605790519443656e-05, "loss": 0.003191586583852768, "step": 119630 }, { "epoch": 33.95969344308828, "grad_norm": 0.6010175943374634, "learning_rate": 6.60550667045132e-05, "loss": 0.002622613124549389, "step": 119640 }, { "epoch": 33.96253193301164, "grad_norm": 0.24956364929676056, "learning_rate": 6.605222821458984e-05, "loss": 0.0021810749545693397, "step": 119650 }, { "epoch": 33.965370422934996, "grad_norm": 0.5087161660194397, "learning_rate": 6.604938972466648e-05, "loss": 0.0039704151451587675, "step": 119660 }, { "epoch": 33.96820891285836, "grad_norm": 0.9862024784088135, "learning_rate": 6.604655123474311e-05, "loss": 0.004416496679186821, "step": 119670 }, { "epoch": 33.97104740278172, "grad_norm": 4.125974178314209, "learning_rate": 6.604371274481977e-05, "loss": 0.004498625174164772, "step": 119680 }, { "epoch": 33.973885892705084, "grad_norm": 0.3017573654651642, "learning_rate": 6.604087425489641e-05, "loss": 0.003621348738670349, "step": 119690 }, { "epoch": 33.97672438262844, "grad_norm": 0.2410620003938675, "learning_rate": 6.603803576497304e-05, "loss": 0.004757682606577873, "step": 119700 }, { "epoch": 33.9795628725518, "grad_norm": 0.6310582756996155, "learning_rate": 6.603519727504968e-05, "loss": 0.004366743564605713, "step": 119710 }, { "epoch": 33.982401362475166, "grad_norm": 0.9471608400344849, "learning_rate": 6.603235878512632e-05, "loss": 0.006595972925424576, "step": 119720 }, { "epoch": 33.98523985239852, "grad_norm": 14.939306259155273, "learning_rate": 6.602952029520295e-05, "loss": 0.01201133131980896, "step": 119730 }, { "epoch": 33.988078342321884, "grad_norm": 2.4232983589172363, "learning_rate": 6.60266818052796e-05, "loss": 0.0035892192274332045, "step": 119740 }, { "epoch": 33.99091683224525, "grad_norm": 4.69612979888916, "learning_rate": 6.602384331535624e-05, "loss": 0.007551954686641693, "step": 119750 }, { "epoch": 33.9937553221686, "grad_norm": 0.8327898979187012, "learning_rate": 6.602100482543287e-05, "loss": 0.003397147357463837, "step": 119760 }, { "epoch": 33.996593812091966, "grad_norm": 0.8515961170196533, "learning_rate": 6.601816633550951e-05, "loss": 0.002370903082191944, "step": 119770 }, { "epoch": 33.99943230201533, "grad_norm": 0.24727791547775269, "learning_rate": 6.601532784558615e-05, "loss": 0.004083870723843574, "step": 119780 }, { "epoch": 34.00227079193869, "grad_norm": 1.560390830039978, "learning_rate": 6.60124893556628e-05, "loss": 0.0021931326016783716, "step": 119790 }, { "epoch": 34.00510928186205, "grad_norm": 0.23149053752422333, "learning_rate": 6.600965086573942e-05, "loss": 0.002705189771950245, "step": 119800 }, { "epoch": 34.00794777178541, "grad_norm": 0.4312277138233185, "learning_rate": 6.600681237581608e-05, "loss": 0.001137382909655571, "step": 119810 }, { "epoch": 34.01078626170877, "grad_norm": 8.387903213500977, "learning_rate": 6.600397388589272e-05, "loss": 0.0077542029321193695, "step": 119820 }, { "epoch": 34.01362475163213, "grad_norm": 0.804916501045227, "learning_rate": 6.600113539596935e-05, "loss": 0.004466918483376503, "step": 119830 }, { "epoch": 34.01646324155549, "grad_norm": 0.31707876920700073, "learning_rate": 6.599829690604599e-05, "loss": 0.002220351994037628, "step": 119840 }, { "epoch": 34.019301731478855, "grad_norm": 0.2828819453716278, "learning_rate": 6.599545841612263e-05, "loss": 0.0023949654772877694, "step": 119850 }, { "epoch": 34.02214022140221, "grad_norm": 0.3264423906803131, "learning_rate": 6.599261992619926e-05, "loss": 0.0023763000965118406, "step": 119860 }, { "epoch": 34.02497871132557, "grad_norm": 3.518228530883789, "learning_rate": 6.59897814362759e-05, "loss": 0.008187740296125411, "step": 119870 }, { "epoch": 34.027817201248936, "grad_norm": 0.411467045545578, "learning_rate": 6.598694294635255e-05, "loss": 0.011641748249530792, "step": 119880 }, { "epoch": 34.0306556911723, "grad_norm": 0.6357895135879517, "learning_rate": 6.598410445642918e-05, "loss": 0.014119769632816314, "step": 119890 }, { "epoch": 34.033494181095655, "grad_norm": 1.389652967453003, "learning_rate": 6.598126596650582e-05, "loss": 0.003645886480808258, "step": 119900 }, { "epoch": 34.03633267101902, "grad_norm": 0.0479893833398819, "learning_rate": 6.597842747658246e-05, "loss": 0.0020871927961707117, "step": 119910 }, { "epoch": 34.03917116094238, "grad_norm": 3.1221442222595215, "learning_rate": 6.59755889866591e-05, "loss": 0.003246936947107315, "step": 119920 }, { "epoch": 34.04200965086574, "grad_norm": 1.9789921045303345, "learning_rate": 6.597275049673573e-05, "loss": 0.011044969409704208, "step": 119930 }, { "epoch": 34.0448481407891, "grad_norm": 0.4218142032623291, "learning_rate": 6.596991200681239e-05, "loss": 0.00381898432970047, "step": 119940 }, { "epoch": 34.04768663071246, "grad_norm": 0.8461046814918518, "learning_rate": 6.596707351688902e-05, "loss": 0.0024044355377554893, "step": 119950 }, { "epoch": 34.050525120635825, "grad_norm": 0.15074428915977478, "learning_rate": 6.596423502696566e-05, "loss": 0.0025313768535852434, "step": 119960 }, { "epoch": 34.05336361055918, "grad_norm": 0.37584373354911804, "learning_rate": 6.59613965370423e-05, "loss": 0.003507961332798004, "step": 119970 }, { "epoch": 34.056202100482544, "grad_norm": 0.22898602485656738, "learning_rate": 6.595855804711894e-05, "loss": 0.00391046330332756, "step": 119980 }, { "epoch": 34.05904059040591, "grad_norm": 0.2502862513065338, "learning_rate": 6.595571955719557e-05, "loss": 0.008375391364097595, "step": 119990 }, { "epoch": 34.06187908032926, "grad_norm": 0.04171254485845566, "learning_rate": 6.595288106727221e-05, "loss": 0.002264373190701008, "step": 120000 }, { "epoch": 34.06187908032926, "eval_accuracy": 0.97501112736059, "eval_loss": 0.09171520918607712, "eval_runtime": 31.7329, "eval_samples_per_second": 495.606, "eval_steps_per_second": 7.752, "step": 120000 }, { "epoch": 34.064717570252625, "grad_norm": 0.12566813826560974, "learning_rate": 6.595004257734886e-05, "loss": 0.008542881906032562, "step": 120010 }, { "epoch": 34.06755606017599, "grad_norm": 3.2479872703552246, "learning_rate": 6.594720408742549e-05, "loss": 0.00406017079949379, "step": 120020 }, { "epoch": 34.070394550099344, "grad_norm": 0.08720032125711441, "learning_rate": 6.594436559750213e-05, "loss": 0.001068960689008236, "step": 120030 }, { "epoch": 34.07323304002271, "grad_norm": 0.14192959666252136, "learning_rate": 6.594152710757877e-05, "loss": 0.005984793603420258, "step": 120040 }, { "epoch": 34.07607152994607, "grad_norm": 1.612888216972351, "learning_rate": 6.593868861765542e-05, "loss": 0.0025998856872320175, "step": 120050 }, { "epoch": 34.07891001986943, "grad_norm": 0.8385291695594788, "learning_rate": 6.593585012773204e-05, "loss": 0.001395496353507042, "step": 120060 }, { "epoch": 34.08174850979279, "grad_norm": 0.4187453091144562, "learning_rate": 6.593301163780868e-05, "loss": 0.0017596635967493056, "step": 120070 }, { "epoch": 34.08458699971615, "grad_norm": 0.7616415023803711, "learning_rate": 6.593017314788533e-05, "loss": 0.0006042171269655227, "step": 120080 }, { "epoch": 34.087425489639514, "grad_norm": 0.3409002125263214, "learning_rate": 6.592733465796197e-05, "loss": 0.0041004780679941176, "step": 120090 }, { "epoch": 34.09026397956287, "grad_norm": 2.6917452812194824, "learning_rate": 6.592449616803861e-05, "loss": 0.002159907855093479, "step": 120100 }, { "epoch": 34.09310246948623, "grad_norm": 1.0723531246185303, "learning_rate": 6.592165767811525e-05, "loss": 0.007290668785572052, "step": 120110 }, { "epoch": 34.095940959409596, "grad_norm": 17.104623794555664, "learning_rate": 6.591881918819188e-05, "loss": 0.008852966874837876, "step": 120120 }, { "epoch": 34.09877944933295, "grad_norm": 2.0907278060913086, "learning_rate": 6.591598069826852e-05, "loss": 0.0036791734397411346, "step": 120130 }, { "epoch": 34.101617939256315, "grad_norm": 0.28257763385772705, "learning_rate": 6.591314220834517e-05, "loss": 0.0014428889378905296, "step": 120140 }, { "epoch": 34.10445642917968, "grad_norm": 11.50788402557373, "learning_rate": 6.59103037184218e-05, "loss": 0.021514533460140227, "step": 120150 }, { "epoch": 34.10729491910304, "grad_norm": 0.06060031056404114, "learning_rate": 6.590746522849844e-05, "loss": 0.004983774572610855, "step": 120160 }, { "epoch": 34.110133409026396, "grad_norm": 0.09006636589765549, "learning_rate": 6.590462673857508e-05, "loss": 0.00188368558883667, "step": 120170 }, { "epoch": 34.11297189894976, "grad_norm": 0.08525079488754272, "learning_rate": 6.590178824865171e-05, "loss": 0.014009492099285125, "step": 120180 }, { "epoch": 34.11581038887312, "grad_norm": 1.4263910055160522, "learning_rate": 6.589894975872835e-05, "loss": 0.0017824728041887284, "step": 120190 }, { "epoch": 34.11864887879648, "grad_norm": 13.674447059631348, "learning_rate": 6.5896111268805e-05, "loss": 0.015447154641151428, "step": 120200 }, { "epoch": 34.12148736871984, "grad_norm": 0.20295418798923492, "learning_rate": 6.589327277888164e-05, "loss": 0.0009308716282248497, "step": 120210 }, { "epoch": 34.1243258586432, "grad_norm": 0.3147740066051483, "learning_rate": 6.589043428895828e-05, "loss": 0.006511497497558594, "step": 120220 }, { "epoch": 34.12716434856656, "grad_norm": 0.0220764372497797, "learning_rate": 6.588759579903492e-05, "loss": 0.0010855557397007943, "step": 120230 }, { "epoch": 34.13000283848992, "grad_norm": 1.274138331413269, "learning_rate": 6.588475730911156e-05, "loss": 0.0011312618851661682, "step": 120240 }, { "epoch": 34.132841328413285, "grad_norm": 6.901713848114014, "learning_rate": 6.588191881918819e-05, "loss": 0.008790388703346252, "step": 120250 }, { "epoch": 34.13567981833665, "grad_norm": 0.783248245716095, "learning_rate": 6.587908032926483e-05, "loss": 0.001980581507086754, "step": 120260 }, { "epoch": 34.138518308260004, "grad_norm": 5.808366298675537, "learning_rate": 6.587624183934147e-05, "loss": 0.009575360268354417, "step": 120270 }, { "epoch": 34.14135679818337, "grad_norm": 0.7367664575576782, "learning_rate": 6.587340334941811e-05, "loss": 0.003168274462223053, "step": 120280 }, { "epoch": 34.14419528810673, "grad_norm": 0.3028112053871155, "learning_rate": 6.587056485949475e-05, "loss": 0.004181566834449768, "step": 120290 }, { "epoch": 34.147033778030085, "grad_norm": 2.263658285140991, "learning_rate": 6.58677263695714e-05, "loss": 0.0023328032344579696, "step": 120300 }, { "epoch": 34.14987226795345, "grad_norm": 0.3305845856666565, "learning_rate": 6.586488787964802e-05, "loss": 0.00887860804796219, "step": 120310 }, { "epoch": 34.15271075787681, "grad_norm": 0.46553438901901245, "learning_rate": 6.586204938972466e-05, "loss": 0.0010540813207626342, "step": 120320 }, { "epoch": 34.15554924780017, "grad_norm": 2.120094060897827, "learning_rate": 6.58592108998013e-05, "loss": 0.007890991866588593, "step": 120330 }, { "epoch": 34.15838773772353, "grad_norm": 0.057511698454618454, "learning_rate": 6.585637240987795e-05, "loss": 0.0018104396760463715, "step": 120340 }, { "epoch": 34.16122622764689, "grad_norm": 0.4922485947608948, "learning_rate": 6.585353391995459e-05, "loss": 0.0028358491137623785, "step": 120350 }, { "epoch": 34.164064717570255, "grad_norm": 0.14623531699180603, "learning_rate": 6.585069543003123e-05, "loss": 0.0010311707854270935, "step": 120360 }, { "epoch": 34.16690320749361, "grad_norm": 1.3655275106430054, "learning_rate": 6.584785694010787e-05, "loss": 0.007381750643253327, "step": 120370 }, { "epoch": 34.169741697416974, "grad_norm": 3.024090051651001, "learning_rate": 6.58450184501845e-05, "loss": 0.005179420113563538, "step": 120380 }, { "epoch": 34.17258018734034, "grad_norm": 0.28700101375579834, "learning_rate": 6.584217996026114e-05, "loss": 0.0009599108248949051, "step": 120390 }, { "epoch": 34.17541867726369, "grad_norm": 9.125275611877441, "learning_rate": 6.583934147033778e-05, "loss": 0.0028434379026293756, "step": 120400 }, { "epoch": 34.178257167187056, "grad_norm": 0.12669123709201813, "learning_rate": 6.583650298041442e-05, "loss": 0.006512732803821563, "step": 120410 }, { "epoch": 34.18109565711042, "grad_norm": 0.018964238464832306, "learning_rate": 6.583366449049106e-05, "loss": 0.013804316520690918, "step": 120420 }, { "epoch": 34.18393414703378, "grad_norm": 5.092179298400879, "learning_rate": 6.58308260005677e-05, "loss": 0.006323505192995071, "step": 120430 }, { "epoch": 34.18677263695714, "grad_norm": 0.09260304272174835, "learning_rate": 6.582798751064433e-05, "loss": 0.004588256776332855, "step": 120440 }, { "epoch": 34.1896111268805, "grad_norm": 0.4579649269580841, "learning_rate": 6.582514902072098e-05, "loss": 0.0031266190111637116, "step": 120450 }, { "epoch": 34.19244961680386, "grad_norm": 1.9222780466079712, "learning_rate": 6.582231053079762e-05, "loss": 0.007798251509666443, "step": 120460 }, { "epoch": 34.19528810672722, "grad_norm": 0.7869184613227844, "learning_rate": 6.581947204087426e-05, "loss": 0.005959853902459145, "step": 120470 }, { "epoch": 34.19812659665058, "grad_norm": 2.2712228298187256, "learning_rate": 6.58166335509509e-05, "loss": 0.0022471331059932707, "step": 120480 }, { "epoch": 34.200965086573945, "grad_norm": 0.1493186205625534, "learning_rate": 6.581379506102754e-05, "loss": 0.015672385692596436, "step": 120490 }, { "epoch": 34.2038035764973, "grad_norm": 0.18912315368652344, "learning_rate": 6.581095657110418e-05, "loss": 0.0024687185883522033, "step": 120500 }, { "epoch": 34.2038035764973, "eval_accuracy": 0.9753926368665352, "eval_loss": 0.08686452358961105, "eval_runtime": 32.1024, "eval_samples_per_second": 489.9, "eval_steps_per_second": 7.663, "step": 120500 }, { "epoch": 34.20664206642066, "grad_norm": 2.927769660949707, "learning_rate": 6.580811808118081e-05, "loss": 0.0013673042878508568, "step": 120510 }, { "epoch": 34.209480556344026, "grad_norm": 0.27282968163490295, "learning_rate": 6.580527959125745e-05, "loss": 0.007888178527355193, "step": 120520 }, { "epoch": 34.21231904626739, "grad_norm": 0.11811738461256027, "learning_rate": 6.580244110133409e-05, "loss": 0.0040873602032661435, "step": 120530 }, { "epoch": 34.215157536190745, "grad_norm": 1.1339547634124756, "learning_rate": 6.579960261141073e-05, "loss": 0.014372356235980988, "step": 120540 }, { "epoch": 34.21799602611411, "grad_norm": 0.15421226620674133, "learning_rate": 6.579676412148738e-05, "loss": 0.00357535220682621, "step": 120550 }, { "epoch": 34.22083451603747, "grad_norm": 0.8814896941184998, "learning_rate": 6.579392563156402e-05, "loss": 0.01395888477563858, "step": 120560 }, { "epoch": 34.223673005960826, "grad_norm": 0.6410775184631348, "learning_rate": 6.579108714164064e-05, "loss": 0.002811104618012905, "step": 120570 }, { "epoch": 34.22651149588419, "grad_norm": 0.21123194694519043, "learning_rate": 6.578824865171729e-05, "loss": 0.0010377543047070503, "step": 120580 }, { "epoch": 34.22934998580755, "grad_norm": 0.23363442718982697, "learning_rate": 6.578541016179393e-05, "loss": 0.004100901260972023, "step": 120590 }, { "epoch": 34.23218847573091, "grad_norm": 0.1753082275390625, "learning_rate": 6.578257167187057e-05, "loss": 0.004530787467956543, "step": 120600 }, { "epoch": 34.23502696565427, "grad_norm": 9.359720230102539, "learning_rate": 6.577973318194721e-05, "loss": 0.01224035769701004, "step": 120610 }, { "epoch": 34.237865455577634, "grad_norm": 0.8083338141441345, "learning_rate": 6.577689469202385e-05, "loss": 0.009027906507253648, "step": 120620 }, { "epoch": 34.240703945501, "grad_norm": 9.791426658630371, "learning_rate": 6.577405620210049e-05, "loss": 0.0037582565099000933, "step": 120630 }, { "epoch": 34.24354243542435, "grad_norm": 0.20598530769348145, "learning_rate": 6.577121771217712e-05, "loss": 0.01041233241558075, "step": 120640 }, { "epoch": 34.246380925347715, "grad_norm": 0.8702098727226257, "learning_rate": 6.576837922225376e-05, "loss": 0.003046915866434574, "step": 120650 }, { "epoch": 34.24921941527108, "grad_norm": 0.41778644919395447, "learning_rate": 6.57655407323304e-05, "loss": 0.00889195129275322, "step": 120660 }, { "epoch": 34.252057905194434, "grad_norm": 0.037582848221063614, "learning_rate": 6.576270224240703e-05, "loss": 0.006374266743659973, "step": 120670 }, { "epoch": 34.2548963951178, "grad_norm": 1.0132782459259033, "learning_rate": 6.575986375248369e-05, "loss": 0.003450435400009155, "step": 120680 }, { "epoch": 34.25773488504116, "grad_norm": 0.14376351237297058, "learning_rate": 6.575702526256033e-05, "loss": 0.0028980176895856856, "step": 120690 }, { "epoch": 34.260573374964515, "grad_norm": 0.8079062700271606, "learning_rate": 6.575418677263696e-05, "loss": 0.015229898691177367, "step": 120700 }, { "epoch": 34.26341186488788, "grad_norm": 0.8749398589134216, "learning_rate": 6.57513482827136e-05, "loss": 0.007276272773742676, "step": 120710 }, { "epoch": 34.26625035481124, "grad_norm": 1.4335858821868896, "learning_rate": 6.574850979279024e-05, "loss": 0.0052808687090873715, "step": 120720 }, { "epoch": 34.269088844734604, "grad_norm": 1.8931385278701782, "learning_rate": 6.574567130286688e-05, "loss": 0.019981157779693604, "step": 120730 }, { "epoch": 34.27192733465796, "grad_norm": 0.4695318341255188, "learning_rate": 6.574283281294352e-05, "loss": 0.008774477988481522, "step": 120740 }, { "epoch": 34.27476582458132, "grad_norm": 0.08131548017263412, "learning_rate": 6.573999432302016e-05, "loss": 0.0059345878660678865, "step": 120750 }, { "epoch": 34.277604314504686, "grad_norm": 3.9901278018951416, "learning_rate": 6.57371558330968e-05, "loss": 0.003952443972229958, "step": 120760 }, { "epoch": 34.28044280442804, "grad_norm": 17.154191970825195, "learning_rate": 6.573431734317343e-05, "loss": 0.007435213774442673, "step": 120770 }, { "epoch": 34.283281294351404, "grad_norm": 0.6555381417274475, "learning_rate": 6.573147885325007e-05, "loss": 0.002556019090116024, "step": 120780 }, { "epoch": 34.28611978427477, "grad_norm": 0.37856563925743103, "learning_rate": 6.572864036332671e-05, "loss": 0.005672322213649749, "step": 120790 }, { "epoch": 34.28895827419813, "grad_norm": 0.2495548576116562, "learning_rate": 6.572580187340334e-05, "loss": 0.0071455128490924835, "step": 120800 }, { "epoch": 34.291796764121486, "grad_norm": 0.08386978507041931, "learning_rate": 6.572296338348e-05, "loss": 0.000952913612127304, "step": 120810 }, { "epoch": 34.29463525404485, "grad_norm": 0.1970139592885971, "learning_rate": 6.572012489355664e-05, "loss": 0.0030345387756824495, "step": 120820 }, { "epoch": 34.29747374396821, "grad_norm": 0.6047740578651428, "learning_rate": 6.571728640363327e-05, "loss": 0.0037004269659519197, "step": 120830 }, { "epoch": 34.30031223389157, "grad_norm": 0.25278300046920776, "learning_rate": 6.571444791370991e-05, "loss": 0.0041191868484020235, "step": 120840 }, { "epoch": 34.30315072381493, "grad_norm": 2.0211734771728516, "learning_rate": 6.571160942378655e-05, "loss": 0.004446744173765183, "step": 120850 }, { "epoch": 34.30598921373829, "grad_norm": 0.31450673937797546, "learning_rate": 6.570877093386319e-05, "loss": 0.0034073799848556518, "step": 120860 }, { "epoch": 34.30882770366165, "grad_norm": 0.10980676114559174, "learning_rate": 6.570593244393982e-05, "loss": 0.0017848830670118333, "step": 120870 }, { "epoch": 34.31166619358501, "grad_norm": 0.32644227147102356, "learning_rate": 6.570309395401647e-05, "loss": 0.004441918432712555, "step": 120880 }, { "epoch": 34.314504683508375, "grad_norm": 17.252668380737305, "learning_rate": 6.57002554640931e-05, "loss": 0.02058267742395401, "step": 120890 }, { "epoch": 34.31734317343174, "grad_norm": 0.3855568766593933, "learning_rate": 6.569741697416974e-05, "loss": 0.009337116777896882, "step": 120900 }, { "epoch": 34.32018166335509, "grad_norm": 5.581953525543213, "learning_rate": 6.569457848424638e-05, "loss": 0.007589308917522431, "step": 120910 }, { "epoch": 34.323020153278456, "grad_norm": 0.056428126990795135, "learning_rate": 6.569173999432303e-05, "loss": 0.004325633868575096, "step": 120920 }, { "epoch": 34.32585864320182, "grad_norm": 0.21463581919670105, "learning_rate": 6.568890150439965e-05, "loss": 0.005055137723684311, "step": 120930 }, { "epoch": 34.328697133125175, "grad_norm": 0.41009339690208435, "learning_rate": 6.568606301447631e-05, "loss": 0.004133815318346024, "step": 120940 }, { "epoch": 34.33153562304854, "grad_norm": 0.7506110668182373, "learning_rate": 6.568322452455295e-05, "loss": 0.006041218340396881, "step": 120950 }, { "epoch": 34.3343741129719, "grad_norm": 0.02781783416867256, "learning_rate": 6.568038603462958e-05, "loss": 0.0034258291125297546, "step": 120960 }, { "epoch": 34.33721260289526, "grad_norm": 1.2344199419021606, "learning_rate": 6.567754754470622e-05, "loss": 0.017416100203990936, "step": 120970 }, { "epoch": 34.34005109281862, "grad_norm": 0.4481377601623535, "learning_rate": 6.567470905478286e-05, "loss": 0.005416756868362427, "step": 120980 }, { "epoch": 34.34288958274198, "grad_norm": 8.962422370910645, "learning_rate": 6.567187056485949e-05, "loss": 0.0033637385815382, "step": 120990 }, { "epoch": 34.345728072665345, "grad_norm": 1.0761725902557373, "learning_rate": 6.566903207493613e-05, "loss": 0.010275451838970185, "step": 121000 }, { "epoch": 34.345728072665345, "eval_accuracy": 0.9714503719717683, "eval_loss": 0.09643729776144028, "eval_runtime": 32.301, "eval_samples_per_second": 486.888, "eval_steps_per_second": 7.616, "step": 121000 }, { "epoch": 34.3485665625887, "grad_norm": 3.3196048736572266, "learning_rate": 6.566619358501278e-05, "loss": 0.014252856373786926, "step": 121010 }, { "epoch": 34.351405052512064, "grad_norm": 0.10674888640642166, "learning_rate": 6.566335509508941e-05, "loss": 0.009489893168210983, "step": 121020 }, { "epoch": 34.35424354243543, "grad_norm": 12.795866966247559, "learning_rate": 6.566051660516605e-05, "loss": 0.01366342306137085, "step": 121030 }, { "epoch": 34.35708203235878, "grad_norm": 0.35392606258392334, "learning_rate": 6.56576781152427e-05, "loss": 0.002171451784670353, "step": 121040 }, { "epoch": 34.359920522282145, "grad_norm": 0.11029020696878433, "learning_rate": 6.565483962531934e-05, "loss": 0.010325114428997039, "step": 121050 }, { "epoch": 34.36275901220551, "grad_norm": 0.17874668538570404, "learning_rate": 6.565200113539596e-05, "loss": 0.010382829606533051, "step": 121060 }, { "epoch": 34.365597502128864, "grad_norm": 0.06169803813099861, "learning_rate": 6.564916264547262e-05, "loss": 0.002348296530544758, "step": 121070 }, { "epoch": 34.36843599205223, "grad_norm": 0.09570597112178802, "learning_rate": 6.564632415554926e-05, "loss": 0.007990719377994537, "step": 121080 }, { "epoch": 34.37127448197559, "grad_norm": 4.681507587432861, "learning_rate": 6.564348566562589e-05, "loss": 0.009579497575759887, "step": 121090 }, { "epoch": 34.37411297189895, "grad_norm": 5.019866943359375, "learning_rate": 6.564064717570253e-05, "loss": 0.005193861573934555, "step": 121100 }, { "epoch": 34.37695146182231, "grad_norm": 0.050225336104631424, "learning_rate": 6.563780868577917e-05, "loss": 0.008365646004676819, "step": 121110 }, { "epoch": 34.37978995174567, "grad_norm": 0.5290006399154663, "learning_rate": 6.56349701958558e-05, "loss": 0.005773923546075821, "step": 121120 }, { "epoch": 34.382628441669034, "grad_norm": 9.007638931274414, "learning_rate": 6.563213170593244e-05, "loss": 0.0076716005802154544, "step": 121130 }, { "epoch": 34.38546693159239, "grad_norm": 10.064278602600098, "learning_rate": 6.56292932160091e-05, "loss": 0.006720879673957824, "step": 121140 }, { "epoch": 34.38830542151575, "grad_norm": 5.260037899017334, "learning_rate": 6.562645472608572e-05, "loss": 0.002526211738586426, "step": 121150 }, { "epoch": 34.391143911439116, "grad_norm": 0.14219024777412415, "learning_rate": 6.562361623616236e-05, "loss": 0.00200135949999094, "step": 121160 }, { "epoch": 34.39398240136248, "grad_norm": 0.059102702885866165, "learning_rate": 6.5620777746239e-05, "loss": 0.001701488345861435, "step": 121170 }, { "epoch": 34.396820891285834, "grad_norm": 2.133500099182129, "learning_rate": 6.561793925631565e-05, "loss": 0.001616235449910164, "step": 121180 }, { "epoch": 34.3996593812092, "grad_norm": 3.6466965675354004, "learning_rate": 6.561510076639227e-05, "loss": 0.0028897901996970178, "step": 121190 }, { "epoch": 34.40249787113256, "grad_norm": 0.9116111993789673, "learning_rate": 6.561226227646892e-05, "loss": 0.010547797381877898, "step": 121200 }, { "epoch": 34.405336361055916, "grad_norm": 0.3015027642250061, "learning_rate": 6.560942378654557e-05, "loss": 0.00779331848025322, "step": 121210 }, { "epoch": 34.40817485097928, "grad_norm": 0.6132565140724182, "learning_rate": 6.56065852966222e-05, "loss": 0.0037642888724803924, "step": 121220 }, { "epoch": 34.41101334090264, "grad_norm": 8.30250072479248, "learning_rate": 6.560374680669884e-05, "loss": 0.01046159565448761, "step": 121230 }, { "epoch": 34.413851830826, "grad_norm": 0.05436651036143303, "learning_rate": 6.560090831677548e-05, "loss": 0.008311940729618073, "step": 121240 }, { "epoch": 34.41669032074936, "grad_norm": 0.21833334863185883, "learning_rate": 6.559806982685211e-05, "loss": 0.002599102072417736, "step": 121250 }, { "epoch": 34.41952881067272, "grad_norm": 3.46553373336792, "learning_rate": 6.559523133692875e-05, "loss": 0.0048821233212947845, "step": 121260 }, { "epoch": 34.422367300596086, "grad_norm": 0.17556701600551605, "learning_rate": 6.55923928470054e-05, "loss": 0.0015192406252026557, "step": 121270 }, { "epoch": 34.42520579051944, "grad_norm": 1.5152485370635986, "learning_rate": 6.558955435708203e-05, "loss": 0.0024162139743566513, "step": 121280 }, { "epoch": 34.428044280442805, "grad_norm": 0.16614964604377747, "learning_rate": 6.558671586715867e-05, "loss": 0.003900546580553055, "step": 121290 }, { "epoch": 34.43088277036617, "grad_norm": 0.1593964695930481, "learning_rate": 6.558387737723532e-05, "loss": 0.009399491548538207, "step": 121300 }, { "epoch": 34.43372126028952, "grad_norm": 0.5776618719100952, "learning_rate": 6.558103888731196e-05, "loss": 0.0017070671543478965, "step": 121310 }, { "epoch": 34.436559750212886, "grad_norm": 0.10168550163507462, "learning_rate": 6.557820039738859e-05, "loss": 0.014530080556869506, "step": 121320 }, { "epoch": 34.43939824013625, "grad_norm": 0.027144042775034904, "learning_rate": 6.557536190746523e-05, "loss": 0.002441391907632351, "step": 121330 }, { "epoch": 34.442236730059605, "grad_norm": 3.639847755432129, "learning_rate": 6.557252341754188e-05, "loss": 0.009512382745742797, "step": 121340 }, { "epoch": 34.44507521998297, "grad_norm": 1.7980903387069702, "learning_rate": 6.556968492761851e-05, "loss": 0.014887143671512604, "step": 121350 }, { "epoch": 34.44791370990633, "grad_norm": 0.637545108795166, "learning_rate": 6.556684643769515e-05, "loss": 0.0011794516816735269, "step": 121360 }, { "epoch": 34.450752199829694, "grad_norm": 0.6996497511863708, "learning_rate": 6.556400794777179e-05, "loss": 0.004240016639232636, "step": 121370 }, { "epoch": 34.45359068975305, "grad_norm": 0.428958922624588, "learning_rate": 6.556116945784842e-05, "loss": 0.002185925841331482, "step": 121380 }, { "epoch": 34.45642917967641, "grad_norm": 2.982541561126709, "learning_rate": 6.555833096792506e-05, "loss": 0.0016149669885635377, "step": 121390 }, { "epoch": 34.459267669599775, "grad_norm": 15.298772811889648, "learning_rate": 6.55554924780017e-05, "loss": 0.007458152621984482, "step": 121400 }, { "epoch": 34.46210615952313, "grad_norm": 0.24099060893058777, "learning_rate": 6.555265398807834e-05, "loss": 0.001996976509690285, "step": 121410 }, { "epoch": 34.464944649446494, "grad_norm": 0.44171348214149475, "learning_rate": 6.554981549815499e-05, "loss": 0.003290959447622299, "step": 121420 }, { "epoch": 34.46778313936986, "grad_norm": 0.19654907286167145, "learning_rate": 6.554697700823163e-05, "loss": 0.01628538519144058, "step": 121430 }, { "epoch": 34.47062162929321, "grad_norm": 0.10233055055141449, "learning_rate": 6.554413851830827e-05, "loss": 0.004568058997392654, "step": 121440 }, { "epoch": 34.473460119216575, "grad_norm": 0.11782721430063248, "learning_rate": 6.55413000283849e-05, "loss": 0.0028743093833327295, "step": 121450 }, { "epoch": 34.47629860913994, "grad_norm": 0.24863453209400177, "learning_rate": 6.553846153846154e-05, "loss": 0.0036957345902919768, "step": 121460 }, { "epoch": 34.4791370990633, "grad_norm": 8.27115249633789, "learning_rate": 6.553562304853819e-05, "loss": 0.003077494539320469, "step": 121470 }, { "epoch": 34.48197558898666, "grad_norm": 0.6603391766548157, "learning_rate": 6.553278455861482e-05, "loss": 0.010180327296257018, "step": 121480 }, { "epoch": 34.48481407891002, "grad_norm": 1.912031888961792, "learning_rate": 6.552994606869146e-05, "loss": 0.003331070765852928, "step": 121490 }, { "epoch": 34.48765256883338, "grad_norm": 0.8125287294387817, "learning_rate": 6.55271075787681e-05, "loss": 0.005793993920087814, "step": 121500 }, { "epoch": 34.48765256883338, "eval_accuracy": 0.9713232021364532, "eval_loss": 0.09893113374710083, "eval_runtime": 32.1117, "eval_samples_per_second": 489.759, "eval_steps_per_second": 7.661, "step": 121500 }, { "epoch": 34.49049105875674, "grad_norm": 0.4688956141471863, "learning_rate": 6.552426908884473e-05, "loss": 0.005483905225992203, "step": 121510 }, { "epoch": 34.4933295486801, "grad_norm": 4.4383087158203125, "learning_rate": 6.552143059892137e-05, "loss": 0.005467564612627029, "step": 121520 }, { "epoch": 34.496168038603464, "grad_norm": 0.3712189197540283, "learning_rate": 6.551859210899801e-05, "loss": 0.008706380426883698, "step": 121530 }, { "epoch": 34.49900652852683, "grad_norm": 0.15487441420555115, "learning_rate": 6.551575361907465e-05, "loss": 0.007328922301530838, "step": 121540 }, { "epoch": 34.50184501845018, "grad_norm": 0.30117157101631165, "learning_rate": 6.55129151291513e-05, "loss": 0.006180235743522644, "step": 121550 }, { "epoch": 34.504683508373546, "grad_norm": 0.16331055760383606, "learning_rate": 6.551007663922794e-05, "loss": 0.004076334461569786, "step": 121560 }, { "epoch": 34.50752199829691, "grad_norm": 9.480448722839355, "learning_rate": 6.550723814930458e-05, "loss": 0.010015219449996948, "step": 121570 }, { "epoch": 34.510360488220265, "grad_norm": 0.07059287279844284, "learning_rate": 6.550439965938121e-05, "loss": 0.004824448376893997, "step": 121580 }, { "epoch": 34.51319897814363, "grad_norm": 1.363734483718872, "learning_rate": 6.550156116945785e-05, "loss": 0.0052822701632976535, "step": 121590 }, { "epoch": 34.51603746806699, "grad_norm": 26.711833953857422, "learning_rate": 6.549872267953449e-05, "loss": 0.012368667870759964, "step": 121600 }, { "epoch": 34.518875957990346, "grad_norm": 15.351613998413086, "learning_rate": 6.549588418961113e-05, "loss": 0.012082774937152863, "step": 121610 }, { "epoch": 34.52171444791371, "grad_norm": 1.0990090370178223, "learning_rate": 6.549304569968777e-05, "loss": 0.0031744129955768585, "step": 121620 }, { "epoch": 34.52455293783707, "grad_norm": 2.9962265491485596, "learning_rate": 6.549020720976441e-05, "loss": 0.003898720443248749, "step": 121630 }, { "epoch": 34.527391427760435, "grad_norm": 0.008796045556664467, "learning_rate": 6.548736871984104e-05, "loss": 0.012318804860115051, "step": 121640 }, { "epoch": 34.53022991768379, "grad_norm": 2.5744450092315674, "learning_rate": 6.548453022991768e-05, "loss": 0.007464762777090073, "step": 121650 }, { "epoch": 34.53306840760715, "grad_norm": 4.2871904373168945, "learning_rate": 6.548169173999432e-05, "loss": 0.0058799199759960175, "step": 121660 }, { "epoch": 34.535906897530516, "grad_norm": 1.9031122922897339, "learning_rate": 6.547885325007097e-05, "loss": 0.010009279847145081, "step": 121670 }, { "epoch": 34.53874538745387, "grad_norm": 2.049429416656494, "learning_rate": 6.547601476014761e-05, "loss": 0.00995267480611801, "step": 121680 }, { "epoch": 34.541583877377235, "grad_norm": 2.6767995357513428, "learning_rate": 6.547317627022425e-05, "loss": 0.003879908472299576, "step": 121690 }, { "epoch": 34.5444223673006, "grad_norm": 3.3350377082824707, "learning_rate": 6.547033778030089e-05, "loss": 0.004507974535226822, "step": 121700 }, { "epoch": 34.547260857223954, "grad_norm": 0.21623603999614716, "learning_rate": 6.546749929037752e-05, "loss": 0.003957278281450272, "step": 121710 }, { "epoch": 34.55009934714732, "grad_norm": 0.571140706539154, "learning_rate": 6.546466080045416e-05, "loss": 0.00429561510682106, "step": 121720 }, { "epoch": 34.55293783707068, "grad_norm": 1.5757650136947632, "learning_rate": 6.54618223105308e-05, "loss": 0.004278173297643661, "step": 121730 }, { "epoch": 34.55577632699404, "grad_norm": 0.3858022391796112, "learning_rate": 6.545898382060744e-05, "loss": 0.003992342948913574, "step": 121740 }, { "epoch": 34.5586148169174, "grad_norm": 0.06836457550525665, "learning_rate": 6.545614533068408e-05, "loss": 0.004588346928358078, "step": 121750 }, { "epoch": 34.56145330684076, "grad_norm": 2.7228176593780518, "learning_rate": 6.545330684076072e-05, "loss": 0.00953756421804428, "step": 121760 }, { "epoch": 34.564291796764124, "grad_norm": 12.314213752746582, "learning_rate": 6.545046835083735e-05, "loss": 0.006161943078041077, "step": 121770 }, { "epoch": 34.56713028668748, "grad_norm": 2.7650938034057617, "learning_rate": 6.5447629860914e-05, "loss": 0.01035730168223381, "step": 121780 }, { "epoch": 34.56996877661084, "grad_norm": 0.4451490342617035, "learning_rate": 6.544479137099064e-05, "loss": 0.009587416052818298, "step": 121790 }, { "epoch": 34.572807266534205, "grad_norm": 9.189227104187012, "learning_rate": 6.544195288106728e-05, "loss": 0.006797210872173309, "step": 121800 }, { "epoch": 34.57564575645756, "grad_norm": 5.982112884521484, "learning_rate": 6.543911439114392e-05, "loss": 0.005052819848060608, "step": 121810 }, { "epoch": 34.578484246380924, "grad_norm": 4.866293430328369, "learning_rate": 6.543627590122056e-05, "loss": 0.004836326092481613, "step": 121820 }, { "epoch": 34.58132273630429, "grad_norm": 1.0301543474197388, "learning_rate": 6.543343741129719e-05, "loss": 0.0048585444688797, "step": 121830 }, { "epoch": 34.58416122622765, "grad_norm": 8.754075050354004, "learning_rate": 6.543059892137383e-05, "loss": 0.008437854051589967, "step": 121840 }, { "epoch": 34.586999716151006, "grad_norm": 0.020057469606399536, "learning_rate": 6.542776043145047e-05, "loss": 0.007250663638114929, "step": 121850 }, { "epoch": 34.58983820607437, "grad_norm": 2.0008208751678467, "learning_rate": 6.542492194152711e-05, "loss": 0.018709914386272432, "step": 121860 }, { "epoch": 34.59267669599773, "grad_norm": 0.41609352827072144, "learning_rate": 6.542208345160375e-05, "loss": 0.002455700933933258, "step": 121870 }, { "epoch": 34.59551518592109, "grad_norm": 0.6780348420143127, "learning_rate": 6.54192449616804e-05, "loss": 0.002100434899330139, "step": 121880 }, { "epoch": 34.59835367584445, "grad_norm": 0.28256723284721375, "learning_rate": 6.541640647175704e-05, "loss": 0.008593764156103134, "step": 121890 }, { "epoch": 34.60119216576781, "grad_norm": 12.566946029663086, "learning_rate": 6.541356798183366e-05, "loss": 0.011824636161327362, "step": 121900 }, { "epoch": 34.604030655691176, "grad_norm": 0.7104384899139404, "learning_rate": 6.54107294919103e-05, "loss": 0.006410237401723862, "step": 121910 }, { "epoch": 34.60686914561453, "grad_norm": 0.7847989201545715, "learning_rate": 6.540789100198695e-05, "loss": 0.002907312475144863, "step": 121920 }, { "epoch": 34.609707635537895, "grad_norm": 0.4069633185863495, "learning_rate": 6.540505251206357e-05, "loss": 0.005336733162403106, "step": 121930 }, { "epoch": 34.61254612546126, "grad_norm": 0.8873326182365417, "learning_rate": 6.540221402214023e-05, "loss": 0.003549569100141525, "step": 121940 }, { "epoch": 34.61538461538461, "grad_norm": 0.24483853578567505, "learning_rate": 6.539937553221687e-05, "loss": 0.0016845526173710824, "step": 121950 }, { "epoch": 34.618223105307976, "grad_norm": 1.4285119771957397, "learning_rate": 6.53965370422935e-05, "loss": 0.002040030062198639, "step": 121960 }, { "epoch": 34.62106159523134, "grad_norm": 0.49573495984077454, "learning_rate": 6.539369855237014e-05, "loss": 0.005358568951487541, "step": 121970 }, { "epoch": 34.623900085154695, "grad_norm": 0.16889546811580658, "learning_rate": 6.539086006244678e-05, "loss": 0.001979339122772217, "step": 121980 }, { "epoch": 34.62673857507806, "grad_norm": 0.19205884635448456, "learning_rate": 6.538802157252342e-05, "loss": 0.00336933434009552, "step": 121990 }, { "epoch": 34.62957706500142, "grad_norm": 1.6046061515808105, "learning_rate": 6.538518308260005e-05, "loss": 0.004387809336185456, "step": 122000 }, { "epoch": 34.62957706500142, "eval_accuracy": 0.9740573535957271, "eval_loss": 0.08764299005270004, "eval_runtime": 32.2426, "eval_samples_per_second": 487.771, "eval_steps_per_second": 7.63, "step": 122000 }, { "epoch": 34.63241555492478, "grad_norm": 0.08497347682714462, "learning_rate": 6.53823445926767e-05, "loss": 0.002357286959886551, "step": 122010 }, { "epoch": 34.63525404484814, "grad_norm": 0.41470205783843994, "learning_rate": 6.537950610275335e-05, "loss": 0.0036878764629364015, "step": 122020 }, { "epoch": 34.6380925347715, "grad_norm": 0.4962767958641052, "learning_rate": 6.537666761282997e-05, "loss": 0.008641448616981507, "step": 122030 }, { "epoch": 34.640931024694865, "grad_norm": 2.749258518218994, "learning_rate": 6.537382912290662e-05, "loss": 0.00379687137901783, "step": 122040 }, { "epoch": 34.64376951461822, "grad_norm": 1.0091066360473633, "learning_rate": 6.537099063298326e-05, "loss": 0.002864320017397404, "step": 122050 }, { "epoch": 34.646608004541584, "grad_norm": 0.385221928358078, "learning_rate": 6.536815214305988e-05, "loss": 0.004537220299243927, "step": 122060 }, { "epoch": 34.64944649446495, "grad_norm": 3.0260488986968994, "learning_rate": 6.536531365313654e-05, "loss": 0.006437567621469497, "step": 122070 }, { "epoch": 34.6522849843883, "grad_norm": 4.358546257019043, "learning_rate": 6.536247516321318e-05, "loss": 0.004372327029705048, "step": 122080 }, { "epoch": 34.655123474311665, "grad_norm": 1.408868432044983, "learning_rate": 6.535963667328981e-05, "loss": 0.010747304558753968, "step": 122090 }, { "epoch": 34.65796196423503, "grad_norm": 0.19077467918395996, "learning_rate": 6.535679818336645e-05, "loss": 0.011761073768138886, "step": 122100 }, { "epoch": 34.66080045415839, "grad_norm": 0.2147844433784485, "learning_rate": 6.535395969344309e-05, "loss": 0.0072051376104354855, "step": 122110 }, { "epoch": 34.66363894408175, "grad_norm": 0.6149144172668457, "learning_rate": 6.535112120351973e-05, "loss": 0.0058714568614959715, "step": 122120 }, { "epoch": 34.66647743400511, "grad_norm": 0.11503326892852783, "learning_rate": 6.534828271359636e-05, "loss": 0.0031664468348026275, "step": 122130 }, { "epoch": 34.66931592392847, "grad_norm": 0.7054546475410461, "learning_rate": 6.534544422367302e-05, "loss": 0.002845547907054424, "step": 122140 }, { "epoch": 34.67215441385183, "grad_norm": 0.1965297907590866, "learning_rate": 6.534260573374966e-05, "loss": 0.0036231093108654022, "step": 122150 }, { "epoch": 34.67499290377519, "grad_norm": 11.49445629119873, "learning_rate": 6.533976724382628e-05, "loss": 0.003418346494436264, "step": 122160 }, { "epoch": 34.677831393698554, "grad_norm": 9.815134048461914, "learning_rate": 6.533692875390293e-05, "loss": 0.014207223057746887, "step": 122170 }, { "epoch": 34.68066988362191, "grad_norm": 1.6379940509796143, "learning_rate": 6.533409026397957e-05, "loss": 0.004850973933935165, "step": 122180 }, { "epoch": 34.68350837354527, "grad_norm": 0.38227379322052, "learning_rate": 6.53312517740562e-05, "loss": 0.003502725064754486, "step": 122190 }, { "epoch": 34.686346863468636, "grad_norm": 0.032944899052381516, "learning_rate": 6.532841328413285e-05, "loss": 0.001232980191707611, "step": 122200 }, { "epoch": 34.689185353392, "grad_norm": 1.1862001419067383, "learning_rate": 6.532557479420949e-05, "loss": 0.006770524382591248, "step": 122210 }, { "epoch": 34.692023843315354, "grad_norm": 1.5211012363433838, "learning_rate": 6.532273630428612e-05, "loss": 0.003845862299203873, "step": 122220 }, { "epoch": 34.69486233323872, "grad_norm": 0.07238578796386719, "learning_rate": 6.531989781436276e-05, "loss": 0.003885899484157562, "step": 122230 }, { "epoch": 34.69770082316208, "grad_norm": 0.0045026689767837524, "learning_rate": 6.53170593244394e-05, "loss": 0.0027515754103660584, "step": 122240 }, { "epoch": 34.700539313085436, "grad_norm": 0.07014446705579758, "learning_rate": 6.531422083451604e-05, "loss": 0.005787523835897446, "step": 122250 }, { "epoch": 34.7033778030088, "grad_norm": 9.372686386108398, "learning_rate": 6.531138234459267e-05, "loss": 0.008054859936237335, "step": 122260 }, { "epoch": 34.70621629293216, "grad_norm": 9.588811874389648, "learning_rate": 6.530854385466933e-05, "loss": 0.004153379052877426, "step": 122270 }, { "epoch": 34.70905478285552, "grad_norm": 0.18317072093486786, "learning_rate": 6.530570536474597e-05, "loss": 0.007642856240272522, "step": 122280 }, { "epoch": 34.71189327277888, "grad_norm": 1.1680307388305664, "learning_rate": 6.53028668748226e-05, "loss": 0.005600081384181976, "step": 122290 }, { "epoch": 34.71473176270224, "grad_norm": 0.47032883763313293, "learning_rate": 6.530002838489924e-05, "loss": 0.0019518153741955758, "step": 122300 }, { "epoch": 34.717570252625606, "grad_norm": 0.17898622155189514, "learning_rate": 6.529718989497588e-05, "loss": 0.004440861940383911, "step": 122310 }, { "epoch": 34.72040874254896, "grad_norm": 0.1917894184589386, "learning_rate": 6.52943514050525e-05, "loss": 0.006739561259746551, "step": 122320 }, { "epoch": 34.723247232472325, "grad_norm": 0.13850022852420807, "learning_rate": 6.529151291512915e-05, "loss": 0.010078959912061692, "step": 122330 }, { "epoch": 34.72608572239569, "grad_norm": 0.04218889772891998, "learning_rate": 6.52886744252058e-05, "loss": 0.010856794565916062, "step": 122340 }, { "epoch": 34.72892421231904, "grad_norm": 3.785734176635742, "learning_rate": 6.528583593528243e-05, "loss": 0.004300510510802269, "step": 122350 }, { "epoch": 34.731762702242406, "grad_norm": 1.9344936609268188, "learning_rate": 6.528299744535907e-05, "loss": 0.004588557034730911, "step": 122360 }, { "epoch": 34.73460119216577, "grad_norm": 0.8153248429298401, "learning_rate": 6.528015895543571e-05, "loss": 0.003909099847078323, "step": 122370 }, { "epoch": 34.73743968208913, "grad_norm": 2.579099178314209, "learning_rate": 6.527732046551235e-05, "loss": 0.00475742518901825, "step": 122380 }, { "epoch": 34.74027817201249, "grad_norm": 0.02742755599319935, "learning_rate": 6.527448197558898e-05, "loss": 0.0064716808497905735, "step": 122390 }, { "epoch": 34.74311666193585, "grad_norm": 5.555974960327148, "learning_rate": 6.527164348566564e-05, "loss": 0.008911204338073731, "step": 122400 }, { "epoch": 34.745955151859214, "grad_norm": 0.40832000970840454, "learning_rate": 6.526880499574228e-05, "loss": 0.006559894233942032, "step": 122410 }, { "epoch": 34.74879364178257, "grad_norm": 0.07574240863323212, "learning_rate": 6.52659665058189e-05, "loss": 0.007863300293684006, "step": 122420 }, { "epoch": 34.75163213170593, "grad_norm": 28.71633529663086, "learning_rate": 6.526312801589555e-05, "loss": 0.02056107670068741, "step": 122430 }, { "epoch": 34.754470621629295, "grad_norm": 0.052262160927057266, "learning_rate": 6.526028952597219e-05, "loss": 0.00662291944026947, "step": 122440 }, { "epoch": 34.75730911155265, "grad_norm": 0.5298376679420471, "learning_rate": 6.525745103604882e-05, "loss": 0.010420435667037964, "step": 122450 }, { "epoch": 34.760147601476014, "grad_norm": 0.2313220500946045, "learning_rate": 6.525461254612546e-05, "loss": 0.008924816548824311, "step": 122460 }, { "epoch": 34.76298609139938, "grad_norm": 0.2534920871257782, "learning_rate": 6.525177405620211e-05, "loss": 0.0008092386648058892, "step": 122470 }, { "epoch": 34.76582458132274, "grad_norm": 0.7692059278488159, "learning_rate": 6.524893556627874e-05, "loss": 0.004016993939876557, "step": 122480 }, { "epoch": 34.768663071246095, "grad_norm": 1.4794586896896362, "learning_rate": 6.524609707635538e-05, "loss": 0.012088869512081147, "step": 122490 }, { "epoch": 34.77150156116946, "grad_norm": 6.549243927001953, "learning_rate": 6.524325858643202e-05, "loss": 0.0027276264503598214, "step": 122500 }, { "epoch": 34.77150156116946, "eval_accuracy": 0.9693520696890697, "eval_loss": 0.10795033723115921, "eval_runtime": 31.7387, "eval_samples_per_second": 495.515, "eval_steps_per_second": 7.751, "step": 122500 }, { "epoch": 34.77434005109282, "grad_norm": 0.8010633587837219, "learning_rate": 6.524042009650867e-05, "loss": 0.006794467568397522, "step": 122510 }, { "epoch": 34.77717854101618, "grad_norm": 0.5210892558097839, "learning_rate": 6.523758160658529e-05, "loss": 0.002064274065196514, "step": 122520 }, { "epoch": 34.78001703093954, "grad_norm": 0.38127318024635315, "learning_rate": 6.523474311666193e-05, "loss": 0.006865138560533524, "step": 122530 }, { "epoch": 34.7828555208629, "grad_norm": 0.23486526310443878, "learning_rate": 6.523190462673859e-05, "loss": 0.005589088797569275, "step": 122540 }, { "epoch": 34.78569401078626, "grad_norm": 0.14956527948379517, "learning_rate": 6.522906613681522e-05, "loss": 0.008493191003799439, "step": 122550 }, { "epoch": 34.78853250070962, "grad_norm": 8.225197792053223, "learning_rate": 6.522622764689186e-05, "loss": 0.009365185350179672, "step": 122560 }, { "epoch": 34.791370990632984, "grad_norm": 0.6843498349189758, "learning_rate": 6.52233891569685e-05, "loss": 0.003148317337036133, "step": 122570 }, { "epoch": 34.79420948055635, "grad_norm": 0.058947645127773285, "learning_rate": 6.522055066704513e-05, "loss": 0.006049583852291107, "step": 122580 }, { "epoch": 34.7970479704797, "grad_norm": 0.07899049669504166, "learning_rate": 6.521771217712177e-05, "loss": 0.00302484966814518, "step": 122590 }, { "epoch": 34.799886460403066, "grad_norm": 0.4935496151447296, "learning_rate": 6.521487368719842e-05, "loss": 0.006386324763298035, "step": 122600 }, { "epoch": 34.80272495032643, "grad_norm": 3.368997097015381, "learning_rate": 6.521203519727505e-05, "loss": 0.0053942278027534485, "step": 122610 }, { "epoch": 34.805563440249784, "grad_norm": 0.04686836898326874, "learning_rate": 6.520919670735169e-05, "loss": 0.013360641896724701, "step": 122620 }, { "epoch": 34.80840193017315, "grad_norm": 0.2947852909564972, "learning_rate": 6.520635821742833e-05, "loss": 0.006451833248138428, "step": 122630 }, { "epoch": 34.81124042009651, "grad_norm": 4.3377861976623535, "learning_rate": 6.520351972750498e-05, "loss": 0.012702731788158417, "step": 122640 }, { "epoch": 34.814078910019866, "grad_norm": 0.4195486605167389, "learning_rate": 6.52006812375816e-05, "loss": 0.0025812866166234016, "step": 122650 }, { "epoch": 34.81691739994323, "grad_norm": 0.5187996625900269, "learning_rate": 6.519784274765824e-05, "loss": 0.0030008174479007723, "step": 122660 }, { "epoch": 34.81975588986659, "grad_norm": 0.20222580432891846, "learning_rate": 6.519500425773489e-05, "loss": 0.007525639235973358, "step": 122670 }, { "epoch": 34.822594379789955, "grad_norm": 2.2353641986846924, "learning_rate": 6.519216576781153e-05, "loss": 0.0022400256246328356, "step": 122680 }, { "epoch": 34.82543286971331, "grad_norm": 0.03016316331923008, "learning_rate": 6.518932727788817e-05, "loss": 0.0030402921140193937, "step": 122690 }, { "epoch": 34.82827135963667, "grad_norm": 0.7809737324714661, "learning_rate": 6.518648878796481e-05, "loss": 0.0027246598154306413, "step": 122700 }, { "epoch": 34.831109849560036, "grad_norm": 0.3580397665500641, "learning_rate": 6.518365029804144e-05, "loss": 0.0029597712680697443, "step": 122710 }, { "epoch": 34.83394833948339, "grad_norm": 1.078988790512085, "learning_rate": 6.518081180811808e-05, "loss": 0.008649495244026185, "step": 122720 }, { "epoch": 34.836786829406755, "grad_norm": 2.592554807662964, "learning_rate": 6.517797331819472e-05, "loss": 0.00869375467300415, "step": 122730 }, { "epoch": 34.83962531933012, "grad_norm": 1.322304129600525, "learning_rate": 6.517513482827136e-05, "loss": 0.0024959685280919073, "step": 122740 }, { "epoch": 34.84246380925348, "grad_norm": 13.843086242675781, "learning_rate": 6.5172296338348e-05, "loss": 0.009198613464832306, "step": 122750 }, { "epoch": 34.845302299176836, "grad_norm": 1.5036993026733398, "learning_rate": 6.516945784842465e-05, "loss": 0.006504300981760025, "step": 122760 }, { "epoch": 34.8481407891002, "grad_norm": 1.190007209777832, "learning_rate": 6.516661935850127e-05, "loss": 0.004325412586331368, "step": 122770 }, { "epoch": 34.85097927902356, "grad_norm": 4.4854230880737305, "learning_rate": 6.516378086857791e-05, "loss": 0.007730226218700409, "step": 122780 }, { "epoch": 34.85381776894692, "grad_norm": 0.6568772792816162, "learning_rate": 6.516094237865456e-05, "loss": 0.005169256776571274, "step": 122790 }, { "epoch": 34.85665625887028, "grad_norm": 0.49595290422439575, "learning_rate": 6.51581038887312e-05, "loss": 0.004600518941879272, "step": 122800 }, { "epoch": 34.859494748793644, "grad_norm": 1.6020830869674683, "learning_rate": 6.515526539880784e-05, "loss": 0.005419518053531647, "step": 122810 }, { "epoch": 34.862333238717, "grad_norm": 9.591131210327148, "learning_rate": 6.515242690888448e-05, "loss": 0.008065285533666611, "step": 122820 }, { "epoch": 34.86517172864036, "grad_norm": 5.1198248863220215, "learning_rate": 6.514958841896112e-05, "loss": 0.0038939177989959716, "step": 122830 }, { "epoch": 34.868010218563725, "grad_norm": 0.11730759590864182, "learning_rate": 6.514674992903775e-05, "loss": 0.00441715270280838, "step": 122840 }, { "epoch": 34.87084870848709, "grad_norm": 0.3771185576915741, "learning_rate": 6.514391143911439e-05, "loss": 0.010656940191984177, "step": 122850 }, { "epoch": 34.873687198410444, "grad_norm": 0.5060539245605469, "learning_rate": 6.514107294919103e-05, "loss": 0.007317967712879181, "step": 122860 }, { "epoch": 34.87652568833381, "grad_norm": 1.1179192066192627, "learning_rate": 6.513823445926767e-05, "loss": 0.0019321886822581292, "step": 122870 }, { "epoch": 34.87936417825717, "grad_norm": 0.1301707625389099, "learning_rate": 6.513539596934431e-05, "loss": 0.002627173252403736, "step": 122880 }, { "epoch": 34.882202668180526, "grad_norm": 0.4482869505882263, "learning_rate": 6.513255747942096e-05, "loss": 0.0059134714305400845, "step": 122890 }, { "epoch": 34.88504115810389, "grad_norm": 1.2510905265808105, "learning_rate": 6.512971898949758e-05, "loss": 0.004336058348417282, "step": 122900 }, { "epoch": 34.88787964802725, "grad_norm": 0.43234360218048096, "learning_rate": 6.512688049957423e-05, "loss": 0.0222098246216774, "step": 122910 }, { "epoch": 34.89071813795061, "grad_norm": 6.758641242980957, "learning_rate": 6.512404200965087e-05, "loss": 0.004389127343893051, "step": 122920 }, { "epoch": 34.89355662787397, "grad_norm": 0.5900225043296814, "learning_rate": 6.512120351972751e-05, "loss": 0.014180150628089905, "step": 122930 }, { "epoch": 34.89639511779733, "grad_norm": 2.0587778091430664, "learning_rate": 6.511836502980415e-05, "loss": 0.004373796284198761, "step": 122940 }, { "epoch": 34.899233607720696, "grad_norm": 2.4004077911376953, "learning_rate": 6.511552653988079e-05, "loss": 0.007106940448284149, "step": 122950 }, { "epoch": 34.90207209764405, "grad_norm": 3.9666500091552734, "learning_rate": 6.511268804995743e-05, "loss": 0.00499482974410057, "step": 122960 }, { "epoch": 34.904910587567414, "grad_norm": 5.166602611541748, "learning_rate": 6.510984956003406e-05, "loss": 0.0032051924616098406, "step": 122970 }, { "epoch": 34.90774907749078, "grad_norm": 2.019221544265747, "learning_rate": 6.51070110701107e-05, "loss": 0.014046119153499603, "step": 122980 }, { "epoch": 34.91058756741413, "grad_norm": 0.0988413617014885, "learning_rate": 6.510417258018734e-05, "loss": 0.006494200229644776, "step": 122990 }, { "epoch": 34.913426057337496, "grad_norm": 0.32847774028778076, "learning_rate": 6.510133409026398e-05, "loss": 0.004254234954714775, "step": 123000 }, { "epoch": 34.913426057337496, "eval_accuracy": 0.9715775418070833, "eval_loss": 0.0980750173330307, "eval_runtime": 31.5336, "eval_samples_per_second": 498.739, "eval_steps_per_second": 7.801, "step": 123000 }, { "epoch": 34.91626454726086, "grad_norm": 1.9581429958343506, "learning_rate": 6.509849560034063e-05, "loss": 0.006026730313897133, "step": 123010 }, { "epoch": 34.919103037184215, "grad_norm": 0.0716891884803772, "learning_rate": 6.509565711041727e-05, "loss": 0.004169018566608429, "step": 123020 }, { "epoch": 34.92194152710758, "grad_norm": 7.145716667175293, "learning_rate": 6.50928186204939e-05, "loss": 0.009784446656703949, "step": 123030 }, { "epoch": 34.92478001703094, "grad_norm": 6.575927734375, "learning_rate": 6.508998013057054e-05, "loss": 0.004447620362043381, "step": 123040 }, { "epoch": 34.9276185069543, "grad_norm": 5.477135181427002, "learning_rate": 6.508714164064718e-05, "loss": 0.007135018706321716, "step": 123050 }, { "epoch": 34.93045699687766, "grad_norm": 2.1018199920654297, "learning_rate": 6.508458699971616e-05, "loss": 0.008049090206623078, "step": 123060 }, { "epoch": 34.93329548680102, "grad_norm": 0.4277038872241974, "learning_rate": 6.50817485097928e-05, "loss": 0.014344774186611176, "step": 123070 }, { "epoch": 34.936133976724385, "grad_norm": 13.465224266052246, "learning_rate": 6.507891001986943e-05, "loss": 0.0125269815325737, "step": 123080 }, { "epoch": 34.93897246664774, "grad_norm": 19.660667419433594, "learning_rate": 6.507607152994607e-05, "loss": 0.01384229063987732, "step": 123090 }, { "epoch": 34.9418109565711, "grad_norm": 0.11826115846633911, "learning_rate": 6.507323304002271e-05, "loss": 0.01463172435760498, "step": 123100 }, { "epoch": 34.944649446494466, "grad_norm": 4.548632621765137, "learning_rate": 6.507039455009935e-05, "loss": 0.005859664082527161, "step": 123110 }, { "epoch": 34.94748793641783, "grad_norm": 4.475899696350098, "learning_rate": 6.506755606017599e-05, "loss": 0.028899195790290832, "step": 123120 }, { "epoch": 34.950326426341185, "grad_norm": 0.1496761292219162, "learning_rate": 6.506471757025263e-05, "loss": 0.0035422831773757935, "step": 123130 }, { "epoch": 34.95316491626455, "grad_norm": 0.6479604244232178, "learning_rate": 6.506187908032927e-05, "loss": 0.011058783531188965, "step": 123140 }, { "epoch": 34.95600340618791, "grad_norm": 0.12235426157712936, "learning_rate": 6.50590405904059e-05, "loss": 0.003103846870362759, "step": 123150 }, { "epoch": 34.95884189611127, "grad_norm": 1.573138952255249, "learning_rate": 6.505620210048254e-05, "loss": 0.0033440306782722474, "step": 123160 }, { "epoch": 34.96168038603463, "grad_norm": 0.3734928369522095, "learning_rate": 6.505336361055919e-05, "loss": 0.010607029497623443, "step": 123170 }, { "epoch": 34.96451887595799, "grad_norm": 1.0418274402618408, "learning_rate": 6.505052512063583e-05, "loss": 0.005559181794524193, "step": 123180 }, { "epoch": 34.96735736588135, "grad_norm": 6.6771440505981445, "learning_rate": 6.504768663071247e-05, "loss": 0.005438496544957161, "step": 123190 }, { "epoch": 34.97019585580471, "grad_norm": 0.11661142855882645, "learning_rate": 6.504484814078911e-05, "loss": 0.009551359713077546, "step": 123200 }, { "epoch": 34.973034345728074, "grad_norm": 1.4923397302627563, "learning_rate": 6.504200965086574e-05, "loss": 0.004081368446350098, "step": 123210 }, { "epoch": 34.97587283565144, "grad_norm": 1.7484431266784668, "learning_rate": 6.503917116094238e-05, "loss": 0.0041174527257680895, "step": 123220 }, { "epoch": 34.97871132557479, "grad_norm": 3.9637866020202637, "learning_rate": 6.503633267101902e-05, "loss": 0.0025552863255143164, "step": 123230 }, { "epoch": 34.981549815498155, "grad_norm": 0.6464236378669739, "learning_rate": 6.503349418109566e-05, "loss": 0.0028840348124504088, "step": 123240 }, { "epoch": 34.98438830542152, "grad_norm": 0.12997175753116608, "learning_rate": 6.50306556911723e-05, "loss": 0.0010608045384287834, "step": 123250 }, { "epoch": 34.987226795344874, "grad_norm": 0.026326360180974007, "learning_rate": 6.502781720124894e-05, "loss": 0.008442394435405731, "step": 123260 }, { "epoch": 34.99006528526824, "grad_norm": 0.8273661732673645, "learning_rate": 6.502497871132559e-05, "loss": 0.007401278614997864, "step": 123270 }, { "epoch": 34.9929037751916, "grad_norm": 16.157241821289062, "learning_rate": 6.502214022140221e-05, "loss": 0.01034441515803337, "step": 123280 }, { "epoch": 34.995742265114956, "grad_norm": 0.9919429421424866, "learning_rate": 6.501930173147885e-05, "loss": 0.00947769433259964, "step": 123290 }, { "epoch": 34.99858075503832, "grad_norm": 4.441378116607666, "learning_rate": 6.50164632415555e-05, "loss": 0.0032621417194604874, "step": 123300 }, { "epoch": 35.00141924496168, "grad_norm": 0.28039756417274475, "learning_rate": 6.501362475163212e-05, "loss": 0.00488453134894371, "step": 123310 }, { "epoch": 35.004257734885044, "grad_norm": 1.2004787921905518, "learning_rate": 6.501078626170878e-05, "loss": 0.0033238157629966736, "step": 123320 }, { "epoch": 35.0070962248084, "grad_norm": 10.741399765014648, "learning_rate": 6.500794777178542e-05, "loss": 0.007548578083515167, "step": 123330 }, { "epoch": 35.00993471473176, "grad_norm": 0.3778873383998871, "learning_rate": 6.500510928186205e-05, "loss": 0.0014742210507392884, "step": 123340 }, { "epoch": 35.012773204655126, "grad_norm": 2.465121269226074, "learning_rate": 6.500227079193869e-05, "loss": 0.00376603826880455, "step": 123350 }, { "epoch": 35.01561169457848, "grad_norm": 0.22278501093387604, "learning_rate": 6.499943230201533e-05, "loss": 0.0022728117182850838, "step": 123360 }, { "epoch": 35.018450184501845, "grad_norm": 0.1620955616235733, "learning_rate": 6.499659381209197e-05, "loss": 0.003637935221195221, "step": 123370 }, { "epoch": 35.02128867442521, "grad_norm": 1.1723861694335938, "learning_rate": 6.499375532216861e-05, "loss": 0.00351867750287056, "step": 123380 }, { "epoch": 35.02412716434856, "grad_norm": 0.044418927282094955, "learning_rate": 6.499091683224525e-05, "loss": 0.0013112232089042663, "step": 123390 }, { "epoch": 35.026965654271926, "grad_norm": 0.2748942971229553, "learning_rate": 6.49880783423219e-05, "loss": 0.008447934687137604, "step": 123400 }, { "epoch": 35.02980414419529, "grad_norm": 0.6357675194740295, "learning_rate": 6.498523985239852e-05, "loss": 0.005553607642650604, "step": 123410 }, { "epoch": 35.03264263411865, "grad_norm": 0.07412460446357727, "learning_rate": 6.498240136247517e-05, "loss": 0.004420771449804306, "step": 123420 }, { "epoch": 35.03548112404201, "grad_norm": 0.019539849832654, "learning_rate": 6.497956287255181e-05, "loss": 0.0026613591238856316, "step": 123430 }, { "epoch": 35.03831961396537, "grad_norm": 0.5748937129974365, "learning_rate": 6.497672438262843e-05, "loss": 0.0058439701795578005, "step": 123440 }, { "epoch": 35.04115810388873, "grad_norm": 0.1638011485338211, "learning_rate": 6.497388589270509e-05, "loss": 0.010530485212802887, "step": 123450 }, { "epoch": 35.04399659381209, "grad_norm": 0.041668184101581573, "learning_rate": 6.497104740278173e-05, "loss": 0.001187422126531601, "step": 123460 }, { "epoch": 35.04683508373545, "grad_norm": 0.7090847492218018, "learning_rate": 6.496820891285836e-05, "loss": 0.0016980024054646492, "step": 123470 }, { "epoch": 35.049673573658815, "grad_norm": 0.13345836102962494, "learning_rate": 6.4965370422935e-05, "loss": 0.0009114157408475875, "step": 123480 }, { "epoch": 35.05251206358217, "grad_norm": 1.2167409658432007, "learning_rate": 6.496253193301164e-05, "loss": 0.002050544321537018, "step": 123490 }, { "epoch": 35.055350553505534, "grad_norm": 0.042073171585798264, "learning_rate": 6.495969344308828e-05, "loss": 0.0042908497154712675, "step": 123500 }, { "epoch": 35.055350553505534, "eval_accuracy": 0.9746296178546449, "eval_loss": 0.08595505356788635, "eval_runtime": 31.7227, "eval_samples_per_second": 495.764, "eval_steps_per_second": 7.755, "step": 123500 }, { "epoch": 35.0581890434289, "grad_norm": 1.1798826456069946, "learning_rate": 6.495685495316491e-05, "loss": 0.003264184296131134, "step": 123510 }, { "epoch": 35.06102753335226, "grad_norm": 14.037063598632812, "learning_rate": 6.495401646324157e-05, "loss": 0.0070368461310863495, "step": 123520 }, { "epoch": 35.063866023275615, "grad_norm": 1.8519291877746582, "learning_rate": 6.495117797331821e-05, "loss": 0.0022337231785058975, "step": 123530 }, { "epoch": 35.06670451319898, "grad_norm": 0.13651880621910095, "learning_rate": 6.494833948339483e-05, "loss": 0.004648395627737045, "step": 123540 }, { "epoch": 35.06954300312234, "grad_norm": 0.5002447366714478, "learning_rate": 6.494550099347148e-05, "loss": 0.0021394338458776475, "step": 123550 }, { "epoch": 35.0723814930457, "grad_norm": 0.7359775900840759, "learning_rate": 6.494266250354812e-05, "loss": 0.002437192015349865, "step": 123560 }, { "epoch": 35.07521998296906, "grad_norm": 1.0284568071365356, "learning_rate": 6.493982401362475e-05, "loss": 0.0009309140965342522, "step": 123570 }, { "epoch": 35.07805847289242, "grad_norm": 1.1425306797027588, "learning_rate": 6.49369855237014e-05, "loss": 0.003125985711812973, "step": 123580 }, { "epoch": 35.080896962815785, "grad_norm": 0.47948789596557617, "learning_rate": 6.493414703377804e-05, "loss": 0.0025972478091716767, "step": 123590 }, { "epoch": 35.08373545273914, "grad_norm": 0.11085069924592972, "learning_rate": 6.493130854385467e-05, "loss": 0.004784049838781357, "step": 123600 }, { "epoch": 35.086573942662504, "grad_norm": 0.07507847994565964, "learning_rate": 6.492847005393131e-05, "loss": 0.007839050143957138, "step": 123610 }, { "epoch": 35.08941243258587, "grad_norm": 0.7462412714958191, "learning_rate": 6.492563156400795e-05, "loss": 0.002127721719443798, "step": 123620 }, { "epoch": 35.09225092250922, "grad_norm": 0.07783081382513046, "learning_rate": 6.49227930740846e-05, "loss": 0.0026726726442575454, "step": 123630 }, { "epoch": 35.095089412432586, "grad_norm": 0.38809579610824585, "learning_rate": 6.491995458416122e-05, "loss": 0.006369081139564514, "step": 123640 }, { "epoch": 35.09792790235595, "grad_norm": 0.2409714311361313, "learning_rate": 6.491711609423788e-05, "loss": 0.014126959443092345, "step": 123650 }, { "epoch": 35.100766392279304, "grad_norm": 0.7101395130157471, "learning_rate": 6.49142776043145e-05, "loss": 0.003222520649433136, "step": 123660 }, { "epoch": 35.10360488220267, "grad_norm": 3.327662944793701, "learning_rate": 6.491143911439115e-05, "loss": 0.004929027706384659, "step": 123670 }, { "epoch": 35.10644337212603, "grad_norm": 0.11978889256715775, "learning_rate": 6.490860062446779e-05, "loss": 0.0018755283206701278, "step": 123680 }, { "epoch": 35.10928186204939, "grad_norm": 5.899048805236816, "learning_rate": 6.490576213454443e-05, "loss": 0.006101794540882111, "step": 123690 }, { "epoch": 35.11212035197275, "grad_norm": 0.2056846022605896, "learning_rate": 6.490292364462106e-05, "loss": 0.005246323347091675, "step": 123700 }, { "epoch": 35.11495884189611, "grad_norm": 0.9035454988479614, "learning_rate": 6.49000851546977e-05, "loss": 0.0018155500292778015, "step": 123710 }, { "epoch": 35.117797331819474, "grad_norm": 0.03675409406423569, "learning_rate": 6.489724666477435e-05, "loss": 0.001511766202747822, "step": 123720 }, { "epoch": 35.12063582174283, "grad_norm": 0.6537216901779175, "learning_rate": 6.489440817485098e-05, "loss": 0.0027268130332231523, "step": 123730 }, { "epoch": 35.12347431166619, "grad_norm": 0.016659442335367203, "learning_rate": 6.489156968492762e-05, "loss": 0.002885117568075657, "step": 123740 }, { "epoch": 35.126312801589556, "grad_norm": 5.474729061126709, "learning_rate": 6.488873119500426e-05, "loss": 0.0033930309116840364, "step": 123750 }, { "epoch": 35.12915129151291, "grad_norm": 0.2748189866542816, "learning_rate": 6.488589270508089e-05, "loss": 0.010052946954965591, "step": 123760 }, { "epoch": 35.131989781436275, "grad_norm": 0.18555425107479095, "learning_rate": 6.488305421515753e-05, "loss": 0.0016905482858419417, "step": 123770 }, { "epoch": 35.13482827135964, "grad_norm": 0.11990313977003098, "learning_rate": 6.488021572523419e-05, "loss": 0.0018359489738941192, "step": 123780 }, { "epoch": 35.137666761283, "grad_norm": 0.0719369575381279, "learning_rate": 6.487737723531081e-05, "loss": 0.0013567242771387101, "step": 123790 }, { "epoch": 35.140505251206356, "grad_norm": 0.28665465116500854, "learning_rate": 6.487453874538746e-05, "loss": 0.0009961199015378953, "step": 123800 }, { "epoch": 35.14334374112972, "grad_norm": 0.4773728847503662, "learning_rate": 6.48717002554641e-05, "loss": 0.0012740563601255418, "step": 123810 }, { "epoch": 35.14618223105308, "grad_norm": 0.7105950713157654, "learning_rate": 6.486886176554074e-05, "loss": 0.0010430144146084785, "step": 123820 }, { "epoch": 35.14902072097644, "grad_norm": 0.1763571947813034, "learning_rate": 6.486602327561737e-05, "loss": 0.004257035255432129, "step": 123830 }, { "epoch": 35.1518592108998, "grad_norm": 0.6583411693572998, "learning_rate": 6.486318478569401e-05, "loss": 0.0017475824803113937, "step": 123840 }, { "epoch": 35.154697700823164, "grad_norm": 0.24023713171482086, "learning_rate": 6.486034629577066e-05, "loss": 0.002329159528017044, "step": 123850 }, { "epoch": 35.15753619074652, "grad_norm": 0.03802018240094185, "learning_rate": 6.485750780584729e-05, "loss": 0.003923746570944786, "step": 123860 }, { "epoch": 35.16037468066988, "grad_norm": 0.09055881202220917, "learning_rate": 6.485466931592393e-05, "loss": 0.010653698444366455, "step": 123870 }, { "epoch": 35.163213170593245, "grad_norm": 0.02531651221215725, "learning_rate": 6.485183082600057e-05, "loss": 0.0019262006506323814, "step": 123880 }, { "epoch": 35.16605166051661, "grad_norm": 2.1929142475128174, "learning_rate": 6.48489923360772e-05, "loss": 0.005838973075151443, "step": 123890 }, { "epoch": 35.168890150439964, "grad_norm": 0.10594841092824936, "learning_rate": 6.484615384615384e-05, "loss": 0.0043249309062957765, "step": 123900 }, { "epoch": 35.17172864036333, "grad_norm": 0.11679910868406296, "learning_rate": 6.48433153562305e-05, "loss": 0.0023911599069833755, "step": 123910 }, { "epoch": 35.17456713028669, "grad_norm": 0.20320293307304382, "learning_rate": 6.484047686630713e-05, "loss": 0.015488362312316895, "step": 123920 }, { "epoch": 35.177405620210045, "grad_norm": 0.3133673071861267, "learning_rate": 6.483763837638377e-05, "loss": 0.0070457324385643, "step": 123930 }, { "epoch": 35.18024411013341, "grad_norm": 0.07562874257564545, "learning_rate": 6.483479988646041e-05, "loss": 0.0022812724113464356, "step": 123940 }, { "epoch": 35.18308260005677, "grad_norm": 0.07803227752447128, "learning_rate": 6.483196139653705e-05, "loss": 0.004335168749094009, "step": 123950 }, { "epoch": 35.185921089980134, "grad_norm": 0.5244848728179932, "learning_rate": 6.482912290661368e-05, "loss": 0.001566123776137829, "step": 123960 }, { "epoch": 35.18875957990349, "grad_norm": 1.4270581007003784, "learning_rate": 6.482628441669032e-05, "loss": 0.002507426403462887, "step": 123970 }, { "epoch": 35.19159806982685, "grad_norm": 0.5445083975791931, "learning_rate": 6.482344592676697e-05, "loss": 0.0021049004048109055, "step": 123980 }, { "epoch": 35.194436559750216, "grad_norm": 0.04573143646121025, "learning_rate": 6.48206074368436e-05, "loss": 0.004109510779380798, "step": 123990 }, { "epoch": 35.19727504967357, "grad_norm": 0.3981334865093231, "learning_rate": 6.481776894692024e-05, "loss": 0.0154224693775177, "step": 124000 }, { "epoch": 35.19727504967357, "eval_accuracy": 0.9736122591721244, "eval_loss": 0.09443560987710953, "eval_runtime": 32.6858, "eval_samples_per_second": 481.157, "eval_steps_per_second": 7.526, "step": 124000 }, { "epoch": 35.200113539596934, "grad_norm": 0.7593827843666077, "learning_rate": 6.481493045699688e-05, "loss": 0.005224959924817085, "step": 124010 }, { "epoch": 35.2029520295203, "grad_norm": 3.7218992710113525, "learning_rate": 6.481209196707351e-05, "loss": 0.0020976241677999495, "step": 124020 }, { "epoch": 35.20579051944365, "grad_norm": 0.028922753408551216, "learning_rate": 6.480925347715015e-05, "loss": 0.005799264460802078, "step": 124030 }, { "epoch": 35.208629009367016, "grad_norm": 1.5619020462036133, "learning_rate": 6.48064149872268e-05, "loss": 0.0034693576395511626, "step": 124040 }, { "epoch": 35.21146749929038, "grad_norm": 0.1801603138446808, "learning_rate": 6.480357649730344e-05, "loss": 0.00175578985363245, "step": 124050 }, { "epoch": 35.21430598921374, "grad_norm": 1.5496667623519897, "learning_rate": 6.480073800738008e-05, "loss": 0.0032393354922533035, "step": 124060 }, { "epoch": 35.2171444791371, "grad_norm": 11.993553161621094, "learning_rate": 6.479789951745672e-05, "loss": 0.007441597431898117, "step": 124070 }, { "epoch": 35.21998296906046, "grad_norm": 0.22900712490081787, "learning_rate": 6.479506102753336e-05, "loss": 0.008034928888082504, "step": 124080 }, { "epoch": 35.22282145898382, "grad_norm": 0.8359187245368958, "learning_rate": 6.479222253760999e-05, "loss": 0.012940695881843567, "step": 124090 }, { "epoch": 35.22565994890718, "grad_norm": 0.23055890202522278, "learning_rate": 6.478938404768663e-05, "loss": 0.001070609875023365, "step": 124100 }, { "epoch": 35.22849843883054, "grad_norm": 10.563587188720703, "learning_rate": 6.478654555776328e-05, "loss": 0.011516466736793518, "step": 124110 }, { "epoch": 35.231336928753905, "grad_norm": 0.04512307420372963, "learning_rate": 6.478370706783991e-05, "loss": 0.0006166955456137658, "step": 124120 }, { "epoch": 35.23417541867726, "grad_norm": 0.11999720335006714, "learning_rate": 6.478086857791655e-05, "loss": 0.0038831207901239397, "step": 124130 }, { "epoch": 35.23701390860062, "grad_norm": 0.26278963685035706, "learning_rate": 6.47780300879932e-05, "loss": 0.00224434956908226, "step": 124140 }, { "epoch": 35.239852398523986, "grad_norm": 0.1841161847114563, "learning_rate": 6.477519159806982e-05, "loss": 0.00695534348487854, "step": 124150 }, { "epoch": 35.24269088844735, "grad_norm": 0.3225573003292084, "learning_rate": 6.477235310814646e-05, "loss": 0.0033076439052820207, "step": 124160 }, { "epoch": 35.245529378370705, "grad_norm": 0.49394699931144714, "learning_rate": 6.47695146182231e-05, "loss": 0.0024590885266661646, "step": 124170 }, { "epoch": 35.24836786829407, "grad_norm": 0.3010150194168091, "learning_rate": 6.476667612829975e-05, "loss": 0.005991066619753837, "step": 124180 }, { "epoch": 35.25120635821743, "grad_norm": 3.922555446624756, "learning_rate": 6.476383763837639e-05, "loss": 0.004172184318304062, "step": 124190 }, { "epoch": 35.254044848140786, "grad_norm": 18.703628540039062, "learning_rate": 6.476099914845303e-05, "loss": 0.013839299976825713, "step": 124200 }, { "epoch": 35.25688333806415, "grad_norm": 0.04546620324254036, "learning_rate": 6.475816065852967e-05, "loss": 0.001329273171722889, "step": 124210 }, { "epoch": 35.25972182798751, "grad_norm": 0.16649018228054047, "learning_rate": 6.47553221686063e-05, "loss": 0.0022044172510504723, "step": 124220 }, { "epoch": 35.26256031791087, "grad_norm": 0.22203882038593292, "learning_rate": 6.475248367868294e-05, "loss": 0.003903895989060402, "step": 124230 }, { "epoch": 35.26539880783423, "grad_norm": 1.7335249185562134, "learning_rate": 6.474964518875958e-05, "loss": 0.008229069411754608, "step": 124240 }, { "epoch": 35.268237297757594, "grad_norm": 12.773519515991211, "learning_rate": 6.474680669883622e-05, "loss": 0.009718339145183563, "step": 124250 }, { "epoch": 35.27107578768096, "grad_norm": 0.32537272572517395, "learning_rate": 6.474396820891286e-05, "loss": 0.0024151701480150225, "step": 124260 }, { "epoch": 35.27391427760431, "grad_norm": 0.29036396741867065, "learning_rate": 6.47411297189895e-05, "loss": 0.002510245516896248, "step": 124270 }, { "epoch": 35.276752767527675, "grad_norm": 9.233658790588379, "learning_rate": 6.473829122906613e-05, "loss": 0.008023773133754731, "step": 124280 }, { "epoch": 35.27959125745104, "grad_norm": 0.6115303039550781, "learning_rate": 6.473545273914278e-05, "loss": 0.0014342835173010826, "step": 124290 }, { "epoch": 35.282429747374394, "grad_norm": 0.2653685212135315, "learning_rate": 6.473261424921942e-05, "loss": 0.0017992928624153137, "step": 124300 }, { "epoch": 35.28526823729776, "grad_norm": 0.07249756902456284, "learning_rate": 6.472977575929606e-05, "loss": 0.0012476861476898194, "step": 124310 }, { "epoch": 35.28810672722112, "grad_norm": 0.041601844131946564, "learning_rate": 6.47269372693727e-05, "loss": 0.007989827543497086, "step": 124320 }, { "epoch": 35.29094521714448, "grad_norm": 0.07185827940702438, "learning_rate": 6.472409877944934e-05, "loss": 0.0005686616525053978, "step": 124330 }, { "epoch": 35.29378370706784, "grad_norm": 2.94441556930542, "learning_rate": 6.472126028952598e-05, "loss": 0.001534784398972988, "step": 124340 }, { "epoch": 35.2966221969912, "grad_norm": 1.1852378845214844, "learning_rate": 6.471842179960261e-05, "loss": 0.0017348313704133034, "step": 124350 }, { "epoch": 35.299460686914564, "grad_norm": 4.328054904937744, "learning_rate": 6.471586715867159e-05, "loss": 0.012050793319940568, "step": 124360 }, { "epoch": 35.30229917683792, "grad_norm": 0.7507839798927307, "learning_rate": 6.471302866874823e-05, "loss": 0.00557251125574112, "step": 124370 }, { "epoch": 35.30513766676128, "grad_norm": 1.5571794509887695, "learning_rate": 6.471019017882487e-05, "loss": 0.0037160709500312803, "step": 124380 }, { "epoch": 35.307976156684646, "grad_norm": 0.6430335640907288, "learning_rate": 6.470735168890151e-05, "loss": 0.003933653980493545, "step": 124390 }, { "epoch": 35.310814646608, "grad_norm": 0.3176755905151367, "learning_rate": 6.470451319897814e-05, "loss": 0.007839932292699813, "step": 124400 }, { "epoch": 35.313653136531364, "grad_norm": 0.6366283893585205, "learning_rate": 6.470167470905478e-05, "loss": 0.004763571172952652, "step": 124410 }, { "epoch": 35.31649162645473, "grad_norm": 0.08230401575565338, "learning_rate": 6.469883621913142e-05, "loss": 0.012808883190155029, "step": 124420 }, { "epoch": 35.31933011637809, "grad_norm": 0.08792869001626968, "learning_rate": 6.469599772920807e-05, "loss": 0.00757981464266777, "step": 124430 }, { "epoch": 35.322168606301446, "grad_norm": 0.13389721512794495, "learning_rate": 6.469315923928471e-05, "loss": 0.0026062030345201494, "step": 124440 }, { "epoch": 35.32500709622481, "grad_norm": 0.17631679773330688, "learning_rate": 6.469032074936135e-05, "loss": 0.0029279107227921488, "step": 124450 }, { "epoch": 35.32784558614817, "grad_norm": 0.11230606585741043, "learning_rate": 6.468748225943798e-05, "loss": 0.003203073889017105, "step": 124460 }, { "epoch": 35.33068407607153, "grad_norm": 5.077963352203369, "learning_rate": 6.468464376951462e-05, "loss": 0.0040066268295049666, "step": 124470 }, { "epoch": 35.33352256599489, "grad_norm": 0.2952515184879303, "learning_rate": 6.468180527959126e-05, "loss": 0.006278103590011597, "step": 124480 }, { "epoch": 35.33636105591825, "grad_norm": 0.6553015112876892, "learning_rate": 6.46789667896679e-05, "loss": 0.006164129823446274, "step": 124490 }, { "epoch": 35.33919954584161, "grad_norm": 0.05470352619886398, "learning_rate": 6.467612829974454e-05, "loss": 0.003238702192902565, "step": 124500 }, { "epoch": 35.33919954584161, "eval_accuracy": 0.9672537674063713, "eval_loss": 0.11323834955692291, "eval_runtime": 31.7161, "eval_samples_per_second": 495.867, "eval_steps_per_second": 7.756, "step": 124500 }, { "epoch": 35.34203803576497, "grad_norm": 0.2669536769390106, "learning_rate": 6.467328980982118e-05, "loss": 0.0028995968401432036, "step": 124510 }, { "epoch": 35.344876525688335, "grad_norm": 0.1884731650352478, "learning_rate": 6.467045131989781e-05, "loss": 0.010411982238292695, "step": 124520 }, { "epoch": 35.3477150156117, "grad_norm": 5.714053153991699, "learning_rate": 6.466761282997445e-05, "loss": 0.0029901187866926194, "step": 124530 }, { "epoch": 35.35055350553505, "grad_norm": 0.018948949873447418, "learning_rate": 6.46647743400511e-05, "loss": 0.002273883670568466, "step": 124540 }, { "epoch": 35.353391995458416, "grad_norm": 0.5497522354125977, "learning_rate": 6.466193585012774e-05, "loss": 0.004661240428686142, "step": 124550 }, { "epoch": 35.35623048538178, "grad_norm": 0.2104068547487259, "learning_rate": 6.465909736020438e-05, "loss": 0.002843768149614334, "step": 124560 }, { "epoch": 35.359068975305135, "grad_norm": 0.35174691677093506, "learning_rate": 6.465625887028102e-05, "loss": 0.0036916732788085938, "step": 124570 }, { "epoch": 35.3619074652285, "grad_norm": 1.3930494785308838, "learning_rate": 6.465342038035766e-05, "loss": 0.009654825925827027, "step": 124580 }, { "epoch": 35.36474595515186, "grad_norm": 0.2108253836631775, "learning_rate": 6.465058189043429e-05, "loss": 0.014609508216381073, "step": 124590 }, { "epoch": 35.36758444507522, "grad_norm": 3.552393674850464, "learning_rate": 6.464774340051093e-05, "loss": 0.015583406388759612, "step": 124600 }, { "epoch": 35.37042293499858, "grad_norm": 0.09530099481344223, "learning_rate": 6.464490491058757e-05, "loss": 0.006617793440818786, "step": 124610 }, { "epoch": 35.37326142492194, "grad_norm": 0.19090984761714935, "learning_rate": 6.46420664206642e-05, "loss": 0.0013797318562865258, "step": 124620 }, { "epoch": 35.376099914845305, "grad_norm": 0.30811187624931335, "learning_rate": 6.463922793074085e-05, "loss": 0.007688663899898529, "step": 124630 }, { "epoch": 35.37893840476866, "grad_norm": 1.1452854871749878, "learning_rate": 6.46363894408175e-05, "loss": 0.0016369694843888284, "step": 124640 }, { "epoch": 35.381776894692024, "grad_norm": 0.29348257184028625, "learning_rate": 6.463355095089412e-05, "loss": 0.002300905808806419, "step": 124650 }, { "epoch": 35.38461538461539, "grad_norm": 3.268918991088867, "learning_rate": 6.463071246097076e-05, "loss": 0.007275217771530151, "step": 124660 }, { "epoch": 35.38745387453874, "grad_norm": 0.20252858102321625, "learning_rate": 6.46278739710474e-05, "loss": 0.005437327921390534, "step": 124670 }, { "epoch": 35.390292364462105, "grad_norm": 0.0661584809422493, "learning_rate": 6.462503548112405e-05, "loss": 0.0055758640170097355, "step": 124680 }, { "epoch": 35.39313085438547, "grad_norm": 1.109292984008789, "learning_rate": 6.462219699120069e-05, "loss": 0.012798486649990082, "step": 124690 }, { "epoch": 35.395969344308824, "grad_norm": 0.048671137541532516, "learning_rate": 6.461935850127733e-05, "loss": 0.006836451590061188, "step": 124700 }, { "epoch": 35.39880783423219, "grad_norm": 0.5001567006111145, "learning_rate": 6.461652001135397e-05, "loss": 0.004541724920272827, "step": 124710 }, { "epoch": 35.40164632415555, "grad_norm": 0.02809389866888523, "learning_rate": 6.46136815214306e-05, "loss": 0.004792189970612526, "step": 124720 }, { "epoch": 35.40448481407891, "grad_norm": 0.08299035578966141, "learning_rate": 6.461084303150724e-05, "loss": 0.005103607475757599, "step": 124730 }, { "epoch": 35.40732330400227, "grad_norm": 1.3819055557250977, "learning_rate": 6.460800454158388e-05, "loss": 0.012244104593992233, "step": 124740 }, { "epoch": 35.41016179392563, "grad_norm": 0.6637468338012695, "learning_rate": 6.460516605166051e-05, "loss": 0.012076373398303985, "step": 124750 }, { "epoch": 35.413000283848994, "grad_norm": 0.6656967401504517, "learning_rate": 6.460232756173716e-05, "loss": 0.028901314735412596, "step": 124760 }, { "epoch": 35.41583877377235, "grad_norm": 0.024717943742871284, "learning_rate": 6.45994890718138e-05, "loss": 0.001671173982322216, "step": 124770 }, { "epoch": 35.41867726369571, "grad_norm": 3.7956604957580566, "learning_rate": 6.459665058189043e-05, "loss": 0.0027819059789180754, "step": 124780 }, { "epoch": 35.421515753619076, "grad_norm": 0.9437547922134399, "learning_rate": 6.459381209196707e-05, "loss": 0.003428303450345993, "step": 124790 }, { "epoch": 35.42435424354244, "grad_norm": 0.9660364389419556, "learning_rate": 6.459097360204372e-05, "loss": 0.007007385045289994, "step": 124800 }, { "epoch": 35.427192733465795, "grad_norm": 7.1223955154418945, "learning_rate": 6.458813511212036e-05, "loss": 0.0050876297056674956, "step": 124810 }, { "epoch": 35.43003122338916, "grad_norm": 4.550117015838623, "learning_rate": 6.458529662219698e-05, "loss": 0.006623812019824982, "step": 124820 }, { "epoch": 35.43286971331252, "grad_norm": 0.4335205852985382, "learning_rate": 6.458245813227364e-05, "loss": 0.003278358280658722, "step": 124830 }, { "epoch": 35.435708203235876, "grad_norm": 0.025949157774448395, "learning_rate": 6.457961964235028e-05, "loss": 0.006486078351736068, "step": 124840 }, { "epoch": 35.43854669315924, "grad_norm": 6.395306587219238, "learning_rate": 6.457678115242691e-05, "loss": 0.014768750965595245, "step": 124850 }, { "epoch": 35.4413851830826, "grad_norm": 3.8973159790039062, "learning_rate": 6.457394266250355e-05, "loss": 0.0062584981322288515, "step": 124860 }, { "epoch": 35.44422367300596, "grad_norm": 1.3165786266326904, "learning_rate": 6.457110417258019e-05, "loss": 0.010468196123838425, "step": 124870 }, { "epoch": 35.44706216292932, "grad_norm": 0.1820409744977951, "learning_rate": 6.456826568265682e-05, "loss": 0.022558340430259706, "step": 124880 }, { "epoch": 35.44990065285268, "grad_norm": 0.5113018751144409, "learning_rate": 6.456542719273347e-05, "loss": 0.003203253448009491, "step": 124890 }, { "epoch": 35.452739142776046, "grad_norm": 0.032387614250183105, "learning_rate": 6.456258870281012e-05, "loss": 0.002933865785598755, "step": 124900 }, { "epoch": 35.4555776326994, "grad_norm": 0.34201836585998535, "learning_rate": 6.455975021288674e-05, "loss": 0.006065483391284943, "step": 124910 }, { "epoch": 35.458416122622765, "grad_norm": 2.2335569858551025, "learning_rate": 6.455691172296338e-05, "loss": 0.008176913857460022, "step": 124920 }, { "epoch": 35.46125461254613, "grad_norm": 8.967089653015137, "learning_rate": 6.455407323304003e-05, "loss": 0.005386056005954742, "step": 124930 }, { "epoch": 35.464093102469484, "grad_norm": 13.451428413391113, "learning_rate": 6.455123474311667e-05, "loss": 0.009882912039756775, "step": 124940 }, { "epoch": 35.46693159239285, "grad_norm": 2.963137149810791, "learning_rate": 6.45483962531933e-05, "loss": 0.014495460689067841, "step": 124950 }, { "epoch": 35.46977008231621, "grad_norm": 0.19372975826263428, "learning_rate": 6.454555776326995e-05, "loss": 0.007631847262382507, "step": 124960 }, { "epoch": 35.472608572239565, "grad_norm": 12.685774803161621, "learning_rate": 6.454271927334659e-05, "loss": 0.01924920529127121, "step": 124970 }, { "epoch": 35.47544706216293, "grad_norm": 5.221495628356934, "learning_rate": 6.453988078342322e-05, "loss": 0.009094873815774918, "step": 124980 }, { "epoch": 35.47828555208629, "grad_norm": 0.3292238116264343, "learning_rate": 6.453704229349986e-05, "loss": 0.009680644422769547, "step": 124990 }, { "epoch": 35.481124042009654, "grad_norm": 0.09861037135124207, "learning_rate": 6.45342038035765e-05, "loss": 0.003667459636926651, "step": 125000 }, { "epoch": 35.481124042009654, "eval_accuracy": 0.9736122591721244, "eval_loss": 0.09246641397476196, "eval_runtime": 32.1676, "eval_samples_per_second": 488.908, "eval_steps_per_second": 7.647, "step": 125000 }, { "epoch": 35.48396253193301, "grad_norm": 0.35291433334350586, "learning_rate": 6.453136531365313e-05, "loss": 0.01767518073320389, "step": 125010 }, { "epoch": 35.48680102185637, "grad_norm": 0.6744071245193481, "learning_rate": 6.452852682372977e-05, "loss": 0.003070857934653759, "step": 125020 }, { "epoch": 35.489639511779735, "grad_norm": 3.7759416103363037, "learning_rate": 6.452568833380643e-05, "loss": 0.0032647754997015, "step": 125030 }, { "epoch": 35.49247800170309, "grad_norm": 0.10367639362812042, "learning_rate": 6.452284984388305e-05, "loss": 0.003720211237668991, "step": 125040 }, { "epoch": 35.495316491626454, "grad_norm": 0.506496250629425, "learning_rate": 6.45200113539597e-05, "loss": 0.010861226171255112, "step": 125050 }, { "epoch": 35.49815498154982, "grad_norm": 0.14185407757759094, "learning_rate": 6.451717286403634e-05, "loss": 0.0069375157356262205, "step": 125060 }, { "epoch": 35.50099347147318, "grad_norm": 0.16966380178928375, "learning_rate": 6.451433437411298e-05, "loss": 0.0066543005406856535, "step": 125070 }, { "epoch": 35.503831961396536, "grad_norm": 0.15068873763084412, "learning_rate": 6.45114958841896e-05, "loss": 0.0008128028362989426, "step": 125080 }, { "epoch": 35.5066704513199, "grad_norm": 0.4455798864364624, "learning_rate": 6.450865739426626e-05, "loss": 0.0015239287167787552, "step": 125090 }, { "epoch": 35.50950894124326, "grad_norm": 0.9672191143035889, "learning_rate": 6.45058189043429e-05, "loss": 0.015783116221427917, "step": 125100 }, { "epoch": 35.51234743116662, "grad_norm": 4.735830783843994, "learning_rate": 6.450298041441953e-05, "loss": 0.010145072638988496, "step": 125110 }, { "epoch": 35.51518592108998, "grad_norm": 0.3422497510910034, "learning_rate": 6.450014192449617e-05, "loss": 0.013720910251140594, "step": 125120 }, { "epoch": 35.51802441101334, "grad_norm": 0.12498065084218979, "learning_rate": 6.449730343457281e-05, "loss": 0.002984499931335449, "step": 125130 }, { "epoch": 35.5208629009367, "grad_norm": 0.5811486840248108, "learning_rate": 6.449446494464944e-05, "loss": 0.0034018225967884065, "step": 125140 }, { "epoch": 35.52370139086006, "grad_norm": 0.480955570936203, "learning_rate": 6.449162645472608e-05, "loss": 0.008165840804576874, "step": 125150 }, { "epoch": 35.526539880783425, "grad_norm": 2.1032793521881104, "learning_rate": 6.448878796480274e-05, "loss": 0.005098474770784378, "step": 125160 }, { "epoch": 35.52937837070679, "grad_norm": 0.7500012516975403, "learning_rate": 6.448594947487937e-05, "loss": 0.002728801034390926, "step": 125170 }, { "epoch": 35.53221686063014, "grad_norm": 0.1109260618686676, "learning_rate": 6.4483110984956e-05, "loss": 0.011433902382850646, "step": 125180 }, { "epoch": 35.535055350553506, "grad_norm": 0.8421873450279236, "learning_rate": 6.448027249503265e-05, "loss": 0.010049564391374588, "step": 125190 }, { "epoch": 35.53789384047687, "grad_norm": 5.967371463775635, "learning_rate": 6.447743400510929e-05, "loss": 0.0035402797162532806, "step": 125200 }, { "epoch": 35.540732330400225, "grad_norm": 0.2739831805229187, "learning_rate": 6.447459551518592e-05, "loss": 0.003483092039823532, "step": 125210 }, { "epoch": 35.54357082032359, "grad_norm": 0.308971107006073, "learning_rate": 6.447175702526256e-05, "loss": 0.008164165914058686, "step": 125220 }, { "epoch": 35.54640931024695, "grad_norm": 0.10572206228971481, "learning_rate": 6.446891853533921e-05, "loss": 0.003227214515209198, "step": 125230 }, { "epoch": 35.549247800170306, "grad_norm": 1.0289645195007324, "learning_rate": 6.446608004541584e-05, "loss": 0.0036446630954742433, "step": 125240 }, { "epoch": 35.55208629009367, "grad_norm": 0.1717691421508789, "learning_rate": 6.446324155549248e-05, "loss": 0.002933488413691521, "step": 125250 }, { "epoch": 35.55492478001703, "grad_norm": 0.03167391195893288, "learning_rate": 6.446040306556912e-05, "loss": 0.005520378798246383, "step": 125260 }, { "epoch": 35.557763269940395, "grad_norm": 0.2930864095687866, "learning_rate": 6.445756457564575e-05, "loss": 0.000985959731042385, "step": 125270 }, { "epoch": 35.56060175986375, "grad_norm": 0.1712752878665924, "learning_rate": 6.445472608572239e-05, "loss": 0.0023083217442035676, "step": 125280 }, { "epoch": 35.563440249787114, "grad_norm": 2.1108486652374268, "learning_rate": 6.445188759579905e-05, "loss": 0.01200648695230484, "step": 125290 }, { "epoch": 35.56627873971048, "grad_norm": 0.371765673160553, "learning_rate": 6.444904910587568e-05, "loss": 0.005719281733036041, "step": 125300 }, { "epoch": 35.56911722963383, "grad_norm": 0.29231005907058716, "learning_rate": 6.444621061595232e-05, "loss": 0.0013308390974998475, "step": 125310 }, { "epoch": 35.571955719557195, "grad_norm": 0.05381132289767265, "learning_rate": 6.444337212602896e-05, "loss": 0.011894132941961288, "step": 125320 }, { "epoch": 35.57479420948056, "grad_norm": 0.15411996841430664, "learning_rate": 6.44405336361056e-05, "loss": 0.011256928741931915, "step": 125330 }, { "epoch": 35.577632699403914, "grad_norm": 5.019742012023926, "learning_rate": 6.443769514618223e-05, "loss": 0.009076657891273498, "step": 125340 }, { "epoch": 35.58047118932728, "grad_norm": 0.7339893579483032, "learning_rate": 6.443485665625887e-05, "loss": 0.00530652366578579, "step": 125350 }, { "epoch": 35.58330967925064, "grad_norm": 1.5370879173278809, "learning_rate": 6.443201816633551e-05, "loss": 0.0019224384799599648, "step": 125360 }, { "epoch": 35.586148169174, "grad_norm": 7.998415946960449, "learning_rate": 6.442917967641215e-05, "loss": 0.00514814630150795, "step": 125370 }, { "epoch": 35.58898665909736, "grad_norm": 7.727628707885742, "learning_rate": 6.44263411864888e-05, "loss": 0.005549687147140503, "step": 125380 }, { "epoch": 35.59182514902072, "grad_norm": 2.280776023864746, "learning_rate": 6.442350269656543e-05, "loss": 0.010034140944480897, "step": 125390 }, { "epoch": 35.594663638944084, "grad_norm": 9.786527633666992, "learning_rate": 6.442066420664206e-05, "loss": 0.014894461631774903, "step": 125400 }, { "epoch": 35.59750212886744, "grad_norm": 2.7524898052215576, "learning_rate": 6.44178257167187e-05, "loss": 0.0026578644290566446, "step": 125410 }, { "epoch": 35.6003406187908, "grad_norm": 14.630492210388184, "learning_rate": 6.441498722679535e-05, "loss": 0.007752618938684464, "step": 125420 }, { "epoch": 35.603179108714166, "grad_norm": 1.4213225841522217, "learning_rate": 6.441214873687199e-05, "loss": 0.014157319068908691, "step": 125430 }, { "epoch": 35.60601759863752, "grad_norm": 0.6881102919578552, "learning_rate": 6.440931024694863e-05, "loss": 0.005177746713161469, "step": 125440 }, { "epoch": 35.608856088560884, "grad_norm": 21.151691436767578, "learning_rate": 6.440647175702527e-05, "loss": 0.008037223666906356, "step": 125450 }, { "epoch": 35.61169457848425, "grad_norm": 1.5915447473526, "learning_rate": 6.44036332671019e-05, "loss": 0.007538111507892608, "step": 125460 }, { "epoch": 35.61453306840761, "grad_norm": 1.09759521484375, "learning_rate": 6.440079477717854e-05, "loss": 0.006154146045446396, "step": 125470 }, { "epoch": 35.617371558330966, "grad_norm": 0.8544142246246338, "learning_rate": 6.439795628725518e-05, "loss": 0.006271190196275711, "step": 125480 }, { "epoch": 35.62021004825433, "grad_norm": 0.6008771657943726, "learning_rate": 6.439511779733182e-05, "loss": 0.012101072818040848, "step": 125490 }, { "epoch": 35.62304853817769, "grad_norm": 9.811751365661621, "learning_rate": 6.439227930740846e-05, "loss": 0.006930852681398392, "step": 125500 }, { "epoch": 35.62304853817769, "eval_accuracy": 0.9694156546067273, "eval_loss": 0.11055191606283188, "eval_runtime": 31.695, "eval_samples_per_second": 496.197, "eval_steps_per_second": 7.761, "step": 125500 }, { "epoch": 35.62588702810105, "grad_norm": 0.7095000147819519, "learning_rate": 6.43894408174851e-05, "loss": 0.00657695084810257, "step": 125510 }, { "epoch": 35.62872551802441, "grad_norm": 1.8637582063674927, "learning_rate": 6.438660232756175e-05, "loss": 0.023449543118476867, "step": 125520 }, { "epoch": 35.63156400794777, "grad_norm": 0.3539074957370758, "learning_rate": 6.438376383763837e-05, "loss": 0.009968521445989609, "step": 125530 }, { "epoch": 35.634402497871136, "grad_norm": 0.253376305103302, "learning_rate": 6.438092534771501e-05, "loss": 0.002223910391330719, "step": 125540 }, { "epoch": 35.63724098779449, "grad_norm": 0.6831166744232178, "learning_rate": 6.437808685779166e-05, "loss": 0.0034550823271274566, "step": 125550 }, { "epoch": 35.640079477717855, "grad_norm": 0.0465189591050148, "learning_rate": 6.43752483678683e-05, "loss": 0.012002493441104888, "step": 125560 }, { "epoch": 35.64291796764122, "grad_norm": 0.03205685690045357, "learning_rate": 6.437240987794494e-05, "loss": 0.003970462456345558, "step": 125570 }, { "epoch": 35.64575645756457, "grad_norm": 1.2504947185516357, "learning_rate": 6.436957138802158e-05, "loss": 0.0035271897912025453, "step": 125580 }, { "epoch": 35.648594947487936, "grad_norm": 0.5116320848464966, "learning_rate": 6.436673289809821e-05, "loss": 0.0025082001462578773, "step": 125590 }, { "epoch": 35.6514334374113, "grad_norm": 0.22356906533241272, "learning_rate": 6.436389440817485e-05, "loss": 0.005789599567651749, "step": 125600 }, { "epoch": 35.654271927334655, "grad_norm": 0.161332905292511, "learning_rate": 6.436105591825149e-05, "loss": 0.0014455650001764298, "step": 125610 }, { "epoch": 35.65711041725802, "grad_norm": 0.19064517319202423, "learning_rate": 6.435821742832813e-05, "loss": 0.0017866864800453186, "step": 125620 }, { "epoch": 35.65994890718138, "grad_norm": 0.052568469196558, "learning_rate": 6.435537893840477e-05, "loss": 0.0048654571175575255, "step": 125630 }, { "epoch": 35.66278739710474, "grad_norm": 1.3232976198196411, "learning_rate": 6.435254044848141e-05, "loss": 0.001846633478999138, "step": 125640 }, { "epoch": 35.6656258870281, "grad_norm": 4.506834030151367, "learning_rate": 6.434970195855806e-05, "loss": 0.005079007148742676, "step": 125650 }, { "epoch": 35.66846437695146, "grad_norm": 0.17795799672603607, "learning_rate": 6.434686346863468e-05, "loss": 0.003507944941520691, "step": 125660 }, { "epoch": 35.671302866874825, "grad_norm": 5.064487934112549, "learning_rate": 6.434402497871133e-05, "loss": 0.0053322933614254, "step": 125670 }, { "epoch": 35.67414135679818, "grad_norm": 0.5494018197059631, "learning_rate": 6.434118648878797e-05, "loss": 0.0027816742658615112, "step": 125680 }, { "epoch": 35.676979846721544, "grad_norm": 0.3183806538581848, "learning_rate": 6.433834799886461e-05, "loss": 0.003345290943980217, "step": 125690 }, { "epoch": 35.67981833664491, "grad_norm": 0.848863422870636, "learning_rate": 6.433550950894125e-05, "loss": 0.007506284862756729, "step": 125700 }, { "epoch": 35.68265682656826, "grad_norm": 0.24562405049800873, "learning_rate": 6.433267101901789e-05, "loss": 0.0064976766705513, "step": 125710 }, { "epoch": 35.685495316491625, "grad_norm": 0.11981731653213501, "learning_rate": 6.432983252909452e-05, "loss": 0.0031975656747817993, "step": 125720 }, { "epoch": 35.68833380641499, "grad_norm": 0.7863903045654297, "learning_rate": 6.432699403917116e-05, "loss": 0.0029272908344864846, "step": 125730 }, { "epoch": 35.69117229633835, "grad_norm": 7.400240421295166, "learning_rate": 6.43241555492478e-05, "loss": 0.01401362419128418, "step": 125740 }, { "epoch": 35.69401078626171, "grad_norm": 0.49115607142448425, "learning_rate": 6.432131705932444e-05, "loss": 0.0011231260374188422, "step": 125750 }, { "epoch": 35.69684927618507, "grad_norm": 0.0890878215432167, "learning_rate": 6.431847856940108e-05, "loss": 0.001536744087934494, "step": 125760 }, { "epoch": 35.69968776610843, "grad_norm": 0.2775839567184448, "learning_rate": 6.431564007947773e-05, "loss": 0.005969899520277977, "step": 125770 }, { "epoch": 35.70252625603179, "grad_norm": 4.4697771072387695, "learning_rate": 6.431280158955437e-05, "loss": 0.012081263959407807, "step": 125780 }, { "epoch": 35.70536474595515, "grad_norm": 6.609982490539551, "learning_rate": 6.4309963099631e-05, "loss": 0.0028816327452659608, "step": 125790 }, { "epoch": 35.708203235878514, "grad_norm": 3.862739324569702, "learning_rate": 6.430712460970764e-05, "loss": 0.004419993609189987, "step": 125800 }, { "epoch": 35.71104172580187, "grad_norm": 0.19788038730621338, "learning_rate": 6.430428611978428e-05, "loss": 0.007346046715974807, "step": 125810 }, { "epoch": 35.71388021572523, "grad_norm": 1.3134608268737793, "learning_rate": 6.430144762986092e-05, "loss": 0.017233341932296753, "step": 125820 }, { "epoch": 35.716718705648596, "grad_norm": 0.24054394662380219, "learning_rate": 6.429860913993756e-05, "loss": 0.0023723315447568894, "step": 125830 }, { "epoch": 35.71955719557196, "grad_norm": 1.1830635070800781, "learning_rate": 6.42957706500142e-05, "loss": 0.008591021597385406, "step": 125840 }, { "epoch": 35.722395685495314, "grad_norm": 0.3654405176639557, "learning_rate": 6.429293216009083e-05, "loss": 0.004867617040872574, "step": 125850 }, { "epoch": 35.72523417541868, "grad_norm": 7.728053569793701, "learning_rate": 6.429009367016747e-05, "loss": 0.0101021908223629, "step": 125860 }, { "epoch": 35.72807266534204, "grad_norm": 0.08115047216415405, "learning_rate": 6.428725518024411e-05, "loss": 0.004279579967260361, "step": 125870 }, { "epoch": 35.730911155265396, "grad_norm": 0.019951509311795235, "learning_rate": 6.428441669032075e-05, "loss": 0.007779677212238312, "step": 125880 }, { "epoch": 35.73374964518876, "grad_norm": 0.07166038453578949, "learning_rate": 6.42815782003974e-05, "loss": 0.007013005018234253, "step": 125890 }, { "epoch": 35.73658813511212, "grad_norm": 4.195037841796875, "learning_rate": 6.427873971047404e-05, "loss": 0.014965492486953735, "step": 125900 }, { "epoch": 35.739426625035485, "grad_norm": 13.27398681640625, "learning_rate": 6.427590122055068e-05, "loss": 0.008800321817398071, "step": 125910 }, { "epoch": 35.74226511495884, "grad_norm": 0.03212512657046318, "learning_rate": 6.42730627306273e-05, "loss": 0.021278107166290285, "step": 125920 }, { "epoch": 35.7451036048822, "grad_norm": 0.21651475131511688, "learning_rate": 6.427022424070395e-05, "loss": 0.018256112933158875, "step": 125930 }, { "epoch": 35.747942094805566, "grad_norm": 11.894150733947754, "learning_rate": 6.426738575078059e-05, "loss": 0.0115379199385643, "step": 125940 }, { "epoch": 35.75078058472892, "grad_norm": 1.8476530313491821, "learning_rate": 6.426454726085722e-05, "loss": 0.012787336111068725, "step": 125950 }, { "epoch": 35.753619074652285, "grad_norm": 2.3486788272857666, "learning_rate": 6.426170877093387e-05, "loss": 0.007745042443275452, "step": 125960 }, { "epoch": 35.75645756457565, "grad_norm": 0.5078985095024109, "learning_rate": 6.425887028101051e-05, "loss": 0.003848437964916229, "step": 125970 }, { "epoch": 35.759296054499, "grad_norm": 8.703825950622559, "learning_rate": 6.425603179108714e-05, "loss": 0.0069452449679374695, "step": 125980 }, { "epoch": 35.762134544422366, "grad_norm": 0.6124324202537537, "learning_rate": 6.425319330116378e-05, "loss": 0.003283681720495224, "step": 125990 }, { "epoch": 35.76497303434573, "grad_norm": 11.745828628540039, "learning_rate": 6.425035481124042e-05, "loss": 0.007817354798316956, "step": 126000 }, { "epoch": 35.76497303434573, "eval_accuracy": 0.9734850893368093, "eval_loss": 0.08920043706893921, "eval_runtime": 32.3174, "eval_samples_per_second": 486.642, "eval_steps_per_second": 7.612, "step": 126000 }, { "epoch": 35.76781152426909, "grad_norm": 2.3494677543640137, "learning_rate": 6.424751632131706e-05, "loss": 0.008326833695173263, "step": 126010 }, { "epoch": 35.77065001419245, "grad_norm": 0.1206546351313591, "learning_rate": 6.42446778313937e-05, "loss": 0.002287722937762737, "step": 126020 }, { "epoch": 35.77348850411581, "grad_norm": 0.47574958205223083, "learning_rate": 6.424183934147035e-05, "loss": 0.00491691455245018, "step": 126030 }, { "epoch": 35.776326994039174, "grad_norm": 0.15117399394512177, "learning_rate": 6.423900085154699e-05, "loss": 0.002325243316590786, "step": 126040 }, { "epoch": 35.77916548396253, "grad_norm": 0.5581682920455933, "learning_rate": 6.423616236162362e-05, "loss": 0.004037883877754211, "step": 126050 }, { "epoch": 35.78200397388589, "grad_norm": 0.791415810585022, "learning_rate": 6.423332387170026e-05, "loss": 0.0028008786961436273, "step": 126060 }, { "epoch": 35.784842463809255, "grad_norm": 0.15140633285045624, "learning_rate": 6.42304853817769e-05, "loss": 0.005774249881505966, "step": 126070 }, { "epoch": 35.78768095373261, "grad_norm": 0.44931066036224365, "learning_rate": 6.422764689185353e-05, "loss": 0.00166766457259655, "step": 126080 }, { "epoch": 35.790519443655974, "grad_norm": 0.6648796200752258, "learning_rate": 6.422480840193018e-05, "loss": 0.001513228565454483, "step": 126090 }, { "epoch": 35.79335793357934, "grad_norm": 5.194144248962402, "learning_rate": 6.422196991200682e-05, "loss": 0.003154616430401802, "step": 126100 }, { "epoch": 35.7961964235027, "grad_norm": 2.333834409713745, "learning_rate": 6.421913142208345e-05, "loss": 0.011298684030771255, "step": 126110 }, { "epoch": 35.799034913426055, "grad_norm": 0.6022517085075378, "learning_rate": 6.421629293216009e-05, "loss": 0.006935279071331024, "step": 126120 }, { "epoch": 35.80187340334942, "grad_norm": 4.017784118652344, "learning_rate": 6.421345444223673e-05, "loss": 0.010142439603805542, "step": 126130 }, { "epoch": 35.80471189327278, "grad_norm": 0.036865461617708206, "learning_rate": 6.421061595231338e-05, "loss": 0.007009932398796081, "step": 126140 }, { "epoch": 35.80755038319614, "grad_norm": 0.4198978841304779, "learning_rate": 6.420777746239e-05, "loss": 0.002792798914015293, "step": 126150 }, { "epoch": 35.8103888731195, "grad_norm": 4.522526264190674, "learning_rate": 6.420493897246666e-05, "loss": 0.006036895141005516, "step": 126160 }, { "epoch": 35.81322736304286, "grad_norm": 0.0803339034318924, "learning_rate": 6.42021004825433e-05, "loss": 0.0018586892634630202, "step": 126170 }, { "epoch": 35.81606585296622, "grad_norm": 0.15588295459747314, "learning_rate": 6.419926199261993e-05, "loss": 0.003602844476699829, "step": 126180 }, { "epoch": 35.81890434288958, "grad_norm": 0.5945289134979248, "learning_rate": 6.419642350269657e-05, "loss": 0.005804742127656937, "step": 126190 }, { "epoch": 35.821742832812944, "grad_norm": 0.4530585706233978, "learning_rate": 6.419358501277321e-05, "loss": 0.0030143633484840393, "step": 126200 }, { "epoch": 35.82458132273631, "grad_norm": 0.060854967683553696, "learning_rate": 6.419074652284984e-05, "loss": 0.010567860305309295, "step": 126210 }, { "epoch": 35.82741981265966, "grad_norm": 0.41671624779701233, "learning_rate": 6.418790803292649e-05, "loss": 0.005266121774911881, "step": 126220 }, { "epoch": 35.830258302583026, "grad_norm": 0.2279660999774933, "learning_rate": 6.418506954300313e-05, "loss": 0.007976356893777847, "step": 126230 }, { "epoch": 35.83309679250639, "grad_norm": 0.05304163321852684, "learning_rate": 6.418223105307976e-05, "loss": 0.00886397585272789, "step": 126240 }, { "epoch": 35.835935282429745, "grad_norm": 0.10024485737085342, "learning_rate": 6.41793925631564e-05, "loss": 0.005539890751242638, "step": 126250 }, { "epoch": 35.83877377235311, "grad_norm": 10.355852127075195, "learning_rate": 6.417655407323304e-05, "loss": 0.009502045810222626, "step": 126260 }, { "epoch": 35.84161226227647, "grad_norm": 2.6603891849517822, "learning_rate": 6.417371558330969e-05, "loss": 0.001631772518157959, "step": 126270 }, { "epoch": 35.84445075219983, "grad_norm": 1.4265003204345703, "learning_rate": 6.417087709338631e-05, "loss": 0.013410523533821106, "step": 126280 }, { "epoch": 35.84728924212319, "grad_norm": 0.29218432307243347, "learning_rate": 6.416803860346297e-05, "loss": 0.0052363332360982895, "step": 126290 }, { "epoch": 35.85012773204655, "grad_norm": 0.15043163299560547, "learning_rate": 6.41652001135396e-05, "loss": 0.0010573316365480423, "step": 126300 }, { "epoch": 35.852966221969915, "grad_norm": 19.424463272094727, "learning_rate": 6.416236162361624e-05, "loss": 0.01594058722257614, "step": 126310 }, { "epoch": 35.85580471189327, "grad_norm": 0.3940888047218323, "learning_rate": 6.415952313369288e-05, "loss": 0.011520786583423615, "step": 126320 }, { "epoch": 35.85864320181663, "grad_norm": 0.31801101565361023, "learning_rate": 6.415668464376952e-05, "loss": 0.001605990156531334, "step": 126330 }, { "epoch": 35.861481691739996, "grad_norm": 0.5530164837837219, "learning_rate": 6.415384615384615e-05, "loss": 0.004565146565437317, "step": 126340 }, { "epoch": 35.86432018166335, "grad_norm": 0.14780156314373016, "learning_rate": 6.415100766392279e-05, "loss": 0.0013087816536426545, "step": 126350 }, { "epoch": 35.867158671586715, "grad_norm": 1.1247615814208984, "learning_rate": 6.414816917399944e-05, "loss": 0.006440658867359161, "step": 126360 }, { "epoch": 35.86999716151008, "grad_norm": 6.3111114501953125, "learning_rate": 6.414533068407607e-05, "loss": 0.005968622118234635, "step": 126370 }, { "epoch": 35.87283565143344, "grad_norm": 2.370638608932495, "learning_rate": 6.414249219415271e-05, "loss": 0.01015380620956421, "step": 126380 }, { "epoch": 35.8756741413568, "grad_norm": 0.6105343699455261, "learning_rate": 6.413965370422936e-05, "loss": 0.0028698213398456575, "step": 126390 }, { "epoch": 35.87851263128016, "grad_norm": 11.494352340698242, "learning_rate": 6.413681521430598e-05, "loss": 0.008557707071304321, "step": 126400 }, { "epoch": 35.88135112120352, "grad_norm": 0.4002112150192261, "learning_rate": 6.413397672438262e-05, "loss": 0.002713472954928875, "step": 126410 }, { "epoch": 35.88418961112688, "grad_norm": 6.1648736000061035, "learning_rate": 6.413113823445928e-05, "loss": 0.0023005463182926176, "step": 126420 }, { "epoch": 35.88702810105024, "grad_norm": 0.5191795229911804, "learning_rate": 6.412829974453591e-05, "loss": 0.004546607285737992, "step": 126430 }, { "epoch": 35.889866590973604, "grad_norm": 0.1352374255657196, "learning_rate": 6.412546125461255e-05, "loss": 0.011713664233684539, "step": 126440 }, { "epoch": 35.89270508089696, "grad_norm": 3.385709047317505, "learning_rate": 6.412262276468919e-05, "loss": 0.006901535391807556, "step": 126450 }, { "epoch": 35.89554357082032, "grad_norm": 1.5677236318588257, "learning_rate": 6.411978427476583e-05, "loss": 0.007343468815088272, "step": 126460 }, { "epoch": 35.898382060743685, "grad_norm": 0.2934154272079468, "learning_rate": 6.411694578484246e-05, "loss": 0.005378042161464691, "step": 126470 }, { "epoch": 35.90122055066705, "grad_norm": 0.5809357166290283, "learning_rate": 6.41141072949191e-05, "loss": 0.004280347377061844, "step": 126480 }, { "epoch": 35.904059040590404, "grad_norm": 0.8176398277282715, "learning_rate": 6.411126880499576e-05, "loss": 0.003025081381201744, "step": 126490 }, { "epoch": 35.90689753051377, "grad_norm": 0.032708168029785156, "learning_rate": 6.410843031507238e-05, "loss": 0.0015533359721302987, "step": 126500 }, { "epoch": 35.90689753051377, "eval_accuracy": 0.9737394290074395, "eval_loss": 0.08901378512382507, "eval_runtime": 32.1781, "eval_samples_per_second": 488.749, "eval_steps_per_second": 7.645, "step": 126500 }, { "epoch": 35.90973602043713, "grad_norm": 0.736684262752533, "learning_rate": 6.410559182514902e-05, "loss": 0.0011504879221320152, "step": 126510 }, { "epoch": 35.912574510360486, "grad_norm": 0.07730340212583542, "learning_rate": 6.410275333522567e-05, "loss": 0.002045847661793232, "step": 126520 }, { "epoch": 35.91541300028385, "grad_norm": 0.728770911693573, "learning_rate": 6.40999148453023e-05, "loss": 0.003300675377249718, "step": 126530 }, { "epoch": 35.91825149020721, "grad_norm": 3.564445972442627, "learning_rate": 6.409707635537894e-05, "loss": 0.0019577568396925925, "step": 126540 }, { "epoch": 35.92108998013057, "grad_norm": 3.454472303390503, "learning_rate": 6.409423786545558e-05, "loss": 0.0023244909942150118, "step": 126550 }, { "epoch": 35.92392847005393, "grad_norm": 9.89323616027832, "learning_rate": 6.409139937553222e-05, "loss": 0.004548443108797073, "step": 126560 }, { "epoch": 35.92676695997729, "grad_norm": 1.1458797454833984, "learning_rate": 6.408856088560886e-05, "loss": 0.014385245740413666, "step": 126570 }, { "epoch": 35.929605449900656, "grad_norm": 0.5685465931892395, "learning_rate": 6.40857223956855e-05, "loss": 0.003281041979789734, "step": 126580 }, { "epoch": 35.93244393982401, "grad_norm": 0.2190071940422058, "learning_rate": 6.408288390576214e-05, "loss": 0.0005115475505590439, "step": 126590 }, { "epoch": 35.935282429747375, "grad_norm": 0.2857154905796051, "learning_rate": 6.408004541583877e-05, "loss": 0.0019325120374560356, "step": 126600 }, { "epoch": 35.93812091967074, "grad_norm": 14.508387565612793, "learning_rate": 6.407720692591541e-05, "loss": 0.005317140370607376, "step": 126610 }, { "epoch": 35.94095940959409, "grad_norm": 3.4920389652252197, "learning_rate": 6.407436843599207e-05, "loss": 0.0015887487679719925, "step": 126620 }, { "epoch": 35.943797899517456, "grad_norm": 0.12757577002048492, "learning_rate": 6.40715299460687e-05, "loss": 0.005288310348987579, "step": 126630 }, { "epoch": 35.94663638944082, "grad_norm": 1.0794126987457275, "learning_rate": 6.406869145614534e-05, "loss": 0.0079939104616642, "step": 126640 }, { "epoch": 35.949474879364175, "grad_norm": 0.49123790860176086, "learning_rate": 6.406585296622198e-05, "loss": 0.007430511713027954, "step": 126650 }, { "epoch": 35.95231336928754, "grad_norm": 0.15124057233333588, "learning_rate": 6.40630144762986e-05, "loss": 0.003562916815280914, "step": 126660 }, { "epoch": 35.9551518592109, "grad_norm": 2.425823450088501, "learning_rate": 6.406017598637525e-05, "loss": 0.005152301862835884, "step": 126670 }, { "epoch": 35.95799034913426, "grad_norm": 0.26592516899108887, "learning_rate": 6.405733749645189e-05, "loss": 0.0022968359291553496, "step": 126680 }, { "epoch": 35.96082883905762, "grad_norm": 0.5596168041229248, "learning_rate": 6.405449900652853e-05, "loss": 0.007101870328187943, "step": 126690 }, { "epoch": 35.96366732898098, "grad_norm": 0.7218759059906006, "learning_rate": 6.405166051660517e-05, "loss": 0.0033067218959331513, "step": 126700 }, { "epoch": 35.966505818904345, "grad_norm": 0.9604300856590271, "learning_rate": 6.404882202668181e-05, "loss": 0.0033266060054302216, "step": 126710 }, { "epoch": 35.9693443088277, "grad_norm": 0.6491814851760864, "learning_rate": 6.404598353675845e-05, "loss": 0.006630474328994751, "step": 126720 }, { "epoch": 35.972182798751064, "grad_norm": 0.28935250639915466, "learning_rate": 6.404314504683508e-05, "loss": 0.0036221802234649657, "step": 126730 }, { "epoch": 35.97502128867443, "grad_norm": 5.347591400146484, "learning_rate": 6.404030655691172e-05, "loss": 0.004163079708814621, "step": 126740 }, { "epoch": 35.97785977859779, "grad_norm": 0.634170413017273, "learning_rate": 6.403746806698836e-05, "loss": 0.006770875304937363, "step": 126750 }, { "epoch": 35.980698268521145, "grad_norm": 0.5171111822128296, "learning_rate": 6.4034629577065e-05, "loss": 0.007034525275230408, "step": 126760 }, { "epoch": 35.98353675844451, "grad_norm": 0.5424671173095703, "learning_rate": 6.403179108714165e-05, "loss": 0.01683698445558548, "step": 126770 }, { "epoch": 35.98637524836787, "grad_norm": 12.746862411499023, "learning_rate": 6.402895259721829e-05, "loss": 0.011363303661346436, "step": 126780 }, { "epoch": 35.98921373829123, "grad_norm": 4.254042625427246, "learning_rate": 6.402611410729492e-05, "loss": 0.007727722078561783, "step": 126790 }, { "epoch": 35.99205222821459, "grad_norm": 0.5924074053764343, "learning_rate": 6.402327561737156e-05, "loss": 0.00403762012720108, "step": 126800 }, { "epoch": 35.99489071813795, "grad_norm": 0.04714516922831535, "learning_rate": 6.40204371274482e-05, "loss": 0.003626175969839096, "step": 126810 }, { "epoch": 35.99772920806131, "grad_norm": 1.6308715343475342, "learning_rate": 6.401759863752484e-05, "loss": 0.004816995561122894, "step": 126820 }, { "epoch": 36.00056769798467, "grad_norm": 0.7485119104385376, "learning_rate": 6.401476014760148e-05, "loss": 0.007126141339540482, "step": 126830 }, { "epoch": 36.003406187908034, "grad_norm": 0.2731200158596039, "learning_rate": 6.401192165767812e-05, "loss": 0.005330976843833923, "step": 126840 }, { "epoch": 36.0062446778314, "grad_norm": 7.831894874572754, "learning_rate": 6.400908316775476e-05, "loss": 0.007614757120609284, "step": 126850 }, { "epoch": 36.00908316775475, "grad_norm": 0.6452323794364929, "learning_rate": 6.400624467783139e-05, "loss": 0.014581537246704102, "step": 126860 }, { "epoch": 36.011921657678116, "grad_norm": 9.264128684997559, "learning_rate": 6.400340618790803e-05, "loss": 0.016015347838401795, "step": 126870 }, { "epoch": 36.01476014760148, "grad_norm": 0.07857528328895569, "learning_rate": 6.400056769798467e-05, "loss": 0.0005820931866765023, "step": 126880 }, { "epoch": 36.017598637524834, "grad_norm": 0.2249826341867447, "learning_rate": 6.399772920806132e-05, "loss": 0.005755890905857086, "step": 126890 }, { "epoch": 36.0204371274482, "grad_norm": 0.06895659863948822, "learning_rate": 6.399489071813796e-05, "loss": 0.008543316274881363, "step": 126900 }, { "epoch": 36.02327561737156, "grad_norm": 3.3606367111206055, "learning_rate": 6.39920522282146e-05, "loss": 0.00845354124903679, "step": 126910 }, { "epoch": 36.026114107294916, "grad_norm": 0.03915700316429138, "learning_rate": 6.398921373829123e-05, "loss": 0.003287498652935028, "step": 126920 }, { "epoch": 36.02895259721828, "grad_norm": 12.868856430053711, "learning_rate": 6.398637524836787e-05, "loss": 0.020087313652038575, "step": 126930 }, { "epoch": 36.03179108714164, "grad_norm": 0.17498674988746643, "learning_rate": 6.398353675844451e-05, "loss": 0.00724179744720459, "step": 126940 }, { "epoch": 36.034629577065004, "grad_norm": 0.6085094809532166, "learning_rate": 6.398069826852115e-05, "loss": 0.023564092814922333, "step": 126950 }, { "epoch": 36.03746806698836, "grad_norm": 2.0589208602905273, "learning_rate": 6.397785977859779e-05, "loss": 0.005112854391336441, "step": 126960 }, { "epoch": 36.04030655691172, "grad_norm": 4.473414421081543, "learning_rate": 6.397530513766676e-05, "loss": 0.028100308775901795, "step": 126970 }, { "epoch": 36.043145046835086, "grad_norm": 0.36405524611473083, "learning_rate": 6.39724666477434e-05, "loss": 0.01251579523086548, "step": 126980 }, { "epoch": 36.04598353675844, "grad_norm": 5.049113750457764, "learning_rate": 6.396962815782004e-05, "loss": 0.004196828603744507, "step": 126990 }, { "epoch": 36.048822026681805, "grad_norm": 0.3501921594142914, "learning_rate": 6.396678966789668e-05, "loss": 0.0025804173201322556, "step": 127000 }, { "epoch": 36.048822026681805, "eval_accuracy": 0.9729128250778916, "eval_loss": 0.09345213323831558, "eval_runtime": 31.7659, "eval_samples_per_second": 495.091, "eval_steps_per_second": 7.744, "step": 127000 }, { "epoch": 36.05166051660517, "grad_norm": 0.41528764367103577, "learning_rate": 6.396395117797332e-05, "loss": 0.0034525208175182343, "step": 127010 }, { "epoch": 36.05449900652852, "grad_norm": 1.0354595184326172, "learning_rate": 6.396111268804996e-05, "loss": 0.01257609874010086, "step": 127020 }, { "epoch": 36.057337496451886, "grad_norm": 4.490610599517822, "learning_rate": 6.39582741981266e-05, "loss": 0.00984746590256691, "step": 127030 }, { "epoch": 36.06017598637525, "grad_norm": 3.345027208328247, "learning_rate": 6.395543570820323e-05, "loss": 0.0022512603551149367, "step": 127040 }, { "epoch": 36.06301447629861, "grad_norm": 1.1294705867767334, "learning_rate": 6.395259721827988e-05, "loss": 0.0022397279739379884, "step": 127050 }, { "epoch": 36.06585296622197, "grad_norm": 0.031205175444483757, "learning_rate": 6.394975872835652e-05, "loss": 0.0008827924728393554, "step": 127060 }, { "epoch": 36.06869145614533, "grad_norm": 1.3361119031906128, "learning_rate": 6.394692023843316e-05, "loss": 0.002103321999311447, "step": 127070 }, { "epoch": 36.071529946068694, "grad_norm": 0.0718621164560318, "learning_rate": 6.39440817485098e-05, "loss": 0.003419715166091919, "step": 127080 }, { "epoch": 36.07436843599205, "grad_norm": 0.07301519811153412, "learning_rate": 6.394124325858644e-05, "loss": 0.010206998139619828, "step": 127090 }, { "epoch": 36.07720692591541, "grad_norm": 0.4432388246059418, "learning_rate": 6.393840476866307e-05, "loss": 0.016574613749980927, "step": 127100 }, { "epoch": 36.080045415838775, "grad_norm": 7.471394062042236, "learning_rate": 6.393556627873971e-05, "loss": 0.00944097265601158, "step": 127110 }, { "epoch": 36.08288390576214, "grad_norm": 1.7806057929992676, "learning_rate": 6.393272778881635e-05, "loss": 0.011175611615180969, "step": 127120 }, { "epoch": 36.085722395685494, "grad_norm": 0.045527562499046326, "learning_rate": 6.392988929889299e-05, "loss": 0.0006334753707051277, "step": 127130 }, { "epoch": 36.08856088560886, "grad_norm": 0.26164624094963074, "learning_rate": 6.392705080896963e-05, "loss": 0.0032220132648944854, "step": 127140 }, { "epoch": 36.09139937553222, "grad_norm": 0.5625957250595093, "learning_rate": 6.392421231904628e-05, "loss": 0.0038161464035511017, "step": 127150 }, { "epoch": 36.094237865455575, "grad_norm": 2.360600471496582, "learning_rate": 6.39213738291229e-05, "loss": 0.001096723973751068, "step": 127160 }, { "epoch": 36.09707635537894, "grad_norm": 0.0593549944460392, "learning_rate": 6.391853533919954e-05, "loss": 0.000880332849919796, "step": 127170 }, { "epoch": 36.0999148453023, "grad_norm": 3.7415199279785156, "learning_rate": 6.391569684927619e-05, "loss": 0.0011992288753390312, "step": 127180 }, { "epoch": 36.10275333522566, "grad_norm": 0.5388703346252441, "learning_rate": 6.391285835935283e-05, "loss": 0.0010394833981990815, "step": 127190 }, { "epoch": 36.10559182514902, "grad_norm": 6.951554298400879, "learning_rate": 6.391001986942947e-05, "loss": 0.00624723955988884, "step": 127200 }, { "epoch": 36.10843031507238, "grad_norm": 5.447987079620361, "learning_rate": 6.390718137950611e-05, "loss": 0.001547386683523655, "step": 127210 }, { "epoch": 36.111268804995746, "grad_norm": 1.3525289297103882, "learning_rate": 6.390434288958275e-05, "loss": 0.0023940665647387504, "step": 127220 }, { "epoch": 36.1141072949191, "grad_norm": 4.19992733001709, "learning_rate": 6.390150439965938e-05, "loss": 0.0039614934474229814, "step": 127230 }, { "epoch": 36.116945784842464, "grad_norm": 0.027442842721939087, "learning_rate": 6.389866590973602e-05, "loss": 0.001865350641310215, "step": 127240 }, { "epoch": 36.11978427476583, "grad_norm": 0.061473265290260315, "learning_rate": 6.389582741981266e-05, "loss": 0.01696058511734009, "step": 127250 }, { "epoch": 36.12262276468918, "grad_norm": 1.4688719511032104, "learning_rate": 6.389298892988929e-05, "loss": 0.003261295333504677, "step": 127260 }, { "epoch": 36.125461254612546, "grad_norm": 0.08504585176706314, "learning_rate": 6.389015043996595e-05, "loss": 0.007860887050628661, "step": 127270 }, { "epoch": 36.12829974453591, "grad_norm": 0.36169302463531494, "learning_rate": 6.388731195004259e-05, "loss": 0.009373527765274049, "step": 127280 }, { "epoch": 36.131138234459264, "grad_norm": 0.6977614760398865, "learning_rate": 6.388447346011921e-05, "loss": 0.0014830667525529862, "step": 127290 }, { "epoch": 36.13397672438263, "grad_norm": 17.43887710571289, "learning_rate": 6.388163497019586e-05, "loss": 0.007896265387535096, "step": 127300 }, { "epoch": 36.13681521430599, "grad_norm": 0.4687079191207886, "learning_rate": 6.38787964802725e-05, "loss": 0.009588398039340973, "step": 127310 }, { "epoch": 36.13965370422935, "grad_norm": 0.42098483443260193, "learning_rate": 6.387595799034914e-05, "loss": 0.0017126014456152916, "step": 127320 }, { "epoch": 36.14249219415271, "grad_norm": 0.8760297298431396, "learning_rate": 6.387311950042578e-05, "loss": 0.0017194518819451331, "step": 127330 }, { "epoch": 36.14533068407607, "grad_norm": 0.14376872777938843, "learning_rate": 6.387028101050242e-05, "loss": 0.00064933430403471, "step": 127340 }, { "epoch": 36.148169173999435, "grad_norm": 0.07841896265745163, "learning_rate": 6.386744252057906e-05, "loss": 0.0008113643154501915, "step": 127350 }, { "epoch": 36.15100766392279, "grad_norm": 0.7485700249671936, "learning_rate": 6.386460403065569e-05, "loss": 0.008631116896867751, "step": 127360 }, { "epoch": 36.15384615384615, "grad_norm": 0.13008366525173187, "learning_rate": 6.386176554073233e-05, "loss": 0.0023050157353281973, "step": 127370 }, { "epoch": 36.156684643769516, "grad_norm": 0.1320813149213791, "learning_rate": 6.385892705080897e-05, "loss": 0.0016317794099450112, "step": 127380 }, { "epoch": 36.15952313369287, "grad_norm": 1.6797621250152588, "learning_rate": 6.38560885608856e-05, "loss": 0.002636714465916157, "step": 127390 }, { "epoch": 36.162361623616235, "grad_norm": 0.061095159500837326, "learning_rate": 6.385325007096226e-05, "loss": 0.0011680079624056816, "step": 127400 }, { "epoch": 36.1652001135396, "grad_norm": 1.6298158168792725, "learning_rate": 6.38504115810389e-05, "loss": 0.011523217707872391, "step": 127410 }, { "epoch": 36.16803860346296, "grad_norm": 0.5967862606048584, "learning_rate": 6.384757309111553e-05, "loss": 0.0023915402591228483, "step": 127420 }, { "epoch": 36.170877093386316, "grad_norm": 0.1472238153219223, "learning_rate": 6.384473460119217e-05, "loss": 0.003700447082519531, "step": 127430 }, { "epoch": 36.17371558330968, "grad_norm": 0.8949035406112671, "learning_rate": 6.384189611126881e-05, "loss": 0.0012044599279761314, "step": 127440 }, { "epoch": 36.17655407323304, "grad_norm": 0.5069131851196289, "learning_rate": 6.383905762134545e-05, "loss": 0.002039975859224796, "step": 127450 }, { "epoch": 36.1793925631564, "grad_norm": 0.1094423159956932, "learning_rate": 6.383621913142208e-05, "loss": 0.004139226675033569, "step": 127460 }, { "epoch": 36.18223105307976, "grad_norm": 2.3406026363372803, "learning_rate": 6.383338064149873e-05, "loss": 0.012016795575618744, "step": 127470 }, { "epoch": 36.185069543003124, "grad_norm": 1.3236818313598633, "learning_rate": 6.383054215157537e-05, "loss": 0.00615473985671997, "step": 127480 }, { "epoch": 36.18790803292649, "grad_norm": 0.37161555886268616, "learning_rate": 6.3827703661652e-05, "loss": 0.00452272966504097, "step": 127490 }, { "epoch": 36.19074652284984, "grad_norm": 2.7792532444000244, "learning_rate": 6.382486517172864e-05, "loss": 0.002688353881239891, "step": 127500 }, { "epoch": 36.19074652284984, "eval_accuracy": 0.9731671647485216, "eval_loss": 0.09406692534685135, "eval_runtime": 31.9699, "eval_samples_per_second": 491.932, "eval_steps_per_second": 7.695, "step": 127500 }, { "epoch": 36.193585012773205, "grad_norm": 0.19656357169151306, "learning_rate": 6.382202668180528e-05, "loss": 0.0017799204215407372, "step": 127510 }, { "epoch": 36.19642350269657, "grad_norm": 16.63015365600586, "learning_rate": 6.381918819188191e-05, "loss": 0.006682169437408447, "step": 127520 }, { "epoch": 36.199261992619924, "grad_norm": 0.07477105408906937, "learning_rate": 6.381634970195857e-05, "loss": 0.007875749468803405, "step": 127530 }, { "epoch": 36.20210048254329, "grad_norm": 1.3011082410812378, "learning_rate": 6.381351121203521e-05, "loss": 0.018955110013484953, "step": 127540 }, { "epoch": 36.20493897246665, "grad_norm": 2.0958359241485596, "learning_rate": 6.381067272211184e-05, "loss": 0.004847618192434311, "step": 127550 }, { "epoch": 36.207777462390005, "grad_norm": 0.5526466369628906, "learning_rate": 6.380783423218848e-05, "loss": 0.002208054251968861, "step": 127560 }, { "epoch": 36.21061595231337, "grad_norm": 5.690003871917725, "learning_rate": 6.380499574226512e-05, "loss": 0.0014346564188599587, "step": 127570 }, { "epoch": 36.21345444223673, "grad_norm": 2.031998872756958, "learning_rate": 6.380215725234176e-05, "loss": 0.009736639261245728, "step": 127580 }, { "epoch": 36.216292932160094, "grad_norm": 4.549363136291504, "learning_rate": 6.379931876241839e-05, "loss": 0.007268596440553665, "step": 127590 }, { "epoch": 36.21913142208345, "grad_norm": 0.4121505320072174, "learning_rate": 6.379648027249504e-05, "loss": 0.003952358290553093, "step": 127600 }, { "epoch": 36.22196991200681, "grad_norm": 1.6879030466079712, "learning_rate": 6.379364178257168e-05, "loss": 0.009512005746364594, "step": 127610 }, { "epoch": 36.224808401930176, "grad_norm": 0.20126743614673615, "learning_rate": 6.379080329264831e-05, "loss": 0.0018951848149299621, "step": 127620 }, { "epoch": 36.22764689185353, "grad_norm": 0.6883359551429749, "learning_rate": 6.378796480272495e-05, "loss": 0.001357901468873024, "step": 127630 }, { "epoch": 36.230485381776894, "grad_norm": 2.239004135131836, "learning_rate": 6.37851263128016e-05, "loss": 0.00416656956076622, "step": 127640 }, { "epoch": 36.23332387170026, "grad_norm": 0.1031096801161766, "learning_rate": 6.378228782287822e-05, "loss": 0.003200557455420494, "step": 127650 }, { "epoch": 36.23616236162361, "grad_norm": 0.046665020287036896, "learning_rate": 6.377944933295486e-05, "loss": 0.008613959699869157, "step": 127660 }, { "epoch": 36.239000851546976, "grad_norm": 0.3083481788635254, "learning_rate": 6.377661084303152e-05, "loss": 0.003672643005847931, "step": 127670 }, { "epoch": 36.24183934147034, "grad_norm": 0.15197885036468506, "learning_rate": 6.377377235310815e-05, "loss": 0.014231361448764801, "step": 127680 }, { "epoch": 36.2446778313937, "grad_norm": 1.635524034500122, "learning_rate": 6.377093386318479e-05, "loss": 0.004113158956170082, "step": 127690 }, { "epoch": 36.24751632131706, "grad_norm": 7.497893333435059, "learning_rate": 6.376809537326143e-05, "loss": 0.0060790546238422396, "step": 127700 }, { "epoch": 36.25035481124042, "grad_norm": 0.03468652814626694, "learning_rate": 6.376525688333807e-05, "loss": 0.002390434592962265, "step": 127710 }, { "epoch": 36.25319330116378, "grad_norm": 10.676828384399414, "learning_rate": 6.37624183934147e-05, "loss": 0.004556234925985336, "step": 127720 }, { "epoch": 36.25603179108714, "grad_norm": 0.039073631167411804, "learning_rate": 6.375957990349135e-05, "loss": 0.0015852043405175209, "step": 127730 }, { "epoch": 36.2588702810105, "grad_norm": 0.042142949998378754, "learning_rate": 6.3756741413568e-05, "loss": 0.0017862040549516678, "step": 127740 }, { "epoch": 36.261708770933865, "grad_norm": 0.044744137674570084, "learning_rate": 6.375390292364462e-05, "loss": 0.002254301495850086, "step": 127750 }, { "epoch": 36.26454726085722, "grad_norm": 0.09166830033063889, "learning_rate": 6.375106443372126e-05, "loss": 0.004956484586000442, "step": 127760 }, { "epoch": 36.26738575078058, "grad_norm": 0.048585277050733566, "learning_rate": 6.37482259437979e-05, "loss": 0.0030336380004882812, "step": 127770 }, { "epoch": 36.270224240703946, "grad_norm": 0.986164391040802, "learning_rate": 6.374538745387453e-05, "loss": 0.002197781763970852, "step": 127780 }, { "epoch": 36.27306273062731, "grad_norm": 0.23840592801570892, "learning_rate": 6.374254896395117e-05, "loss": 0.0013004055246710778, "step": 127790 }, { "epoch": 36.275901220550665, "grad_norm": 3.966578483581543, "learning_rate": 6.373971047402783e-05, "loss": 0.0036557894200086594, "step": 127800 }, { "epoch": 36.27873971047403, "grad_norm": 1.0389047861099243, "learning_rate": 6.373687198410446e-05, "loss": 0.003493938222527504, "step": 127810 }, { "epoch": 36.28157820039739, "grad_norm": 0.13905692100524902, "learning_rate": 6.37340334941811e-05, "loss": 0.002410623989999294, "step": 127820 }, { "epoch": 36.28441669032075, "grad_norm": 0.1657152622938156, "learning_rate": 6.373119500425774e-05, "loss": 0.001114712655544281, "step": 127830 }, { "epoch": 36.28725518024411, "grad_norm": 0.09387261420488358, "learning_rate": 6.372835651433438e-05, "loss": 0.007324092090129852, "step": 127840 }, { "epoch": 36.29009367016747, "grad_norm": 0.060805726796388626, "learning_rate": 6.372551802441101e-05, "loss": 0.003158874437212944, "step": 127850 }, { "epoch": 36.29293216009083, "grad_norm": 0.34216102957725525, "learning_rate": 6.372267953448765e-05, "loss": 0.0020167721435427667, "step": 127860 }, { "epoch": 36.29577065001419, "grad_norm": 0.10189714282751083, "learning_rate": 6.37198410445643e-05, "loss": 0.00444999523460865, "step": 127870 }, { "epoch": 36.298609139937554, "grad_norm": 0.19940786063671112, "learning_rate": 6.371700255464093e-05, "loss": 0.0025221148505806925, "step": 127880 }, { "epoch": 36.30144762986092, "grad_norm": 0.19623884558677673, "learning_rate": 6.371416406471757e-05, "loss": 0.007103340327739715, "step": 127890 }, { "epoch": 36.30428611978427, "grad_norm": 0.10340545326471329, "learning_rate": 6.371132557479422e-05, "loss": 0.0008633177727460861, "step": 127900 }, { "epoch": 36.307124609707635, "grad_norm": 1.7378860712051392, "learning_rate": 6.370848708487084e-05, "loss": 0.0050704386085271835, "step": 127910 }, { "epoch": 36.309963099631, "grad_norm": 0.1937931478023529, "learning_rate": 6.370564859494749e-05, "loss": 0.002620670571923256, "step": 127920 }, { "epoch": 36.312801589554354, "grad_norm": 2.300102472305298, "learning_rate": 6.370281010502414e-05, "loss": 0.0028615819290280344, "step": 127930 }, { "epoch": 36.31564007947772, "grad_norm": 0.09869740903377533, "learning_rate": 6.369997161510077e-05, "loss": 0.005170597136020661, "step": 127940 }, { "epoch": 36.31847856940108, "grad_norm": 0.1035519540309906, "learning_rate": 6.369713312517741e-05, "loss": 0.0022327609360218046, "step": 127950 }, { "epoch": 36.32131705932444, "grad_norm": 0.04852621629834175, "learning_rate": 6.369429463525405e-05, "loss": 0.0018946433439850808, "step": 127960 }, { "epoch": 36.3241555492478, "grad_norm": 0.06586398929357529, "learning_rate": 6.369145614533069e-05, "loss": 0.0036411739885807036, "step": 127970 }, { "epoch": 36.32699403917116, "grad_norm": 0.17175254225730896, "learning_rate": 6.368861765540732e-05, "loss": 0.0009551232680678368, "step": 127980 }, { "epoch": 36.329832529094524, "grad_norm": 0.05443062260746956, "learning_rate": 6.368577916548396e-05, "loss": 0.003121565096080303, "step": 127990 }, { "epoch": 36.33267101901788, "grad_norm": 0.24511653184890747, "learning_rate": 6.36829406755606e-05, "loss": 0.00266999751329422, "step": 128000 }, { "epoch": 36.33267101901788, "eval_accuracy": 0.9730399949132066, "eval_loss": 0.08970324695110321, "eval_runtime": 31.8666, "eval_samples_per_second": 493.527, "eval_steps_per_second": 7.72, "step": 128000 }, { "epoch": 36.33550950894124, "grad_norm": 13.086302757263184, "learning_rate": 6.368010218563724e-05, "loss": 0.004339936375617981, "step": 128010 }, { "epoch": 36.338347998864606, "grad_norm": 0.25706109404563904, "learning_rate": 6.367726369571389e-05, "loss": 0.0017901195213198661, "step": 128020 }, { "epoch": 36.34118648878796, "grad_norm": 0.17140182852745056, "learning_rate": 6.367442520579053e-05, "loss": 0.0032442830502986907, "step": 128030 }, { "epoch": 36.344024978711325, "grad_norm": 0.17065833508968353, "learning_rate": 6.367158671586715e-05, "loss": 0.0031328584998846056, "step": 128040 }, { "epoch": 36.34686346863469, "grad_norm": 0.33550211787223816, "learning_rate": 6.36687482259438e-05, "loss": 0.011456573754549027, "step": 128050 }, { "epoch": 36.34970195855805, "grad_norm": 0.5969600677490234, "learning_rate": 6.366590973602044e-05, "loss": 0.004975881427526474, "step": 128060 }, { "epoch": 36.352540448481406, "grad_norm": 2.8020832538604736, "learning_rate": 6.366307124609708e-05, "loss": 0.00581776537001133, "step": 128070 }, { "epoch": 36.35537893840477, "grad_norm": 0.19912339746952057, "learning_rate": 6.366023275617372e-05, "loss": 0.0022149460390210153, "step": 128080 }, { "epoch": 36.35821742832813, "grad_norm": 1.316356897354126, "learning_rate": 6.365739426625036e-05, "loss": 0.0019860155880451202, "step": 128090 }, { "epoch": 36.36105591825149, "grad_norm": 0.10104569047689438, "learning_rate": 6.365455577632699e-05, "loss": 0.0036674156785011292, "step": 128100 }, { "epoch": 36.36389440817485, "grad_norm": 0.07789231091737747, "learning_rate": 6.365171728640363e-05, "loss": 0.0013231365010142326, "step": 128110 }, { "epoch": 36.36673289809821, "grad_norm": 1.383571982383728, "learning_rate": 6.364887879648027e-05, "loss": 0.0022407697513699533, "step": 128120 }, { "epoch": 36.36957138802157, "grad_norm": 7.491725444793701, "learning_rate": 6.364604030655691e-05, "loss": 0.0041404839605093, "step": 128130 }, { "epoch": 36.37240987794493, "grad_norm": 13.068432807922363, "learning_rate": 6.364320181663355e-05, "loss": 0.0044719275087118145, "step": 128140 }, { "epoch": 36.375248367868295, "grad_norm": 6.452056884765625, "learning_rate": 6.36403633267102e-05, "loss": 0.004169797152280807, "step": 128150 }, { "epoch": 36.37808685779166, "grad_norm": 0.021738294512033463, "learning_rate": 6.363752483678684e-05, "loss": 0.0056602489203214645, "step": 128160 }, { "epoch": 36.380925347715014, "grad_norm": 0.13982366025447845, "learning_rate": 6.363468634686347e-05, "loss": 0.008925019204616547, "step": 128170 }, { "epoch": 36.38376383763838, "grad_norm": 2.598951816558838, "learning_rate": 6.363184785694011e-05, "loss": 0.0027162447571754456, "step": 128180 }, { "epoch": 36.38660232756174, "grad_norm": 0.17163889110088348, "learning_rate": 6.362900936701675e-05, "loss": 0.003969043493270874, "step": 128190 }, { "epoch": 36.389440817485095, "grad_norm": 0.03473769500851631, "learning_rate": 6.362617087709339e-05, "loss": 0.00586436279118061, "step": 128200 }, { "epoch": 36.39227930740846, "grad_norm": 0.13865157961845398, "learning_rate": 6.362333238717003e-05, "loss": 0.0024255817756056786, "step": 128210 }, { "epoch": 36.39511779733182, "grad_norm": 0.3917488753795624, "learning_rate": 6.362049389724667e-05, "loss": 0.00288480818271637, "step": 128220 }, { "epoch": 36.39795628725518, "grad_norm": 0.09312880039215088, "learning_rate": 6.36176554073233e-05, "loss": 0.00928426831960678, "step": 128230 }, { "epoch": 36.40079477717854, "grad_norm": 0.21374282240867615, "learning_rate": 6.361481691739994e-05, "loss": 0.007262980937957764, "step": 128240 }, { "epoch": 36.4036332671019, "grad_norm": 0.11260316520929337, "learning_rate": 6.361197842747658e-05, "loss": 0.0019806986674666405, "step": 128250 }, { "epoch": 36.406471757025265, "grad_norm": 10.372431755065918, "learning_rate": 6.360913993755322e-05, "loss": 0.01027105525135994, "step": 128260 }, { "epoch": 36.40931024694862, "grad_norm": 1.049002766609192, "learning_rate": 6.360630144762987e-05, "loss": 0.0022628437727689743, "step": 128270 }, { "epoch": 36.412148736871984, "grad_norm": 1.1305917501449585, "learning_rate": 6.360346295770651e-05, "loss": 0.002162879146635532, "step": 128280 }, { "epoch": 36.41498722679535, "grad_norm": 0.3266650140285492, "learning_rate": 6.360062446778315e-05, "loss": 0.002702903561294079, "step": 128290 }, { "epoch": 36.4178257167187, "grad_norm": 0.6826332807540894, "learning_rate": 6.359778597785978e-05, "loss": 0.006277526170015335, "step": 128300 }, { "epoch": 36.420664206642066, "grad_norm": 1.6387525796890259, "learning_rate": 6.359494748793642e-05, "loss": 0.005201103165745735, "step": 128310 }, { "epoch": 36.42350269656543, "grad_norm": 6.370752334594727, "learning_rate": 6.359210899801306e-05, "loss": 0.007139098644256592, "step": 128320 }, { "epoch": 36.42634118648879, "grad_norm": 0.5603417754173279, "learning_rate": 6.35892705080897e-05, "loss": 0.006142812967300415, "step": 128330 }, { "epoch": 36.42917967641215, "grad_norm": 1.1552526950836182, "learning_rate": 6.358643201816634e-05, "loss": 0.002818172238767147, "step": 128340 }, { "epoch": 36.43201816633551, "grad_norm": 2.741763114929199, "learning_rate": 6.358359352824298e-05, "loss": 0.005219575390219689, "step": 128350 }, { "epoch": 36.43485665625887, "grad_norm": 0.9224965572357178, "learning_rate": 6.358075503831961e-05, "loss": 0.003419680893421173, "step": 128360 }, { "epoch": 36.43769514618223, "grad_norm": 3.190770387649536, "learning_rate": 6.357791654839625e-05, "loss": 0.0038978178054094315, "step": 128370 }, { "epoch": 36.44053363610559, "grad_norm": 0.05639544874429703, "learning_rate": 6.35750780584729e-05, "loss": 0.009301922470331191, "step": 128380 }, { "epoch": 36.443372126028954, "grad_norm": 9.033286094665527, "learning_rate": 6.357223956854954e-05, "loss": 0.010313080996274948, "step": 128390 }, { "epoch": 36.44621061595231, "grad_norm": 0.0202597938477993, "learning_rate": 6.356940107862618e-05, "loss": 0.0026626212522387503, "step": 128400 }, { "epoch": 36.44904910587567, "grad_norm": 0.4026896357536316, "learning_rate": 6.356656258870282e-05, "loss": 0.004579829424619675, "step": 128410 }, { "epoch": 36.451887595799036, "grad_norm": 6.656915187835693, "learning_rate": 6.356372409877946e-05, "loss": 0.018889476358890534, "step": 128420 }, { "epoch": 36.4547260857224, "grad_norm": 0.5472156405448914, "learning_rate": 6.356088560885609e-05, "loss": 0.017475323379039766, "step": 128430 }, { "epoch": 36.457564575645755, "grad_norm": 0.24752402305603027, "learning_rate": 6.355804711893273e-05, "loss": 0.005521754175424576, "step": 128440 }, { "epoch": 36.46040306556912, "grad_norm": 2.5118281841278076, "learning_rate": 6.355520862900937e-05, "loss": 0.005071083456277848, "step": 128450 }, { "epoch": 36.46324155549248, "grad_norm": 0.0630308985710144, "learning_rate": 6.3552370139086e-05, "loss": 0.004758365452289581, "step": 128460 }, { "epoch": 36.466080045415836, "grad_norm": 0.12117838859558105, "learning_rate": 6.354953164916265e-05, "loss": 0.008446036279201508, "step": 128470 }, { "epoch": 36.4689185353392, "grad_norm": 20.681432723999023, "learning_rate": 6.35466931592393e-05, "loss": 0.008392959833145142, "step": 128480 }, { "epoch": 36.47175702526256, "grad_norm": 12.232525825500488, "learning_rate": 6.354385466931592e-05, "loss": 0.009375204145908356, "step": 128490 }, { "epoch": 36.47459551518592, "grad_norm": 0.1576116383075714, "learning_rate": 6.354101617939256e-05, "loss": 0.0052847139537334446, "step": 128500 }, { "epoch": 36.47459551518592, "eval_accuracy": 0.972849240160234, "eval_loss": 0.09215620160102844, "eval_runtime": 32.7131, "eval_samples_per_second": 480.756, "eval_steps_per_second": 7.52, "step": 128500 }, { "epoch": 36.47743400510928, "grad_norm": 0.6093406081199646, "learning_rate": 6.35381776894692e-05, "loss": 0.012586894631385803, "step": 128510 }, { "epoch": 36.480272495032644, "grad_norm": 0.14331737160682678, "learning_rate": 6.353533919954585e-05, "loss": 0.005738149583339691, "step": 128520 }, { "epoch": 36.48311098495601, "grad_norm": 2.3704588413238525, "learning_rate": 6.353250070962249e-05, "loss": 0.01546432077884674, "step": 128530 }, { "epoch": 36.48594947487936, "grad_norm": 0.11676281690597534, "learning_rate": 6.352966221969913e-05, "loss": 0.008744961023330689, "step": 128540 }, { "epoch": 36.488787964802725, "grad_norm": 2.250260353088379, "learning_rate": 6.352682372977577e-05, "loss": 0.004321060329675675, "step": 128550 }, { "epoch": 36.49162645472609, "grad_norm": 0.09385251998901367, "learning_rate": 6.35239852398524e-05, "loss": 0.00914500504732132, "step": 128560 }, { "epoch": 36.494464944649444, "grad_norm": 0.29716208577156067, "learning_rate": 6.352114674992904e-05, "loss": 0.006280108541250229, "step": 128570 }, { "epoch": 36.49730343457281, "grad_norm": 0.21534784138202667, "learning_rate": 6.351830826000568e-05, "loss": 0.005754648894071579, "step": 128580 }, { "epoch": 36.50014192449617, "grad_norm": 0.10736148804426193, "learning_rate": 6.351546977008231e-05, "loss": 0.004745303839445114, "step": 128590 }, { "epoch": 36.502980414419525, "grad_norm": 0.3634936511516571, "learning_rate": 6.351263128015896e-05, "loss": 0.007020667940378189, "step": 128600 }, { "epoch": 36.50581890434289, "grad_norm": 4.621170520782471, "learning_rate": 6.35097927902356e-05, "loss": 0.005616667121648789, "step": 128610 }, { "epoch": 36.50865739426625, "grad_norm": 0.48352116346359253, "learning_rate": 6.350695430031223e-05, "loss": 0.003449523076415062, "step": 128620 }, { "epoch": 36.511495884189614, "grad_norm": 1.4400978088378906, "learning_rate": 6.350411581038887e-05, "loss": 0.015318857133388519, "step": 128630 }, { "epoch": 36.51433437411297, "grad_norm": 0.6999803781509399, "learning_rate": 6.350127732046552e-05, "loss": 0.005331846326589585, "step": 128640 }, { "epoch": 36.51717286403633, "grad_norm": 1.2224411964416504, "learning_rate": 6.349843883054216e-05, "loss": 0.0011890539899468422, "step": 128650 }, { "epoch": 36.520011353959696, "grad_norm": 3.0679917335510254, "learning_rate": 6.34956003406188e-05, "loss": 0.0034409984946250916, "step": 128660 }, { "epoch": 36.52284984388305, "grad_norm": 0.5644010901451111, "learning_rate": 6.349276185069544e-05, "loss": 0.0022447856143116953, "step": 128670 }, { "epoch": 36.525688333806414, "grad_norm": 6.425459384918213, "learning_rate": 6.348992336077208e-05, "loss": 0.0019124269485473633, "step": 128680 }, { "epoch": 36.52852682372978, "grad_norm": 0.07166341692209244, "learning_rate": 6.348708487084871e-05, "loss": 0.0013645041733980178, "step": 128690 }, { "epoch": 36.53136531365314, "grad_norm": 7.907047748565674, "learning_rate": 6.348424638092535e-05, "loss": 0.007405373454093933, "step": 128700 }, { "epoch": 36.534203803576496, "grad_norm": 17.879535675048828, "learning_rate": 6.348140789100199e-05, "loss": 0.017444223165512085, "step": 128710 }, { "epoch": 36.53704229349986, "grad_norm": 0.2733509838581085, "learning_rate": 6.347856940107862e-05, "loss": 0.01838797926902771, "step": 128720 }, { "epoch": 36.53988078342322, "grad_norm": 0.6011088490486145, "learning_rate": 6.347573091115527e-05, "loss": 0.0011929968371987344, "step": 128730 }, { "epoch": 36.54271927334658, "grad_norm": 0.311436265707016, "learning_rate": 6.347289242123192e-05, "loss": 0.0064258076250553135, "step": 128740 }, { "epoch": 36.54555776326994, "grad_norm": 0.18849098682403564, "learning_rate": 6.347005393130854e-05, "loss": 0.0012023285031318664, "step": 128750 }, { "epoch": 36.5483962531933, "grad_norm": 0.28044795989990234, "learning_rate": 6.346721544138518e-05, "loss": 0.001198618859052658, "step": 128760 }, { "epoch": 36.55123474311666, "grad_norm": 0.03143656253814697, "learning_rate": 6.346437695146183e-05, "loss": 0.0015751926228404045, "step": 128770 }, { "epoch": 36.55407323304002, "grad_norm": 2.046753406524658, "learning_rate": 6.346153846153847e-05, "loss": 0.001050523854792118, "step": 128780 }, { "epoch": 36.556911722963385, "grad_norm": 0.2764652967453003, "learning_rate": 6.34586999716151e-05, "loss": 0.010814299434423446, "step": 128790 }, { "epoch": 36.55975021288675, "grad_norm": 0.03935953602194786, "learning_rate": 6.345586148169175e-05, "loss": 0.0011992184445261955, "step": 128800 }, { "epoch": 36.5625887028101, "grad_norm": 0.3302364945411682, "learning_rate": 6.345302299176839e-05, "loss": 0.0026190012693405153, "step": 128810 }, { "epoch": 36.565427192733466, "grad_norm": 0.7445336580276489, "learning_rate": 6.345018450184502e-05, "loss": 0.001105300709605217, "step": 128820 }, { "epoch": 36.56826568265683, "grad_norm": 0.13778544962406158, "learning_rate": 6.344734601192166e-05, "loss": 0.0033204302191734312, "step": 128830 }, { "epoch": 36.571104172580185, "grad_norm": 1.4326865673065186, "learning_rate": 6.34445075219983e-05, "loss": 0.0022096866741776466, "step": 128840 }, { "epoch": 36.57394266250355, "grad_norm": 7.33085823059082, "learning_rate": 6.344166903207493e-05, "loss": 0.0032810993492603303, "step": 128850 }, { "epoch": 36.57678115242691, "grad_norm": 0.8473078012466431, "learning_rate": 6.343883054215158e-05, "loss": 0.002782110869884491, "step": 128860 }, { "epoch": 36.579619642350266, "grad_norm": 0.04309206083416939, "learning_rate": 6.343599205222823e-05, "loss": 0.005980565771460533, "step": 128870 }, { "epoch": 36.58245813227363, "grad_norm": 0.10303124040365219, "learning_rate": 6.343315356230485e-05, "loss": 0.004922766610980034, "step": 128880 }, { "epoch": 36.58529662219699, "grad_norm": 0.613318681716919, "learning_rate": 6.34303150723815e-05, "loss": 0.010484806448221206, "step": 128890 }, { "epoch": 36.588135112120355, "grad_norm": 1.4879721403121948, "learning_rate": 6.342747658245814e-05, "loss": 0.0008864721283316612, "step": 128900 }, { "epoch": 36.59097360204371, "grad_norm": 0.014926394447684288, "learning_rate": 6.342463809253478e-05, "loss": 0.002685549668967724, "step": 128910 }, { "epoch": 36.593812091967074, "grad_norm": 0.08097674697637558, "learning_rate": 6.34217996026114e-05, "loss": 0.009730508178472519, "step": 128920 }, { "epoch": 36.59665058189044, "grad_norm": 6.7847161293029785, "learning_rate": 6.341896111268806e-05, "loss": 0.003635186329483986, "step": 128930 }, { "epoch": 36.59948907181379, "grad_norm": 0.748887836933136, "learning_rate": 6.341612262276469e-05, "loss": 0.020578068494796754, "step": 128940 }, { "epoch": 36.602327561737155, "grad_norm": 6.1447434425354, "learning_rate": 6.341328413284133e-05, "loss": 0.007363961637020111, "step": 128950 }, { "epoch": 36.60516605166052, "grad_norm": 0.1851801574230194, "learning_rate": 6.341044564291797e-05, "loss": 0.008729495853185654, "step": 128960 }, { "epoch": 36.608004541583874, "grad_norm": 0.18269306421279907, "learning_rate": 6.340760715299461e-05, "loss": 0.013525110483169556, "step": 128970 }, { "epoch": 36.61084303150724, "grad_norm": 16.460660934448242, "learning_rate": 6.340476866307124e-05, "loss": 0.01247837245464325, "step": 128980 }, { "epoch": 36.6136815214306, "grad_norm": 2.8644165992736816, "learning_rate": 6.340193017314788e-05, "loss": 0.014566735923290252, "step": 128990 }, { "epoch": 36.61652001135396, "grad_norm": 0.42880287766456604, "learning_rate": 6.339909168322454e-05, "loss": 0.0036171190440654755, "step": 129000 }, { "epoch": 36.61652001135396, "eval_accuracy": 0.9737394290074395, "eval_loss": 0.09346146136522293, "eval_runtime": 32.3784, "eval_samples_per_second": 485.725, "eval_steps_per_second": 7.598, "step": 129000 }, { "epoch": 36.61935850127732, "grad_norm": 2.3125431537628174, "learning_rate": 6.339625319330116e-05, "loss": 0.005539394170045853, "step": 129010 }, { "epoch": 36.62219699120068, "grad_norm": 1.638205885887146, "learning_rate": 6.33934147033778e-05, "loss": 0.0030544910579919813, "step": 129020 }, { "epoch": 36.625035481124044, "grad_norm": 0.20315228402614594, "learning_rate": 6.339057621345445e-05, "loss": 0.006136573851108551, "step": 129030 }, { "epoch": 36.6278739710474, "grad_norm": 4.5584397315979, "learning_rate": 6.338773772353108e-05, "loss": 0.0066944994032382965, "step": 129040 }, { "epoch": 36.63071246097076, "grad_norm": 0.9973592758178711, "learning_rate": 6.338489923360772e-05, "loss": 0.008231399208307266, "step": 129050 }, { "epoch": 36.633550950894126, "grad_norm": 2.0751712322235107, "learning_rate": 6.338206074368437e-05, "loss": 0.007291973382234573, "step": 129060 }, { "epoch": 36.63638944081748, "grad_norm": 1.9103273153305054, "learning_rate": 6.3379222253761e-05, "loss": 0.01072971075773239, "step": 129070 }, { "epoch": 36.639227930740844, "grad_norm": 0.23109903931617737, "learning_rate": 6.337638376383764e-05, "loss": 0.008249661326408387, "step": 129080 }, { "epoch": 36.64206642066421, "grad_norm": 0.6804870963096619, "learning_rate": 6.337354527391428e-05, "loss": 0.004257912933826447, "step": 129090 }, { "epoch": 36.64490491058757, "grad_norm": 1.897863507270813, "learning_rate": 6.337070678399092e-05, "loss": 0.0033970408141613005, "step": 129100 }, { "epoch": 36.647743400510926, "grad_norm": 0.8044957518577576, "learning_rate": 6.336786829406755e-05, "loss": 0.007437290251255035, "step": 129110 }, { "epoch": 36.65058189043429, "grad_norm": 7.828952789306641, "learning_rate": 6.336502980414419e-05, "loss": 0.0060139715671539305, "step": 129120 }, { "epoch": 36.65342038035765, "grad_norm": 4.810834884643555, "learning_rate": 6.336219131422085e-05, "loss": 0.003734876960515976, "step": 129130 }, { "epoch": 36.65625887028101, "grad_norm": 0.24256737530231476, "learning_rate": 6.335935282429748e-05, "loss": 0.003031609021127224, "step": 129140 }, { "epoch": 36.65909736020437, "grad_norm": 1.8751940727233887, "learning_rate": 6.335651433437412e-05, "loss": 0.0058440230786800385, "step": 129150 }, { "epoch": 36.66193585012773, "grad_norm": 0.9842199683189392, "learning_rate": 6.335367584445076e-05, "loss": 0.0043347708880901335, "step": 129160 }, { "epoch": 36.664774340051096, "grad_norm": 4.380272388458252, "learning_rate": 6.335083735452739e-05, "loss": 0.011398304998874665, "step": 129170 }, { "epoch": 36.66761282997445, "grad_norm": 0.07377107441425323, "learning_rate": 6.334799886460403e-05, "loss": 0.004765710234642029, "step": 129180 }, { "epoch": 36.670451319897815, "grad_norm": 0.3610929250717163, "learning_rate": 6.334516037468067e-05, "loss": 0.006116589903831482, "step": 129190 }, { "epoch": 36.67328980982118, "grad_norm": 7.359393119812012, "learning_rate": 6.334232188475731e-05, "loss": 0.002503790892660618, "step": 129200 }, { "epoch": 36.67612829974453, "grad_norm": 0.24853478372097015, "learning_rate": 6.333948339483395e-05, "loss": 0.003330572694540024, "step": 129210 }, { "epoch": 36.678966789667896, "grad_norm": 0.043273068964481354, "learning_rate": 6.333664490491059e-05, "loss": 0.0007842764258384705, "step": 129220 }, { "epoch": 36.68180527959126, "grad_norm": 0.026256702840328217, "learning_rate": 6.333380641498723e-05, "loss": 0.004211011528968811, "step": 129230 }, { "epoch": 36.684643769514615, "grad_norm": 7.325388431549072, "learning_rate": 6.333096792506386e-05, "loss": 0.013927152752876282, "step": 129240 }, { "epoch": 36.68748225943798, "grad_norm": 0.10616528242826462, "learning_rate": 6.33281294351405e-05, "loss": 0.009010542184114456, "step": 129250 }, { "epoch": 36.69032074936134, "grad_norm": 0.27823007106781006, "learning_rate": 6.332529094521716e-05, "loss": 0.004000075161457062, "step": 129260 }, { "epoch": 36.693159239284704, "grad_norm": 0.15269601345062256, "learning_rate": 6.332245245529379e-05, "loss": 0.007821942120790482, "step": 129270 }, { "epoch": 36.69599772920806, "grad_norm": 0.46389326453208923, "learning_rate": 6.331961396537043e-05, "loss": 0.0022553984075784684, "step": 129280 }, { "epoch": 36.69883621913142, "grad_norm": 1.809531331062317, "learning_rate": 6.331677547544707e-05, "loss": 0.0018227126449346543, "step": 129290 }, { "epoch": 36.701674709054785, "grad_norm": 9.893295288085938, "learning_rate": 6.33139369855237e-05, "loss": 0.004665595665574074, "step": 129300 }, { "epoch": 36.70451319897814, "grad_norm": 0.1619918793439865, "learning_rate": 6.331109849560034e-05, "loss": 0.0009306618943810463, "step": 129310 }, { "epoch": 36.707351688901504, "grad_norm": 0.12259563058614731, "learning_rate": 6.330826000567698e-05, "loss": 0.006989867985248565, "step": 129320 }, { "epoch": 36.71019017882487, "grad_norm": 6.396764755249023, "learning_rate": 6.330542151575362e-05, "loss": 0.0029924996197223663, "step": 129330 }, { "epoch": 36.71302866874822, "grad_norm": 0.33126208186149597, "learning_rate": 6.330258302583026e-05, "loss": 0.0009299600496888161, "step": 129340 }, { "epoch": 36.715867158671585, "grad_norm": 0.2216186374425888, "learning_rate": 6.32997445359069e-05, "loss": 0.0019865959882736208, "step": 129350 }, { "epoch": 36.71870564859495, "grad_norm": 0.15415926277637482, "learning_rate": 6.329690604598355e-05, "loss": 0.0022443262860178947, "step": 129360 }, { "epoch": 36.72154413851831, "grad_norm": 1.9715499877929688, "learning_rate": 6.329406755606017e-05, "loss": 0.0027319744229316713, "step": 129370 }, { "epoch": 36.72438262844167, "grad_norm": 2.377908706665039, "learning_rate": 6.329122906613681e-05, "loss": 0.0027299953624606134, "step": 129380 }, { "epoch": 36.72722111836503, "grad_norm": 0.24208156764507294, "learning_rate": 6.328839057621346e-05, "loss": 0.006608296930789947, "step": 129390 }, { "epoch": 36.73005960828839, "grad_norm": 0.010012074373662472, "learning_rate": 6.32855520862901e-05, "loss": 0.00527685359120369, "step": 129400 }, { "epoch": 36.73289809821175, "grad_norm": 0.16252736747264862, "learning_rate": 6.328271359636674e-05, "loss": 0.005410671234130859, "step": 129410 }, { "epoch": 36.73573658813511, "grad_norm": 16.146242141723633, "learning_rate": 6.327987510644338e-05, "loss": 0.01796700358390808, "step": 129420 }, { "epoch": 36.738575078058474, "grad_norm": 14.447863578796387, "learning_rate": 6.327703661652001e-05, "loss": 0.010531289875507355, "step": 129430 }, { "epoch": 36.74141356798184, "grad_norm": 18.59294319152832, "learning_rate": 6.327419812659665e-05, "loss": 0.006996383517980575, "step": 129440 }, { "epoch": 36.74425205790519, "grad_norm": 0.24117887020111084, "learning_rate": 6.327135963667329e-05, "loss": 0.005534819141030311, "step": 129450 }, { "epoch": 36.747090547828556, "grad_norm": 2.1923325061798096, "learning_rate": 6.326852114674993e-05, "loss": 0.0020588096231222153, "step": 129460 }, { "epoch": 36.74992903775192, "grad_norm": 0.23226113617420197, "learning_rate": 6.326568265682657e-05, "loss": 0.0009400393813848495, "step": 129470 }, { "epoch": 36.752767527675275, "grad_norm": 0.8297702074050903, "learning_rate": 6.326284416690321e-05, "loss": 0.006173324584960937, "step": 129480 }, { "epoch": 36.75560601759864, "grad_norm": 2.8535878658294678, "learning_rate": 6.326000567697986e-05, "loss": 0.004022371023893356, "step": 129490 }, { "epoch": 36.758444507522, "grad_norm": 1.3793009519577026, "learning_rate": 6.325716718705648e-05, "loss": 0.0027482248842716217, "step": 129500 }, { "epoch": 36.758444507522, "eval_accuracy": 0.9741209385133847, "eval_loss": 0.08918464928865433, "eval_runtime": 31.6225, "eval_samples_per_second": 497.335, "eval_steps_per_second": 7.779, "step": 129500 }, { "epoch": 36.761282997445356, "grad_norm": 0.8029513955116272, "learning_rate": 6.325432869713313e-05, "loss": 0.00414060652256012, "step": 129510 }, { "epoch": 36.76412148736872, "grad_norm": 0.8600261807441711, "learning_rate": 6.325149020720977e-05, "loss": 0.004316479340195656, "step": 129520 }, { "epoch": 36.76695997729208, "grad_norm": 19.443056106567383, "learning_rate": 6.324865171728641e-05, "loss": 0.025584471225738526, "step": 129530 }, { "epoch": 36.769798467215445, "grad_norm": 0.18524987995624542, "learning_rate": 6.324581322736305e-05, "loss": 0.003167290985584259, "step": 129540 }, { "epoch": 36.7726369571388, "grad_norm": 0.6329717636108398, "learning_rate": 6.324297473743969e-05, "loss": 0.006857694685459137, "step": 129550 }, { "epoch": 36.77547544706216, "grad_norm": 0.5915277004241943, "learning_rate": 6.324013624751632e-05, "loss": 0.004746244102716446, "step": 129560 }, { "epoch": 36.778313936985526, "grad_norm": 3.187633752822876, "learning_rate": 6.323729775759296e-05, "loss": 0.012950533628463745, "step": 129570 }, { "epoch": 36.78115242690888, "grad_norm": 2.5667121410369873, "learning_rate": 6.32344592676696e-05, "loss": 0.00438239574432373, "step": 129580 }, { "epoch": 36.783990916832245, "grad_norm": 4.938164234161377, "learning_rate": 6.323162077774624e-05, "loss": 0.017377015948295594, "step": 129590 }, { "epoch": 36.78682940675561, "grad_norm": 0.24072721600532532, "learning_rate": 6.322878228782288e-05, "loss": 0.011724398285150529, "step": 129600 }, { "epoch": 36.789667896678964, "grad_norm": 0.3767806589603424, "learning_rate": 6.322594379789953e-05, "loss": 0.022611044347286224, "step": 129610 }, { "epoch": 36.79250638660233, "grad_norm": 0.3283828794956207, "learning_rate": 6.322310530797617e-05, "loss": 0.0015596816316246986, "step": 129620 }, { "epoch": 36.79534487652569, "grad_norm": 0.47462981939315796, "learning_rate": 6.32202668180528e-05, "loss": 0.009086674451828003, "step": 129630 }, { "epoch": 36.79818336644905, "grad_norm": 0.18554306030273438, "learning_rate": 6.321742832812944e-05, "loss": 0.008681416511535645, "step": 129640 }, { "epoch": 36.80102185637241, "grad_norm": 1.013665795326233, "learning_rate": 6.321458983820608e-05, "loss": 0.0018669061362743377, "step": 129650 }, { "epoch": 36.80386034629577, "grad_norm": 0.05997001379728317, "learning_rate": 6.321175134828272e-05, "loss": 0.00434509888291359, "step": 129660 }, { "epoch": 36.806698836219134, "grad_norm": 0.37173959612846375, "learning_rate": 6.320891285835936e-05, "loss": 0.005068081989884376, "step": 129670 }, { "epoch": 36.80953732614249, "grad_norm": 0.19517859816551208, "learning_rate": 6.3206074368436e-05, "loss": 0.010366223752498627, "step": 129680 }, { "epoch": 36.81237581606585, "grad_norm": 0.019192226231098175, "learning_rate": 6.320323587851263e-05, "loss": 0.0025984589010477066, "step": 129690 }, { "epoch": 36.815214305989215, "grad_norm": 0.21634961664676666, "learning_rate": 6.320039738858927e-05, "loss": 0.003962265327572823, "step": 129700 }, { "epoch": 36.81805279591257, "grad_norm": 4.878888130187988, "learning_rate": 6.319755889866591e-05, "loss": 0.0021356716752052307, "step": 129710 }, { "epoch": 36.820891285835934, "grad_norm": 1.1203750371932983, "learning_rate": 6.319472040874255e-05, "loss": 0.0032168880105018617, "step": 129720 }, { "epoch": 36.8237297757593, "grad_norm": 11.216403007507324, "learning_rate": 6.31918819188192e-05, "loss": 0.018211647868156433, "step": 129730 }, { "epoch": 36.82656826568266, "grad_norm": 0.4733777344226837, "learning_rate": 6.318904342889584e-05, "loss": 0.0008802158758044243, "step": 129740 }, { "epoch": 36.829406755606016, "grad_norm": 3.07503080368042, "learning_rate": 6.318620493897248e-05, "loss": 0.0013648156076669694, "step": 129750 }, { "epoch": 36.83224524552938, "grad_norm": 0.6883289217948914, "learning_rate": 6.31833664490491e-05, "loss": 0.003224961832165718, "step": 129760 }, { "epoch": 36.83508373545274, "grad_norm": 2.002540111541748, "learning_rate": 6.318081180811809e-05, "loss": 0.0060753501951694485, "step": 129770 }, { "epoch": 36.8379222253761, "grad_norm": 1.736228346824646, "learning_rate": 6.317797331819473e-05, "loss": 0.005475520342588425, "step": 129780 }, { "epoch": 36.84076071529946, "grad_norm": 0.2607590854167938, "learning_rate": 6.317513482827137e-05, "loss": 0.010686306655406952, "step": 129790 }, { "epoch": 36.84359920522282, "grad_norm": 6.973796367645264, "learning_rate": 6.317229633834801e-05, "loss": 0.019275836646556854, "step": 129800 }, { "epoch": 36.84643769514618, "grad_norm": 0.1851673126220703, "learning_rate": 6.316945784842464e-05, "loss": 0.020482569932937622, "step": 129810 }, { "epoch": 36.84927618506954, "grad_norm": 0.29992789030075073, "learning_rate": 6.316661935850128e-05, "loss": 0.010313406586647034, "step": 129820 }, { "epoch": 36.852114674992904, "grad_norm": 0.8173611760139465, "learning_rate": 6.316378086857792e-05, "loss": 0.009486261755228043, "step": 129830 }, { "epoch": 36.85495316491627, "grad_norm": 1.0202186107635498, "learning_rate": 6.316094237865456e-05, "loss": 0.0014545038342475892, "step": 129840 }, { "epoch": 36.85779165483962, "grad_norm": 2.476170063018799, "learning_rate": 6.31581038887312e-05, "loss": 0.01063292995095253, "step": 129850 }, { "epoch": 36.860630144762986, "grad_norm": 0.5094186663627625, "learning_rate": 6.315526539880784e-05, "loss": 0.004208836331963539, "step": 129860 }, { "epoch": 36.86346863468635, "grad_norm": 0.13013063371181488, "learning_rate": 6.315242690888447e-05, "loss": 0.003684304654598236, "step": 129870 }, { "epoch": 36.866307124609705, "grad_norm": 0.17290210723876953, "learning_rate": 6.314958841896111e-05, "loss": 0.00754975825548172, "step": 129880 }, { "epoch": 36.86914561453307, "grad_norm": 1.6510149240493774, "learning_rate": 6.314674992903775e-05, "loss": 0.008528110384941102, "step": 129890 }, { "epoch": 36.87198410445643, "grad_norm": 4.656164169311523, "learning_rate": 6.31439114391144e-05, "loss": 0.00805177241563797, "step": 129900 }, { "epoch": 36.87482259437979, "grad_norm": 0.12451548129320145, "learning_rate": 6.314107294919104e-05, "loss": 0.010972409695386886, "step": 129910 }, { "epoch": 36.87766108430315, "grad_norm": 0.8624584078788757, "learning_rate": 6.313823445926768e-05, "loss": 0.015606716275215149, "step": 129920 }, { "epoch": 36.88049957422651, "grad_norm": 0.5240159034729004, "learning_rate": 6.31353959693443e-05, "loss": 0.0037956714630126955, "step": 129930 }, { "epoch": 36.883338064149875, "grad_norm": 0.13141736388206482, "learning_rate": 6.313255747942095e-05, "loss": 0.009870089590549469, "step": 129940 }, { "epoch": 36.88617655407323, "grad_norm": 0.42530563473701477, "learning_rate": 6.312971898949759e-05, "loss": 0.0013246871531009674, "step": 129950 }, { "epoch": 36.889015043996594, "grad_norm": 0.3775751292705536, "learning_rate": 6.312688049957423e-05, "loss": 0.0010652609169483184, "step": 129960 }, { "epoch": 36.89185353391996, "grad_norm": 0.2522916793823242, "learning_rate": 6.312404200965086e-05, "loss": 0.0014710525050759316, "step": 129970 }, { "epoch": 36.89469202384331, "grad_norm": 0.052453719079494476, "learning_rate": 6.312120351972751e-05, "loss": 0.003587979078292847, "step": 129980 }, { "epoch": 36.897530513766675, "grad_norm": 2.422349214553833, "learning_rate": 6.311836502980415e-05, "loss": 0.0016953453421592712, "step": 129990 }, { "epoch": 36.90036900369004, "grad_norm": 0.5440014004707336, "learning_rate": 6.311552653988078e-05, "loss": 0.0029727375134825707, "step": 130000 }, { "epoch": 36.90036900369004, "eval_accuracy": 0.9686526355948369, "eval_loss": 0.10726909339427948, "eval_runtime": 32.1793, "eval_samples_per_second": 488.731, "eval_steps_per_second": 7.645, "step": 130000 }, { "epoch": 36.9032074936134, "grad_norm": 0.03966816887259483, "learning_rate": 6.311268804995742e-05, "loss": 0.002824271284043789, "step": 130010 }, { "epoch": 36.90604598353676, "grad_norm": 4.140873432159424, "learning_rate": 6.310984956003407e-05, "loss": 0.012478328496217727, "step": 130020 }, { "epoch": 36.90888447346012, "grad_norm": 0.2851111590862274, "learning_rate": 6.31070110701107e-05, "loss": 0.015599347651004791, "step": 130030 }, { "epoch": 36.91172296338348, "grad_norm": 1.598029613494873, "learning_rate": 6.310417258018735e-05, "loss": 0.0022613028064370154, "step": 130040 }, { "epoch": 36.91456145330684, "grad_norm": 3.9007551670074463, "learning_rate": 6.310133409026399e-05, "loss": 0.005545905977487564, "step": 130050 }, { "epoch": 36.9173999432302, "grad_norm": 0.20168644189834595, "learning_rate": 6.309849560034062e-05, "loss": 0.0030916605144739153, "step": 130060 }, { "epoch": 36.920238433153564, "grad_norm": 0.7060447931289673, "learning_rate": 6.309565711041726e-05, "loss": 0.004274709150195122, "step": 130070 }, { "epoch": 36.92307692307692, "grad_norm": 12.111396789550781, "learning_rate": 6.30928186204939e-05, "loss": 0.0052076026797294615, "step": 130080 }, { "epoch": 36.92591541300028, "grad_norm": 14.857709884643555, "learning_rate": 6.308998013057054e-05, "loss": 0.012939468026161194, "step": 130090 }, { "epoch": 36.928753902923646, "grad_norm": 0.8208363056182861, "learning_rate": 6.308714164064717e-05, "loss": 0.0035960406064987184, "step": 130100 }, { "epoch": 36.93159239284701, "grad_norm": 10.019550323486328, "learning_rate": 6.308430315072382e-05, "loss": 0.008321501314640045, "step": 130110 }, { "epoch": 36.934430882770364, "grad_norm": 1.8831768035888672, "learning_rate": 6.308146466080047e-05, "loss": 0.004503098130226135, "step": 130120 }, { "epoch": 36.93726937269373, "grad_norm": 2.117215633392334, "learning_rate": 6.30786261708771e-05, "loss": 0.0022235911339521406, "step": 130130 }, { "epoch": 36.94010786261709, "grad_norm": 14.876653671264648, "learning_rate": 6.307578768095373e-05, "loss": 0.005610499531030655, "step": 130140 }, { "epoch": 36.942946352540446, "grad_norm": 0.5633611083030701, "learning_rate": 6.307294919103038e-05, "loss": 0.004958879947662353, "step": 130150 }, { "epoch": 36.94578484246381, "grad_norm": 0.24144604802131653, "learning_rate": 6.3070110701107e-05, "loss": 0.006815387308597565, "step": 130160 }, { "epoch": 36.94862333238717, "grad_norm": 0.05977444350719452, "learning_rate": 6.306727221118365e-05, "loss": 0.0021437158808112144, "step": 130170 }, { "epoch": 36.95146182231053, "grad_norm": 0.5437458753585815, "learning_rate": 6.30644337212603e-05, "loss": 0.0010036014020442963, "step": 130180 }, { "epoch": 36.95430031223389, "grad_norm": 0.2767156958580017, "learning_rate": 6.306159523133693e-05, "loss": 0.004242165386676789, "step": 130190 }, { "epoch": 36.95713880215725, "grad_norm": 0.9214121103286743, "learning_rate": 6.305875674141357e-05, "loss": 0.011180636286735535, "step": 130200 }, { "epoch": 36.959977292080616, "grad_norm": 1.5011546611785889, "learning_rate": 6.305591825149021e-05, "loss": 0.002765375375747681, "step": 130210 }, { "epoch": 36.96281578200397, "grad_norm": 3.544799327850342, "learning_rate": 6.305307976156685e-05, "loss": 0.0022524427622556686, "step": 130220 }, { "epoch": 36.965654271927335, "grad_norm": 6.53395414352417, "learning_rate": 6.305024127164348e-05, "loss": 0.008183082938194275, "step": 130230 }, { "epoch": 36.9684927618507, "grad_norm": 0.36019039154052734, "learning_rate": 6.304740278172014e-05, "loss": 0.010857483744621277, "step": 130240 }, { "epoch": 36.97133125177405, "grad_norm": 1.58556067943573, "learning_rate": 6.304456429179678e-05, "loss": 0.0022740768268704414, "step": 130250 }, { "epoch": 36.974169741697416, "grad_norm": 4.802194118499756, "learning_rate": 6.30417258018734e-05, "loss": 0.008049385994672776, "step": 130260 }, { "epoch": 36.97700823162078, "grad_norm": 11.787601470947266, "learning_rate": 6.303888731195005e-05, "loss": 0.007207253575325012, "step": 130270 }, { "epoch": 36.97984672154414, "grad_norm": 0.09446095675230026, "learning_rate": 6.303604882202669e-05, "loss": 0.001432538777589798, "step": 130280 }, { "epoch": 36.9826852114675, "grad_norm": 0.24842219054698944, "learning_rate": 6.303321033210331e-05, "loss": 0.0013066751882433892, "step": 130290 }, { "epoch": 36.98552370139086, "grad_norm": 0.7556681036949158, "learning_rate": 6.303037184217996e-05, "loss": 0.0037295877933502196, "step": 130300 }, { "epoch": 36.98836219131422, "grad_norm": 0.6380088925361633, "learning_rate": 6.302753335225661e-05, "loss": 0.00798134133219719, "step": 130310 }, { "epoch": 36.99120068123758, "grad_norm": 1.0664875507354736, "learning_rate": 6.302469486233324e-05, "loss": 0.0020342197269201277, "step": 130320 }, { "epoch": 36.99403917116094, "grad_norm": 3.7254788875579834, "learning_rate": 6.302185637240988e-05, "loss": 0.00406913086771965, "step": 130330 }, { "epoch": 36.996877661084305, "grad_norm": 1.4459738731384277, "learning_rate": 6.301901788248652e-05, "loss": 0.006305234879255295, "step": 130340 }, { "epoch": 36.99971615100766, "grad_norm": 0.06888779252767563, "learning_rate": 6.301617939256316e-05, "loss": 0.0030141718685626983, "step": 130350 }, { "epoch": 37.002554640931024, "grad_norm": 0.11672481894493103, "learning_rate": 6.301334090263979e-05, "loss": 0.0013808565214276313, "step": 130360 }, { "epoch": 37.00539313085439, "grad_norm": 0.5524692535400391, "learning_rate": 6.301050241271645e-05, "loss": 0.014604221284389495, "step": 130370 }, { "epoch": 37.00823162077775, "grad_norm": 0.11652400344610214, "learning_rate": 6.300766392279309e-05, "loss": 0.004034185409545898, "step": 130380 }, { "epoch": 37.011070110701105, "grad_norm": 0.07715091854333878, "learning_rate": 6.300482543286971e-05, "loss": 0.0007389606907963753, "step": 130390 }, { "epoch": 37.01390860062447, "grad_norm": 0.17035825550556183, "learning_rate": 6.300198694294636e-05, "loss": 0.005327817052602768, "step": 130400 }, { "epoch": 37.01674709054783, "grad_norm": 0.539364755153656, "learning_rate": 6.2999148453023e-05, "loss": 0.0017512964084744453, "step": 130410 }, { "epoch": 37.01958558047119, "grad_norm": 0.06850375980138779, "learning_rate": 6.299630996309963e-05, "loss": 0.0015660757198929787, "step": 130420 }, { "epoch": 37.02242407039455, "grad_norm": 0.20674671232700348, "learning_rate": 6.299347147317627e-05, "loss": 0.0029807770624756813, "step": 130430 }, { "epoch": 37.02526256031791, "grad_norm": 1.017618179321289, "learning_rate": 6.299063298325292e-05, "loss": 0.0009050402790307998, "step": 130440 }, { "epoch": 37.02810105024127, "grad_norm": 2.5087740421295166, "learning_rate": 6.298779449332955e-05, "loss": 0.0015734119340777397, "step": 130450 }, { "epoch": 37.03093954016463, "grad_norm": 7.713961124420166, "learning_rate": 6.298495600340619e-05, "loss": 0.002575315348803997, "step": 130460 }, { "epoch": 37.033778030087994, "grad_norm": 0.03889383748173714, "learning_rate": 6.298211751348283e-05, "loss": 0.002600409835577011, "step": 130470 }, { "epoch": 37.03661652001136, "grad_norm": 7.656311511993408, "learning_rate": 6.297927902355947e-05, "loss": 0.005116645619273186, "step": 130480 }, { "epoch": 37.03945500993471, "grad_norm": 0.04063142463564873, "learning_rate": 6.29764405336361e-05, "loss": 0.0005413470789790154, "step": 130490 }, { "epoch": 37.042293499858076, "grad_norm": 0.23168174922466278, "learning_rate": 6.297360204371274e-05, "loss": 0.0012384606525301933, "step": 130500 }, { "epoch": 37.042293499858076, "eval_accuracy": 0.9766007503020283, "eval_loss": 0.08327371627092361, "eval_runtime": 32.3173, "eval_samples_per_second": 486.644, "eval_steps_per_second": 7.612, "step": 130500 }, { "epoch": 37.04513198978144, "grad_norm": 0.24813707172870636, "learning_rate": 6.29707635537894e-05, "loss": 0.005601443350315094, "step": 130510 }, { "epoch": 37.047970479704794, "grad_norm": 5.603160381317139, "learning_rate": 6.296792506386603e-05, "loss": 0.0025729740038514136, "step": 130520 }, { "epoch": 37.05080896962816, "grad_norm": 1.1618857383728027, "learning_rate": 6.296508657394267e-05, "loss": 0.0021844714879989623, "step": 130530 }, { "epoch": 37.05364745955152, "grad_norm": 0.010869505815207958, "learning_rate": 6.296224808401931e-05, "loss": 0.0009341269731521606, "step": 130540 }, { "epoch": 37.056485949474876, "grad_norm": 0.26706835627555847, "learning_rate": 6.295940959409594e-05, "loss": 0.0025830110535025597, "step": 130550 }, { "epoch": 37.05932443939824, "grad_norm": 0.13113319873809814, "learning_rate": 6.295657110417258e-05, "loss": 0.0023242367431521417, "step": 130560 }, { "epoch": 37.0621629293216, "grad_norm": 0.12004057317972183, "learning_rate": 6.295373261424923e-05, "loss": 0.008387956023216247, "step": 130570 }, { "epoch": 37.065001419244965, "grad_norm": 4.2592692375183105, "learning_rate": 6.295089412432586e-05, "loss": 0.0021336326375603677, "step": 130580 }, { "epoch": 37.06783990916832, "grad_norm": 0.03199302777647972, "learning_rate": 6.29480556344025e-05, "loss": 0.0020484507083892824, "step": 130590 }, { "epoch": 37.07067839909168, "grad_norm": 0.16396111249923706, "learning_rate": 6.294521714447914e-05, "loss": 0.0009353829547762871, "step": 130600 }, { "epoch": 37.073516889015046, "grad_norm": 0.20396865904331207, "learning_rate": 6.294237865455578e-05, "loss": 0.0007122941315174103, "step": 130610 }, { "epoch": 37.0763553789384, "grad_norm": 0.14715063571929932, "learning_rate": 6.293954016463241e-05, "loss": 0.002794131264090538, "step": 130620 }, { "epoch": 37.079193868861765, "grad_norm": 0.18097048997879028, "learning_rate": 6.293670167470905e-05, "loss": 0.0017539788037538528, "step": 130630 }, { "epoch": 37.08203235878513, "grad_norm": 0.08686074614524841, "learning_rate": 6.29338631847857e-05, "loss": 0.0007848981767892838, "step": 130640 }, { "epoch": 37.08487084870849, "grad_norm": 1.241327166557312, "learning_rate": 6.293102469486234e-05, "loss": 0.0019084749743342399, "step": 130650 }, { "epoch": 37.087709338631846, "grad_norm": 0.35967665910720825, "learning_rate": 6.292818620493898e-05, "loss": 0.0025888266041874885, "step": 130660 }, { "epoch": 37.09054782855521, "grad_norm": 2.762559652328491, "learning_rate": 6.292534771501562e-05, "loss": 0.0031750939786434175, "step": 130670 }, { "epoch": 37.09338631847857, "grad_norm": 0.22140389680862427, "learning_rate": 6.292250922509225e-05, "loss": 0.0068203344941139225, "step": 130680 }, { "epoch": 37.09622480840193, "grad_norm": 12.226556777954102, "learning_rate": 6.291967073516889e-05, "loss": 0.005363423377275467, "step": 130690 }, { "epoch": 37.09906329832529, "grad_norm": 3.410160541534424, "learning_rate": 6.291683224524553e-05, "loss": 0.0042905699461698536, "step": 130700 }, { "epoch": 37.101901788248654, "grad_norm": 2.1112289428710938, "learning_rate": 6.291399375532217e-05, "loss": 0.0014471398666501046, "step": 130710 }, { "epoch": 37.10474027817201, "grad_norm": 0.09496190398931503, "learning_rate": 6.291115526539881e-05, "loss": 0.004353401437401772, "step": 130720 }, { "epoch": 37.10757876809537, "grad_norm": 0.034049637615680695, "learning_rate": 6.290831677547545e-05, "loss": 0.005679722875356674, "step": 130730 }, { "epoch": 37.110417258018735, "grad_norm": 0.053114213049411774, "learning_rate": 6.290547828555208e-05, "loss": 0.002184465527534485, "step": 130740 }, { "epoch": 37.1132557479421, "grad_norm": 1.4256616830825806, "learning_rate": 6.290263979562872e-05, "loss": 0.0043985359370708466, "step": 130750 }, { "epoch": 37.116094237865454, "grad_norm": 0.19390933215618134, "learning_rate": 6.289980130570536e-05, "loss": 0.008119472116231919, "step": 130760 }, { "epoch": 37.11893272778882, "grad_norm": 0.5172305107116699, "learning_rate": 6.2896962815782e-05, "loss": 0.004832695424556732, "step": 130770 }, { "epoch": 37.12177121771218, "grad_norm": 0.4204552173614502, "learning_rate": 6.289412432585865e-05, "loss": 0.003250330686569214, "step": 130780 }, { "epoch": 37.124609707635535, "grad_norm": 2.3222405910491943, "learning_rate": 6.289128583593529e-05, "loss": 0.001523638889193535, "step": 130790 }, { "epoch": 37.1274481975589, "grad_norm": 0.15209142863750458, "learning_rate": 6.288844734601193e-05, "loss": 0.0020307138562202453, "step": 130800 }, { "epoch": 37.13028668748226, "grad_norm": 0.00636715954169631, "learning_rate": 6.288560885608856e-05, "loss": 0.001542331837117672, "step": 130810 }, { "epoch": 37.13312517740562, "grad_norm": 0.9724351167678833, "learning_rate": 6.28827703661652e-05, "loss": 0.005102554336190224, "step": 130820 }, { "epoch": 37.13596366732898, "grad_norm": 0.2466776818037033, "learning_rate": 6.287993187624184e-05, "loss": 0.004802029579877853, "step": 130830 }, { "epoch": 37.13880215725234, "grad_norm": 0.03119310922920704, "learning_rate": 6.287709338631848e-05, "loss": 0.0008781157433986664, "step": 130840 }, { "epoch": 37.141640647175706, "grad_norm": 0.5078030824661255, "learning_rate": 6.287425489639512e-05, "loss": 0.003531506285071373, "step": 130850 }, { "epoch": 37.14447913709906, "grad_norm": 0.30165284872055054, "learning_rate": 6.287141640647176e-05, "loss": 0.0018650492653250693, "step": 130860 }, { "epoch": 37.147317627022424, "grad_norm": 2.7572038173675537, "learning_rate": 6.286857791654839e-05, "loss": 0.0033584870398044586, "step": 130870 }, { "epoch": 37.15015611694579, "grad_norm": 3.474074363708496, "learning_rate": 6.286573942662503e-05, "loss": 0.007018668949604035, "step": 130880 }, { "epoch": 37.15299460686914, "grad_norm": 0.03878495842218399, "learning_rate": 6.286290093670168e-05, "loss": 0.0035669267177581786, "step": 130890 }, { "epoch": 37.155833096792506, "grad_norm": 28.961444854736328, "learning_rate": 6.286006244677832e-05, "loss": 0.02030410021543503, "step": 130900 }, { "epoch": 37.15867158671587, "grad_norm": 0.2948603928089142, "learning_rate": 6.285722395685496e-05, "loss": 0.00799187421798706, "step": 130910 }, { "epoch": 37.161510076639225, "grad_norm": 0.05488612502813339, "learning_rate": 6.28543854669316e-05, "loss": 0.007093751430511474, "step": 130920 }, { "epoch": 37.16434856656259, "grad_norm": 5.548788070678711, "learning_rate": 6.285154697700824e-05, "loss": 0.002365946024656296, "step": 130930 }, { "epoch": 37.16718705648595, "grad_norm": 0.16475054621696472, "learning_rate": 6.284870848708487e-05, "loss": 0.0029524678364396097, "step": 130940 }, { "epoch": 37.17002554640931, "grad_norm": 11.743864059448242, "learning_rate": 6.284586999716151e-05, "loss": 0.012582795321941375, "step": 130950 }, { "epoch": 37.17286403633267, "grad_norm": 0.1441352814435959, "learning_rate": 6.284303150723815e-05, "loss": 0.0060310795903205875, "step": 130960 }, { "epoch": 37.17570252625603, "grad_norm": 1.0272866487503052, "learning_rate": 6.284019301731479e-05, "loss": 0.002225098945200443, "step": 130970 }, { "epoch": 37.178541016179395, "grad_norm": 2.3722569942474365, "learning_rate": 6.283735452739143e-05, "loss": 0.005828509852290153, "step": 130980 }, { "epoch": 37.18137950610275, "grad_norm": 0.10453873872756958, "learning_rate": 6.283451603746808e-05, "loss": 0.002018792927265167, "step": 130990 }, { "epoch": 37.18421799602611, "grad_norm": 1.2867122888565063, "learning_rate": 6.28316775475447e-05, "loss": 0.002536172792315483, "step": 131000 }, { "epoch": 37.18421799602611, "eval_accuracy": 0.9684618808418644, "eval_loss": 0.11624494940042496, "eval_runtime": 31.7395, "eval_samples_per_second": 495.503, "eval_steps_per_second": 7.751, "step": 131000 }, { "epoch": 37.187056485949476, "grad_norm": 0.9096093773841858, "learning_rate": 6.282883905762134e-05, "loss": 0.005689398199319839, "step": 131010 }, { "epoch": 37.18989497587283, "grad_norm": 2.2749664783477783, "learning_rate": 6.282600056769799e-05, "loss": 0.01010909229516983, "step": 131020 }, { "epoch": 37.192733465796195, "grad_norm": 0.16351597011089325, "learning_rate": 6.282316207777463e-05, "loss": 0.0020475007593631746, "step": 131030 }, { "epoch": 37.19557195571956, "grad_norm": 11.037381172180176, "learning_rate": 6.282032358785127e-05, "loss": 0.004963525384664535, "step": 131040 }, { "epoch": 37.19841044564292, "grad_norm": 0.036546166986227036, "learning_rate": 6.281748509792791e-05, "loss": 0.003959363698959351, "step": 131050 }, { "epoch": 37.20124893556628, "grad_norm": 0.3034837245941162, "learning_rate": 6.281464660800455e-05, "loss": 0.0020316850394010546, "step": 131060 }, { "epoch": 37.20408742548964, "grad_norm": 0.23153412342071533, "learning_rate": 6.281180811808118e-05, "loss": 0.0018282707780599595, "step": 131070 }, { "epoch": 37.206925915413, "grad_norm": 0.09012892097234726, "learning_rate": 6.280896962815782e-05, "loss": 0.0038454826921224595, "step": 131080 }, { "epoch": 37.20976440533636, "grad_norm": 5.106657981872559, "learning_rate": 6.280613113823446e-05, "loss": 0.0024235064163804056, "step": 131090 }, { "epoch": 37.21260289525972, "grad_norm": 4.090630054473877, "learning_rate": 6.280329264831109e-05, "loss": 0.0017037678509950638, "step": 131100 }, { "epoch": 37.215441385183084, "grad_norm": 0.12549522519111633, "learning_rate": 6.280045415838774e-05, "loss": 0.004420855641365051, "step": 131110 }, { "epoch": 37.21827987510645, "grad_norm": 0.2063904106616974, "learning_rate": 6.279761566846439e-05, "loss": 0.0009550334885716439, "step": 131120 }, { "epoch": 37.2211183650298, "grad_norm": 0.27155205607414246, "learning_rate": 6.279477717854101e-05, "loss": 0.008056730031967163, "step": 131130 }, { "epoch": 37.223956854953165, "grad_norm": 0.04324699193239212, "learning_rate": 6.279193868861766e-05, "loss": 0.0013077192008495331, "step": 131140 }, { "epoch": 37.22679534487653, "grad_norm": 1.5315698385238647, "learning_rate": 6.27891001986943e-05, "loss": 0.00098564513027668, "step": 131150 }, { "epoch": 37.229633834799884, "grad_norm": 0.23244625329971313, "learning_rate": 6.278626170877094e-05, "loss": 0.0010462000966072083, "step": 131160 }, { "epoch": 37.23247232472325, "grad_norm": 0.033028729259967804, "learning_rate": 6.278342321884758e-05, "loss": 0.003750918060541153, "step": 131170 }, { "epoch": 37.23531081464661, "grad_norm": 0.16220255196094513, "learning_rate": 6.278058472892422e-05, "loss": 0.001012563519179821, "step": 131180 }, { "epoch": 37.238149304569966, "grad_norm": 0.14952395856380463, "learning_rate": 6.277774623900086e-05, "loss": 0.004271767660975456, "step": 131190 }, { "epoch": 37.24098779449333, "grad_norm": 8.265573501586914, "learning_rate": 6.277490774907749e-05, "loss": 0.003409551829099655, "step": 131200 }, { "epoch": 37.24382628441669, "grad_norm": 0.04811207577586174, "learning_rate": 6.277206925915413e-05, "loss": 0.001033814065158367, "step": 131210 }, { "epoch": 37.246664774340054, "grad_norm": 0.06130290776491165, "learning_rate": 6.276923076923077e-05, "loss": 0.004459200054407119, "step": 131220 }, { "epoch": 37.24950326426341, "grad_norm": 5.857054233551025, "learning_rate": 6.27663922793074e-05, "loss": 0.0048430532217025755, "step": 131230 }, { "epoch": 37.25234175418677, "grad_norm": 0.901811957359314, "learning_rate": 6.276355378938406e-05, "loss": 0.010946673154830933, "step": 131240 }, { "epoch": 37.255180244110136, "grad_norm": 0.024879150092601776, "learning_rate": 6.27607152994607e-05, "loss": 0.0023593151941895484, "step": 131250 }, { "epoch": 37.25801873403349, "grad_norm": 0.037461698055267334, "learning_rate": 6.275787680953732e-05, "loss": 0.0031211191788315775, "step": 131260 }, { "epoch": 37.260857223956855, "grad_norm": 0.18380141258239746, "learning_rate": 6.275503831961397e-05, "loss": 0.0008625281974673271, "step": 131270 }, { "epoch": 37.26369571388022, "grad_norm": 1.1647462844848633, "learning_rate": 6.275219982969061e-05, "loss": 0.0018008915707468987, "step": 131280 }, { "epoch": 37.26653420380357, "grad_norm": 0.023566801100969315, "learning_rate": 6.274936133976725e-05, "loss": 0.001033123955130577, "step": 131290 }, { "epoch": 37.269372693726936, "grad_norm": 0.2482381910085678, "learning_rate": 6.274652284984388e-05, "loss": 0.002074230834841728, "step": 131300 }, { "epoch": 37.2722111836503, "grad_norm": 1.1313368082046509, "learning_rate": 6.274368435992053e-05, "loss": 0.001044745184481144, "step": 131310 }, { "epoch": 37.27504967357366, "grad_norm": 0.025125112384557724, "learning_rate": 6.274084586999717e-05, "loss": 0.001435760036110878, "step": 131320 }, { "epoch": 37.27788816349702, "grad_norm": 0.06331707537174225, "learning_rate": 6.27380073800738e-05, "loss": 0.005990580841898918, "step": 131330 }, { "epoch": 37.28072665342038, "grad_norm": 0.5997627973556519, "learning_rate": 6.273516889015044e-05, "loss": 0.0007789181545376778, "step": 131340 }, { "epoch": 37.28356514334374, "grad_norm": 0.22059839963912964, "learning_rate": 6.273233040022708e-05, "loss": 0.0006396165117621422, "step": 131350 }, { "epoch": 37.2864036332671, "grad_norm": 0.05978359654545784, "learning_rate": 6.272949191030371e-05, "loss": 0.0016806956380605697, "step": 131360 }, { "epoch": 37.28924212319046, "grad_norm": 0.11193135380744934, "learning_rate": 6.272665342038037e-05, "loss": 0.001833895407617092, "step": 131370 }, { "epoch": 37.292080613113825, "grad_norm": 2.195242404937744, "learning_rate": 6.272381493045701e-05, "loss": 0.0022886825725436212, "step": 131380 }, { "epoch": 37.29491910303718, "grad_norm": 0.3864086866378784, "learning_rate": 6.272097644053364e-05, "loss": 0.018172965943813325, "step": 131390 }, { "epoch": 37.297757592960544, "grad_norm": 0.077408567070961, "learning_rate": 6.271813795061028e-05, "loss": 0.012025503814220429, "step": 131400 }, { "epoch": 37.30059608288391, "grad_norm": 0.10832397639751434, "learning_rate": 6.271529946068692e-05, "loss": 0.004276793450117111, "step": 131410 }, { "epoch": 37.30343457280727, "grad_norm": 0.3043539822101593, "learning_rate": 6.271246097076356e-05, "loss": 0.009828600287437438, "step": 131420 }, { "epoch": 37.306273062730625, "grad_norm": 0.1570448875427246, "learning_rate": 6.270962248084019e-05, "loss": 0.024268218874931337, "step": 131430 }, { "epoch": 37.30911155265399, "grad_norm": 0.11421597003936768, "learning_rate": 6.270678399091684e-05, "loss": 0.0017327869310975075, "step": 131440 }, { "epoch": 37.31195004257735, "grad_norm": 0.1929979771375656, "learning_rate": 6.270394550099348e-05, "loss": 0.003777717798948288, "step": 131450 }, { "epoch": 37.31478853250071, "grad_norm": 0.5578688979148865, "learning_rate": 6.270110701107011e-05, "loss": 0.004466625303030014, "step": 131460 }, { "epoch": 37.31762702242407, "grad_norm": 0.1220560222864151, "learning_rate": 6.269826852114675e-05, "loss": 0.005531726032495498, "step": 131470 }, { "epoch": 37.32046551234743, "grad_norm": 0.7332008481025696, "learning_rate": 6.26954300312234e-05, "loss": 0.006013903766870499, "step": 131480 }, { "epoch": 37.323304002270795, "grad_norm": 0.1988469958305359, "learning_rate": 6.269259154130002e-05, "loss": 0.0027560507878661156, "step": 131490 }, { "epoch": 37.32614249219415, "grad_norm": 1.6989895105361938, "learning_rate": 6.268975305137666e-05, "loss": 0.0019672904163599014, "step": 131500 }, { "epoch": 37.32614249219415, "eval_accuracy": 0.9741209385133847, "eval_loss": 0.09308131784200668, "eval_runtime": 32.3375, "eval_samples_per_second": 486.34, "eval_steps_per_second": 7.607, "step": 131500 }, { "epoch": 37.328980982117514, "grad_norm": 1.309671401977539, "learning_rate": 6.268691456145332e-05, "loss": 0.002148168720304966, "step": 131510 }, { "epoch": 37.33181947204088, "grad_norm": 0.16804414987564087, "learning_rate": 6.268407607152995e-05, "loss": 0.006189702451229096, "step": 131520 }, { "epoch": 37.33465796196423, "grad_norm": 1.0050872564315796, "learning_rate": 6.268123758160659e-05, "loss": 0.002596258744597435, "step": 131530 }, { "epoch": 37.337496451887596, "grad_norm": 1.9548320770263672, "learning_rate": 6.267839909168323e-05, "loss": 0.0045948468148708345, "step": 131540 }, { "epoch": 37.34033494181096, "grad_norm": 0.19570277631282806, "learning_rate": 6.267556060175987e-05, "loss": 0.0014979248866438867, "step": 131550 }, { "epoch": 37.343173431734314, "grad_norm": 0.22376661002635956, "learning_rate": 6.26727221118365e-05, "loss": 0.0027515044435858726, "step": 131560 }, { "epoch": 37.34601192165768, "grad_norm": 1.8288902044296265, "learning_rate": 6.266988362191315e-05, "loss": 0.0031682685017585754, "step": 131570 }, { "epoch": 37.34885041158104, "grad_norm": 1.6682987213134766, "learning_rate": 6.266704513198978e-05, "loss": 0.018534722924232482, "step": 131580 }, { "epoch": 37.3516889015044, "grad_norm": 0.19539980590343475, "learning_rate": 6.266420664206642e-05, "loss": 0.005206095427274704, "step": 131590 }, { "epoch": 37.35452739142776, "grad_norm": 0.12350120395421982, "learning_rate": 6.266136815214306e-05, "loss": 0.004144991934299469, "step": 131600 }, { "epoch": 37.35736588135112, "grad_norm": 1.4572023153305054, "learning_rate": 6.26585296622197e-05, "loss": 0.0028891973197460175, "step": 131610 }, { "epoch": 37.360204371274484, "grad_norm": 0.15302659571170807, "learning_rate": 6.265569117229633e-05, "loss": 0.001631498523056507, "step": 131620 }, { "epoch": 37.36304286119784, "grad_norm": 0.03710472956299782, "learning_rate": 6.265285268237297e-05, "loss": 0.007481534779071808, "step": 131630 }, { "epoch": 37.3658813511212, "grad_norm": 0.7931613326072693, "learning_rate": 6.265001419244963e-05, "loss": 0.0028386315330863, "step": 131640 }, { "epoch": 37.368719841044566, "grad_norm": 0.24774785339832306, "learning_rate": 6.264717570252626e-05, "loss": 0.003350292146205902, "step": 131650 }, { "epoch": 37.37155833096792, "grad_norm": 4.3190765380859375, "learning_rate": 6.26443372126029e-05, "loss": 0.0019821641966700555, "step": 131660 }, { "epoch": 37.374396820891285, "grad_norm": 0.13862355053424835, "learning_rate": 6.264149872267954e-05, "loss": 0.017703670263290405, "step": 131670 }, { "epoch": 37.37723531081465, "grad_norm": 0.2711879014968872, "learning_rate": 6.263866023275617e-05, "loss": 0.005402345582842827, "step": 131680 }, { "epoch": 37.38007380073801, "grad_norm": 0.2839202284812927, "learning_rate": 6.263582174283281e-05, "loss": 0.003753827512264252, "step": 131690 }, { "epoch": 37.382912290661366, "grad_norm": 2.89367413520813, "learning_rate": 6.263298325290946e-05, "loss": 0.008917482197284698, "step": 131700 }, { "epoch": 37.38575078058473, "grad_norm": 2.1834144592285156, "learning_rate": 6.263014476298609e-05, "loss": 0.006561017036437989, "step": 131710 }, { "epoch": 37.38858927050809, "grad_norm": 0.17815791070461273, "learning_rate": 6.262730627306273e-05, "loss": 0.00616636760532856, "step": 131720 }, { "epoch": 37.39142776043145, "grad_norm": 0.4269798696041107, "learning_rate": 6.262446778313937e-05, "loss": 0.013853214681148529, "step": 131730 }, { "epoch": 37.39426625035481, "grad_norm": 10.929792404174805, "learning_rate": 6.262162929321602e-05, "loss": 0.009705546498298644, "step": 131740 }, { "epoch": 37.39710474027817, "grad_norm": 0.4829752743244171, "learning_rate": 6.261879080329264e-05, "loss": 0.002416595444083214, "step": 131750 }, { "epoch": 37.39994323020153, "grad_norm": 4.949604511260986, "learning_rate": 6.261595231336929e-05, "loss": 0.006063929200172425, "step": 131760 }, { "epoch": 37.40278172012489, "grad_norm": 0.8582398295402527, "learning_rate": 6.261311382344594e-05, "loss": 0.013048869371414185, "step": 131770 }, { "epoch": 37.405620210048255, "grad_norm": 8.469538688659668, "learning_rate": 6.261027533352257e-05, "loss": 0.006428474932909012, "step": 131780 }, { "epoch": 37.40845869997162, "grad_norm": 2.118295192718506, "learning_rate": 6.260743684359921e-05, "loss": 0.009956207871437073, "step": 131790 }, { "epoch": 37.411297189894974, "grad_norm": 0.4764280617237091, "learning_rate": 6.260459835367585e-05, "loss": 0.005274946987628937, "step": 131800 }, { "epoch": 37.41413567981834, "grad_norm": 3.7286934852600098, "learning_rate": 6.260175986375248e-05, "loss": 0.004202421382069587, "step": 131810 }, { "epoch": 37.4169741697417, "grad_norm": 2.1792564392089844, "learning_rate": 6.259892137382912e-05, "loss": 0.0013472307473421097, "step": 131820 }, { "epoch": 37.419812659665055, "grad_norm": 0.14795002341270447, "learning_rate": 6.259608288390576e-05, "loss": 0.011506377160549164, "step": 131830 }, { "epoch": 37.42265114958842, "grad_norm": 1.9586554765701294, "learning_rate": 6.25932443939824e-05, "loss": 0.004862635582685471, "step": 131840 }, { "epoch": 37.42548963951178, "grad_norm": 0.7770957350730896, "learning_rate": 6.259040590405904e-05, "loss": 0.0034213945269584655, "step": 131850 }, { "epoch": 37.428328129435144, "grad_norm": 0.6758635640144348, "learning_rate": 6.258756741413569e-05, "loss": 0.010343948751688004, "step": 131860 }, { "epoch": 37.4311666193585, "grad_norm": 1.428584337234497, "learning_rate": 6.258472892421233e-05, "loss": 0.010746560245752334, "step": 131870 }, { "epoch": 37.43400510928186, "grad_norm": 0.2744644284248352, "learning_rate": 6.258189043428895e-05, "loss": 0.002670061029493809, "step": 131880 }, { "epoch": 37.436843599205226, "grad_norm": 0.11933600902557373, "learning_rate": 6.25790519443656e-05, "loss": 0.0031556248664855956, "step": 131890 }, { "epoch": 37.43968208912858, "grad_norm": 2.065722703933716, "learning_rate": 6.257621345444225e-05, "loss": 0.0010117342695593833, "step": 131900 }, { "epoch": 37.442520579051944, "grad_norm": 15.049694061279297, "learning_rate": 6.257337496451888e-05, "loss": 0.00541214719414711, "step": 131910 }, { "epoch": 37.44535906897531, "grad_norm": 0.15314093232154846, "learning_rate": 6.257053647459552e-05, "loss": 0.0013120271265506744, "step": 131920 }, { "epoch": 37.44819755889866, "grad_norm": 0.596635103225708, "learning_rate": 6.256769798467216e-05, "loss": 0.0016798578202724458, "step": 131930 }, { "epoch": 37.451036048822026, "grad_norm": 0.0935182124376297, "learning_rate": 6.256485949474879e-05, "loss": 0.0035447575151920318, "step": 131940 }, { "epoch": 37.45387453874539, "grad_norm": 6.7385478019714355, "learning_rate": 6.256202100482543e-05, "loss": 0.0028651636093854904, "step": 131950 }, { "epoch": 37.45671302866875, "grad_norm": 0.2648725211620331, "learning_rate": 6.255918251490207e-05, "loss": 0.003851566091179848, "step": 131960 }, { "epoch": 37.45955151859211, "grad_norm": 11.077371597290039, "learning_rate": 6.255634402497871e-05, "loss": 0.007340320199728012, "step": 131970 }, { "epoch": 37.46239000851547, "grad_norm": 0.17729884386062622, "learning_rate": 6.255350553505535e-05, "loss": 0.0028548333793878555, "step": 131980 }, { "epoch": 37.46522849843883, "grad_norm": 0.38628140091896057, "learning_rate": 6.2550667045132e-05, "loss": 0.004797953739762306, "step": 131990 }, { "epoch": 37.46806698836219, "grad_norm": 1.0531806945800781, "learning_rate": 6.254782855520864e-05, "loss": 0.0011209480464458466, "step": 132000 }, { "epoch": 37.46806698836219, "eval_accuracy": 0.9743752781840147, "eval_loss": 0.09310868382453918, "eval_runtime": 32.362, "eval_samples_per_second": 485.972, "eval_steps_per_second": 7.602, "step": 132000 }, { "epoch": 37.47090547828555, "grad_norm": 0.09871648997068405, "learning_rate": 6.254499006528527e-05, "loss": 0.009373778849840165, "step": 132010 }, { "epoch": 37.473743968208915, "grad_norm": 0.10134980827569962, "learning_rate": 6.25421515753619e-05, "loss": 0.0011668138206005097, "step": 132020 }, { "epoch": 37.47658245813227, "grad_norm": 1.1469206809997559, "learning_rate": 6.253931308543855e-05, "loss": 0.0013335609808564186, "step": 132030 }, { "epoch": 37.47942094805563, "grad_norm": 1.8722736835479736, "learning_rate": 6.253647459551519e-05, "loss": 0.0031882777810096742, "step": 132040 }, { "epoch": 37.482259437978996, "grad_norm": 4.049002647399902, "learning_rate": 6.253363610559183e-05, "loss": 0.001576954498887062, "step": 132050 }, { "epoch": 37.48509792790236, "grad_norm": 3.817969799041748, "learning_rate": 6.253079761566847e-05, "loss": 0.0037019729614257814, "step": 132060 }, { "epoch": 37.487936417825715, "grad_norm": 2.039700746536255, "learning_rate": 6.25279591257451e-05, "loss": 0.0018210779875516892, "step": 132070 }, { "epoch": 37.49077490774908, "grad_norm": 0.34361350536346436, "learning_rate": 6.252512063582174e-05, "loss": 0.000877358764410019, "step": 132080 }, { "epoch": 37.49361339767244, "grad_norm": 0.20258112251758575, "learning_rate": 6.252228214589838e-05, "loss": 0.014889316260814666, "step": 132090 }, { "epoch": 37.496451887595796, "grad_norm": 2.361514091491699, "learning_rate": 6.251944365597502e-05, "loss": 0.00841614305973053, "step": 132100 }, { "epoch": 37.49929037751916, "grad_norm": 1.1773335933685303, "learning_rate": 6.251660516605167e-05, "loss": 0.003871069103479385, "step": 132110 }, { "epoch": 37.50212886744252, "grad_norm": 0.18657879531383514, "learning_rate": 6.251376667612831e-05, "loss": 0.0026958780363202095, "step": 132120 }, { "epoch": 37.50496735736588, "grad_norm": 6.229252338409424, "learning_rate": 6.251092818620495e-05, "loss": 0.001969832554459572, "step": 132130 }, { "epoch": 37.50780584728924, "grad_norm": 0.45797938108444214, "learning_rate": 6.250808969628158e-05, "loss": 0.0019026946276426316, "step": 132140 }, { "epoch": 37.510644337212604, "grad_norm": 0.5916337966918945, "learning_rate": 6.250525120635822e-05, "loss": 0.007045965641736984, "step": 132150 }, { "epoch": 37.51348282713597, "grad_norm": 0.041298478841781616, "learning_rate": 6.250241271643486e-05, "loss": 0.007796673476696015, "step": 132160 }, { "epoch": 37.51632131705932, "grad_norm": 0.1929798722267151, "learning_rate": 6.24995742265115e-05, "loss": 0.007389642298221588, "step": 132170 }, { "epoch": 37.519159806982685, "grad_norm": 0.6311588287353516, "learning_rate": 6.249673573658814e-05, "loss": 0.004484188556671142, "step": 132180 }, { "epoch": 37.52199829690605, "grad_norm": 18.881084442138672, "learning_rate": 6.249389724666478e-05, "loss": 0.005851928517222405, "step": 132190 }, { "epoch": 37.524836786829404, "grad_norm": 2.152219533920288, "learning_rate": 6.249105875674141e-05, "loss": 0.011918966472148896, "step": 132200 }, { "epoch": 37.52767527675277, "grad_norm": 3.280728816986084, "learning_rate": 6.248822026681805e-05, "loss": 0.007176411151885986, "step": 132210 }, { "epoch": 37.53051376667613, "grad_norm": 0.31383973360061646, "learning_rate": 6.24853817768947e-05, "loss": 0.005173692479729652, "step": 132220 }, { "epoch": 37.533352256599485, "grad_norm": 0.20483887195587158, "learning_rate": 6.248254328697133e-05, "loss": 0.005251907557249069, "step": 132230 }, { "epoch": 37.53619074652285, "grad_norm": 0.15790414810180664, "learning_rate": 6.247970479704798e-05, "loss": 0.0015483779832720756, "step": 132240 }, { "epoch": 37.53902923644621, "grad_norm": 1.085664987564087, "learning_rate": 6.247686630712462e-05, "loss": 0.006709817796945572, "step": 132250 }, { "epoch": 37.541867726369574, "grad_norm": 4.931957721710205, "learning_rate": 6.247402781720126e-05, "loss": 0.0018745260313153267, "step": 132260 }, { "epoch": 37.54470621629293, "grad_norm": 0.7235962152481079, "learning_rate": 6.247118932727789e-05, "loss": 0.015942877531051634, "step": 132270 }, { "epoch": 37.54754470621629, "grad_norm": 0.965523362159729, "learning_rate": 6.246835083735453e-05, "loss": 0.00424588993191719, "step": 132280 }, { "epoch": 37.550383196139656, "grad_norm": 0.08497064560651779, "learning_rate": 6.246551234743117e-05, "loss": 0.001767456904053688, "step": 132290 }, { "epoch": 37.55322168606301, "grad_norm": 4.354334831237793, "learning_rate": 6.246267385750781e-05, "loss": 0.0019675798714160917, "step": 132300 }, { "epoch": 37.556060175986374, "grad_norm": 1.51112699508667, "learning_rate": 6.245983536758445e-05, "loss": 0.0034618955105543137, "step": 132310 }, { "epoch": 37.55889866590974, "grad_norm": 2.4551963806152344, "learning_rate": 6.24569968776611e-05, "loss": 0.004053299129009247, "step": 132320 }, { "epoch": 37.5617371558331, "grad_norm": 0.8450458645820618, "learning_rate": 6.245415838773772e-05, "loss": 0.011071632802486419, "step": 132330 }, { "epoch": 37.564575645756456, "grad_norm": 0.1548750400543213, "learning_rate": 6.245131989781436e-05, "loss": 0.0032876778393983843, "step": 132340 }, { "epoch": 37.56741413567982, "grad_norm": 0.12953384220600128, "learning_rate": 6.2448481407891e-05, "loss": 0.0035818852484226227, "step": 132350 }, { "epoch": 37.57025262560318, "grad_norm": 0.18858279287815094, "learning_rate": 6.244564291796765e-05, "loss": 0.0031421855092048645, "step": 132360 }, { "epoch": 37.57309111552654, "grad_norm": 1.0477215051651, "learning_rate": 6.244280442804429e-05, "loss": 0.0027070142328739165, "step": 132370 }, { "epoch": 37.5759296054499, "grad_norm": 5.662510871887207, "learning_rate": 6.243996593812093e-05, "loss": 0.003234283998608589, "step": 132380 }, { "epoch": 37.57876809537326, "grad_norm": 0.20722918212413788, "learning_rate": 6.243712744819757e-05, "loss": 0.002323152311146259, "step": 132390 }, { "epoch": 37.58160658529662, "grad_norm": 0.2880652844905853, "learning_rate": 6.24342889582742e-05, "loss": 0.012477408349514007, "step": 132400 }, { "epoch": 37.58444507521998, "grad_norm": 0.29456761479377747, "learning_rate": 6.243145046835084e-05, "loss": 0.02442405968904495, "step": 132410 }, { "epoch": 37.587283565143345, "grad_norm": 0.1021864265203476, "learning_rate": 6.242861197842748e-05, "loss": 0.004192773252725601, "step": 132420 }, { "epoch": 37.59012205506671, "grad_norm": 0.7154480814933777, "learning_rate": 6.242577348850411e-05, "loss": 0.002216958999633789, "step": 132430 }, { "epoch": 37.59296054499006, "grad_norm": 1.254004716873169, "learning_rate": 6.242293499858076e-05, "loss": 0.0014498740434646606, "step": 132440 }, { "epoch": 37.595799034913426, "grad_norm": 0.41714709997177124, "learning_rate": 6.24200965086574e-05, "loss": 0.0025337714701890945, "step": 132450 }, { "epoch": 37.59863752483679, "grad_norm": 0.6684608459472656, "learning_rate": 6.241725801873403e-05, "loss": 0.0041950389742851256, "step": 132460 }, { "epoch": 37.601476014760145, "grad_norm": 0.4297643303871155, "learning_rate": 6.241441952881067e-05, "loss": 0.0021754536777734757, "step": 132470 }, { "epoch": 37.60431450468351, "grad_norm": 7.331753253936768, "learning_rate": 6.241158103888732e-05, "loss": 0.008207141607999801, "step": 132480 }, { "epoch": 37.60715299460687, "grad_norm": 10.085725784301758, "learning_rate": 6.240874254896396e-05, "loss": 0.024471521377563477, "step": 132490 }, { "epoch": 37.60999148453023, "grad_norm": 0.057595256716012955, "learning_rate": 6.24059040590406e-05, "loss": 0.008684705942869186, "step": 132500 }, { "epoch": 37.60999148453023, "eval_accuracy": 0.9724677306542888, "eval_loss": 0.09692180156707764, "eval_runtime": 32.112, "eval_samples_per_second": 489.754, "eval_steps_per_second": 7.661, "step": 132500 }, { "epoch": 37.61282997445359, "grad_norm": 0.2997278869152069, "learning_rate": 6.240306556911724e-05, "loss": 0.005181872844696045, "step": 132510 }, { "epoch": 37.61566846437695, "grad_norm": 0.3243466019630432, "learning_rate": 6.240022707919387e-05, "loss": 0.0038658004254102708, "step": 132520 }, { "epoch": 37.618506954300315, "grad_norm": 0.10478179901838303, "learning_rate": 6.239738858927051e-05, "loss": 0.005461958795785904, "step": 132530 }, { "epoch": 37.62134544422367, "grad_norm": 0.567054033279419, "learning_rate": 6.239455009934715e-05, "loss": 0.006647210568189621, "step": 132540 }, { "epoch": 37.624183934147034, "grad_norm": 0.359387069940567, "learning_rate": 6.239171160942379e-05, "loss": 0.001304195448756218, "step": 132550 }, { "epoch": 37.6270224240704, "grad_norm": 2.377549409866333, "learning_rate": 6.238887311950042e-05, "loss": 0.002070714719593525, "step": 132560 }, { "epoch": 37.62986091399375, "grad_norm": 2.464228630065918, "learning_rate": 6.238603462957707e-05, "loss": 0.0017978737130761146, "step": 132570 }, { "epoch": 37.632699403917115, "grad_norm": 0.514973521232605, "learning_rate": 6.238319613965372e-05, "loss": 0.002048587612807751, "step": 132580 }, { "epoch": 37.63553789384048, "grad_norm": 0.5121153593063354, "learning_rate": 6.238035764973034e-05, "loss": 0.011153070628643036, "step": 132590 }, { "epoch": 37.63837638376384, "grad_norm": 1.0856119394302368, "learning_rate": 6.237751915980698e-05, "loss": 0.016050618886947633, "step": 132600 }, { "epoch": 37.6412148736872, "grad_norm": 1.9948545694351196, "learning_rate": 6.237468066988363e-05, "loss": 0.007011393457651139, "step": 132610 }, { "epoch": 37.64405336361056, "grad_norm": 0.4396890103816986, "learning_rate": 6.237184217996025e-05, "loss": 0.002546379156410694, "step": 132620 }, { "epoch": 37.64689185353392, "grad_norm": 7.393730163574219, "learning_rate": 6.23690036900369e-05, "loss": 0.004048900306224823, "step": 132630 }, { "epoch": 37.64973034345728, "grad_norm": 0.06148531287908554, "learning_rate": 6.236616520011355e-05, "loss": 0.0010700980201363564, "step": 132640 }, { "epoch": 37.65256883338064, "grad_norm": 2.0977513790130615, "learning_rate": 6.236332671019018e-05, "loss": 0.0025764688849449157, "step": 132650 }, { "epoch": 37.655407323304004, "grad_norm": 0.14940853416919708, "learning_rate": 6.236048822026682e-05, "loss": 0.000983455777168274, "step": 132660 }, { "epoch": 37.65824581322736, "grad_norm": 0.19542144238948822, "learning_rate": 6.235764973034346e-05, "loss": 0.008302377909421921, "step": 132670 }, { "epoch": 37.66108430315072, "grad_norm": 0.7092940807342529, "learning_rate": 6.23548112404201e-05, "loss": 0.0015957662835717201, "step": 132680 }, { "epoch": 37.663922793074086, "grad_norm": 0.45525434613227844, "learning_rate": 6.235197275049673e-05, "loss": 0.002026466839015484, "step": 132690 }, { "epoch": 37.66676128299745, "grad_norm": 0.09463665634393692, "learning_rate": 6.234913426057338e-05, "loss": 0.002368542551994324, "step": 132700 }, { "epoch": 37.669599772920805, "grad_norm": 1.0793935060501099, "learning_rate": 6.234629577065003e-05, "loss": 0.002449742890894413, "step": 132710 }, { "epoch": 37.67243826284417, "grad_norm": 0.6311933994293213, "learning_rate": 6.234345728072665e-05, "loss": 0.004752984642982483, "step": 132720 }, { "epoch": 37.67527675276753, "grad_norm": 0.11249015480279922, "learning_rate": 6.23406187908033e-05, "loss": 0.0039205759763717655, "step": 132730 }, { "epoch": 37.678115242690886, "grad_norm": 6.136289596557617, "learning_rate": 6.233778030087994e-05, "loss": 0.01242113783955574, "step": 132740 }, { "epoch": 37.68095373261425, "grad_norm": 1.0341088771820068, "learning_rate": 6.233494181095656e-05, "loss": 0.017194324731826784, "step": 132750 }, { "epoch": 37.68379222253761, "grad_norm": 4.73037052154541, "learning_rate": 6.23321033210332e-05, "loss": 0.003039441630244255, "step": 132760 }, { "epoch": 37.68663071246097, "grad_norm": 13.117598533630371, "learning_rate": 6.232926483110986e-05, "loss": 0.007743534445762634, "step": 132770 }, { "epoch": 37.68946920238433, "grad_norm": 0.159669890999794, "learning_rate": 6.232642634118649e-05, "loss": 0.006506147980690003, "step": 132780 }, { "epoch": 37.69230769230769, "grad_norm": 0.028011225163936615, "learning_rate": 6.232358785126313e-05, "loss": 0.0007077228277921676, "step": 132790 }, { "epoch": 37.695146182231056, "grad_norm": 0.35714855790138245, "learning_rate": 6.232074936133977e-05, "loss": 0.0049905598163604735, "step": 132800 }, { "epoch": 37.69798467215441, "grad_norm": 1.3885563611984253, "learning_rate": 6.231791087141641e-05, "loss": 0.01562163382768631, "step": 132810 }, { "epoch": 37.700823162077775, "grad_norm": 5.245763301849365, "learning_rate": 6.231507238149304e-05, "loss": 0.0016142314299941063, "step": 132820 }, { "epoch": 37.70366165200114, "grad_norm": 1.7521334886550903, "learning_rate": 6.231223389156968e-05, "loss": 0.0013959165662527084, "step": 132830 }, { "epoch": 37.706500141924494, "grad_norm": 1.8494197130203247, "learning_rate": 6.230939540164634e-05, "loss": 0.0030511913821101187, "step": 132840 }, { "epoch": 37.70933863184786, "grad_norm": 0.2823350131511688, "learning_rate": 6.230655691172296e-05, "loss": 0.0056950576603412625, "step": 132850 }, { "epoch": 37.71217712177122, "grad_norm": 1.437185525894165, "learning_rate": 6.23037184217996e-05, "loss": 0.0032662130892276765, "step": 132860 }, { "epoch": 37.715015611694575, "grad_norm": 0.02005903795361519, "learning_rate": 6.230087993187625e-05, "loss": 0.0014844300225377084, "step": 132870 }, { "epoch": 37.71785410161794, "grad_norm": 0.16124209761619568, "learning_rate": 6.229804144195288e-05, "loss": 0.025777983665466308, "step": 132880 }, { "epoch": 37.7206925915413, "grad_norm": 2.0263826847076416, "learning_rate": 6.229520295202952e-05, "loss": 0.005041095614433289, "step": 132890 }, { "epoch": 37.723531081464664, "grad_norm": 2.158900737762451, "learning_rate": 6.229236446210617e-05, "loss": 0.012619401514530181, "step": 132900 }, { "epoch": 37.72636957138802, "grad_norm": 11.915014266967773, "learning_rate": 6.22895259721828e-05, "loss": 0.004944156482815742, "step": 132910 }, { "epoch": 37.72920806131138, "grad_norm": 0.14055761694908142, "learning_rate": 6.228668748225944e-05, "loss": 0.0059983029961586, "step": 132920 }, { "epoch": 37.732046551234745, "grad_norm": 0.11732164770364761, "learning_rate": 6.228384899233608e-05, "loss": 0.0019366150721907617, "step": 132930 }, { "epoch": 37.7348850411581, "grad_norm": 0.1532517373561859, "learning_rate": 6.228101050241272e-05, "loss": 0.004197642207145691, "step": 132940 }, { "epoch": 37.737723531081464, "grad_norm": 16.41945457458496, "learning_rate": 6.227817201248935e-05, "loss": 0.017532306909561157, "step": 132950 }, { "epoch": 37.74056202100483, "grad_norm": 0.09246093034744263, "learning_rate": 6.227533352256599e-05, "loss": 0.004091132059693336, "step": 132960 }, { "epoch": 37.74340051092818, "grad_norm": 0.28916335105895996, "learning_rate": 6.227249503264265e-05, "loss": 0.005170441418886185, "step": 132970 }, { "epoch": 37.746239000851546, "grad_norm": 0.37603336572647095, "learning_rate": 6.226965654271928e-05, "loss": 0.002044243551790714, "step": 132980 }, { "epoch": 37.74907749077491, "grad_norm": 8.819252014160156, "learning_rate": 6.226681805279592e-05, "loss": 0.012336017191410064, "step": 132990 }, { "epoch": 37.75191598069827, "grad_norm": 2.41961669921875, "learning_rate": 6.226397956287256e-05, "loss": 0.003954678773880005, "step": 133000 }, { "epoch": 37.75191598069827, "eval_accuracy": 0.9724041457366313, "eval_loss": 0.09708116948604584, "eval_runtime": 32.9351, "eval_samples_per_second": 477.514, "eval_steps_per_second": 7.469, "step": 133000 }, { "epoch": 37.75475447062163, "grad_norm": 0.12998168170452118, "learning_rate": 6.226114107294919e-05, "loss": 0.0009527182206511498, "step": 133010 }, { "epoch": 37.75759296054499, "grad_norm": 0.222847580909729, "learning_rate": 6.225830258302583e-05, "loss": 0.0050263553857803345, "step": 133020 }, { "epoch": 37.76043145046835, "grad_norm": 0.3734874725341797, "learning_rate": 6.225546409310248e-05, "loss": 0.0008156495168805122, "step": 133030 }, { "epoch": 37.76326994039171, "grad_norm": 0.24316823482513428, "learning_rate": 6.225262560317911e-05, "loss": 0.001613054983317852, "step": 133040 }, { "epoch": 37.76610843031507, "grad_norm": 0.06607042998075485, "learning_rate": 6.224978711325575e-05, "loss": 0.004872652143239975, "step": 133050 }, { "epoch": 37.768946920238434, "grad_norm": 0.31327736377716064, "learning_rate": 6.224694862333239e-05, "loss": 0.005223363637924194, "step": 133060 }, { "epoch": 37.7717854101618, "grad_norm": 2.095780372619629, "learning_rate": 6.224411013340903e-05, "loss": 0.010399572551250458, "step": 133070 }, { "epoch": 37.77462390008515, "grad_norm": 2.0874862670898438, "learning_rate": 6.224127164348566e-05, "loss": 0.003095918335020542, "step": 133080 }, { "epoch": 37.777462390008516, "grad_norm": 0.6543291211128235, "learning_rate": 6.22384331535623e-05, "loss": 0.017857806384563447, "step": 133090 }, { "epoch": 37.78030087993188, "grad_norm": 0.02607010491192341, "learning_rate": 6.223559466363896e-05, "loss": 0.0015245603397488594, "step": 133100 }, { "epoch": 37.783139369855235, "grad_norm": 0.1765156388282776, "learning_rate": 6.223275617371559e-05, "loss": 0.007004989683628083, "step": 133110 }, { "epoch": 37.7859778597786, "grad_norm": 0.40197938680648804, "learning_rate": 6.222991768379223e-05, "loss": 0.005685955286026001, "step": 133120 }, { "epoch": 37.78881634970196, "grad_norm": 2.2790653705596924, "learning_rate": 6.222707919386887e-05, "loss": 0.0015490902587771416, "step": 133130 }, { "epoch": 37.791654839625316, "grad_norm": 1.7385667562484741, "learning_rate": 6.22242407039455e-05, "loss": 0.010572496056556701, "step": 133140 }, { "epoch": 37.79449332954868, "grad_norm": 0.29737934470176697, "learning_rate": 6.222140221402214e-05, "loss": 0.004896527528762818, "step": 133150 }, { "epoch": 37.79733181947204, "grad_norm": 1.0798273086547852, "learning_rate": 6.221856372409878e-05, "loss": 0.0012504935264587402, "step": 133160 }, { "epoch": 37.800170309395405, "grad_norm": 4.986037731170654, "learning_rate": 6.221572523417542e-05, "loss": 0.0018505590036511422, "step": 133170 }, { "epoch": 37.80300879931876, "grad_norm": 0.07906194776296616, "learning_rate": 6.221288674425206e-05, "loss": 0.006117449700832367, "step": 133180 }, { "epoch": 37.805847289242124, "grad_norm": 0.07539534568786621, "learning_rate": 6.22100482543287e-05, "loss": 0.0040138278156518934, "step": 133190 }, { "epoch": 37.80868577916549, "grad_norm": 1.0952357053756714, "learning_rate": 6.220720976440534e-05, "loss": 0.0031634148210287092, "step": 133200 }, { "epoch": 37.81152426908884, "grad_norm": 7.378997802734375, "learning_rate": 6.220437127448197e-05, "loss": 0.005262964591383934, "step": 133210 }, { "epoch": 37.814362759012205, "grad_norm": 0.356638640165329, "learning_rate": 6.220153278455861e-05, "loss": 0.0005699669942259789, "step": 133220 }, { "epoch": 37.81720124893557, "grad_norm": 0.12266595661640167, "learning_rate": 6.219869429463527e-05, "loss": 0.004715427756309509, "step": 133230 }, { "epoch": 37.820039738858924, "grad_norm": 6.195380687713623, "learning_rate": 6.21958558047119e-05, "loss": 0.00301548857241869, "step": 133240 }, { "epoch": 37.82287822878229, "grad_norm": 0.21377554535865784, "learning_rate": 6.219301731478854e-05, "loss": 0.0037096865475177766, "step": 133250 }, { "epoch": 37.82571671870565, "grad_norm": 0.19940659403800964, "learning_rate": 6.219017882486518e-05, "loss": 0.012438584119081497, "step": 133260 }, { "epoch": 37.82855520862901, "grad_norm": 2.8873722553253174, "learning_rate": 6.218734033494181e-05, "loss": 0.001673341728746891, "step": 133270 }, { "epoch": 37.83139369855237, "grad_norm": 1.2639799118041992, "learning_rate": 6.218450184501845e-05, "loss": 0.0005596622824668885, "step": 133280 }, { "epoch": 37.83423218847573, "grad_norm": 0.6432152390480042, "learning_rate": 6.218166335509509e-05, "loss": 0.001684900000691414, "step": 133290 }, { "epoch": 37.837070678399094, "grad_norm": 0.30038389563560486, "learning_rate": 6.217882486517173e-05, "loss": 0.00300500076264143, "step": 133300 }, { "epoch": 37.83990916832245, "grad_norm": 0.21321283280849457, "learning_rate": 6.217598637524837e-05, "loss": 0.004956291988492012, "step": 133310 }, { "epoch": 37.84274765824581, "grad_norm": 0.12209369987249374, "learning_rate": 6.217314788532501e-05, "loss": 0.003255471959710121, "step": 133320 }, { "epoch": 37.845586148169176, "grad_norm": 0.2678591012954712, "learning_rate": 6.217030939540166e-05, "loss": 0.0021209243685007097, "step": 133330 }, { "epoch": 37.84842463809253, "grad_norm": 2.2129557132720947, "learning_rate": 6.216747090547828e-05, "loss": 0.004342694580554962, "step": 133340 }, { "epoch": 37.851263128015894, "grad_norm": 0.9906684160232544, "learning_rate": 6.216491626454726e-05, "loss": 0.03175324201583862, "step": 133350 }, { "epoch": 37.85410161793926, "grad_norm": 6.482047080993652, "learning_rate": 6.21620777746239e-05, "loss": 0.004478826373815537, "step": 133360 }, { "epoch": 37.85694010786262, "grad_norm": 0.8999180793762207, "learning_rate": 6.215923928470055e-05, "loss": 0.0024129973724484445, "step": 133370 }, { "epoch": 37.859778597785976, "grad_norm": 1.0929769277572632, "learning_rate": 6.215640079477719e-05, "loss": 0.0009130131453275681, "step": 133380 }, { "epoch": 37.86261708770934, "grad_norm": 0.9031277894973755, "learning_rate": 6.215356230485382e-05, "loss": 0.012177640199661255, "step": 133390 }, { "epoch": 37.8654555776327, "grad_norm": 1.5737007856369019, "learning_rate": 6.215072381493046e-05, "loss": 0.004903804883360863, "step": 133400 }, { "epoch": 37.86829406755606, "grad_norm": 0.889591634273529, "learning_rate": 6.21478853250071e-05, "loss": 0.0056223064661026, "step": 133410 }, { "epoch": 37.87113255747942, "grad_norm": 0.953218400478363, "learning_rate": 6.214504683508374e-05, "loss": 0.007409502565860748, "step": 133420 }, { "epoch": 37.87397104740278, "grad_norm": 1.053886890411377, "learning_rate": 6.214220834516038e-05, "loss": 0.00195941012352705, "step": 133430 }, { "epoch": 37.876809537326146, "grad_norm": 0.1118682324886322, "learning_rate": 6.213936985523702e-05, "loss": 0.0025756197050213815, "step": 133440 }, { "epoch": 37.8796480272495, "grad_norm": 0.1571532040834427, "learning_rate": 6.213653136531365e-05, "loss": 0.0010448066517710687, "step": 133450 }, { "epoch": 37.882486517172865, "grad_norm": 0.37357431650161743, "learning_rate": 6.213369287539029e-05, "loss": 0.002458176575601101, "step": 133460 }, { "epoch": 37.88532500709623, "grad_norm": 0.4780252277851105, "learning_rate": 6.213085438546693e-05, "loss": 0.0034547030925750734, "step": 133470 }, { "epoch": 37.88816349701958, "grad_norm": 0.059194665402173996, "learning_rate": 6.212801589554357e-05, "loss": 0.0010642634704709053, "step": 133480 }, { "epoch": 37.891001986942946, "grad_norm": 0.06451675295829773, "learning_rate": 6.212517740562022e-05, "loss": 0.004234927892684937, "step": 133490 }, { "epoch": 37.89384047686631, "grad_norm": 0.4888061285018921, "learning_rate": 6.212233891569686e-05, "loss": 0.031558871269226074, "step": 133500 }, { "epoch": 37.89384047686631, "eval_accuracy": 0.9761556558784257, "eval_loss": 0.08558957278728485, "eval_runtime": 31.9137, "eval_samples_per_second": 492.798, "eval_steps_per_second": 7.708, "step": 133500 }, { "epoch": 37.896678966789665, "grad_norm": 0.2645043432712555, "learning_rate": 6.211950042577348e-05, "loss": 0.004266504943370819, "step": 133510 }, { "epoch": 37.89951745671303, "grad_norm": 1.0531411170959473, "learning_rate": 6.211666193585013e-05, "loss": 0.0090446799993515, "step": 133520 }, { "epoch": 37.90235594663639, "grad_norm": 1.3234773874282837, "learning_rate": 6.211382344592677e-05, "loss": 0.0029716160148382186, "step": 133530 }, { "epoch": 37.90519443655975, "grad_norm": 1.9012441635131836, "learning_rate": 6.211098495600341e-05, "loss": 0.0021519467234611513, "step": 133540 }, { "epoch": 37.90803292648311, "grad_norm": 2.706608295440674, "learning_rate": 6.210814646608005e-05, "loss": 0.009835876524448395, "step": 133550 }, { "epoch": 37.91087141640647, "grad_norm": 3.6343371868133545, "learning_rate": 6.210530797615669e-05, "loss": 0.00712469220161438, "step": 133560 }, { "epoch": 37.913709906329835, "grad_norm": 1.2140740156173706, "learning_rate": 6.210246948623333e-05, "loss": 0.007036004215478897, "step": 133570 }, { "epoch": 37.91654839625319, "grad_norm": 0.5931113958358765, "learning_rate": 6.209963099630996e-05, "loss": 0.015692949295043945, "step": 133580 }, { "epoch": 37.919386886176554, "grad_norm": 1.4090523719787598, "learning_rate": 6.20967925063866e-05, "loss": 0.00514179989695549, "step": 133590 }, { "epoch": 37.92222537609992, "grad_norm": 3.1565916538238525, "learning_rate": 6.209395401646324e-05, "loss": 0.00160933006554842, "step": 133600 }, { "epoch": 37.92506386602327, "grad_norm": 0.09420684725046158, "learning_rate": 6.209111552653989e-05, "loss": 0.004977817833423615, "step": 133610 }, { "epoch": 37.927902355946635, "grad_norm": 0.20956964790821075, "learning_rate": 6.208827703661653e-05, "loss": 0.015343551337718964, "step": 133620 }, { "epoch": 37.93074084587, "grad_norm": 4.91356897354126, "learning_rate": 6.208543854669317e-05, "loss": 0.0038940757513046265, "step": 133630 }, { "epoch": 37.93357933579336, "grad_norm": 1.4136955738067627, "learning_rate": 6.20826000567698e-05, "loss": 0.0038405947387218477, "step": 133640 }, { "epoch": 37.93641782571672, "grad_norm": 0.9950119853019714, "learning_rate": 6.207976156684644e-05, "loss": 0.007612425833940506, "step": 133650 }, { "epoch": 37.93925631564008, "grad_norm": 0.37792518734931946, "learning_rate": 6.207692307692308e-05, "loss": 0.007347790896892548, "step": 133660 }, { "epoch": 37.94209480556344, "grad_norm": 0.08669348061084747, "learning_rate": 6.207408458699972e-05, "loss": 0.007537807524204254, "step": 133670 }, { "epoch": 37.9449332954868, "grad_norm": 1.3634811639785767, "learning_rate": 6.207124609707636e-05, "loss": 0.002638531103730202, "step": 133680 }, { "epoch": 37.94777178541016, "grad_norm": 1.146072268486023, "learning_rate": 6.2068407607153e-05, "loss": 0.0021408434957265856, "step": 133690 }, { "epoch": 37.950610275333524, "grad_norm": 0.2799375057220459, "learning_rate": 6.206556911722964e-05, "loss": 0.0016608472913503646, "step": 133700 }, { "epoch": 37.95344876525688, "grad_norm": 4.991639614105225, "learning_rate": 6.206273062730627e-05, "loss": 0.0023230288177728655, "step": 133710 }, { "epoch": 37.95628725518024, "grad_norm": 0.23622044920921326, "learning_rate": 6.205989213738291e-05, "loss": 0.010309036821126938, "step": 133720 }, { "epoch": 37.959125745103606, "grad_norm": 1.4886629581451416, "learning_rate": 6.205705364745955e-05, "loss": 0.002765454538166523, "step": 133730 }, { "epoch": 37.96196423502697, "grad_norm": 0.10436052083969116, "learning_rate": 6.205421515753618e-05, "loss": 0.001004292443394661, "step": 133740 }, { "epoch": 37.964802724950324, "grad_norm": 0.7403091192245483, "learning_rate": 6.205137666761284e-05, "loss": 0.0029820667579770086, "step": 133750 }, { "epoch": 37.96764121487369, "grad_norm": 4.340145587921143, "learning_rate": 6.204853817768948e-05, "loss": 0.007359112799167633, "step": 133760 }, { "epoch": 37.97047970479705, "grad_norm": 7.15266227722168, "learning_rate": 6.20456996877661e-05, "loss": 0.00341038778424263, "step": 133770 }, { "epoch": 37.973318194720406, "grad_norm": 0.22077172994613647, "learning_rate": 6.204286119784275e-05, "loss": 0.002966208755970001, "step": 133780 }, { "epoch": 37.97615668464377, "grad_norm": 0.2161400318145752, "learning_rate": 6.204002270791939e-05, "loss": 0.0014428023248910903, "step": 133790 }, { "epoch": 37.97899517456713, "grad_norm": 0.13567674160003662, "learning_rate": 6.203718421799603e-05, "loss": 0.006211702525615692, "step": 133800 }, { "epoch": 37.981833664490495, "grad_norm": 0.5223957896232605, "learning_rate": 6.203434572807267e-05, "loss": 0.0025690630078315734, "step": 133810 }, { "epoch": 37.98467215441385, "grad_norm": 3.2517518997192383, "learning_rate": 6.203150723814931e-05, "loss": 0.0032192133367061615, "step": 133820 }, { "epoch": 37.98751064433721, "grad_norm": 0.045168712735176086, "learning_rate": 6.202866874822595e-05, "loss": 0.0024882715195417406, "step": 133830 }, { "epoch": 37.990349134260576, "grad_norm": 0.5610141754150391, "learning_rate": 6.202583025830258e-05, "loss": 0.00626506432890892, "step": 133840 }, { "epoch": 37.99318762418393, "grad_norm": 1.4371424913406372, "learning_rate": 6.202299176837922e-05, "loss": 0.0032473891973495484, "step": 133850 }, { "epoch": 37.996026114107295, "grad_norm": 5.081765651702881, "learning_rate": 6.202015327845587e-05, "loss": 0.005523406714200973, "step": 133860 }, { "epoch": 37.99886460403066, "grad_norm": 9.350892066955566, "learning_rate": 6.201731478853249e-05, "loss": 0.0034673213958740234, "step": 133870 }, { "epoch": 38.00170309395401, "grad_norm": 0.051754266023635864, "learning_rate": 6.201447629860915e-05, "loss": 0.007218610495328903, "step": 133880 }, { "epoch": 38.004541583877376, "grad_norm": 0.7074530720710754, "learning_rate": 6.201163780868579e-05, "loss": 0.002277693711221218, "step": 133890 }, { "epoch": 38.00738007380074, "grad_norm": 0.12943114340305328, "learning_rate": 6.200879931876242e-05, "loss": 0.0024940351024270056, "step": 133900 }, { "epoch": 38.0102185637241, "grad_norm": 0.3673385679721832, "learning_rate": 6.200596082883906e-05, "loss": 0.009372051060199737, "step": 133910 }, { "epoch": 38.01305705364746, "grad_norm": 6.83774471282959, "learning_rate": 6.20031223389157e-05, "loss": 0.0028739402070641516, "step": 133920 }, { "epoch": 38.01589554357082, "grad_norm": 0.26884937286376953, "learning_rate": 6.200028384899234e-05, "loss": 0.0052588354796171185, "step": 133930 }, { "epoch": 38.018734033494184, "grad_norm": 6.7967047691345215, "learning_rate": 6.199744535906897e-05, "loss": 0.007299830764532089, "step": 133940 }, { "epoch": 38.02157252341754, "grad_norm": 0.5923227071762085, "learning_rate": 6.199460686914562e-05, "loss": 0.0042127460241317746, "step": 133950 }, { "epoch": 38.0244110133409, "grad_norm": 0.1493835151195526, "learning_rate": 6.199176837922227e-05, "loss": 0.001406092755496502, "step": 133960 }, { "epoch": 38.027249503264265, "grad_norm": 0.25654828548431396, "learning_rate": 6.198892988929889e-05, "loss": 0.0037756316363811494, "step": 133970 }, { "epoch": 38.03008799318762, "grad_norm": 0.04529784619808197, "learning_rate": 6.198609139937553e-05, "loss": 0.002508901245892048, "step": 133980 }, { "epoch": 38.032926483110984, "grad_norm": 0.19847343862056732, "learning_rate": 6.198325290945218e-05, "loss": 0.00103923287242651, "step": 133990 }, { "epoch": 38.03576497303435, "grad_norm": 0.6736425757408142, "learning_rate": 6.19804144195288e-05, "loss": 0.001282622665166855, "step": 134000 }, { "epoch": 38.03576497303435, "eval_accuracy": 0.9725949004896038, "eval_loss": 0.10176767408847809, "eval_runtime": 32.262, "eval_samples_per_second": 487.478, "eval_steps_per_second": 7.625, "step": 134000 }, { "epoch": 38.03860346295771, "grad_norm": 0.3281838595867157, "learning_rate": 6.197757592960546e-05, "loss": 0.0008872263133525848, "step": 134010 }, { "epoch": 38.041441952881065, "grad_norm": 0.3041618764400482, "learning_rate": 6.19747374396821e-05, "loss": 0.0015516072511672975, "step": 134020 }, { "epoch": 38.04428044280443, "grad_norm": 0.5932643413543701, "learning_rate": 6.197189894975873e-05, "loss": 0.0013360675424337388, "step": 134030 }, { "epoch": 38.04711893272779, "grad_norm": 0.8423849940299988, "learning_rate": 6.196906045983537e-05, "loss": 0.00694122314453125, "step": 134040 }, { "epoch": 38.04995742265115, "grad_norm": 0.18612664937973022, "learning_rate": 6.196622196991201e-05, "loss": 0.0027197122573852537, "step": 134050 }, { "epoch": 38.05279591257451, "grad_norm": 0.9791676998138428, "learning_rate": 6.196338347998865e-05, "loss": 0.004170769453048706, "step": 134060 }, { "epoch": 38.05563440249787, "grad_norm": 2.237821102142334, "learning_rate": 6.196054499006528e-05, "loss": 0.002253599651157856, "step": 134070 }, { "epoch": 38.05847289242123, "grad_norm": 0.1873857080936432, "learning_rate": 6.195770650014193e-05, "loss": 0.0007913874462246895, "step": 134080 }, { "epoch": 38.06131138234459, "grad_norm": 0.021373817697167397, "learning_rate": 6.195486801021858e-05, "loss": 0.001890280842781067, "step": 134090 }, { "epoch": 38.064149872267954, "grad_norm": 0.10305590182542801, "learning_rate": 6.19520295202952e-05, "loss": 0.003622611612081528, "step": 134100 }, { "epoch": 38.06698836219132, "grad_norm": 7.983122825622559, "learning_rate": 6.194919103037185e-05, "loss": 0.01153704673051834, "step": 134110 }, { "epoch": 38.06982685211467, "grad_norm": 0.015571177937090397, "learning_rate": 6.194635254044849e-05, "loss": 0.00203020628541708, "step": 134120 }, { "epoch": 38.072665342038036, "grad_norm": 0.13727964460849762, "learning_rate": 6.194351405052511e-05, "loss": 0.00445866659283638, "step": 134130 }, { "epoch": 38.0755038319614, "grad_norm": 1.0107402801513672, "learning_rate": 6.194067556060176e-05, "loss": 0.005461192131042481, "step": 134140 }, { "epoch": 38.078342321884755, "grad_norm": 0.05921114236116409, "learning_rate": 6.193783707067841e-05, "loss": 0.0036296255886554716, "step": 134150 }, { "epoch": 38.08118081180812, "grad_norm": 0.2927892804145813, "learning_rate": 6.193499858075504e-05, "loss": 0.0005807241424918175, "step": 134160 }, { "epoch": 38.08401930173148, "grad_norm": 0.0699792206287384, "learning_rate": 6.193216009083168e-05, "loss": 0.0020666634663939476, "step": 134170 }, { "epoch": 38.086857791654836, "grad_norm": 0.07194444537162781, "learning_rate": 6.192932160090832e-05, "loss": 0.0012159183621406554, "step": 134180 }, { "epoch": 38.0896962815782, "grad_norm": 2.000232219696045, "learning_rate": 6.192648311098496e-05, "loss": 0.0020729703828692436, "step": 134190 }, { "epoch": 38.09253477150156, "grad_norm": 0.27747848629951477, "learning_rate": 6.192364462106159e-05, "loss": 0.0008992934599518776, "step": 134200 }, { "epoch": 38.095373261424925, "grad_norm": 0.6077998876571655, "learning_rate": 6.192080613113825e-05, "loss": 0.00133347287774086, "step": 134210 }, { "epoch": 38.09821175134828, "grad_norm": 0.3227548897266388, "learning_rate": 6.191796764121487e-05, "loss": 0.003961642831563949, "step": 134220 }, { "epoch": 38.10105024127164, "grad_norm": 0.05675831064581871, "learning_rate": 6.191512915129151e-05, "loss": 0.0005013752728700638, "step": 134230 }, { "epoch": 38.103888731195006, "grad_norm": 0.4183262586593628, "learning_rate": 6.191229066136816e-05, "loss": 0.0006160566583275795, "step": 134240 }, { "epoch": 38.10672722111836, "grad_norm": 0.6983895301818848, "learning_rate": 6.19094521714448e-05, "loss": 0.000806819275021553, "step": 134250 }, { "epoch": 38.109565711041725, "grad_norm": 0.038608014583587646, "learning_rate": 6.190661368152143e-05, "loss": 0.000459945946931839, "step": 134260 }, { "epoch": 38.11240420096509, "grad_norm": 0.13304409384727478, "learning_rate": 6.190377519159807e-05, "loss": 0.004243259131908417, "step": 134270 }, { "epoch": 38.11524269088845, "grad_norm": 0.06416783481836319, "learning_rate": 6.190093670167472e-05, "loss": 0.005541495606303215, "step": 134280 }, { "epoch": 38.11808118081181, "grad_norm": 0.019597381353378296, "learning_rate": 6.189809821175135e-05, "loss": 0.01608901172876358, "step": 134290 }, { "epoch": 38.12091967073517, "grad_norm": 0.05220317468047142, "learning_rate": 6.189525972182799e-05, "loss": 0.00225713811814785, "step": 134300 }, { "epoch": 38.12375816065853, "grad_norm": 11.762308120727539, "learning_rate": 6.189242123190463e-05, "loss": 0.010987860709428787, "step": 134310 }, { "epoch": 38.12659665058189, "grad_norm": 0.7038143873214722, "learning_rate": 6.188958274198127e-05, "loss": 0.007453412562608719, "step": 134320 }, { "epoch": 38.12943514050525, "grad_norm": 0.1096457988023758, "learning_rate": 6.18867442520579e-05, "loss": 0.0036039143800735475, "step": 134330 }, { "epoch": 38.132273630428614, "grad_norm": 0.11033488810062408, "learning_rate": 6.188390576213454e-05, "loss": 0.005243409797549248, "step": 134340 }, { "epoch": 38.13511212035197, "grad_norm": 0.37281447649002075, "learning_rate": 6.188106727221118e-05, "loss": 0.0007205948233604432, "step": 134350 }, { "epoch": 38.13795061027533, "grad_norm": 0.043452195823192596, "learning_rate": 6.187822878228783e-05, "loss": 0.000520840473473072, "step": 134360 }, { "epoch": 38.140789100198695, "grad_norm": 0.19350898265838623, "learning_rate": 6.187539029236447e-05, "loss": 0.005626681447029114, "step": 134370 }, { "epoch": 38.14362759012206, "grad_norm": 1.6138943433761597, "learning_rate": 6.187255180244111e-05, "loss": 0.0021019140258431435, "step": 134380 }, { "epoch": 38.146466080045414, "grad_norm": 0.5595139265060425, "learning_rate": 6.186971331251774e-05, "loss": 0.015924552083015443, "step": 134390 }, { "epoch": 38.14930456996878, "grad_norm": 0.05853088200092316, "learning_rate": 6.186687482259438e-05, "loss": 0.004021376371383667, "step": 134400 }, { "epoch": 38.15214305989214, "grad_norm": 2.0732181072235107, "learning_rate": 6.186403633267103e-05, "loss": 0.0045388080179691315, "step": 134410 }, { "epoch": 38.154981549815496, "grad_norm": 0.06178136169910431, "learning_rate": 6.186119784274766e-05, "loss": 0.00310603529214859, "step": 134420 }, { "epoch": 38.15782003973886, "grad_norm": 0.07122131437063217, "learning_rate": 6.18583593528243e-05, "loss": 0.0036707647144794465, "step": 134430 }, { "epoch": 38.16065852966222, "grad_norm": 0.13076157867908478, "learning_rate": 6.185552086290094e-05, "loss": 0.004789078980684281, "step": 134440 }, { "epoch": 38.16349701958558, "grad_norm": 0.39343711733818054, "learning_rate": 6.185268237297757e-05, "loss": 0.0068543747067451475, "step": 134450 }, { "epoch": 38.16633550950894, "grad_norm": 11.603266716003418, "learning_rate": 6.184984388305421e-05, "loss": 0.005468378216028214, "step": 134460 }, { "epoch": 38.1691739994323, "grad_norm": 0.32400277256965637, "learning_rate": 6.184700539313085e-05, "loss": 0.015094992518424988, "step": 134470 }, { "epoch": 38.172012489355666, "grad_norm": 0.01852233149111271, "learning_rate": 6.18441669032075e-05, "loss": 0.000986446626484394, "step": 134480 }, { "epoch": 38.17485097927902, "grad_norm": 5.0386505126953125, "learning_rate": 6.184132841328414e-05, "loss": 0.001744224689900875, "step": 134490 }, { "epoch": 38.177689469202384, "grad_norm": 0.08164342492818832, "learning_rate": 6.183848992336078e-05, "loss": 0.0028729850426316263, "step": 134500 }, { "epoch": 38.177689469202384, "eval_accuracy": 0.9746932027723024, "eval_loss": 0.09170718491077423, "eval_runtime": 31.8122, "eval_samples_per_second": 494.371, "eval_steps_per_second": 7.733, "step": 134500 }, { "epoch": 38.18052795912575, "grad_norm": 0.08953797817230225, "learning_rate": 6.183565143343742e-05, "loss": 0.0027678653597831724, "step": 134510 }, { "epoch": 38.1833664490491, "grad_norm": 0.6210519075393677, "learning_rate": 6.183281294351405e-05, "loss": 0.0008255355060100555, "step": 134520 }, { "epoch": 38.186204938972466, "grad_norm": 0.7011472582817078, "learning_rate": 6.182997445359069e-05, "loss": 0.0018625002354383468, "step": 134530 }, { "epoch": 38.18904342889583, "grad_norm": 0.28650230169296265, "learning_rate": 6.182713596366733e-05, "loss": 0.0012908091768622399, "step": 134540 }, { "epoch": 38.191881918819185, "grad_norm": 1.9494290351867676, "learning_rate": 6.182429747374397e-05, "loss": 0.0010882403701543808, "step": 134550 }, { "epoch": 38.19472040874255, "grad_norm": 4.1134724617004395, "learning_rate": 6.182145898382061e-05, "loss": 0.003326386585831642, "step": 134560 }, { "epoch": 38.19755889866591, "grad_norm": 0.11450708657503128, "learning_rate": 6.181862049389725e-05, "loss": 0.0017510443925857545, "step": 134570 }, { "epoch": 38.20039738858927, "grad_norm": 0.6780282855033875, "learning_rate": 6.181578200397388e-05, "loss": 0.0007807288318872451, "step": 134580 }, { "epoch": 38.20323587851263, "grad_norm": 0.33603140711784363, "learning_rate": 6.181294351405052e-05, "loss": 0.002268545143306255, "step": 134590 }, { "epoch": 38.20607436843599, "grad_norm": 1.5957492589950562, "learning_rate": 6.181010502412716e-05, "loss": 0.003979887068271637, "step": 134600 }, { "epoch": 38.208912858359355, "grad_norm": 1.7548292875289917, "learning_rate": 6.18072665342038e-05, "loss": 0.004050934687256813, "step": 134610 }, { "epoch": 38.21175134828271, "grad_norm": 0.09020157903432846, "learning_rate": 6.180442804428045e-05, "loss": 0.0010388627648353576, "step": 134620 }, { "epoch": 38.214589838206074, "grad_norm": 0.036795664578676224, "learning_rate": 6.180158955435709e-05, "loss": 0.00508514791727066, "step": 134630 }, { "epoch": 38.21742832812944, "grad_norm": 0.4582359790802002, "learning_rate": 6.179875106443373e-05, "loss": 0.0007269438356161118, "step": 134640 }, { "epoch": 38.2202668180528, "grad_norm": 1.890023946762085, "learning_rate": 6.179591257451036e-05, "loss": 0.005734815821051597, "step": 134650 }, { "epoch": 38.223105307976155, "grad_norm": 0.40506550669670105, "learning_rate": 6.1793074084587e-05, "loss": 0.0010772382840514183, "step": 134660 }, { "epoch": 38.22594379789952, "grad_norm": 0.23205557465553284, "learning_rate": 6.179023559466364e-05, "loss": 0.003627569600939751, "step": 134670 }, { "epoch": 38.22878228782288, "grad_norm": 7.9705586433410645, "learning_rate": 6.178739710474028e-05, "loss": 0.012706337869167328, "step": 134680 }, { "epoch": 38.23162077774624, "grad_norm": 0.30798599123954773, "learning_rate": 6.178455861481692e-05, "loss": 0.0007306167855858803, "step": 134690 }, { "epoch": 38.2344592676696, "grad_norm": 14.042913436889648, "learning_rate": 6.178172012489356e-05, "loss": 0.006286744028329849, "step": 134700 }, { "epoch": 38.23729775759296, "grad_norm": 0.6448888778686523, "learning_rate": 6.177888163497019e-05, "loss": 0.012516550719738007, "step": 134710 }, { "epoch": 38.24013624751632, "grad_norm": 0.34193095564842224, "learning_rate": 6.177604314504683e-05, "loss": 0.0078007087111473085, "step": 134720 }, { "epoch": 38.24297473743968, "grad_norm": 0.7417977452278137, "learning_rate": 6.177320465512348e-05, "loss": 0.006073121726512909, "step": 134730 }, { "epoch": 38.245813227363044, "grad_norm": 1.441645860671997, "learning_rate": 6.177036616520012e-05, "loss": 0.00965147465467453, "step": 134740 }, { "epoch": 38.24865171728641, "grad_norm": 3.3172965049743652, "learning_rate": 6.176752767527676e-05, "loss": 0.0067450180649757385, "step": 134750 }, { "epoch": 38.25149020720976, "grad_norm": 0.4145173132419586, "learning_rate": 6.17646891853534e-05, "loss": 0.005305605381727219, "step": 134760 }, { "epoch": 38.254328697133126, "grad_norm": 0.9570725560188293, "learning_rate": 6.176185069543004e-05, "loss": 0.003194957971572876, "step": 134770 }, { "epoch": 38.25716718705649, "grad_norm": 0.12894731760025024, "learning_rate": 6.175901220550667e-05, "loss": 0.018111911416053773, "step": 134780 }, { "epoch": 38.260005676979844, "grad_norm": 0.1613883674144745, "learning_rate": 6.175617371558331e-05, "loss": 0.005432094633579254, "step": 134790 }, { "epoch": 38.26284416690321, "grad_norm": 0.8154504299163818, "learning_rate": 6.175333522565995e-05, "loss": 0.0064793616533279415, "step": 134800 }, { "epoch": 38.26568265682657, "grad_norm": 10.099153518676758, "learning_rate": 6.175049673573659e-05, "loss": 0.005156424269080162, "step": 134810 }, { "epoch": 38.268521146749926, "grad_norm": 4.767364025115967, "learning_rate": 6.174765824581323e-05, "loss": 0.020709221065044404, "step": 134820 }, { "epoch": 38.27135963667329, "grad_norm": 1.3279755115509033, "learning_rate": 6.174481975588988e-05, "loss": 0.0051625072956085205, "step": 134830 }, { "epoch": 38.27419812659665, "grad_norm": 5.663919925689697, "learning_rate": 6.17419812659665e-05, "loss": 0.004571446776390075, "step": 134840 }, { "epoch": 38.277036616520014, "grad_norm": 4.391523361206055, "learning_rate": 6.173914277604314e-05, "loss": 0.0019650917500257494, "step": 134850 }, { "epoch": 38.27987510644337, "grad_norm": 0.0959833413362503, "learning_rate": 6.173630428611979e-05, "loss": 0.0049162209033966064, "step": 134860 }, { "epoch": 38.28271359636673, "grad_norm": 0.12046205997467041, "learning_rate": 6.173346579619643e-05, "loss": 0.012587885558605193, "step": 134870 }, { "epoch": 38.285552086290096, "grad_norm": 10.680061340332031, "learning_rate": 6.173062730627307e-05, "loss": 0.0034802518784999847, "step": 134880 }, { "epoch": 38.28839057621345, "grad_norm": 0.814301609992981, "learning_rate": 6.172778881634971e-05, "loss": 0.0006178801879286766, "step": 134890 }, { "epoch": 38.291229066136815, "grad_norm": 0.19150963425636292, "learning_rate": 6.172495032642635e-05, "loss": 0.0017397027462720872, "step": 134900 }, { "epoch": 38.29406755606018, "grad_norm": 0.04701870679855347, "learning_rate": 6.172211183650298e-05, "loss": 0.0014583354815840722, "step": 134910 }, { "epoch": 38.29690604598353, "grad_norm": 14.860255241394043, "learning_rate": 6.171927334657962e-05, "loss": 0.008625441789627075, "step": 134920 }, { "epoch": 38.299744535906896, "grad_norm": 1.892711877822876, "learning_rate": 6.171643485665626e-05, "loss": 0.006551828235387802, "step": 134930 }, { "epoch": 38.30258302583026, "grad_norm": 8.801395416259766, "learning_rate": 6.17135963667329e-05, "loss": 0.0031756356358528135, "step": 134940 }, { "epoch": 38.30542151575362, "grad_norm": 0.056268416345119476, "learning_rate": 6.171075787680954e-05, "loss": 0.005384530872106552, "step": 134950 }, { "epoch": 38.30826000567698, "grad_norm": 0.11726606637239456, "learning_rate": 6.170791938688619e-05, "loss": 0.001880672201514244, "step": 134960 }, { "epoch": 38.31109849560034, "grad_norm": 14.109869956970215, "learning_rate": 6.170508089696281e-05, "loss": 0.04095170497894287, "step": 134970 }, { "epoch": 38.3139369855237, "grad_norm": 7.809932231903076, "learning_rate": 6.170224240703946e-05, "loss": 0.004560225084424019, "step": 134980 }, { "epoch": 38.31677547544706, "grad_norm": 0.07856812328100204, "learning_rate": 6.16994039171161e-05, "loss": 0.0033401377499103547, "step": 134990 }, { "epoch": 38.31961396537042, "grad_norm": 0.925116240978241, "learning_rate": 6.169656542719274e-05, "loss": 0.011678503453731537, "step": 135000 }, { "epoch": 38.31961396537042, "eval_accuracy": 0.9743752781840147, "eval_loss": 0.0941469818353653, "eval_runtime": 31.7565, "eval_samples_per_second": 495.238, "eval_steps_per_second": 7.746, "step": 135000 }, { "epoch": 38.322452455293785, "grad_norm": 0.105397529900074, "learning_rate": 6.169372693726938e-05, "loss": 0.009975657612085343, "step": 135010 }, { "epoch": 38.32529094521715, "grad_norm": 0.7006086707115173, "learning_rate": 6.169088844734602e-05, "loss": 0.0010053662583231926, "step": 135020 }, { "epoch": 38.328129435140504, "grad_norm": 0.46642282605171204, "learning_rate": 6.168804995742266e-05, "loss": 0.0025414397940039636, "step": 135030 }, { "epoch": 38.33096792506387, "grad_norm": 0.25644588470458984, "learning_rate": 6.168521146749929e-05, "loss": 0.003680461645126343, "step": 135040 }, { "epoch": 38.33380641498723, "grad_norm": 0.4130198061466217, "learning_rate": 6.168237297757593e-05, "loss": 0.010602089762687682, "step": 135050 }, { "epoch": 38.336644904910585, "grad_norm": 1.493390440940857, "learning_rate": 6.167953448765257e-05, "loss": 0.0018177447840571404, "step": 135060 }, { "epoch": 38.33948339483395, "grad_norm": 0.26527324318885803, "learning_rate": 6.16766959977292e-05, "loss": 0.00382680743932724, "step": 135070 }, { "epoch": 38.34232188475731, "grad_norm": 3.4446351528167725, "learning_rate": 6.167385750780586e-05, "loss": 0.0018974712118506431, "step": 135080 }, { "epoch": 38.34516037468067, "grad_norm": 0.5435878038406372, "learning_rate": 6.16710190178825e-05, "loss": 0.005870004743337631, "step": 135090 }, { "epoch": 38.34799886460403, "grad_norm": 0.056403566151857376, "learning_rate": 6.166818052795912e-05, "loss": 0.0030626475811004637, "step": 135100 }, { "epoch": 38.35083735452739, "grad_norm": 1.920772671699524, "learning_rate": 6.166534203803577e-05, "loss": 0.013771346211433411, "step": 135110 }, { "epoch": 38.353675844450756, "grad_norm": 1.9258034229278564, "learning_rate": 6.166250354811241e-05, "loss": 0.0019804190844297407, "step": 135120 }, { "epoch": 38.35651433437411, "grad_norm": 0.05152112618088722, "learning_rate": 6.165966505818905e-05, "loss": 0.0018835114315152168, "step": 135130 }, { "epoch": 38.359352824297474, "grad_norm": 0.11888402700424194, "learning_rate": 6.165682656826569e-05, "loss": 0.002815871685743332, "step": 135140 }, { "epoch": 38.36219131422084, "grad_norm": 0.9970793128013611, "learning_rate": 6.165398807834233e-05, "loss": 0.0035604190081357954, "step": 135150 }, { "epoch": 38.36502980414419, "grad_norm": 0.24656173586845398, "learning_rate": 6.165114958841896e-05, "loss": 0.0029092352837324143, "step": 135160 }, { "epoch": 38.367868294067556, "grad_norm": 1.7609609365463257, "learning_rate": 6.16483110984956e-05, "loss": 0.007068584114313126, "step": 135170 }, { "epoch": 38.37070678399092, "grad_norm": 0.03300812840461731, "learning_rate": 6.164547260857224e-05, "loss": 0.004900767281651497, "step": 135180 }, { "epoch": 38.373545273914274, "grad_norm": 0.08716969937086105, "learning_rate": 6.164263411864888e-05, "loss": 0.008443042635917664, "step": 135190 }, { "epoch": 38.37638376383764, "grad_norm": 0.12718762457370758, "learning_rate": 6.163979562872551e-05, "loss": 0.0075580857694149016, "step": 135200 }, { "epoch": 38.379222253761, "grad_norm": 1.444724678993225, "learning_rate": 6.163695713880217e-05, "loss": 0.0019911292940378187, "step": 135210 }, { "epoch": 38.38206074368436, "grad_norm": 0.08032118529081345, "learning_rate": 6.163411864887881e-05, "loss": 0.001308680884540081, "step": 135220 }, { "epoch": 38.38489923360772, "grad_norm": 1.1914821863174438, "learning_rate": 6.163128015895544e-05, "loss": 0.005745340138673782, "step": 135230 }, { "epoch": 38.38773772353108, "grad_norm": 0.24814899265766144, "learning_rate": 6.162844166903208e-05, "loss": 0.0017528858035802842, "step": 135240 }, { "epoch": 38.390576213454445, "grad_norm": 4.454397201538086, "learning_rate": 6.162560317910872e-05, "loss": 0.003952928632497787, "step": 135250 }, { "epoch": 38.3934147033778, "grad_norm": 0.06842509657144547, "learning_rate": 6.162276468918535e-05, "loss": 0.005411344021558762, "step": 135260 }, { "epoch": 38.39625319330116, "grad_norm": 3.2981350421905518, "learning_rate": 6.161992619926199e-05, "loss": 0.002562856115400791, "step": 135270 }, { "epoch": 38.399091683224526, "grad_norm": 13.176570892333984, "learning_rate": 6.161708770933864e-05, "loss": 0.008661288768053055, "step": 135280 }, { "epoch": 38.40193017314788, "grad_norm": 0.8192607164382935, "learning_rate": 6.161424921941527e-05, "loss": 0.007953906059265136, "step": 135290 }, { "epoch": 38.404768663071245, "grad_norm": 0.5461180806159973, "learning_rate": 6.161141072949191e-05, "loss": 0.005651094764471054, "step": 135300 }, { "epoch": 38.40760715299461, "grad_norm": 1.5097296237945557, "learning_rate": 6.160857223956855e-05, "loss": 0.001774081028997898, "step": 135310 }, { "epoch": 38.41044564291797, "grad_norm": 0.9418918490409851, "learning_rate": 6.16057337496452e-05, "loss": 0.006616681814193726, "step": 135320 }, { "epoch": 38.413284132841326, "grad_norm": 0.11012096703052521, "learning_rate": 6.160289525972182e-05, "loss": 0.002159501425921917, "step": 135330 }, { "epoch": 38.41612262276469, "grad_norm": 0.8838050365447998, "learning_rate": 6.160005676979848e-05, "loss": 0.0020592864602804184, "step": 135340 }, { "epoch": 38.41896111268805, "grad_norm": 0.08345706015825272, "learning_rate": 6.159721827987512e-05, "loss": 0.005993042513728142, "step": 135350 }, { "epoch": 38.42179960261141, "grad_norm": 0.6298748254776001, "learning_rate": 6.159437978995175e-05, "loss": 0.014646728336811066, "step": 135360 }, { "epoch": 38.42463809253477, "grad_norm": 0.4491177499294281, "learning_rate": 6.159154130002839e-05, "loss": 0.0010119926184415818, "step": 135370 }, { "epoch": 38.427476582458134, "grad_norm": 7.686692714691162, "learning_rate": 6.158870281010503e-05, "loss": 0.0043272413313388824, "step": 135380 }, { "epoch": 38.43031507238149, "grad_norm": 0.6566407680511475, "learning_rate": 6.158586432018166e-05, "loss": 0.0029710086062550546, "step": 135390 }, { "epoch": 38.43315356230485, "grad_norm": 0.031021684408187866, "learning_rate": 6.15830258302583e-05, "loss": 0.0018673669546842575, "step": 135400 }, { "epoch": 38.435992052228215, "grad_norm": 0.09514325112104416, "learning_rate": 6.158018734033495e-05, "loss": 0.0032774358987808228, "step": 135410 }, { "epoch": 38.43883054215158, "grad_norm": 0.050353869795799255, "learning_rate": 6.157734885041158e-05, "loss": 0.0014400530606508255, "step": 135420 }, { "epoch": 38.441669032074934, "grad_norm": 0.2416570633649826, "learning_rate": 6.157451036048822e-05, "loss": 0.009577415883541107, "step": 135430 }, { "epoch": 38.4445075219983, "grad_norm": 7.328619480133057, "learning_rate": 6.157167187056486e-05, "loss": 0.011363565921783447, "step": 135440 }, { "epoch": 38.44734601192166, "grad_norm": 1.306361436843872, "learning_rate": 6.15688333806415e-05, "loss": 0.008180183917284011, "step": 135450 }, { "epoch": 38.450184501845015, "grad_norm": 0.6085950136184692, "learning_rate": 6.156599489071813e-05, "loss": 0.013829648494720459, "step": 135460 }, { "epoch": 38.45302299176838, "grad_norm": 2.418565034866333, "learning_rate": 6.156315640079477e-05, "loss": 0.0010970816016197204, "step": 135470 }, { "epoch": 38.45586148169174, "grad_norm": 0.11012092977762222, "learning_rate": 6.156031791087143e-05, "loss": 0.001015191338956356, "step": 135480 }, { "epoch": 38.458699971615104, "grad_norm": 0.13875535130500793, "learning_rate": 6.155747942094806e-05, "loss": 0.004551955312490463, "step": 135490 }, { "epoch": 38.46153846153846, "grad_norm": 12.91567611694336, "learning_rate": 6.15546409310247e-05, "loss": 0.007015454769134522, "step": 135500 }, { "epoch": 38.46153846153846, "eval_accuracy": 0.9723405608189737, "eval_loss": 0.09708716720342636, "eval_runtime": 31.9768, "eval_samples_per_second": 491.825, "eval_steps_per_second": 7.693, "step": 135500 }, { "epoch": 38.46437695146182, "grad_norm": 0.1532626748085022, "learning_rate": 6.155180244110134e-05, "loss": 0.007028988748788834, "step": 135510 }, { "epoch": 38.467215441385186, "grad_norm": 0.14090636372566223, "learning_rate": 6.154896395117797e-05, "loss": 0.0077640779316425325, "step": 135520 }, { "epoch": 38.47005393130854, "grad_norm": 0.13461485505104065, "learning_rate": 6.154612546125461e-05, "loss": 0.016653384268283843, "step": 135530 }, { "epoch": 38.472892421231904, "grad_norm": 3.1759281158447266, "learning_rate": 6.154328697133126e-05, "loss": 0.0027630701661109924, "step": 135540 }, { "epoch": 38.47573091115527, "grad_norm": 0.9256454706192017, "learning_rate": 6.154044848140789e-05, "loss": 0.0050191331654787065, "step": 135550 }, { "epoch": 38.47856940107862, "grad_norm": 0.09942079335451126, "learning_rate": 6.153760999148453e-05, "loss": 0.0005018610507249832, "step": 135560 }, { "epoch": 38.481407891001986, "grad_norm": 0.15628722310066223, "learning_rate": 6.153477150156117e-05, "loss": 0.0029232488945126534, "step": 135570 }, { "epoch": 38.48424638092535, "grad_norm": 0.13773216307163239, "learning_rate": 6.153193301163782e-05, "loss": 0.0012789636850357056, "step": 135580 }, { "epoch": 38.48708487084871, "grad_norm": 0.39429011940956116, "learning_rate": 6.152909452171444e-05, "loss": 0.0008003953844308853, "step": 135590 }, { "epoch": 38.48992336077207, "grad_norm": 0.04783034324645996, "learning_rate": 6.152625603179108e-05, "loss": 0.0008941607549786568, "step": 135600 }, { "epoch": 38.49276185069543, "grad_norm": 0.1754629760980606, "learning_rate": 6.152341754186774e-05, "loss": 0.0018739040940999984, "step": 135610 }, { "epoch": 38.49560034061879, "grad_norm": 0.04675730690360069, "learning_rate": 6.152057905194437e-05, "loss": 0.0011871449649333953, "step": 135620 }, { "epoch": 38.49843883054215, "grad_norm": 0.028517620638012886, "learning_rate": 6.151774056202101e-05, "loss": 0.0008887562900781632, "step": 135630 }, { "epoch": 38.50127732046551, "grad_norm": 0.3093649744987488, "learning_rate": 6.151490207209765e-05, "loss": 0.0036879539489746093, "step": 135640 }, { "epoch": 38.504115810388875, "grad_norm": 2.7688379287719727, "learning_rate": 6.151206358217428e-05, "loss": 0.002188309095799923, "step": 135650 }, { "epoch": 38.50695430031223, "grad_norm": 0.022523559629917145, "learning_rate": 6.150922509225092e-05, "loss": 0.0018501771613955499, "step": 135660 }, { "epoch": 38.50979279023559, "grad_norm": 0.02356768399477005, "learning_rate": 6.150638660232756e-05, "loss": 0.010983145236968994, "step": 135670 }, { "epoch": 38.512631280158956, "grad_norm": 2.3858256340026855, "learning_rate": 6.15035481124042e-05, "loss": 0.0036229632794857027, "step": 135680 }, { "epoch": 38.51546977008232, "grad_norm": 0.12981124222278595, "learning_rate": 6.150070962248084e-05, "loss": 0.0038952045142650604, "step": 135690 }, { "epoch": 38.518308260005675, "grad_norm": 0.16601401567459106, "learning_rate": 6.149787113255749e-05, "loss": 0.0012140143662691116, "step": 135700 }, { "epoch": 38.52114674992904, "grad_norm": 0.3520420491695404, "learning_rate": 6.149503264263413e-05, "loss": 0.0020994456484913827, "step": 135710 }, { "epoch": 38.5239852398524, "grad_norm": 2.565340757369995, "learning_rate": 6.149219415271075e-05, "loss": 0.0010496702045202256, "step": 135720 }, { "epoch": 38.52682372977576, "grad_norm": 1.2032439708709717, "learning_rate": 6.14893556627874e-05, "loss": 0.0026920245960354804, "step": 135730 }, { "epoch": 38.52966221969912, "grad_norm": 28.78321075439453, "learning_rate": 6.148651717286405e-05, "loss": 0.017881177365779877, "step": 135740 }, { "epoch": 38.53250070962248, "grad_norm": 0.04024608060717583, "learning_rate": 6.148367868294068e-05, "loss": 0.00619904138147831, "step": 135750 }, { "epoch": 38.535339199545845, "grad_norm": 1.1123851537704468, "learning_rate": 6.148084019301732e-05, "loss": 0.0021531926468014717, "step": 135760 }, { "epoch": 38.5381776894692, "grad_norm": 0.3989243507385254, "learning_rate": 6.147800170309396e-05, "loss": 0.0010667555034160614, "step": 135770 }, { "epoch": 38.541016179392564, "grad_norm": 0.2814774811267853, "learning_rate": 6.147516321317059e-05, "loss": 0.0010479971766471862, "step": 135780 }, { "epoch": 38.54385466931593, "grad_norm": 0.3770263195037842, "learning_rate": 6.147232472324723e-05, "loss": 0.0008475968614220619, "step": 135790 }, { "epoch": 38.54669315923928, "grad_norm": 0.15267719328403473, "learning_rate": 6.146948623332387e-05, "loss": 0.001021953672170639, "step": 135800 }, { "epoch": 38.549531649162645, "grad_norm": 0.18413841724395752, "learning_rate": 6.146664774340051e-05, "loss": 0.0028862584382295608, "step": 135810 }, { "epoch": 38.55237013908601, "grad_norm": 0.7051997780799866, "learning_rate": 6.146380925347715e-05, "loss": 0.0021086476743221285, "step": 135820 }, { "epoch": 38.555208629009364, "grad_norm": 1.5941591262817383, "learning_rate": 6.14609707635538e-05, "loss": 0.006381133943796158, "step": 135830 }, { "epoch": 38.55804711893273, "grad_norm": 0.24233686923980713, "learning_rate": 6.145813227363044e-05, "loss": 0.0007729744538664818, "step": 135840 }, { "epoch": 38.56088560885609, "grad_norm": 0.1543876677751541, "learning_rate": 6.145529378370707e-05, "loss": 0.002078931592404842, "step": 135850 }, { "epoch": 38.56372409877945, "grad_norm": 0.5416765809059143, "learning_rate": 6.14524552937837e-05, "loss": 0.001722593791782856, "step": 135860 }, { "epoch": 38.56656258870281, "grad_norm": 0.35925862193107605, "learning_rate": 6.144961680386035e-05, "loss": 0.0046719186007976535, "step": 135870 }, { "epoch": 38.56940107862617, "grad_norm": 0.0941983312368393, "learning_rate": 6.144677831393699e-05, "loss": 0.0016037749126553535, "step": 135880 }, { "epoch": 38.572239568549534, "grad_norm": 4.017645835876465, "learning_rate": 6.144393982401363e-05, "loss": 0.0025598552078008653, "step": 135890 }, { "epoch": 38.57507805847289, "grad_norm": 0.13510210812091827, "learning_rate": 6.144110133409027e-05, "loss": 0.0008298946544528007, "step": 135900 }, { "epoch": 38.57791654839625, "grad_norm": 0.23703446984291077, "learning_rate": 6.14382628441669e-05, "loss": 0.002044939436018467, "step": 135910 }, { "epoch": 38.580755038319616, "grad_norm": 0.19244232773780823, "learning_rate": 6.143542435424354e-05, "loss": 0.0028385674580931664, "step": 135920 }, { "epoch": 38.58359352824297, "grad_norm": 0.09505518525838852, "learning_rate": 6.143258586432018e-05, "loss": 0.0015394626185297965, "step": 135930 }, { "epoch": 38.586432018166335, "grad_norm": 3.29378342628479, "learning_rate": 6.142974737439682e-05, "loss": 0.015290457010269164, "step": 135940 }, { "epoch": 38.5892705080897, "grad_norm": 0.8285510540008545, "learning_rate": 6.142690888447347e-05, "loss": 0.0037109963595867157, "step": 135950 }, { "epoch": 38.59210899801306, "grad_norm": 3.2156736850738525, "learning_rate": 6.14240703945501e-05, "loss": 0.001501442864537239, "step": 135960 }, { "epoch": 38.594947487936416, "grad_norm": 0.10440534353256226, "learning_rate": 6.142123190462675e-05, "loss": 0.004285608977079391, "step": 135970 }, { "epoch": 38.59778597785978, "grad_norm": 1.7456245422363281, "learning_rate": 6.141839341470338e-05, "loss": 0.0019434705376625062, "step": 135980 }, { "epoch": 38.60062446778314, "grad_norm": 0.05066037178039551, "learning_rate": 6.141555492478002e-05, "loss": 0.001310076005756855, "step": 135990 }, { "epoch": 38.6034629577065, "grad_norm": 0.11218142509460449, "learning_rate": 6.141271643485666e-05, "loss": 0.0029816806316375734, "step": 136000 }, { "epoch": 38.6034629577065, "eval_accuracy": 0.9762192407960831, "eval_loss": 0.08278802782297134, "eval_runtime": 32.1749, "eval_samples_per_second": 488.797, "eval_steps_per_second": 7.646, "step": 136000 }, { "epoch": 38.60630144762986, "grad_norm": 0.04110505059361458, "learning_rate": 6.14098779449333e-05, "loss": 0.00704413428902626, "step": 136010 }, { "epoch": 38.60913993755322, "grad_norm": 0.20143942534923553, "learning_rate": 6.140703945500994e-05, "loss": 0.0005810655653476715, "step": 136020 }, { "epoch": 38.61197842747658, "grad_norm": 5.58424711227417, "learning_rate": 6.140420096508658e-05, "loss": 0.009471157193183899, "step": 136030 }, { "epoch": 38.61481691739994, "grad_norm": 2.136950969696045, "learning_rate": 6.140136247516321e-05, "loss": 0.015326546132564544, "step": 136040 }, { "epoch": 38.617655407323305, "grad_norm": 0.05483608320355415, "learning_rate": 6.139852398523985e-05, "loss": 0.002490892820060253, "step": 136050 }, { "epoch": 38.62049389724667, "grad_norm": 0.054581888020038605, "learning_rate": 6.13956854953165e-05, "loss": 0.0021986329928040506, "step": 136060 }, { "epoch": 38.623332387170024, "grad_norm": 0.2335558980703354, "learning_rate": 6.139284700539313e-05, "loss": 0.0022899048402905465, "step": 136070 }, { "epoch": 38.62617087709339, "grad_norm": 1.161386489868164, "learning_rate": 6.139000851546978e-05, "loss": 0.0024105969816446303, "step": 136080 }, { "epoch": 38.62900936701675, "grad_norm": 0.3985218405723572, "learning_rate": 6.138717002554642e-05, "loss": 0.0021485598757863046, "step": 136090 }, { "epoch": 38.631847856940105, "grad_norm": 0.39690661430358887, "learning_rate": 6.138433153562305e-05, "loss": 0.0011351585388183593, "step": 136100 }, { "epoch": 38.63468634686347, "grad_norm": 0.9182251691818237, "learning_rate": 6.138149304569969e-05, "loss": 0.0016373202204704284, "step": 136110 }, { "epoch": 38.63752483678683, "grad_norm": 12.3392915725708, "learning_rate": 6.137865455577633e-05, "loss": 0.011935946345329285, "step": 136120 }, { "epoch": 38.64036332671019, "grad_norm": 0.05375846102833748, "learning_rate": 6.137581606585297e-05, "loss": 0.0032425083220005034, "step": 136130 }, { "epoch": 38.64320181663355, "grad_norm": 0.26288774609565735, "learning_rate": 6.137297757592961e-05, "loss": 0.008097366988658905, "step": 136140 }, { "epoch": 38.64604030655691, "grad_norm": 0.059297073632478714, "learning_rate": 6.137042293499858e-05, "loss": 0.022571422159671783, "step": 136150 }, { "epoch": 38.648878796480275, "grad_norm": 1.421044111251831, "learning_rate": 6.136758444507522e-05, "loss": 0.011364725977182388, "step": 136160 }, { "epoch": 38.65171728640363, "grad_norm": 0.16265228390693665, "learning_rate": 6.136474595515186e-05, "loss": 0.0038663528859615324, "step": 136170 }, { "epoch": 38.654555776326994, "grad_norm": 7.549160957336426, "learning_rate": 6.13619074652285e-05, "loss": 0.0059774771332740785, "step": 136180 }, { "epoch": 38.65739426625036, "grad_norm": 0.24436140060424805, "learning_rate": 6.135906897530514e-05, "loss": 0.006120540201663971, "step": 136190 }, { "epoch": 38.66023275617371, "grad_norm": 0.036731258034706116, "learning_rate": 6.135623048538178e-05, "loss": 0.006040460243821144, "step": 136200 }, { "epoch": 38.663071246097076, "grad_norm": 0.08173620700836182, "learning_rate": 6.135339199545843e-05, "loss": 0.005126922205090523, "step": 136210 }, { "epoch": 38.66590973602044, "grad_norm": 1.0930724143981934, "learning_rate": 6.135055350553505e-05, "loss": 0.001257179118692875, "step": 136220 }, { "epoch": 38.6687482259438, "grad_norm": 1.3666458129882812, "learning_rate": 6.13477150156117e-05, "loss": 0.009613914787769318, "step": 136230 }, { "epoch": 38.67158671586716, "grad_norm": 0.29617321491241455, "learning_rate": 6.134487652568834e-05, "loss": 0.013505506515502929, "step": 136240 }, { "epoch": 38.67442520579052, "grad_norm": 0.19346386194229126, "learning_rate": 6.134203803576496e-05, "loss": 0.006604056805372238, "step": 136250 }, { "epoch": 38.67726369571388, "grad_norm": 0.9535866975784302, "learning_rate": 6.133919954584162e-05, "loss": 0.0025373678654432297, "step": 136260 }, { "epoch": 38.68010218563724, "grad_norm": 2.5597715377807617, "learning_rate": 6.133636105591826e-05, "loss": 0.001963675394654274, "step": 136270 }, { "epoch": 38.6829406755606, "grad_norm": 0.18233366310596466, "learning_rate": 6.133352256599489e-05, "loss": 0.0017403900623321534, "step": 136280 }, { "epoch": 38.685779165483964, "grad_norm": 0.3750947117805481, "learning_rate": 6.133068407607153e-05, "loss": 0.0013620039448142053, "step": 136290 }, { "epoch": 38.68861765540732, "grad_norm": 0.03508025035262108, "learning_rate": 6.132784558614817e-05, "loss": 0.000747206062078476, "step": 136300 }, { "epoch": 38.69145614533068, "grad_norm": 9.164497375488281, "learning_rate": 6.132500709622481e-05, "loss": 0.002782498300075531, "step": 136310 }, { "epoch": 38.694294635254046, "grad_norm": 0.5890185832977295, "learning_rate": 6.132216860630145e-05, "loss": 0.0012815730646252633, "step": 136320 }, { "epoch": 38.69713312517741, "grad_norm": 0.29477575421333313, "learning_rate": 6.13193301163781e-05, "loss": 0.001544569805264473, "step": 136330 }, { "epoch": 38.699971615100765, "grad_norm": 0.16373345255851746, "learning_rate": 6.131649162645474e-05, "loss": 0.003889068216085434, "step": 136340 }, { "epoch": 38.70281010502413, "grad_norm": 0.0695229098200798, "learning_rate": 6.131365313653136e-05, "loss": 0.00486629381775856, "step": 136350 }, { "epoch": 38.70564859494749, "grad_norm": 2.305974006652832, "learning_rate": 6.1310814646608e-05, "loss": 0.0033214457333087923, "step": 136360 }, { "epoch": 38.708487084870846, "grad_norm": 0.14764226973056793, "learning_rate": 6.130797615668465e-05, "loss": 0.004053351655602455, "step": 136370 }, { "epoch": 38.71132557479421, "grad_norm": 10.6625394821167, "learning_rate": 6.130513766676127e-05, "loss": 0.006433658301830292, "step": 136380 }, { "epoch": 38.71416406471757, "grad_norm": 3.6864173412323, "learning_rate": 6.130229917683793e-05, "loss": 0.005073889344930649, "step": 136390 }, { "epoch": 38.71700255464093, "grad_norm": 0.9584582448005676, "learning_rate": 6.129946068691457e-05, "loss": 0.0029225749894976616, "step": 136400 }, { "epoch": 38.71984104456429, "grad_norm": 0.2951776087284088, "learning_rate": 6.12966221969912e-05, "loss": 0.0012059558182954788, "step": 136410 }, { "epoch": 38.72267953448765, "grad_norm": 0.6344748139381409, "learning_rate": 6.129378370706784e-05, "loss": 0.010846298187971115, "step": 136420 }, { "epoch": 38.725518024411016, "grad_norm": 0.1910606175661087, "learning_rate": 6.129094521714448e-05, "loss": 0.0012677567079663276, "step": 136430 }, { "epoch": 38.72835651433437, "grad_norm": 6.896583080291748, "learning_rate": 6.128810672722112e-05, "loss": 0.0036260675638914107, "step": 136440 }, { "epoch": 38.731195004257735, "grad_norm": 4.85918664932251, "learning_rate": 6.128526823729776e-05, "loss": 0.0026777951046824456, "step": 136450 }, { "epoch": 38.7340334941811, "grad_norm": 0.04556845501065254, "learning_rate": 6.12824297473744e-05, "loss": 0.006358266621828079, "step": 136460 }, { "epoch": 38.736871984104454, "grad_norm": 0.6142772436141968, "learning_rate": 6.127959125745105e-05, "loss": 0.002424268238246441, "step": 136470 }, { "epoch": 38.73971047402782, "grad_norm": 1.728261113166809, "learning_rate": 6.127675276752767e-05, "loss": 0.011589571833610535, "step": 136480 }, { "epoch": 38.74254896395118, "grad_norm": 0.058594170957803726, "learning_rate": 6.127391427760432e-05, "loss": 0.005717622488737107, "step": 136490 }, { "epoch": 38.745387453874535, "grad_norm": 0.03481850400567055, "learning_rate": 6.127107578768096e-05, "loss": 0.014588183164596558, "step": 136500 }, { "epoch": 38.745387453874535, "eval_accuracy": 0.9734215044191518, "eval_loss": 0.09129461646080017, "eval_runtime": 31.6834, "eval_samples_per_second": 496.379, "eval_steps_per_second": 7.764, "step": 136500 }, { "epoch": 38.7482259437979, "grad_norm": 0.11015772819519043, "learning_rate": 6.126823729775759e-05, "loss": 0.0039011433720588682, "step": 136510 }, { "epoch": 38.75106443372126, "grad_norm": 12.22962760925293, "learning_rate": 6.126539880783424e-05, "loss": 0.014401167631149292, "step": 136520 }, { "epoch": 38.753902923644624, "grad_norm": 2.4870171546936035, "learning_rate": 6.126256031791088e-05, "loss": 0.0054236166179180145, "step": 136530 }, { "epoch": 38.75674141356798, "grad_norm": 0.3709111511707306, "learning_rate": 6.125972182798751e-05, "loss": 0.0009145205840468407, "step": 136540 }, { "epoch": 38.75957990349134, "grad_norm": 0.06053968518972397, "learning_rate": 6.125688333806415e-05, "loss": 0.004316484555602074, "step": 136550 }, { "epoch": 38.762418393414706, "grad_norm": 10.312936782836914, "learning_rate": 6.125404484814079e-05, "loss": 0.00904449075460434, "step": 136560 }, { "epoch": 38.76525688333806, "grad_norm": 0.06283903121948242, "learning_rate": 6.125120635821743e-05, "loss": 0.002323652245104313, "step": 136570 }, { "epoch": 38.768095373261424, "grad_norm": 0.4236854910850525, "learning_rate": 6.124836786829406e-05, "loss": 0.004421686381101608, "step": 136580 }, { "epoch": 38.77093386318479, "grad_norm": 2.8406593799591064, "learning_rate": 6.124552937837072e-05, "loss": 0.003348906338214874, "step": 136590 }, { "epoch": 38.77377235310815, "grad_norm": 0.9530077576637268, "learning_rate": 6.124269088844736e-05, "loss": 0.0016872406005859375, "step": 136600 }, { "epoch": 38.776610843031506, "grad_norm": 0.20477396249771118, "learning_rate": 6.123985239852399e-05, "loss": 0.0012434175238013267, "step": 136610 }, { "epoch": 38.77944933295487, "grad_norm": 7.7630133628845215, "learning_rate": 6.123701390860063e-05, "loss": 0.002184251695871353, "step": 136620 }, { "epoch": 38.78228782287823, "grad_norm": 0.09650157392024994, "learning_rate": 6.123417541867727e-05, "loss": 0.0012489393353462218, "step": 136630 }, { "epoch": 38.78512631280159, "grad_norm": 0.19266565144062042, "learning_rate": 6.12313369287539e-05, "loss": 0.0038189269602298737, "step": 136640 }, { "epoch": 38.78796480272495, "grad_norm": 0.72478848695755, "learning_rate": 6.122849843883055e-05, "loss": 0.0020806048065423965, "step": 136650 }, { "epoch": 38.79080329264831, "grad_norm": 0.16608668863773346, "learning_rate": 6.122565994890719e-05, "loss": 0.002335289493203163, "step": 136660 }, { "epoch": 38.79364178257167, "grad_norm": 0.03468051180243492, "learning_rate": 6.122282145898382e-05, "loss": 0.0026918921619653704, "step": 136670 }, { "epoch": 38.79648027249503, "grad_norm": 0.2249375581741333, "learning_rate": 6.121998296906046e-05, "loss": 0.0068877778947353365, "step": 136680 }, { "epoch": 38.799318762418395, "grad_norm": 0.19205904006958008, "learning_rate": 6.12171444791371e-05, "loss": 0.004375426471233368, "step": 136690 }, { "epoch": 38.80215725234176, "grad_norm": 1.9620513916015625, "learning_rate": 6.121430598921374e-05, "loss": 0.002422507479786873, "step": 136700 }, { "epoch": 38.80499574226511, "grad_norm": 0.35271015763282776, "learning_rate": 6.121146749929037e-05, "loss": 0.0030752597376704216, "step": 136710 }, { "epoch": 38.807834232188476, "grad_norm": 0.3097192943096161, "learning_rate": 6.120862900936703e-05, "loss": 0.0034665547311306, "step": 136720 }, { "epoch": 38.81067272211184, "grad_norm": 0.17545726895332336, "learning_rate": 6.120579051944367e-05, "loss": 0.005319380015134811, "step": 136730 }, { "epoch": 38.813511212035195, "grad_norm": 0.2879747748374939, "learning_rate": 6.12029520295203e-05, "loss": 0.008793916553258896, "step": 136740 }, { "epoch": 38.81634970195856, "grad_norm": 0.45000073313713074, "learning_rate": 6.120011353959694e-05, "loss": 0.0033906400203704834, "step": 136750 }, { "epoch": 38.81918819188192, "grad_norm": 2.3715920448303223, "learning_rate": 6.119727504967358e-05, "loss": 0.01173495054244995, "step": 136760 }, { "epoch": 38.822026681805276, "grad_norm": 10.430177688598633, "learning_rate": 6.119443655975021e-05, "loss": 0.008167661726474762, "step": 136770 }, { "epoch": 38.82486517172864, "grad_norm": 0.3860531449317932, "learning_rate": 6.119159806982685e-05, "loss": 0.011427169293165207, "step": 136780 }, { "epoch": 38.827703661652, "grad_norm": 0.06467914581298828, "learning_rate": 6.11887595799035e-05, "loss": 0.010730832815170288, "step": 136790 }, { "epoch": 38.830542151575365, "grad_norm": 0.8924474716186523, "learning_rate": 6.118592108998013e-05, "loss": 0.009281785786151886, "step": 136800 }, { "epoch": 38.83338064149872, "grad_norm": 0.23478184640407562, "learning_rate": 6.118308260005677e-05, "loss": 0.021039161086082458, "step": 136810 }, { "epoch": 38.836219131422084, "grad_norm": 0.09967546910047531, "learning_rate": 6.118024411013341e-05, "loss": 0.018042591214179993, "step": 136820 }, { "epoch": 38.83905762134545, "grad_norm": 0.27845102548599243, "learning_rate": 6.117740562021006e-05, "loss": 0.015605142712593079, "step": 136830 }, { "epoch": 38.8418961112688, "grad_norm": 2.200705051422119, "learning_rate": 6.117456713028668e-05, "loss": 0.012432622909545898, "step": 136840 }, { "epoch": 38.844734601192165, "grad_norm": 0.06835105270147324, "learning_rate": 6.117172864036334e-05, "loss": 0.0038689129054546357, "step": 136850 }, { "epoch": 38.84757309111553, "grad_norm": 0.27165040373802185, "learning_rate": 6.116889015043998e-05, "loss": 0.006652887910604477, "step": 136860 }, { "epoch": 38.850411581038884, "grad_norm": 0.9472457766532898, "learning_rate": 6.116605166051661e-05, "loss": 0.0073095932602882385, "step": 136870 }, { "epoch": 38.85325007096225, "grad_norm": 3.0258140563964844, "learning_rate": 6.116321317059325e-05, "loss": 0.013051986694335938, "step": 136880 }, { "epoch": 38.85608856088561, "grad_norm": 0.17335623502731323, "learning_rate": 6.116037468066989e-05, "loss": 0.0005784519016742706, "step": 136890 }, { "epoch": 38.85892705080897, "grad_norm": 5.466840744018555, "learning_rate": 6.115753619074652e-05, "loss": 0.0043010756373405455, "step": 136900 }, { "epoch": 38.86176554073233, "grad_norm": 0.2209157943725586, "learning_rate": 6.115469770082316e-05, "loss": 0.003944623470306397, "step": 136910 }, { "epoch": 38.86460403065569, "grad_norm": 0.21972675621509552, "learning_rate": 6.115185921089981e-05, "loss": 0.018045033514499664, "step": 136920 }, { "epoch": 38.867442520579054, "grad_norm": 1.0783528089523315, "learning_rate": 6.114902072097644e-05, "loss": 0.017407304048538207, "step": 136930 }, { "epoch": 38.87028101050241, "grad_norm": 0.1905258446931839, "learning_rate": 6.114618223105308e-05, "loss": 0.007618250697851181, "step": 136940 }, { "epoch": 38.87311950042577, "grad_norm": 1.7849047183990479, "learning_rate": 6.114334374112972e-05, "loss": 0.006351820379495621, "step": 136950 }, { "epoch": 38.875957990349136, "grad_norm": 0.045467957854270935, "learning_rate": 6.114050525120637e-05, "loss": 0.0009369080886244774, "step": 136960 }, { "epoch": 38.8787964802725, "grad_norm": 1.6275182962417603, "learning_rate": 6.1137666761283e-05, "loss": 0.0020040601491928102, "step": 136970 }, { "epoch": 38.881634970195854, "grad_norm": 0.13557808101177216, "learning_rate": 6.113482827135964e-05, "loss": 0.014672042429447174, "step": 136980 }, { "epoch": 38.88447346011922, "grad_norm": 2.6658971309661865, "learning_rate": 6.113198978143628e-05, "loss": 0.01650681495666504, "step": 136990 }, { "epoch": 38.88731195004258, "grad_norm": 1.719480037689209, "learning_rate": 6.112915129151292e-05, "loss": 0.004433354735374451, "step": 137000 }, { "epoch": 38.88731195004258, "eval_accuracy": 0.9740573535957271, "eval_loss": 0.09641898423433304, "eval_runtime": 31.7762, "eval_samples_per_second": 494.93, "eval_steps_per_second": 7.742, "step": 137000 }, { "epoch": 38.890150439965936, "grad_norm": 7.064290523529053, "learning_rate": 6.112631280158956e-05, "loss": 0.009441594779491424, "step": 137010 }, { "epoch": 38.8929889298893, "grad_norm": 0.36176249384880066, "learning_rate": 6.11234743116662e-05, "loss": 0.0051873397082090374, "step": 137020 }, { "epoch": 38.89582741981266, "grad_norm": 9.650701522827148, "learning_rate": 6.112063582174283e-05, "loss": 0.0037537820637226103, "step": 137030 }, { "epoch": 38.89866590973602, "grad_norm": 0.9703720211982727, "learning_rate": 6.111779733181947e-05, "loss": 0.0056422889232635495, "step": 137040 }, { "epoch": 38.90150439965938, "grad_norm": 0.5276385545730591, "learning_rate": 6.111495884189612e-05, "loss": 0.006073486804962158, "step": 137050 }, { "epoch": 38.90434288958274, "grad_norm": 0.4081055819988251, "learning_rate": 6.111212035197275e-05, "loss": 0.0009991828352212905, "step": 137060 }, { "epoch": 38.907181379506106, "grad_norm": 0.36674511432647705, "learning_rate": 6.11092818620494e-05, "loss": 0.001810632459819317, "step": 137070 }, { "epoch": 38.91001986942946, "grad_norm": 0.038281768560409546, "learning_rate": 6.110644337212604e-05, "loss": 0.005989895015954971, "step": 137080 }, { "epoch": 38.912858359352825, "grad_norm": 0.1562529057264328, "learning_rate": 6.110360488220266e-05, "loss": 0.0038223348557949064, "step": 137090 }, { "epoch": 38.91569684927619, "grad_norm": 9.216922760009766, "learning_rate": 6.11007663922793e-05, "loss": 0.00597425177693367, "step": 137100 }, { "epoch": 38.91853533919954, "grad_norm": 0.08718281984329224, "learning_rate": 6.109792790235595e-05, "loss": 0.000975412130355835, "step": 137110 }, { "epoch": 38.921373829122906, "grad_norm": 0.22173075377941132, "learning_rate": 6.109508941243259e-05, "loss": 0.0017149850726127624, "step": 137120 }, { "epoch": 38.92421231904627, "grad_norm": 0.46883898973464966, "learning_rate": 6.109225092250923e-05, "loss": 0.008884665369987488, "step": 137130 }, { "epoch": 38.927050808969625, "grad_norm": 0.26036038994789124, "learning_rate": 6.108941243258587e-05, "loss": 0.004381699115037918, "step": 137140 }, { "epoch": 38.92988929889299, "grad_norm": 0.23351457715034485, "learning_rate": 6.108657394266251e-05, "loss": 0.0016026290133595467, "step": 137150 }, { "epoch": 38.93272778881635, "grad_norm": 0.12381860613822937, "learning_rate": 6.108373545273914e-05, "loss": 0.011141019314527512, "step": 137160 }, { "epoch": 38.935566278739714, "grad_norm": 5.0101752281188965, "learning_rate": 6.108089696281578e-05, "loss": 0.002365664765238762, "step": 137170 }, { "epoch": 38.93840476866307, "grad_norm": 0.6688987612724304, "learning_rate": 6.107805847289242e-05, "loss": 0.0030611764639616014, "step": 137180 }, { "epoch": 38.94124325858643, "grad_norm": 8.807551383972168, "learning_rate": 6.107521998296906e-05, "loss": 0.004492054507136345, "step": 137190 }, { "epoch": 38.944081748509795, "grad_norm": 0.2584785223007202, "learning_rate": 6.10723814930457e-05, "loss": 0.00657244622707367, "step": 137200 }, { "epoch": 38.94692023843315, "grad_norm": 16.21637535095215, "learning_rate": 6.106954300312235e-05, "loss": 0.010976333171129227, "step": 137210 }, { "epoch": 38.949758728356514, "grad_norm": 0.09858343750238419, "learning_rate": 6.106670451319897e-05, "loss": 0.0068354949355125426, "step": 137220 }, { "epoch": 38.95259721827988, "grad_norm": 0.296319842338562, "learning_rate": 6.106386602327562e-05, "loss": 0.005221719294786454, "step": 137230 }, { "epoch": 38.95543570820323, "grad_norm": 2.3170037269592285, "learning_rate": 6.106102753335226e-05, "loss": 0.0049011029303073885, "step": 137240 }, { "epoch": 38.958274198126595, "grad_norm": 0.7258913516998291, "learning_rate": 6.10581890434289e-05, "loss": 0.0026680197566747665, "step": 137250 }, { "epoch": 38.96111268804996, "grad_norm": 0.4000585079193115, "learning_rate": 6.105535055350554e-05, "loss": 0.0014177164062857627, "step": 137260 }, { "epoch": 38.96395117797332, "grad_norm": 0.6566436290740967, "learning_rate": 6.105251206358218e-05, "loss": 0.004951765388250351, "step": 137270 }, { "epoch": 38.96678966789668, "grad_norm": 0.5991978645324707, "learning_rate": 6.104967357365882e-05, "loss": 0.0011709747835993768, "step": 137280 }, { "epoch": 38.96962815782004, "grad_norm": 0.634044885635376, "learning_rate": 6.104683508373545e-05, "loss": 0.003566627949476242, "step": 137290 }, { "epoch": 38.9724666477434, "grad_norm": 0.2746218144893646, "learning_rate": 6.104399659381209e-05, "loss": 0.0034033864736557006, "step": 137300 }, { "epoch": 38.97530513766676, "grad_norm": 2.897240161895752, "learning_rate": 6.104115810388873e-05, "loss": 0.005538882315158844, "step": 137310 }, { "epoch": 38.97814362759012, "grad_norm": 0.22855786979198456, "learning_rate": 6.103831961396537e-05, "loss": 0.004798336327075959, "step": 137320 }, { "epoch": 38.980982117513484, "grad_norm": 0.2276849001646042, "learning_rate": 6.103548112404202e-05, "loss": 0.008087386190891267, "step": 137330 }, { "epoch": 38.98382060743684, "grad_norm": 0.25852397084236145, "learning_rate": 6.103264263411865e-05, "loss": 0.0025078503414988516, "step": 137340 }, { "epoch": 38.9866590973602, "grad_norm": 0.5816524624824524, "learning_rate": 6.102980414419529e-05, "loss": 0.004313325881958008, "step": 137350 }, { "epoch": 38.989497587283566, "grad_norm": 0.26520130038261414, "learning_rate": 6.1026965654271926e-05, "loss": 0.0020789306610822676, "step": 137360 }, { "epoch": 38.99233607720693, "grad_norm": 0.4833289086818695, "learning_rate": 6.102412716434857e-05, "loss": 0.0011721242219209671, "step": 137370 }, { "epoch": 38.995174567130285, "grad_norm": 0.5899637341499329, "learning_rate": 6.10212886744252e-05, "loss": 0.008053042739629746, "step": 137380 }, { "epoch": 38.99801305705365, "grad_norm": 0.8573595881462097, "learning_rate": 6.101845018450185e-05, "loss": 0.0019636169075965882, "step": 137390 }, { "epoch": 39.00085154697701, "grad_norm": 0.006774546578526497, "learning_rate": 6.101561169457849e-05, "loss": 0.0015545954927802085, "step": 137400 }, { "epoch": 39.003690036900366, "grad_norm": 0.10945258289575577, "learning_rate": 6.1012773204655126e-05, "loss": 0.0005326934158802032, "step": 137410 }, { "epoch": 39.00652852682373, "grad_norm": 0.0803971067070961, "learning_rate": 6.100993471473177e-05, "loss": 0.0009184282273054123, "step": 137420 }, { "epoch": 39.00936701674709, "grad_norm": 0.11306066811084747, "learning_rate": 6.10070962248084e-05, "loss": 0.005082246288657189, "step": 137430 }, { "epoch": 39.012205506670455, "grad_norm": 0.00984280277043581, "learning_rate": 6.100425773488504e-05, "loss": 0.0009250149130821228, "step": 137440 }, { "epoch": 39.01504399659381, "grad_norm": 0.7278012633323669, "learning_rate": 6.100141924496169e-05, "loss": 0.00113204438239336, "step": 137450 }, { "epoch": 39.01788248651717, "grad_norm": 0.08461342751979828, "learning_rate": 6.0998580755038326e-05, "loss": 0.0007893219590187073, "step": 137460 }, { "epoch": 39.020720976440536, "grad_norm": 0.06453245133161545, "learning_rate": 6.099574226511496e-05, "loss": 0.0024091882631182672, "step": 137470 }, { "epoch": 39.02355946636389, "grad_norm": 0.1434095799922943, "learning_rate": 6.09929037751916e-05, "loss": 0.0023527691140770913, "step": 137480 }, { "epoch": 39.026397956287255, "grad_norm": 0.2553968131542206, "learning_rate": 6.099006528526824e-05, "loss": 0.0008763695135712623, "step": 137490 }, { "epoch": 39.02923644621062, "grad_norm": 2.6161484718322754, "learning_rate": 6.098722679534488e-05, "loss": 0.0016762204468250275, "step": 137500 }, { "epoch": 39.02923644621062, "eval_accuracy": 0.9767279201373434, "eval_loss": 0.08128369599580765, "eval_runtime": 31.5043, "eval_samples_per_second": 499.202, "eval_steps_per_second": 7.808, "step": 137500 }, { "epoch": 39.032074936133974, "grad_norm": 0.09355601668357849, "learning_rate": 6.098438830542151e-05, "loss": 0.0012256275862455369, "step": 137510 }, { "epoch": 39.03491342605734, "grad_norm": 0.1080615296959877, "learning_rate": 6.098154981549816e-05, "loss": 0.0006028557196259499, "step": 137520 }, { "epoch": 39.0377519159807, "grad_norm": 0.3344254791736603, "learning_rate": 6.09787113255748e-05, "loss": 0.0006280548870563508, "step": 137530 }, { "epoch": 39.04059040590406, "grad_norm": 0.5869518518447876, "learning_rate": 6.097587283565144e-05, "loss": 0.0009614804759621621, "step": 137540 }, { "epoch": 39.04342889582742, "grad_norm": 0.05939723923802376, "learning_rate": 6.097303434572808e-05, "loss": 0.003757853806018829, "step": 137550 }, { "epoch": 39.04626738575078, "grad_norm": 3.4026780128479004, "learning_rate": 6.097019585580471e-05, "loss": 0.0031615853309631347, "step": 137560 }, { "epoch": 39.049105875674144, "grad_norm": 0.3942703306674957, "learning_rate": 6.096735736588135e-05, "loss": 0.012047493457794189, "step": 137570 }, { "epoch": 39.0519443655975, "grad_norm": 17.280351638793945, "learning_rate": 6.096451887595799e-05, "loss": 0.017882511019706726, "step": 137580 }, { "epoch": 39.05478285552086, "grad_norm": 1.0733215808868408, "learning_rate": 6.096168038603464e-05, "loss": 0.005104930698871612, "step": 137590 }, { "epoch": 39.057621345444225, "grad_norm": 3.197697162628174, "learning_rate": 6.095884189611127e-05, "loss": 0.002362445928156376, "step": 137600 }, { "epoch": 39.06045983536758, "grad_norm": 0.17124387621879578, "learning_rate": 6.095600340618791e-05, "loss": 0.0020661937072873116, "step": 137610 }, { "epoch": 39.063298325290944, "grad_norm": 0.1346268355846405, "learning_rate": 6.095316491626455e-05, "loss": 0.00855952724814415, "step": 137620 }, { "epoch": 39.06613681521431, "grad_norm": 0.13660840690135956, "learning_rate": 6.095032642634119e-05, "loss": 0.005910919979214668, "step": 137630 }, { "epoch": 39.06897530513767, "grad_norm": 0.09878029674291611, "learning_rate": 6.0947487936417823e-05, "loss": 0.0011193074285984039, "step": 137640 }, { "epoch": 39.071813795061026, "grad_norm": 0.03971888870000839, "learning_rate": 6.094464944649447e-05, "loss": 0.0014145774766802788, "step": 137650 }, { "epoch": 39.07465228498439, "grad_norm": 0.3018629252910614, "learning_rate": 6.094181095657111e-05, "loss": 0.011739248037338256, "step": 137660 }, { "epoch": 39.07749077490775, "grad_norm": 5.928213119506836, "learning_rate": 6.093897246664775e-05, "loss": 0.006136106699705124, "step": 137670 }, { "epoch": 39.08032926483111, "grad_norm": 9.246639251708984, "learning_rate": 6.093613397672438e-05, "loss": 0.006184373050928116, "step": 137680 }, { "epoch": 39.08316775475447, "grad_norm": 0.455435574054718, "learning_rate": 6.0933295486801024e-05, "loss": 0.0007635755464434624, "step": 137690 }, { "epoch": 39.08600624467783, "grad_norm": 0.10587795078754425, "learning_rate": 6.093045699687766e-05, "loss": 0.00048276036977767944, "step": 137700 }, { "epoch": 39.08884473460119, "grad_norm": 0.03420078009366989, "learning_rate": 6.09276185069543e-05, "loss": 0.0029030552133917807, "step": 137710 }, { "epoch": 39.09168322452455, "grad_norm": 0.2092326134443283, "learning_rate": 6.092478001703095e-05, "loss": 0.00044975243508815765, "step": 137720 }, { "epoch": 39.094521714447914, "grad_norm": 1.6473523378372192, "learning_rate": 6.092194152710758e-05, "loss": 0.00887780711054802, "step": 137730 }, { "epoch": 39.09736020437128, "grad_norm": 0.061115700751543045, "learning_rate": 6.0919103037184224e-05, "loss": 0.0009986110031604766, "step": 137740 }, { "epoch": 39.10019869429463, "grad_norm": 0.20632874965667725, "learning_rate": 6.091626454726086e-05, "loss": 0.00251407865434885, "step": 137750 }, { "epoch": 39.103037184217996, "grad_norm": 0.4865431785583496, "learning_rate": 6.09134260573375e-05, "loss": 0.0032470479607582094, "step": 137760 }, { "epoch": 39.10587567414136, "grad_norm": 0.30130788683891296, "learning_rate": 6.0910587567414134e-05, "loss": 0.0026256956160068514, "step": 137770 }, { "epoch": 39.108714164064715, "grad_norm": 0.8512123227119446, "learning_rate": 6.090774907749078e-05, "loss": 0.00871809795498848, "step": 137780 }, { "epoch": 39.11155265398808, "grad_norm": 0.936905026435852, "learning_rate": 6.0904910587567424e-05, "loss": 0.006030123680830002, "step": 137790 }, { "epoch": 39.11439114391144, "grad_norm": 0.3430178761482239, "learning_rate": 6.090207209764406e-05, "loss": 0.0007317159324884415, "step": 137800 }, { "epoch": 39.1172296338348, "grad_norm": 1.301710844039917, "learning_rate": 6.089923360772069e-05, "loss": 0.0037686169147491456, "step": 137810 }, { "epoch": 39.12006812375816, "grad_norm": 0.8421335816383362, "learning_rate": 6.0896395117797334e-05, "loss": 0.00961814671754837, "step": 137820 }, { "epoch": 39.12290661368152, "grad_norm": 1.4414587020874023, "learning_rate": 6.089355662787397e-05, "loss": 0.0007542893290519714, "step": 137830 }, { "epoch": 39.125745103604885, "grad_norm": 0.08410868048667908, "learning_rate": 6.089071813795061e-05, "loss": 0.003299996629357338, "step": 137840 }, { "epoch": 39.12858359352824, "grad_norm": 0.07982895523309708, "learning_rate": 6.088787964802726e-05, "loss": 0.0015040313825011252, "step": 137850 }, { "epoch": 39.131422083451604, "grad_norm": 0.16509340703487396, "learning_rate": 6.088504115810389e-05, "loss": 0.0018917864188551902, "step": 137860 }, { "epoch": 39.134260573374966, "grad_norm": 0.09333012998104095, "learning_rate": 6.0882202668180534e-05, "loss": 0.005116412043571472, "step": 137870 }, { "epoch": 39.13709906329832, "grad_norm": 0.16603335738182068, "learning_rate": 6.087936417825717e-05, "loss": 0.001081143505871296, "step": 137880 }, { "epoch": 39.139937553221685, "grad_norm": 2.381290912628174, "learning_rate": 6.087652568833381e-05, "loss": 0.0008555615320801734, "step": 137890 }, { "epoch": 39.14277604314505, "grad_norm": 0.10461077839136124, "learning_rate": 6.0873687198410445e-05, "loss": 0.0013933278620243072, "step": 137900 }, { "epoch": 39.14561453306841, "grad_norm": 0.012128238566219807, "learning_rate": 6.087084870848708e-05, "loss": 0.0016804268583655358, "step": 137910 }, { "epoch": 39.14845302299177, "grad_norm": 0.08353256434202194, "learning_rate": 6.0868010218563735e-05, "loss": 0.0022597117349505424, "step": 137920 }, { "epoch": 39.15129151291513, "grad_norm": 0.02323201112449169, "learning_rate": 6.086517172864037e-05, "loss": 0.00500510036945343, "step": 137930 }, { "epoch": 39.15413000283849, "grad_norm": 0.8645755648612976, "learning_rate": 6.0862333238717004e-05, "loss": 0.0008523259311914444, "step": 137940 }, { "epoch": 39.15696849276185, "grad_norm": 10.219121932983398, "learning_rate": 6.0859494748793645e-05, "loss": 0.004079770669341088, "step": 137950 }, { "epoch": 39.15980698268521, "grad_norm": 0.06384527683258057, "learning_rate": 6.085665625887028e-05, "loss": 0.0008188333362340927, "step": 137960 }, { "epoch": 39.162645472608574, "grad_norm": 0.04710692539811134, "learning_rate": 6.085381776894692e-05, "loss": 0.0014145579189062119, "step": 137970 }, { "epoch": 39.16548396253193, "grad_norm": 0.21720804274082184, "learning_rate": 6.085097927902357e-05, "loss": 0.000653439573943615, "step": 137980 }, { "epoch": 39.16832245245529, "grad_norm": 0.3116194009780884, "learning_rate": 6.0848140789100204e-05, "loss": 0.0036538906395435335, "step": 137990 }, { "epoch": 39.171160942378656, "grad_norm": 0.5946470499038696, "learning_rate": 6.0845302299176845e-05, "loss": 0.0011351684108376503, "step": 138000 }, { "epoch": 39.171160942378656, "eval_accuracy": 0.974883957525275, "eval_loss": 0.0871768519282341, "eval_runtime": 31.6531, "eval_samples_per_second": 496.855, "eval_steps_per_second": 7.772, "step": 138000 }, { "epoch": 39.17399943230202, "grad_norm": 0.023812010884284973, "learning_rate": 6.084246380925348e-05, "loss": 0.001424885354936123, "step": 138010 }, { "epoch": 39.176837922225374, "grad_norm": 1.167183756828308, "learning_rate": 6.083962531933012e-05, "loss": 0.0016846036538481711, "step": 138020 }, { "epoch": 39.17967641214874, "grad_norm": 0.06904592365026474, "learning_rate": 6.0836786829406756e-05, "loss": 0.0014289602637290954, "step": 138030 }, { "epoch": 39.1825149020721, "grad_norm": 0.4332338571548462, "learning_rate": 6.083394833948339e-05, "loss": 0.0045259203761816025, "step": 138040 }, { "epoch": 39.185353391995456, "grad_norm": 0.11826693266630173, "learning_rate": 6.083110984956004e-05, "loss": 0.000598406046628952, "step": 138050 }, { "epoch": 39.18819188191882, "grad_norm": 0.16060800850391388, "learning_rate": 6.082827135963668e-05, "loss": 0.0015125835314393043, "step": 138060 }, { "epoch": 39.19103037184218, "grad_norm": 0.36900851130485535, "learning_rate": 6.0825432869713315e-05, "loss": 0.001500137336552143, "step": 138070 }, { "epoch": 39.19386886176554, "grad_norm": 0.4478924572467804, "learning_rate": 6.0822594379789956e-05, "loss": 0.004307914525270462, "step": 138080 }, { "epoch": 39.1967073516889, "grad_norm": 0.10561967641115189, "learning_rate": 6.081975588986659e-05, "loss": 0.0012365575879812241, "step": 138090 }, { "epoch": 39.19954584161226, "grad_norm": 0.42396971583366394, "learning_rate": 6.081691739994323e-05, "loss": 0.0037472926080226896, "step": 138100 }, { "epoch": 39.202384331535626, "grad_norm": 3.2284858226776123, "learning_rate": 6.0814078910019866e-05, "loss": 0.004078273475170135, "step": 138110 }, { "epoch": 39.20522282145898, "grad_norm": 3.336428642272949, "learning_rate": 6.0811240420096515e-05, "loss": 0.0034972336143255234, "step": 138120 }, { "epoch": 39.208061311382345, "grad_norm": 0.5801212191581726, "learning_rate": 6.0808401930173156e-05, "loss": 0.003932987153530121, "step": 138130 }, { "epoch": 39.21089980130571, "grad_norm": 1.5830198526382446, "learning_rate": 6.080556344024979e-05, "loss": 0.005855109542608261, "step": 138140 }, { "epoch": 39.21373829122906, "grad_norm": 0.1764943152666092, "learning_rate": 6.0802724950326425e-05, "loss": 0.007715848088264465, "step": 138150 }, { "epoch": 39.216576781152426, "grad_norm": 3.043212890625, "learning_rate": 6.0799886460403067e-05, "loss": 0.0018861185759305953, "step": 138160 }, { "epoch": 39.21941527107579, "grad_norm": 16.60807991027832, "learning_rate": 6.07970479704797e-05, "loss": 0.012019102275371552, "step": 138170 }, { "epoch": 39.22225376099915, "grad_norm": 0.0162091925740242, "learning_rate": 6.079420948055635e-05, "loss": 0.0028994901105761526, "step": 138180 }, { "epoch": 39.22509225092251, "grad_norm": 2.6232378482818604, "learning_rate": 6.079137099063299e-05, "loss": 0.00649256631731987, "step": 138190 }, { "epoch": 39.22793074084587, "grad_norm": 0.5794569253921509, "learning_rate": 6.0788532500709625e-05, "loss": 0.0012230440974235536, "step": 138200 }, { "epoch": 39.23076923076923, "grad_norm": 0.153490349650383, "learning_rate": 6.078569401078627e-05, "loss": 0.0009051540866494179, "step": 138210 }, { "epoch": 39.23360772069259, "grad_norm": 1.236511468887329, "learning_rate": 6.07828555208629e-05, "loss": 0.002259830757975578, "step": 138220 }, { "epoch": 39.23644621061595, "grad_norm": 0.0276777483522892, "learning_rate": 6.078001703093954e-05, "loss": 0.00093733761459589, "step": 138230 }, { "epoch": 39.239284700539315, "grad_norm": 0.31755301356315613, "learning_rate": 6.077717854101618e-05, "loss": 0.0020687647163867952, "step": 138240 }, { "epoch": 39.24212319046267, "grad_norm": 0.5143074989318848, "learning_rate": 6.0774340051092825e-05, "loss": 0.003288868069648743, "step": 138250 }, { "epoch": 39.244961680386034, "grad_norm": 0.6853947639465332, "learning_rate": 6.077150156116947e-05, "loss": 0.0055705726146698, "step": 138260 }, { "epoch": 39.2478001703094, "grad_norm": 0.015403758734464645, "learning_rate": 6.07686630712461e-05, "loss": 0.0014558464288711547, "step": 138270 }, { "epoch": 39.25063866023276, "grad_norm": 7.742937088012695, "learning_rate": 6.0765824581322736e-05, "loss": 0.010842492431402206, "step": 138280 }, { "epoch": 39.253477150156115, "grad_norm": 0.04685800150036812, "learning_rate": 6.076298609139938e-05, "loss": 0.0014076102524995804, "step": 138290 }, { "epoch": 39.25631564007948, "grad_norm": 3.1685903072357178, "learning_rate": 6.076014760147601e-05, "loss": 0.004139552265405655, "step": 138300 }, { "epoch": 39.25915413000284, "grad_norm": 0.022206230089068413, "learning_rate": 6.075730911155265e-05, "loss": 0.0065355576574802395, "step": 138310 }, { "epoch": 39.2619926199262, "grad_norm": 20.028385162353516, "learning_rate": 6.07544706216293e-05, "loss": 0.014773531258106232, "step": 138320 }, { "epoch": 39.26483110984956, "grad_norm": 0.039726655930280685, "learning_rate": 6.0751632131705936e-05, "loss": 0.0036755047738552095, "step": 138330 }, { "epoch": 39.26766959977292, "grad_norm": 1.9771883487701416, "learning_rate": 6.074879364178258e-05, "loss": 0.0033871538937091826, "step": 138340 }, { "epoch": 39.27050808969628, "grad_norm": 0.07113926857709885, "learning_rate": 6.074595515185921e-05, "loss": 0.002391861379146576, "step": 138350 }, { "epoch": 39.27334657961964, "grad_norm": 17.838214874267578, "learning_rate": 6.0743116661935853e-05, "loss": 0.01301446110010147, "step": 138360 }, { "epoch": 39.276185069543004, "grad_norm": 1.5915685892105103, "learning_rate": 6.074027817201249e-05, "loss": 0.0026036594063043594, "step": 138370 }, { "epoch": 39.27902355946637, "grad_norm": 0.4081057608127594, "learning_rate": 6.0737439682089136e-05, "loss": 0.0028338827192783355, "step": 138380 }, { "epoch": 39.28186204938972, "grad_norm": 2.8502750396728516, "learning_rate": 6.073460119216578e-05, "loss": 0.005361150950193405, "step": 138390 }, { "epoch": 39.284700539313086, "grad_norm": 0.7020703554153442, "learning_rate": 6.073176270224241e-05, "loss": 0.0009952127933502198, "step": 138400 }, { "epoch": 39.28753902923645, "grad_norm": 0.11265525966882706, "learning_rate": 6.072892421231905e-05, "loss": 0.0034624986350536347, "step": 138410 }, { "epoch": 39.290377519159804, "grad_norm": 0.9328380823135376, "learning_rate": 6.072608572239569e-05, "loss": 0.005276872217655182, "step": 138420 }, { "epoch": 39.29321600908317, "grad_norm": 0.03923322632908821, "learning_rate": 6.072324723247232e-05, "loss": 0.0022132568061351775, "step": 138430 }, { "epoch": 39.29605449900653, "grad_norm": 0.11271873861551285, "learning_rate": 6.0720408742548964e-05, "loss": 0.0007368421182036399, "step": 138440 }, { "epoch": 39.298892988929886, "grad_norm": 0.5709341168403625, "learning_rate": 6.0717854101617944e-05, "loss": 0.006585609912872314, "step": 138450 }, { "epoch": 39.30173147885325, "grad_norm": 1.7319859266281128, "learning_rate": 6.071501561169458e-05, "loss": 0.0020401835441589355, "step": 138460 }, { "epoch": 39.30456996877661, "grad_norm": 0.35285666584968567, "learning_rate": 6.071217712177122e-05, "loss": 0.0028673205524683, "step": 138470 }, { "epoch": 39.307408458699975, "grad_norm": 0.11032412946224213, "learning_rate": 6.0709338631847855e-05, "loss": 0.005562671273946762, "step": 138480 }, { "epoch": 39.31024694862333, "grad_norm": 0.1384722739458084, "learning_rate": 6.0706500141924496e-05, "loss": 0.011051515489816666, "step": 138490 }, { "epoch": 39.31308543854669, "grad_norm": 0.12851504981517792, "learning_rate": 6.0703661652001144e-05, "loss": 0.0017360903322696686, "step": 138500 }, { "epoch": 39.31308543854669, "eval_accuracy": 0.9756469765371654, "eval_loss": 0.08536995202302933, "eval_runtime": 32.0601, "eval_samples_per_second": 490.548, "eval_steps_per_second": 7.673, "step": 138500 }, { "epoch": 39.315923928470056, "grad_norm": 0.20889928936958313, "learning_rate": 6.070082316207778e-05, "loss": 0.0011299841105937957, "step": 138510 }, { "epoch": 39.31876241839341, "grad_norm": 0.18569187819957733, "learning_rate": 6.069798467215442e-05, "loss": 0.0051317159086465836, "step": 138520 }, { "epoch": 39.321600908316775, "grad_norm": 0.5365309715270996, "learning_rate": 6.0695146182231055e-05, "loss": 0.0025659797713160513, "step": 138530 }, { "epoch": 39.32443939824014, "grad_norm": 0.15547697246074677, "learning_rate": 6.0692307692307696e-05, "loss": 0.003668425977230072, "step": 138540 }, { "epoch": 39.32727788816349, "grad_norm": 0.056269995868206024, "learning_rate": 6.068946920238433e-05, "loss": 0.0006794262677431107, "step": 138550 }, { "epoch": 39.330116378086856, "grad_norm": 0.4943416714668274, "learning_rate": 6.068663071246098e-05, "loss": 0.0022031737491488455, "step": 138560 }, { "epoch": 39.33295486801022, "grad_norm": 0.16368018090724945, "learning_rate": 6.0683792222537613e-05, "loss": 0.00043320301920175553, "step": 138570 }, { "epoch": 39.33579335793358, "grad_norm": 0.06819257140159607, "learning_rate": 6.0680953732614255e-05, "loss": 0.0007342305034399033, "step": 138580 }, { "epoch": 39.33863184785694, "grad_norm": 5.623698711395264, "learning_rate": 6.067811524269089e-05, "loss": 0.008658371865749359, "step": 138590 }, { "epoch": 39.3414703377803, "grad_norm": 0.03333594277501106, "learning_rate": 6.067527675276753e-05, "loss": 0.006858749687671662, "step": 138600 }, { "epoch": 39.344308827703664, "grad_norm": 0.1830771118402481, "learning_rate": 6.0672438262844165e-05, "loss": 0.0006727039813995362, "step": 138610 }, { "epoch": 39.34714731762702, "grad_norm": 0.06880036741495132, "learning_rate": 6.066959977292081e-05, "loss": 0.0008875031024217605, "step": 138620 }, { "epoch": 39.34998580755038, "grad_norm": 0.935390055179596, "learning_rate": 6.0666761282997455e-05, "loss": 0.004733657836914063, "step": 138630 }, { "epoch": 39.352824297473745, "grad_norm": 0.6304066181182861, "learning_rate": 6.066392279307409e-05, "loss": 0.0005578668788075447, "step": 138640 }, { "epoch": 39.35566278739711, "grad_norm": 0.27680617570877075, "learning_rate": 6.066108430315073e-05, "loss": 0.0006093733012676239, "step": 138650 }, { "epoch": 39.358501277320464, "grad_norm": 0.03157861903309822, "learning_rate": 6.0658245813227365e-05, "loss": 0.0005899008363485336, "step": 138660 }, { "epoch": 39.36133976724383, "grad_norm": 1.552221417427063, "learning_rate": 6.0655407323304e-05, "loss": 0.0014631872996687888, "step": 138670 }, { "epoch": 39.36417825716719, "grad_norm": 0.7908819913864136, "learning_rate": 6.065256883338064e-05, "loss": 0.0007385041564702988, "step": 138680 }, { "epoch": 39.367016747090545, "grad_norm": 4.120865345001221, "learning_rate": 6.0649730343457276e-05, "loss": 0.0030291903764009475, "step": 138690 }, { "epoch": 39.36985523701391, "grad_norm": 0.03769755735993385, "learning_rate": 6.0646891853533924e-05, "loss": 0.011622709035873414, "step": 138700 }, { "epoch": 39.37269372693727, "grad_norm": 0.10677117854356766, "learning_rate": 6.0644053363610566e-05, "loss": 0.004477967321872711, "step": 138710 }, { "epoch": 39.37553221686063, "grad_norm": 0.08403872698545456, "learning_rate": 6.06412148736872e-05, "loss": 0.0025386758148670196, "step": 138720 }, { "epoch": 39.37837070678399, "grad_norm": 1.9870363473892212, "learning_rate": 6.063837638376384e-05, "loss": 0.003840316832065582, "step": 138730 }, { "epoch": 39.38120919670735, "grad_norm": 0.14122192561626434, "learning_rate": 6.0635537893840476e-05, "loss": 0.0011181939393281938, "step": 138740 }, { "epoch": 39.384047686630716, "grad_norm": 0.12941376864910126, "learning_rate": 6.063269940391712e-05, "loss": 0.0021334828808903692, "step": 138750 }, { "epoch": 39.38688617655407, "grad_norm": 0.3391941785812378, "learning_rate": 6.0629860913993766e-05, "loss": 0.0004289405420422554, "step": 138760 }, { "epoch": 39.389724666477434, "grad_norm": 0.03545178845524788, "learning_rate": 6.06270224240704e-05, "loss": 0.002675817534327507, "step": 138770 }, { "epoch": 39.3925631564008, "grad_norm": 0.330510675907135, "learning_rate": 6.062418393414704e-05, "loss": 0.01122249811887741, "step": 138780 }, { "epoch": 39.39540164632415, "grad_norm": 0.6019308567047119, "learning_rate": 6.0621345444223676e-05, "loss": 0.0034800685942173006, "step": 138790 }, { "epoch": 39.398240136247516, "grad_norm": 0.13632014393806458, "learning_rate": 6.061850695430031e-05, "loss": 0.0010219585150480271, "step": 138800 }, { "epoch": 39.40107862617088, "grad_norm": 0.44659972190856934, "learning_rate": 6.061566846437695e-05, "loss": 0.001603710651397705, "step": 138810 }, { "epoch": 39.403917116094235, "grad_norm": 0.0122533543035388, "learning_rate": 6.061282997445359e-05, "loss": 0.001211416907608509, "step": 138820 }, { "epoch": 39.4067556060176, "grad_norm": 0.7950981259346008, "learning_rate": 6.0609991484530235e-05, "loss": 0.008475624769926072, "step": 138830 }, { "epoch": 39.40959409594096, "grad_norm": 0.06071178615093231, "learning_rate": 6.0607152994606876e-05, "loss": 0.0018236760050058364, "step": 138840 }, { "epoch": 39.41243258586432, "grad_norm": 0.1182396411895752, "learning_rate": 6.060431450468351e-05, "loss": 0.0005002036690711975, "step": 138850 }, { "epoch": 39.41527107578768, "grad_norm": 0.05120754987001419, "learning_rate": 6.060147601476015e-05, "loss": 0.0015990445390343666, "step": 138860 }, { "epoch": 39.41810956571104, "grad_norm": 1.545422077178955, "learning_rate": 6.059863752483679e-05, "loss": 0.007767621427774429, "step": 138870 }, { "epoch": 39.420948055634405, "grad_norm": 5.893726348876953, "learning_rate": 6.059579903491343e-05, "loss": 0.012136691808700561, "step": 138880 }, { "epoch": 39.42378654555776, "grad_norm": 5.254700183868408, "learning_rate": 6.059296054499006e-05, "loss": 0.004616284742951393, "step": 138890 }, { "epoch": 39.42662503548112, "grad_norm": 5.23870325088501, "learning_rate": 6.059012205506671e-05, "loss": 0.005939216911792755, "step": 138900 }, { "epoch": 39.429463525404486, "grad_norm": 1.1079213619232178, "learning_rate": 6.058728356514335e-05, "loss": 0.0015039023011922837, "step": 138910 }, { "epoch": 39.43230201532784, "grad_norm": 0.04870188236236572, "learning_rate": 6.058444507521999e-05, "loss": 0.0006049912422895432, "step": 138920 }, { "epoch": 39.435140505251205, "grad_norm": 0.37689369916915894, "learning_rate": 6.058160658529662e-05, "loss": 0.003204350918531418, "step": 138930 }, { "epoch": 39.43797899517457, "grad_norm": 13.268476486206055, "learning_rate": 6.057876809537326e-05, "loss": 0.0030410535633563994, "step": 138940 }, { "epoch": 39.44081748509793, "grad_norm": 3.697195053100586, "learning_rate": 6.05759296054499e-05, "loss": 0.0026168907061219215, "step": 138950 }, { "epoch": 39.44365597502129, "grad_norm": 1.7088240385055542, "learning_rate": 6.0573091115526546e-05, "loss": 0.003200293704867363, "step": 138960 }, { "epoch": 39.44649446494465, "grad_norm": 0.10265671461820602, "learning_rate": 6.057025262560319e-05, "loss": 0.010031526535749435, "step": 138970 }, { "epoch": 39.44933295486801, "grad_norm": 1.5349925756454468, "learning_rate": 6.056741413567982e-05, "loss": 0.004440639168024063, "step": 138980 }, { "epoch": 39.45217144479137, "grad_norm": 0.041087791323661804, "learning_rate": 6.056457564575646e-05, "loss": 0.013201980292797089, "step": 138990 }, { "epoch": 39.45500993471473, "grad_norm": 0.24191564321517944, "learning_rate": 6.05617371558331e-05, "loss": 0.0029610762372612952, "step": 139000 }, { "epoch": 39.45500993471473, "eval_accuracy": 0.9732307496661792, "eval_loss": 0.09388997405767441, "eval_runtime": 32.1935, "eval_samples_per_second": 488.515, "eval_steps_per_second": 7.641, "step": 139000 }, { "epoch": 39.457848424638094, "grad_norm": 0.12238845974206924, "learning_rate": 6.055889866590974e-05, "loss": 0.004166174679994583, "step": 139010 }, { "epoch": 39.46068691456146, "grad_norm": 10.061746597290039, "learning_rate": 6.0556060175986374e-05, "loss": 0.019758693873882294, "step": 139020 }, { "epoch": 39.46352540448481, "grad_norm": 0.1553926318883896, "learning_rate": 6.055322168606302e-05, "loss": 0.0042705446481704715, "step": 139030 }, { "epoch": 39.466363894408175, "grad_norm": 17.36062240600586, "learning_rate": 6.0550383196139656e-05, "loss": 0.009921664744615555, "step": 139040 }, { "epoch": 39.46920238433154, "grad_norm": 0.49641841650009155, "learning_rate": 6.05475447062163e-05, "loss": 0.0020416708663105964, "step": 139050 }, { "epoch": 39.472040874254894, "grad_norm": 0.168116495013237, "learning_rate": 6.054470621629293e-05, "loss": 0.003057621419429779, "step": 139060 }, { "epoch": 39.47487936417826, "grad_norm": 0.07465113699436188, "learning_rate": 6.0541867726369574e-05, "loss": 0.007385559380054474, "step": 139070 }, { "epoch": 39.47771785410162, "grad_norm": 0.6764495968818665, "learning_rate": 6.053902923644621e-05, "loss": 0.0035275742411613464, "step": 139080 }, { "epoch": 39.480556344024976, "grad_norm": 4.340326309204102, "learning_rate": 6.053619074652285e-05, "loss": 0.008909739553928375, "step": 139090 }, { "epoch": 39.48339483394834, "grad_norm": 0.1992167830467224, "learning_rate": 6.05333522565995e-05, "loss": 0.008172467350959778, "step": 139100 }, { "epoch": 39.4862333238717, "grad_norm": 0.3296763300895691, "learning_rate": 6.053051376667613e-05, "loss": 0.005192354321479797, "step": 139110 }, { "epoch": 39.489071813795064, "grad_norm": 10.435248374938965, "learning_rate": 6.0527675276752774e-05, "loss": 0.018149109184741975, "step": 139120 }, { "epoch": 39.49191030371842, "grad_norm": 0.2836419343948364, "learning_rate": 6.052483678682941e-05, "loss": 0.00365847572684288, "step": 139130 }, { "epoch": 39.49474879364178, "grad_norm": 0.15508265793323517, "learning_rate": 6.052199829690604e-05, "loss": 0.006747350096702576, "step": 139140 }, { "epoch": 39.497587283565146, "grad_norm": 2.163086175918579, "learning_rate": 6.0519159806982684e-05, "loss": 0.0023795176297426225, "step": 139150 }, { "epoch": 39.5004257734885, "grad_norm": 1.1789687871932983, "learning_rate": 6.051632131705933e-05, "loss": 0.007911676168441772, "step": 139160 }, { "epoch": 39.503264263411864, "grad_norm": 0.538994312286377, "learning_rate": 6.051348282713597e-05, "loss": 0.0031716935336589813, "step": 139170 }, { "epoch": 39.50610275333523, "grad_norm": 11.735756874084473, "learning_rate": 6.051064433721261e-05, "loss": 0.012116793543100357, "step": 139180 }, { "epoch": 39.50894124325858, "grad_norm": 0.1775570958852768, "learning_rate": 6.050780584728924e-05, "loss": 0.0034813590347766874, "step": 139190 }, { "epoch": 39.511779733181946, "grad_norm": 2.986088275909424, "learning_rate": 6.0504967357365884e-05, "loss": 0.0022414546459913255, "step": 139200 }, { "epoch": 39.51461822310531, "grad_norm": 19.068557739257812, "learning_rate": 6.050212886744252e-05, "loss": 0.01525673121213913, "step": 139210 }, { "epoch": 39.51745671302867, "grad_norm": 0.4115059971809387, "learning_rate": 6.049929037751916e-05, "loss": 0.0007110729813575745, "step": 139220 }, { "epoch": 39.52029520295203, "grad_norm": 0.10299807041883469, "learning_rate": 6.049645188759581e-05, "loss": 0.0017435448244214058, "step": 139230 }, { "epoch": 39.52313369287539, "grad_norm": 0.06878902018070221, "learning_rate": 6.049361339767244e-05, "loss": 0.005529165267944336, "step": 139240 }, { "epoch": 39.52597218279875, "grad_norm": 0.6363896727561951, "learning_rate": 6.0490774907749085e-05, "loss": 0.0014021214097738265, "step": 139250 }, { "epoch": 39.52881067272211, "grad_norm": 2.591054677963257, "learning_rate": 6.048793641782572e-05, "loss": 0.0031134381890296934, "step": 139260 }, { "epoch": 39.53164916264547, "grad_norm": 0.11640580743551254, "learning_rate": 6.0485097927902354e-05, "loss": 0.0015712512657046318, "step": 139270 }, { "epoch": 39.534487652568835, "grad_norm": 0.12123462557792664, "learning_rate": 6.0482259437978995e-05, "loss": 0.009001583606004716, "step": 139280 }, { "epoch": 39.53732614249219, "grad_norm": 14.135578155517578, "learning_rate": 6.047942094805564e-05, "loss": 0.005289077013731003, "step": 139290 }, { "epoch": 39.540164632415554, "grad_norm": 10.1737060546875, "learning_rate": 6.047658245813228e-05, "loss": 0.0061090394854545595, "step": 139300 }, { "epoch": 39.54300312233892, "grad_norm": 0.24329543113708496, "learning_rate": 6.047374396820892e-05, "loss": 0.001027282141149044, "step": 139310 }, { "epoch": 39.54584161226228, "grad_norm": 0.15187223255634308, "learning_rate": 6.0470905478285554e-05, "loss": 0.0018104445189237595, "step": 139320 }, { "epoch": 39.548680102185635, "grad_norm": 0.12122810631990433, "learning_rate": 6.0468066988362195e-05, "loss": 0.0032040536403656005, "step": 139330 }, { "epoch": 39.551518592109, "grad_norm": 0.03264455869793892, "learning_rate": 6.046522849843883e-05, "loss": 0.0015556132420897484, "step": 139340 }, { "epoch": 39.55435708203236, "grad_norm": 1.6046301126480103, "learning_rate": 6.046239000851547e-05, "loss": 0.001749090850353241, "step": 139350 }, { "epoch": 39.55719557195572, "grad_norm": 0.2947319447994232, "learning_rate": 6.045955151859212e-05, "loss": 0.0008201774209737778, "step": 139360 }, { "epoch": 39.56003406187908, "grad_norm": 1.223129391670227, "learning_rate": 6.0456713028668754e-05, "loss": 0.0019808651879429815, "step": 139370 }, { "epoch": 39.56287255180244, "grad_norm": 0.9171944260597229, "learning_rate": 6.0453874538745395e-05, "loss": 0.0030045343562960626, "step": 139380 }, { "epoch": 39.565711041725805, "grad_norm": 0.19484840333461761, "learning_rate": 6.045103604882203e-05, "loss": 0.001980600319802761, "step": 139390 }, { "epoch": 39.56854953164916, "grad_norm": 7.515980243682861, "learning_rate": 6.0448197558898665e-05, "loss": 0.0042570605874061584, "step": 139400 }, { "epoch": 39.571388021572524, "grad_norm": 0.40401598811149597, "learning_rate": 6.0445359068975306e-05, "loss": 0.00739060714840889, "step": 139410 }, { "epoch": 39.57422651149589, "grad_norm": 2.4137284755706787, "learning_rate": 6.044252057905194e-05, "loss": 0.0021855276077985763, "step": 139420 }, { "epoch": 39.57706500141924, "grad_norm": 0.06560931354761124, "learning_rate": 6.043968208912859e-05, "loss": 0.00098164863884449, "step": 139430 }, { "epoch": 39.579903491342606, "grad_norm": 4.309883117675781, "learning_rate": 6.043684359920523e-05, "loss": 0.00350627563893795, "step": 139440 }, { "epoch": 39.58274198126597, "grad_norm": 0.059342317283153534, "learning_rate": 6.0434005109281865e-05, "loss": 0.00507737547159195, "step": 139450 }, { "epoch": 39.585580471189324, "grad_norm": 3.8229546546936035, "learning_rate": 6.0431166619358506e-05, "loss": 0.0023159828037023545, "step": 139460 }, { "epoch": 39.58841896111269, "grad_norm": 0.06483537703752518, "learning_rate": 6.042832812943514e-05, "loss": 0.00431598462164402, "step": 139470 }, { "epoch": 39.59125745103605, "grad_norm": 0.057901959866285324, "learning_rate": 6.042548963951178e-05, "loss": 0.01255812644958496, "step": 139480 }, { "epoch": 39.59409594095941, "grad_norm": 8.8146333694458, "learning_rate": 6.042265114958843e-05, "loss": 0.007699554413557052, "step": 139490 }, { "epoch": 39.59693443088277, "grad_norm": 0.7698335647583008, "learning_rate": 6.0419812659665065e-05, "loss": 0.0015274036675691604, "step": 139500 }, { "epoch": 39.59693443088277, "eval_accuracy": 0.9713867870541107, "eval_loss": 0.10272873193025589, "eval_runtime": 32.0554, "eval_samples_per_second": 490.619, "eval_steps_per_second": 7.674, "step": 139500 }, { "epoch": 39.59977292080613, "grad_norm": 3.913221836090088, "learning_rate": 6.04169741697417e-05, "loss": 0.0021123886108398438, "step": 139510 }, { "epoch": 39.602611410729494, "grad_norm": 0.12325819581747055, "learning_rate": 6.041413567981834e-05, "loss": 0.005768696963787079, "step": 139520 }, { "epoch": 39.60544990065285, "grad_norm": 0.8495907187461853, "learning_rate": 6.0411297189894975e-05, "loss": 0.0016661401838064193, "step": 139530 }, { "epoch": 39.60828839057621, "grad_norm": 0.13071849942207336, "learning_rate": 6.040845869997162e-05, "loss": 0.0024972803890705108, "step": 139540 }, { "epoch": 39.611126880499576, "grad_norm": 0.41720494627952576, "learning_rate": 6.040562021004825e-05, "loss": 0.0015149889513850212, "step": 139550 }, { "epoch": 39.61396537042293, "grad_norm": 0.01639995165169239, "learning_rate": 6.04027817201249e-05, "loss": 0.0017783386632800103, "step": 139560 }, { "epoch": 39.616803860346295, "grad_norm": 0.15964235365390778, "learning_rate": 6.039994323020154e-05, "loss": 0.0022891581058502196, "step": 139570 }, { "epoch": 39.61964235026966, "grad_norm": 1.8176294565200806, "learning_rate": 6.0397104740278175e-05, "loss": 0.016858980059623718, "step": 139580 }, { "epoch": 39.62248084019302, "grad_norm": 2.083268642425537, "learning_rate": 6.039426625035482e-05, "loss": 0.0023907598108053206, "step": 139590 }, { "epoch": 39.625319330116376, "grad_norm": 0.6479378342628479, "learning_rate": 6.039142776043145e-05, "loss": 0.0036281563341617586, "step": 139600 }, { "epoch": 39.62815782003974, "grad_norm": 7.420990943908691, "learning_rate": 6.0388589270508086e-05, "loss": 0.007852285355329513, "step": 139610 }, { "epoch": 39.6309963099631, "grad_norm": 3.2397515773773193, "learning_rate": 6.038575078058473e-05, "loss": 0.007737544178962707, "step": 139620 }, { "epoch": 39.63383479988646, "grad_norm": 0.9483978152275085, "learning_rate": 6.0382912290661376e-05, "loss": 0.0075049236416816715, "step": 139630 }, { "epoch": 39.63667328980982, "grad_norm": 0.06129680573940277, "learning_rate": 6.038007380073801e-05, "loss": 0.005858098715543747, "step": 139640 }, { "epoch": 39.63951177973318, "grad_norm": 1.0665252208709717, "learning_rate": 6.037723531081465e-05, "loss": 0.0031964600086212156, "step": 139650 }, { "epoch": 39.64235026965654, "grad_norm": 2.998321294784546, "learning_rate": 6.0374396820891286e-05, "loss": 0.004929675534367561, "step": 139660 }, { "epoch": 39.6451887595799, "grad_norm": 1.9591948986053467, "learning_rate": 6.037155833096793e-05, "loss": 0.0023342397063970564, "step": 139670 }, { "epoch": 39.648027249503265, "grad_norm": 0.08371206372976303, "learning_rate": 6.036871984104456e-05, "loss": 0.008088162541389466, "step": 139680 }, { "epoch": 39.65086573942663, "grad_norm": 3.0946784019470215, "learning_rate": 6.036588135112121e-05, "loss": 0.0017350615933537483, "step": 139690 }, { "epoch": 39.653704229349984, "grad_norm": 9.415877342224121, "learning_rate": 6.036304286119785e-05, "loss": 0.006233911216259003, "step": 139700 }, { "epoch": 39.65654271927335, "grad_norm": 0.035978496074676514, "learning_rate": 6.0360204371274486e-05, "loss": 0.0028254522010684015, "step": 139710 }, { "epoch": 39.65938120919671, "grad_norm": 5.757728576660156, "learning_rate": 6.035736588135113e-05, "loss": 0.004225365072488785, "step": 139720 }, { "epoch": 39.662219699120065, "grad_norm": 1.8212720155715942, "learning_rate": 6.035452739142776e-05, "loss": 0.003016148693859577, "step": 139730 }, { "epoch": 39.66505818904343, "grad_norm": 0.06038428097963333, "learning_rate": 6.03516889015044e-05, "loss": 0.007064750790596009, "step": 139740 }, { "epoch": 39.66789667896679, "grad_norm": 1.093817114830017, "learning_rate": 6.034885041158104e-05, "loss": 0.0026820652186870576, "step": 139750 }, { "epoch": 39.67073516889015, "grad_norm": 1.3431864976882935, "learning_rate": 6.0346011921657686e-05, "loss": 0.004437118396162986, "step": 139760 }, { "epoch": 39.67357365881351, "grad_norm": 0.06486725807189941, "learning_rate": 6.034317343173432e-05, "loss": 0.004336366429924965, "step": 139770 }, { "epoch": 39.67641214873687, "grad_norm": 0.02308625355362892, "learning_rate": 6.034033494181096e-05, "loss": 0.005778943002223968, "step": 139780 }, { "epoch": 39.679250638660235, "grad_norm": 0.32858359813690186, "learning_rate": 6.03374964518876e-05, "loss": 0.002763346582651138, "step": 139790 }, { "epoch": 39.68208912858359, "grad_norm": 0.20044772326946259, "learning_rate": 6.033465796196424e-05, "loss": 0.0023469334468245507, "step": 139800 }, { "epoch": 39.684927618506954, "grad_norm": 0.035473112016916275, "learning_rate": 6.033181947204087e-05, "loss": 0.0018551722168922424, "step": 139810 }, { "epoch": 39.68776610843032, "grad_norm": 0.032913029193878174, "learning_rate": 6.0328980982117514e-05, "loss": 0.006726323068141938, "step": 139820 }, { "epoch": 39.69060459835367, "grad_norm": 0.0315186083316803, "learning_rate": 6.032614249219416e-05, "loss": 0.0021501323208212852, "step": 139830 }, { "epoch": 39.693443088277036, "grad_norm": 0.055103130638599396, "learning_rate": 6.03233040022708e-05, "loss": 0.001127505674958229, "step": 139840 }, { "epoch": 39.6962815782004, "grad_norm": 1.3189668655395508, "learning_rate": 6.032046551234743e-05, "loss": 0.003320043534040451, "step": 139850 }, { "epoch": 39.69912006812376, "grad_norm": 1.2478957176208496, "learning_rate": 6.031762702242407e-05, "loss": 0.0030379649251699448, "step": 139860 }, { "epoch": 39.70195855804712, "grad_norm": 0.041675593703985214, "learning_rate": 6.031478853250071e-05, "loss": 0.0013714443892240524, "step": 139870 }, { "epoch": 39.70479704797048, "grad_norm": 0.10999451577663422, "learning_rate": 6.031195004257735e-05, "loss": 0.002758191153407097, "step": 139880 }, { "epoch": 39.70763553789384, "grad_norm": 1.0315507650375366, "learning_rate": 6.0309111552654e-05, "loss": 0.0023459285497665406, "step": 139890 }, { "epoch": 39.7104740278172, "grad_norm": 0.010173357091844082, "learning_rate": 6.030627306273063e-05, "loss": 0.0033757761120796204, "step": 139900 }, { "epoch": 39.71331251774056, "grad_norm": 12.188918113708496, "learning_rate": 6.030343457280727e-05, "loss": 0.0052807372063398365, "step": 139910 }, { "epoch": 39.716151007663925, "grad_norm": 0.020379696041345596, "learning_rate": 6.030059608288391e-05, "loss": 0.0008846750482916832, "step": 139920 }, { "epoch": 39.71898949758728, "grad_norm": 5.913230895996094, "learning_rate": 6.029775759296055e-05, "loss": 0.006001229956746101, "step": 139930 }, { "epoch": 39.72182798751064, "grad_norm": 9.762709617614746, "learning_rate": 6.0294919103037184e-05, "loss": 0.006464720517396927, "step": 139940 }, { "epoch": 39.724666477434006, "grad_norm": 0.3091679811477661, "learning_rate": 6.0292080613113825e-05, "loss": 0.0035059597343206407, "step": 139950 }, { "epoch": 39.72750496735737, "grad_norm": 0.03917648643255234, "learning_rate": 6.028924212319047e-05, "loss": 0.009939504414796829, "step": 139960 }, { "epoch": 39.730343457280725, "grad_norm": 3.0805511474609375, "learning_rate": 6.028640363326711e-05, "loss": 0.005064672231674195, "step": 139970 }, { "epoch": 39.73318194720409, "grad_norm": 0.5887700915336609, "learning_rate": 6.028356514334374e-05, "loss": 0.0012383176013827323, "step": 139980 }, { "epoch": 39.73602043712745, "grad_norm": 0.11344687640666962, "learning_rate": 6.0280726653420384e-05, "loss": 0.002821212261915207, "step": 139990 }, { "epoch": 39.738858927050806, "grad_norm": 0.05641164258122444, "learning_rate": 6.027788816349702e-05, "loss": 0.0049358226358890535, "step": 140000 }, { "epoch": 39.738858927050806, "eval_accuracy": 0.974883957525275, "eval_loss": 0.08956696093082428, "eval_runtime": 31.793, "eval_samples_per_second": 494.668, "eval_steps_per_second": 7.738, "step": 140000 }, { "epoch": 39.74169741697417, "grad_norm": 1.2411690950393677, "learning_rate": 6.027504967357366e-05, "loss": 0.003394593298435211, "step": 140010 }, { "epoch": 39.74453590689753, "grad_norm": 0.051181625574827194, "learning_rate": 6.0272211183650294e-05, "loss": 0.0035420209169387817, "step": 140020 }, { "epoch": 39.74737439682089, "grad_norm": 4.276035308837891, "learning_rate": 6.026937269372694e-05, "loss": 0.009138558804988862, "step": 140030 }, { "epoch": 39.75021288674425, "grad_norm": 0.19671708345413208, "learning_rate": 6.0266534203803584e-05, "loss": 0.006965765357017517, "step": 140040 }, { "epoch": 39.753051376667614, "grad_norm": 10.033811569213867, "learning_rate": 6.026369571388022e-05, "loss": 0.0063288785517215725, "step": 140050 }, { "epoch": 39.75588986659098, "grad_norm": 0.8704296946525574, "learning_rate": 6.026085722395686e-05, "loss": 0.00313953161239624, "step": 140060 }, { "epoch": 39.75872835651433, "grad_norm": 0.06608618050813675, "learning_rate": 6.0258018734033494e-05, "loss": 0.0021096281707286835, "step": 140070 }, { "epoch": 39.761566846437695, "grad_norm": 2.4122865200042725, "learning_rate": 6.025518024411013e-05, "loss": 0.005744149163365364, "step": 140080 }, { "epoch": 39.76440533636106, "grad_norm": 0.36474162340164185, "learning_rate": 6.0252341754186784e-05, "loss": 0.0022863009944558144, "step": 140090 }, { "epoch": 39.767243826284414, "grad_norm": 0.3941896855831146, "learning_rate": 6.024950326426342e-05, "loss": 0.004908876866102219, "step": 140100 }, { "epoch": 39.77008231620778, "grad_norm": 2.551473379135132, "learning_rate": 6.024666477434005e-05, "loss": 0.018268853425979614, "step": 140110 }, { "epoch": 39.77292080613114, "grad_norm": 0.9494673013687134, "learning_rate": 6.0243826284416694e-05, "loss": 0.0062050651758909225, "step": 140120 }, { "epoch": 39.7757592960545, "grad_norm": 0.06125468760728836, "learning_rate": 6.024098779449333e-05, "loss": 0.002519560232758522, "step": 140130 }, { "epoch": 39.77859778597786, "grad_norm": 0.5896734595298767, "learning_rate": 6.023814930456997e-05, "loss": 0.003685469925403595, "step": 140140 }, { "epoch": 39.78143627590122, "grad_norm": 0.10069131851196289, "learning_rate": 6.0235310814646605e-05, "loss": 0.0008576733991503716, "step": 140150 }, { "epoch": 39.784274765824584, "grad_norm": 0.05000810697674751, "learning_rate": 6.023247232472325e-05, "loss": 0.007351435720920563, "step": 140160 }, { "epoch": 39.78711325574794, "grad_norm": 0.09790735691785812, "learning_rate": 6.0229633834799895e-05, "loss": 0.0039072509855031965, "step": 140170 }, { "epoch": 39.7899517456713, "grad_norm": 5.382041931152344, "learning_rate": 6.022679534487653e-05, "loss": 0.0069732718169689175, "step": 140180 }, { "epoch": 39.792790235594666, "grad_norm": 0.09428535401821136, "learning_rate": 6.022395685495317e-05, "loss": 0.0019293110817670822, "step": 140190 }, { "epoch": 39.79562872551802, "grad_norm": 1.814720869064331, "learning_rate": 6.0221118365029805e-05, "loss": 0.007151990383863449, "step": 140200 }, { "epoch": 39.798467215441384, "grad_norm": 0.6311657428741455, "learning_rate": 6.021827987510644e-05, "loss": 0.014634586870670319, "step": 140210 }, { "epoch": 39.80130570536475, "grad_norm": 0.3648786246776581, "learning_rate": 6.021544138518308e-05, "loss": 0.008969998359680176, "step": 140220 }, { "epoch": 39.80414419528811, "grad_norm": 1.6945114135742188, "learning_rate": 6.021260289525973e-05, "loss": 0.004368625581264496, "step": 140230 }, { "epoch": 39.806982685211466, "grad_norm": 0.49376946687698364, "learning_rate": 6.0209764405336364e-05, "loss": 0.00094765555113554, "step": 140240 }, { "epoch": 39.80982117513483, "grad_norm": 0.027799341827630997, "learning_rate": 6.0206925915413005e-05, "loss": 0.0003513531759381294, "step": 140250 }, { "epoch": 39.81265966505819, "grad_norm": 0.28534236550331116, "learning_rate": 6.020408742548964e-05, "loss": 0.0050453715026378635, "step": 140260 }, { "epoch": 39.81549815498155, "grad_norm": 13.636035919189453, "learning_rate": 6.020124893556628e-05, "loss": 0.00275521632283926, "step": 140270 }, { "epoch": 39.81833664490491, "grad_norm": 0.1367509365081787, "learning_rate": 6.0198410445642916e-05, "loss": 0.0012105245143175126, "step": 140280 }, { "epoch": 39.82117513482827, "grad_norm": 1.6144416332244873, "learning_rate": 6.0195571955719564e-05, "loss": 0.0010772377252578735, "step": 140290 }, { "epoch": 39.82401362475163, "grad_norm": 3.217334032058716, "learning_rate": 6.0192733465796205e-05, "loss": 0.002232023328542709, "step": 140300 }, { "epoch": 39.82685211467499, "grad_norm": 0.11124135553836823, "learning_rate": 6.018989497587284e-05, "loss": 0.013311173021793365, "step": 140310 }, { "epoch": 39.829690604598355, "grad_norm": 1.462989091873169, "learning_rate": 6.0187056485949475e-05, "loss": 0.004180317372083664, "step": 140320 }, { "epoch": 39.83252909452172, "grad_norm": 0.1094597578048706, "learning_rate": 6.0184217996026116e-05, "loss": 0.006835350394248962, "step": 140330 }, { "epoch": 39.83536758444507, "grad_norm": 0.4314813017845154, "learning_rate": 6.018137950610275e-05, "loss": 0.0024197274819016457, "step": 140340 }, { "epoch": 39.838206074368436, "grad_norm": 0.7982630133628845, "learning_rate": 6.017854101617939e-05, "loss": 0.0026509566232562065, "step": 140350 }, { "epoch": 39.8410445642918, "grad_norm": 0.605013906955719, "learning_rate": 6.017570252625604e-05, "loss": 0.0019555581733584405, "step": 140360 }, { "epoch": 39.843883054215155, "grad_norm": 5.26009464263916, "learning_rate": 6.0172864036332675e-05, "loss": 0.0020079949870705606, "step": 140370 }, { "epoch": 39.84672154413852, "grad_norm": 1.0180621147155762, "learning_rate": 6.0170025546409316e-05, "loss": 0.00843769684433937, "step": 140380 }, { "epoch": 39.84956003406188, "grad_norm": 0.3391980826854706, "learning_rate": 6.016718705648595e-05, "loss": 0.007211767137050629, "step": 140390 }, { "epoch": 39.85239852398524, "grad_norm": 0.13601602613925934, "learning_rate": 6.016434856656259e-05, "loss": 0.0024050241336226463, "step": 140400 }, { "epoch": 39.8552370139086, "grad_norm": 0.4353223145008087, "learning_rate": 6.0161510076639227e-05, "loss": 0.0009262898936867713, "step": 140410 }, { "epoch": 39.85807550383196, "grad_norm": 10.732467651367188, "learning_rate": 6.015867158671587e-05, "loss": 0.007705754041671753, "step": 140420 }, { "epoch": 39.860913993755325, "grad_norm": 1.2364544868469238, "learning_rate": 6.0155833096792516e-05, "loss": 0.0016876265406608581, "step": 140430 }, { "epoch": 39.86375248367868, "grad_norm": 0.07032128423452377, "learning_rate": 6.015299460686915e-05, "loss": 0.0006453802809119225, "step": 140440 }, { "epoch": 39.866590973602044, "grad_norm": 0.636163055896759, "learning_rate": 6.0150156116945785e-05, "loss": 0.0051218122243881226, "step": 140450 }, { "epoch": 39.86942946352541, "grad_norm": 0.6675548553466797, "learning_rate": 6.014731762702243e-05, "loss": 0.0008221242576837539, "step": 140460 }, { "epoch": 39.87226795344876, "grad_norm": 10.39493465423584, "learning_rate": 6.014447913709906e-05, "loss": 0.005090107768774032, "step": 140470 }, { "epoch": 39.875106443372125, "grad_norm": 0.10883325338363647, "learning_rate": 6.01416406471757e-05, "loss": 0.0008152389898896217, "step": 140480 }, { "epoch": 39.87794493329549, "grad_norm": 18.141572952270508, "learning_rate": 6.013880215725235e-05, "loss": 0.011504334211349488, "step": 140490 }, { "epoch": 39.880783423218844, "grad_norm": 13.353707313537598, "learning_rate": 6.0135963667328985e-05, "loss": 0.009438977390527726, "step": 140500 }, { "epoch": 39.880783423218844, "eval_accuracy": 0.9767279201373434, "eval_loss": 0.08431672304868698, "eval_runtime": 32.0394, "eval_samples_per_second": 490.864, "eval_steps_per_second": 7.678, "step": 140500 }, { "epoch": 39.88362191314221, "grad_norm": 0.6881870627403259, "learning_rate": 6.013312517740563e-05, "loss": 0.0026777882128953935, "step": 140510 }, { "epoch": 39.88646040306557, "grad_norm": 0.5353898406028748, "learning_rate": 6.013028668748226e-05, "loss": 0.007203893363475799, "step": 140520 }, { "epoch": 39.88929889298893, "grad_norm": 0.04074109345674515, "learning_rate": 6.01274481975589e-05, "loss": 0.0034690462052822113, "step": 140530 }, { "epoch": 39.89213738291229, "grad_norm": 0.07092523574829102, "learning_rate": 6.012460970763554e-05, "loss": 0.0026951268315315246, "step": 140540 }, { "epoch": 39.89497587283565, "grad_norm": 2.031587839126587, "learning_rate": 6.012177121771217e-05, "loss": 0.00539964959025383, "step": 140550 }, { "epoch": 39.897814362759014, "grad_norm": 2.4877140522003174, "learning_rate": 6.011893272778883e-05, "loss": 0.001929086074233055, "step": 140560 }, { "epoch": 39.90065285268237, "grad_norm": 1.518911361694336, "learning_rate": 6.011609423786546e-05, "loss": 0.0015271568670868873, "step": 140570 }, { "epoch": 39.90349134260573, "grad_norm": 0.06446368247270584, "learning_rate": 6.0113255747942096e-05, "loss": 0.0006911342963576317, "step": 140580 }, { "epoch": 39.906329832529096, "grad_norm": 0.36873260140419006, "learning_rate": 6.011041725801874e-05, "loss": 0.002900325693190098, "step": 140590 }, { "epoch": 39.90916832245246, "grad_norm": 0.02814585343003273, "learning_rate": 6.010757876809537e-05, "loss": 0.0015156226232647895, "step": 140600 }, { "epoch": 39.912006812375814, "grad_norm": 9.48901081085205, "learning_rate": 6.0104740278172013e-05, "loss": 0.007503252476453781, "step": 140610 }, { "epoch": 39.91484530229918, "grad_norm": 0.4717351794242859, "learning_rate": 6.010190178824866e-05, "loss": 0.00471775159239769, "step": 140620 }, { "epoch": 39.91768379222254, "grad_norm": 0.2773877680301666, "learning_rate": 6.0099063298325296e-05, "loss": 0.0036538586020469664, "step": 140630 }, { "epoch": 39.920522282145896, "grad_norm": 0.043775565922260284, "learning_rate": 6.009622480840194e-05, "loss": 0.000935385748744011, "step": 140640 }, { "epoch": 39.92336077206926, "grad_norm": 0.1913825422525406, "learning_rate": 6.009338631847857e-05, "loss": 0.0012554600834846497, "step": 140650 }, { "epoch": 39.92619926199262, "grad_norm": 0.5335685610771179, "learning_rate": 6.0090547828555213e-05, "loss": 0.006734833121299744, "step": 140660 }, { "epoch": 39.92903775191598, "grad_norm": 1.2815186977386475, "learning_rate": 6.008770933863185e-05, "loss": 0.002618023753166199, "step": 140670 }, { "epoch": 39.93187624183934, "grad_norm": 0.039956893771886826, "learning_rate": 6.008487084870848e-05, "loss": 0.0031925756484270096, "step": 140680 }, { "epoch": 39.9347147317627, "grad_norm": 0.2748216390609741, "learning_rate": 6.008203235878513e-05, "loss": 0.0044944554567337034, "step": 140690 }, { "epoch": 39.937553221686066, "grad_norm": 0.08194872736930847, "learning_rate": 6.007919386886177e-05, "loss": 0.005241891369223595, "step": 140700 }, { "epoch": 39.94039171160942, "grad_norm": 0.13116778433322906, "learning_rate": 6.007635537893841e-05, "loss": 0.008138033747673034, "step": 140710 }, { "epoch": 39.943230201532785, "grad_norm": 11.924056053161621, "learning_rate": 6.007351688901505e-05, "loss": 0.012098485976457596, "step": 140720 }, { "epoch": 39.94606869145615, "grad_norm": 0.14996901154518127, "learning_rate": 6.007067839909168e-05, "loss": 0.000800822488963604, "step": 140730 }, { "epoch": 39.948907181379504, "grad_norm": 0.981072723865509, "learning_rate": 6.0067839909168324e-05, "loss": 0.0014003431424498558, "step": 140740 }, { "epoch": 39.95174567130287, "grad_norm": 1.7009402513504028, "learning_rate": 6.006500141924496e-05, "loss": 0.005894765257835388, "step": 140750 }, { "epoch": 39.95458416122623, "grad_norm": 10.593342781066895, "learning_rate": 6.006216292932161e-05, "loss": 0.0149017333984375, "step": 140760 }, { "epoch": 39.957422651149585, "grad_norm": 1.5137261152267456, "learning_rate": 6.005932443939825e-05, "loss": 0.009461023658514024, "step": 140770 }, { "epoch": 39.96026114107295, "grad_norm": 0.6203153729438782, "learning_rate": 6.005648594947488e-05, "loss": 0.007053401321172714, "step": 140780 }, { "epoch": 39.96309963099631, "grad_norm": 0.07132931798696518, "learning_rate": 6.005364745955152e-05, "loss": 0.002069216966629028, "step": 140790 }, { "epoch": 39.965938120919674, "grad_norm": 0.5278233885765076, "learning_rate": 6.005080896962816e-05, "loss": 0.0014589808881282807, "step": 140800 }, { "epoch": 39.96877661084303, "grad_norm": 12.345726013183594, "learning_rate": 6.0047970479704793e-05, "loss": 0.007135550677776337, "step": 140810 }, { "epoch": 39.97161510076639, "grad_norm": 0.5323654413223267, "learning_rate": 6.004513198978144e-05, "loss": 0.006078997254371643, "step": 140820 }, { "epoch": 39.974453590689755, "grad_norm": 0.4918469786643982, "learning_rate": 6.004229349985808e-05, "loss": 0.00623975470662117, "step": 140830 }, { "epoch": 39.97729208061311, "grad_norm": 0.12139986455440521, "learning_rate": 6.003945500993472e-05, "loss": 0.0022832226008176804, "step": 140840 }, { "epoch": 39.980130570536474, "grad_norm": 0.24073602259159088, "learning_rate": 6.003661652001136e-05, "loss": 0.0028021719306707384, "step": 140850 }, { "epoch": 39.98296906045984, "grad_norm": 0.03108965791761875, "learning_rate": 6.0033778030087994e-05, "loss": 0.014249612390995026, "step": 140860 }, { "epoch": 39.98580755038319, "grad_norm": 0.6516008377075195, "learning_rate": 6.0030939540164635e-05, "loss": 0.012614621222019196, "step": 140870 }, { "epoch": 39.988646040306556, "grad_norm": 0.26067066192626953, "learning_rate": 6.002810105024127e-05, "loss": 0.0032917670905590057, "step": 140880 }, { "epoch": 39.99148453022992, "grad_norm": 0.04338408634066582, "learning_rate": 6.002526256031792e-05, "loss": 0.002953190356492996, "step": 140890 }, { "epoch": 39.99432302015328, "grad_norm": 0.06802965700626373, "learning_rate": 6.002242407039456e-05, "loss": 0.0036380767822265627, "step": 140900 }, { "epoch": 39.99716151007664, "grad_norm": 9.522027969360352, "learning_rate": 6.0019585580471194e-05, "loss": 0.00637759193778038, "step": 140910 }, { "epoch": 40.0, "grad_norm": 0.28387269377708435, "learning_rate": 6.001674709054783e-05, "loss": 0.008763550221920014, "step": 140920 }, { "epoch": 40.00283848992336, "grad_norm": 0.8514469861984253, "learning_rate": 6.001390860062447e-05, "loss": 0.01131848394870758, "step": 140930 }, { "epoch": 40.00567697984672, "grad_norm": 4.396357536315918, "learning_rate": 6.0011070110701104e-05, "loss": 0.005232750624418259, "step": 140940 }, { "epoch": 40.00851546977008, "grad_norm": 1.8497380018234253, "learning_rate": 6.0008231620777746e-05, "loss": 0.0025484314188361167, "step": 140950 }, { "epoch": 40.011353959693444, "grad_norm": 0.0686957985162735, "learning_rate": 6.0005393130854394e-05, "loss": 0.0011997101828455925, "step": 140960 }, { "epoch": 40.01419244961681, "grad_norm": 0.09986752271652222, "learning_rate": 6.000255464093103e-05, "loss": 0.001767115667462349, "step": 140970 }, { "epoch": 40.01703093954016, "grad_norm": 0.04710081219673157, "learning_rate": 5.999971615100767e-05, "loss": 0.0010882586240768433, "step": 140980 }, { "epoch": 40.019869429463526, "grad_norm": 6.7900004386901855, "learning_rate": 5.9996877661084304e-05, "loss": 0.004949909448623657, "step": 140990 }, { "epoch": 40.02270791938689, "grad_norm": 3.7599222660064697, "learning_rate": 5.9994039171160946e-05, "loss": 0.0025281870737671854, "step": 141000 }, { "epoch": 40.02270791938689, "eval_accuracy": 0.9753290519488778, "eval_loss": 0.08811134099960327, "eval_runtime": 31.655, "eval_samples_per_second": 496.824, "eval_steps_per_second": 7.771, "step": 141000 }, { "epoch": 40.025546409310245, "grad_norm": 1.5409983396530151, "learning_rate": 5.999120068123758e-05, "loss": 0.002070675604045391, "step": 141010 }, { "epoch": 40.02838489923361, "grad_norm": 0.9793306589126587, "learning_rate": 5.998836219131423e-05, "loss": 0.001387464813888073, "step": 141020 }, { "epoch": 40.03122338915697, "grad_norm": 0.11398293823003769, "learning_rate": 5.998552370139087e-05, "loss": 0.001777113974094391, "step": 141030 }, { "epoch": 40.034061879080326, "grad_norm": 3.822636842727661, "learning_rate": 5.9982685211467504e-05, "loss": 0.0023824451491236687, "step": 141040 }, { "epoch": 40.03690036900369, "grad_norm": 0.03092256933450699, "learning_rate": 5.997984672154414e-05, "loss": 0.00811997652053833, "step": 141050 }, { "epoch": 40.03973885892705, "grad_norm": 1.8151283264160156, "learning_rate": 5.997700823162078e-05, "loss": 0.0037443444132804872, "step": 141060 }, { "epoch": 40.042577348850415, "grad_norm": 0.0816049575805664, "learning_rate": 5.9974169741697415e-05, "loss": 0.003933548927307129, "step": 141070 }, { "epoch": 40.04541583877377, "grad_norm": 1.2705925703048706, "learning_rate": 5.9971331251774056e-05, "loss": 0.011086380481719971, "step": 141080 }, { "epoch": 40.04825432869713, "grad_norm": 0.4045654833316803, "learning_rate": 5.9968492761850705e-05, "loss": 0.0018155571073293687, "step": 141090 }, { "epoch": 40.051092818620496, "grad_norm": 0.02475903369486332, "learning_rate": 5.996565427192734e-05, "loss": 0.00466834157705307, "step": 141100 }, { "epoch": 40.05393130854385, "grad_norm": 7.658180236816406, "learning_rate": 5.996281578200398e-05, "loss": 0.007724925130605698, "step": 141110 }, { "epoch": 40.056769798467215, "grad_norm": 12.927838325500488, "learning_rate": 5.9959977292080615e-05, "loss": 0.009171854704618454, "step": 141120 }, { "epoch": 40.05960828839058, "grad_norm": Infinity, "learning_rate": 5.9957138802157256e-05, "loss": 0.014033439755439758, "step": 141130 }, { "epoch": 40.062446778313934, "grad_norm": 5.594430446624756, "learning_rate": 5.9954584161226236e-05, "loss": 0.00665440559387207, "step": 141140 }, { "epoch": 40.0652852682373, "grad_norm": 2.498629331588745, "learning_rate": 5.995174567130287e-05, "loss": 0.003264225646853447, "step": 141150 }, { "epoch": 40.06812375816066, "grad_norm": 1.520716905593872, "learning_rate": 5.994890718137951e-05, "loss": 0.009837794303894042, "step": 141160 }, { "epoch": 40.07096224808402, "grad_norm": 0.09265720844268799, "learning_rate": 5.994606869145615e-05, "loss": 0.0010774070397019386, "step": 141170 }, { "epoch": 40.07380073800738, "grad_norm": 7.2474870681762695, "learning_rate": 5.994323020153279e-05, "loss": 0.0023976681753993035, "step": 141180 }, { "epoch": 40.07663922793074, "grad_norm": 0.14696305990219116, "learning_rate": 5.994039171160942e-05, "loss": 0.00358167439699173, "step": 141190 }, { "epoch": 40.079477717854104, "grad_norm": 0.3328649401664734, "learning_rate": 5.993755322168607e-05, "loss": 0.0019380806013941764, "step": 141200 }, { "epoch": 40.08231620777746, "grad_norm": 6.6797990798950195, "learning_rate": 5.9934714731762706e-05, "loss": 0.0036746986210346223, "step": 141210 }, { "epoch": 40.08515469770082, "grad_norm": 1.7957972288131714, "learning_rate": 5.993187624183935e-05, "loss": 0.002004554681479931, "step": 141220 }, { "epoch": 40.087993187624186, "grad_norm": 0.16373403370380402, "learning_rate": 5.992903775191598e-05, "loss": 0.002941415086388588, "step": 141230 }, { "epoch": 40.09083167754754, "grad_norm": 0.03368532657623291, "learning_rate": 5.992619926199262e-05, "loss": 0.0030576467514038088, "step": 141240 }, { "epoch": 40.093670167470904, "grad_norm": 0.062392618507146835, "learning_rate": 5.992336077206926e-05, "loss": 0.004089279472827912, "step": 141250 }, { "epoch": 40.09650865739427, "grad_norm": 0.3097897171974182, "learning_rate": 5.99205222821459e-05, "loss": 0.0013048615306615829, "step": 141260 }, { "epoch": 40.09934714731763, "grad_norm": 0.103785939514637, "learning_rate": 5.991768379222255e-05, "loss": 0.006063215434551239, "step": 141270 }, { "epoch": 40.102185637240986, "grad_norm": 2.2910470962524414, "learning_rate": 5.991484530229918e-05, "loss": 0.009402237087488174, "step": 141280 }, { "epoch": 40.10502412716435, "grad_norm": 0.4142440855503082, "learning_rate": 5.991200681237582e-05, "loss": 0.002428651973605156, "step": 141290 }, { "epoch": 40.10786261708771, "grad_norm": 0.07694739103317261, "learning_rate": 5.990916832245246e-05, "loss": 0.0013912970200181008, "step": 141300 }, { "epoch": 40.11070110701107, "grad_norm": 15.2948579788208, "learning_rate": 5.990632983252909e-05, "loss": 0.01813354641199112, "step": 141310 }, { "epoch": 40.11353959693443, "grad_norm": 0.0976259857416153, "learning_rate": 5.9903491342605734e-05, "loss": 0.0032697319984436035, "step": 141320 }, { "epoch": 40.11637808685779, "grad_norm": 0.23160488903522491, "learning_rate": 5.990065285268237e-05, "loss": 0.004273773357272148, "step": 141330 }, { "epoch": 40.119216576781156, "grad_norm": 3.36407732963562, "learning_rate": 5.9897814362759016e-05, "loss": 0.0016891650855541229, "step": 141340 }, { "epoch": 40.12205506670451, "grad_norm": 0.12114007771015167, "learning_rate": 5.989497587283566e-05, "loss": 0.0010487152263522147, "step": 141350 }, { "epoch": 40.124893556627875, "grad_norm": 0.5600854754447937, "learning_rate": 5.989213738291229e-05, "loss": 0.0018773723393678665, "step": 141360 }, { "epoch": 40.12773204655124, "grad_norm": 2.2968366146087646, "learning_rate": 5.9889298892988934e-05, "loss": 0.002494002506136894, "step": 141370 }, { "epoch": 40.13057053647459, "grad_norm": 0.09448488801717758, "learning_rate": 5.988646040306557e-05, "loss": 0.0030395571142435075, "step": 141380 }, { "epoch": 40.133409026397956, "grad_norm": 2.960134744644165, "learning_rate": 5.988362191314221e-05, "loss": 0.002326612547039986, "step": 141390 }, { "epoch": 40.13624751632132, "grad_norm": 0.20933572947978973, "learning_rate": 5.988078342321886e-05, "loss": 0.0021002670750021935, "step": 141400 }, { "epoch": 40.139086006244675, "grad_norm": 0.07288416475057602, "learning_rate": 5.987794493329549e-05, "loss": 0.0024751640856266023, "step": 141410 }, { "epoch": 40.14192449616804, "grad_norm": 0.3387976288795471, "learning_rate": 5.9875106443372134e-05, "loss": 0.0009743316099047661, "step": 141420 }, { "epoch": 40.1447629860914, "grad_norm": 2.274984121322632, "learning_rate": 5.987226795344877e-05, "loss": 0.007703641057014465, "step": 141430 }, { "epoch": 40.14760147601476, "grad_norm": 0.3951001763343811, "learning_rate": 5.98694294635254e-05, "loss": 0.0011407285928726196, "step": 141440 }, { "epoch": 40.15043996593812, "grad_norm": 0.35357916355133057, "learning_rate": 5.9866590973602044e-05, "loss": 0.0029900169000029566, "step": 141450 }, { "epoch": 40.15327845586148, "grad_norm": 0.04896289482712746, "learning_rate": 5.986375248367868e-05, "loss": 0.0015193462371826173, "step": 141460 }, { "epoch": 40.156116945784845, "grad_norm": 0.541138231754303, "learning_rate": 5.986091399375533e-05, "loss": 0.0006352784112095833, "step": 141470 }, { "epoch": 40.1589554357082, "grad_norm": 1.0357104539871216, "learning_rate": 5.985807550383197e-05, "loss": 0.008746492862701415, "step": 141480 }, { "epoch": 40.161793925631564, "grad_norm": 0.454870343208313, "learning_rate": 5.98552370139086e-05, "loss": 0.0009992912411689759, "step": 141490 }, { "epoch": 40.16463241555493, "grad_norm": 0.28350916504859924, "learning_rate": 5.9852398523985245e-05, "loss": 0.004488542675971985, "step": 141500 }, { "epoch": 40.16463241555493, "eval_accuracy": 0.9769822598079736, "eval_loss": 0.07938940078020096, "eval_runtime": 31.9266, "eval_samples_per_second": 492.598, "eval_steps_per_second": 7.705, "step": 141500 }, { "epoch": 40.16747090547828, "grad_norm": 0.029779765754938126, "learning_rate": 5.984956003406188e-05, "loss": 0.000894186832010746, "step": 141510 }, { "epoch": 40.170309395401645, "grad_norm": 0.33482983708381653, "learning_rate": 5.984672154413852e-05, "loss": 0.0012454506009817123, "step": 141520 }, { "epoch": 40.17314788532501, "grad_norm": 0.13806049525737762, "learning_rate": 5.9843883054215155e-05, "loss": 0.0006639633327722549, "step": 141530 }, { "epoch": 40.17598637524837, "grad_norm": 0.671042263507843, "learning_rate": 5.98410445642918e-05, "loss": 0.0007220933213829995, "step": 141540 }, { "epoch": 40.17882486517173, "grad_norm": 0.781765878200531, "learning_rate": 5.9838206074368445e-05, "loss": 0.000602009892463684, "step": 141550 }, { "epoch": 40.18166335509509, "grad_norm": 0.01682823896408081, "learning_rate": 5.983536758444508e-05, "loss": 0.0032154031097888945, "step": 141560 }, { "epoch": 40.18450184501845, "grad_norm": 0.4468253254890442, "learning_rate": 5.9832529094521714e-05, "loss": 0.004222622141242027, "step": 141570 }, { "epoch": 40.18734033494181, "grad_norm": 0.03703214228153229, "learning_rate": 5.9829690604598355e-05, "loss": 0.0003958847373723984, "step": 141580 }, { "epoch": 40.19017882486517, "grad_norm": 1.3852514028549194, "learning_rate": 5.982685211467499e-05, "loss": 0.0016364442184567452, "step": 141590 }, { "epoch": 40.193017314788534, "grad_norm": 0.3018795847892761, "learning_rate": 5.982401362475164e-05, "loss": 0.002281428873538971, "step": 141600 }, { "epoch": 40.19585580471189, "grad_norm": 5.125916004180908, "learning_rate": 5.982117513482828e-05, "loss": 0.01114809736609459, "step": 141610 }, { "epoch": 40.19869429463525, "grad_norm": 1.877244234085083, "learning_rate": 5.9818336644904914e-05, "loss": 0.0007845491170883178, "step": 141620 }, { "epoch": 40.201532784558616, "grad_norm": 0.3787298798561096, "learning_rate": 5.9815498154981555e-05, "loss": 0.0005596024915575981, "step": 141630 }, { "epoch": 40.20437127448198, "grad_norm": 0.07247426360845566, "learning_rate": 5.981265966505819e-05, "loss": 0.004596986621618271, "step": 141640 }, { "epoch": 40.207209764405334, "grad_norm": 0.042944468557834625, "learning_rate": 5.980982117513483e-05, "loss": 0.001783546805381775, "step": 141650 }, { "epoch": 40.2100482543287, "grad_norm": 0.4978560209274292, "learning_rate": 5.9806982685211466e-05, "loss": 0.012850983440876007, "step": 141660 }, { "epoch": 40.21288674425206, "grad_norm": 13.367024421691895, "learning_rate": 5.9804144195288114e-05, "loss": 0.00840640664100647, "step": 141670 }, { "epoch": 40.215725234175416, "grad_norm": 0.14834816753864288, "learning_rate": 5.980130570536475e-05, "loss": 0.0025252092629671095, "step": 141680 }, { "epoch": 40.21856372409878, "grad_norm": 0.09729539602994919, "learning_rate": 5.979846721544139e-05, "loss": 0.0019758220762014387, "step": 141690 }, { "epoch": 40.22140221402214, "grad_norm": 20.349626541137695, "learning_rate": 5.9795628725518025e-05, "loss": 0.01949440687894821, "step": 141700 }, { "epoch": 40.2242407039455, "grad_norm": 0.037174321711063385, "learning_rate": 5.9792790235594666e-05, "loss": 0.001443978026509285, "step": 141710 }, { "epoch": 40.22707919386886, "grad_norm": 1.1710386276245117, "learning_rate": 5.97899517456713e-05, "loss": 0.00927068516612053, "step": 141720 }, { "epoch": 40.22991768379222, "grad_norm": 0.09592163562774658, "learning_rate": 5.978711325574794e-05, "loss": 0.0022571787238121034, "step": 141730 }, { "epoch": 40.232756173715586, "grad_norm": 0.17454227805137634, "learning_rate": 5.978427476582459e-05, "loss": 0.0012124985456466674, "step": 141740 }, { "epoch": 40.23559466363894, "grad_norm": 0.08364821970462799, "learning_rate": 5.9781436275901225e-05, "loss": 0.001541801542043686, "step": 141750 }, { "epoch": 40.238433153562305, "grad_norm": 0.0061753592453897, "learning_rate": 5.9778597785977866e-05, "loss": 0.0006104664877057076, "step": 141760 }, { "epoch": 40.24127164348567, "grad_norm": 0.4816244840621948, "learning_rate": 5.97757592960545e-05, "loss": 0.0006246676668524743, "step": 141770 }, { "epoch": 40.24411013340902, "grad_norm": 0.14365136623382568, "learning_rate": 5.9772920806131135e-05, "loss": 0.0023251485079526903, "step": 141780 }, { "epoch": 40.246948623332386, "grad_norm": 0.29194337129592896, "learning_rate": 5.977008231620778e-05, "loss": 0.00385005921125412, "step": 141790 }, { "epoch": 40.24978711325575, "grad_norm": 0.1193542331457138, "learning_rate": 5.9767243826284425e-05, "loss": 0.004979455843567848, "step": 141800 }, { "epoch": 40.25262560317911, "grad_norm": 0.4120159447193146, "learning_rate": 5.976440533636106e-05, "loss": 0.0018924063071608544, "step": 141810 }, { "epoch": 40.25546409310247, "grad_norm": 10.911212921142578, "learning_rate": 5.97615668464377e-05, "loss": 0.009361256659030915, "step": 141820 }, { "epoch": 40.25830258302583, "grad_norm": 18.51230812072754, "learning_rate": 5.9758728356514335e-05, "loss": 0.00924486294388771, "step": 141830 }, { "epoch": 40.261141072949194, "grad_norm": 8.473150253295898, "learning_rate": 5.975588986659098e-05, "loss": 0.0069797798991203305, "step": 141840 }, { "epoch": 40.26397956287255, "grad_norm": 0.31222599744796753, "learning_rate": 5.975305137666761e-05, "loss": 0.0031681660562753677, "step": 141850 }, { "epoch": 40.26681805279591, "grad_norm": 0.06635484099388123, "learning_rate": 5.975021288674425e-05, "loss": 0.002317035011947155, "step": 141860 }, { "epoch": 40.269656542719275, "grad_norm": 14.258094787597656, "learning_rate": 5.97473743968209e-05, "loss": 0.006613697856664658, "step": 141870 }, { "epoch": 40.27249503264263, "grad_norm": 0.02786726877093315, "learning_rate": 5.9744535906897536e-05, "loss": 0.0020290933549404143, "step": 141880 }, { "epoch": 40.275333522565994, "grad_norm": 0.33624267578125, "learning_rate": 5.974169741697418e-05, "loss": 0.0017712401226162911, "step": 141890 }, { "epoch": 40.27817201248936, "grad_norm": 0.7553533315658569, "learning_rate": 5.973885892705081e-05, "loss": 0.0025712938979268072, "step": 141900 }, { "epoch": 40.28101050241272, "grad_norm": 0.2746584117412567, "learning_rate": 5.9736020437127446e-05, "loss": 0.0008062634617090226, "step": 141910 }, { "epoch": 40.283848992336075, "grad_norm": 13.061668395996094, "learning_rate": 5.973318194720409e-05, "loss": 0.008780070394277573, "step": 141920 }, { "epoch": 40.28668748225944, "grad_norm": 22.92772102355957, "learning_rate": 5.973034345728072e-05, "loss": 0.015335842967033386, "step": 141930 }, { "epoch": 40.2895259721828, "grad_norm": 2.1875548362731934, "learning_rate": 5.972750496735737e-05, "loss": 0.012350553274154663, "step": 141940 }, { "epoch": 40.29236446210616, "grad_norm": 0.283885657787323, "learning_rate": 5.972466647743401e-05, "loss": 0.0013665201142430306, "step": 141950 }, { "epoch": 40.29520295202952, "grad_norm": 0.3917623460292816, "learning_rate": 5.9721827987510646e-05, "loss": 0.000819229707121849, "step": 141960 }, { "epoch": 40.29804144195288, "grad_norm": 16.855709075927734, "learning_rate": 5.971898949758729e-05, "loss": 0.018814995884895325, "step": 141970 }, { "epoch": 40.30087993187624, "grad_norm": 0.5393671989440918, "learning_rate": 5.971615100766392e-05, "loss": 0.002152710407972336, "step": 141980 }, { "epoch": 40.3037184217996, "grad_norm": 0.6449386477470398, "learning_rate": 5.9713312517740564e-05, "loss": 0.002814857475459576, "step": 141990 }, { "epoch": 40.306556911722964, "grad_norm": 0.241722971200943, "learning_rate": 5.971047402781721e-05, "loss": 0.004410463944077491, "step": 142000 }, { "epoch": 40.306556911722964, "eval_accuracy": 0.9745660329369873, "eval_loss": 0.09066623449325562, "eval_runtime": 31.5119, "eval_samples_per_second": 499.081, "eval_steps_per_second": 7.807, "step": 142000 }, { "epoch": 40.30939540164633, "grad_norm": 1.0706580877304077, "learning_rate": 5.9707635537893846e-05, "loss": 0.007949984073638916, "step": 142010 }, { "epoch": 40.31223389156968, "grad_norm": 8.754439353942871, "learning_rate": 5.970479704797049e-05, "loss": 0.0062861904501914975, "step": 142020 }, { "epoch": 40.315072381493046, "grad_norm": 0.10794955492019653, "learning_rate": 5.970195855804712e-05, "loss": 0.001417907141149044, "step": 142030 }, { "epoch": 40.31791087141641, "grad_norm": 3.55732798576355, "learning_rate": 5.969912006812376e-05, "loss": 0.0011975906789302825, "step": 142040 }, { "epoch": 40.320749361339765, "grad_norm": 0.2522185742855072, "learning_rate": 5.96962815782004e-05, "loss": 0.001125376671552658, "step": 142050 }, { "epoch": 40.32358785126313, "grad_norm": 10.290528297424316, "learning_rate": 5.969344308827703e-05, "loss": 0.006360298395156861, "step": 142060 }, { "epoch": 40.32642634118649, "grad_norm": 1.0646207332611084, "learning_rate": 5.969060459835368e-05, "loss": 0.012793755531311036, "step": 142070 }, { "epoch": 40.329264831109846, "grad_norm": 0.09985020011663437, "learning_rate": 5.968776610843032e-05, "loss": 0.006348150968551636, "step": 142080 }, { "epoch": 40.33210332103321, "grad_norm": 0.13184960186481476, "learning_rate": 5.968492761850696e-05, "loss": 0.010489391535520554, "step": 142090 }, { "epoch": 40.33494181095657, "grad_norm": 15.567636489868164, "learning_rate": 5.96820891285836e-05, "loss": 0.00817161351442337, "step": 142100 }, { "epoch": 40.337780300879935, "grad_norm": 0.763323187828064, "learning_rate": 5.967925063866023e-05, "loss": 0.0026276268064975737, "step": 142110 }, { "epoch": 40.34061879080329, "grad_norm": 0.24279308319091797, "learning_rate": 5.9676412148736874e-05, "loss": 0.010016703605651855, "step": 142120 }, { "epoch": 40.34345728072665, "grad_norm": 0.8303472995758057, "learning_rate": 5.967357365881351e-05, "loss": 0.008593074977397919, "step": 142130 }, { "epoch": 40.346295770650016, "grad_norm": 11.335101127624512, "learning_rate": 5.967073516889016e-05, "loss": 0.006815989315509796, "step": 142140 }, { "epoch": 40.34913426057337, "grad_norm": 0.20901642739772797, "learning_rate": 5.966789667896679e-05, "loss": 0.007770717889070511, "step": 142150 }, { "epoch": 40.351972750496735, "grad_norm": 14.806703567504883, "learning_rate": 5.966505818904343e-05, "loss": 0.007448455691337586, "step": 142160 }, { "epoch": 40.3548112404201, "grad_norm": 3.6475741863250732, "learning_rate": 5.966221969912007e-05, "loss": 0.010018229484558105, "step": 142170 }, { "epoch": 40.35764973034346, "grad_norm": 1.5319020748138428, "learning_rate": 5.965938120919671e-05, "loss": 0.004115867614746094, "step": 142180 }, { "epoch": 40.36048822026682, "grad_norm": 0.436472088098526, "learning_rate": 5.9656542719273344e-05, "loss": 0.004052790999412537, "step": 142190 }, { "epoch": 40.36332671019018, "grad_norm": 19.666156768798828, "learning_rate": 5.965370422934999e-05, "loss": 0.008727312088012695, "step": 142200 }, { "epoch": 40.36616520011354, "grad_norm": 0.3000084161758423, "learning_rate": 5.965086573942663e-05, "loss": 0.0011414213106036187, "step": 142210 }, { "epoch": 40.3690036900369, "grad_norm": 0.504807710647583, "learning_rate": 5.964802724950327e-05, "loss": 0.003113292343914509, "step": 142220 }, { "epoch": 40.37184217996026, "grad_norm": 0.4487690329551697, "learning_rate": 5.964518875957991e-05, "loss": 0.007101792097091675, "step": 142230 }, { "epoch": 40.374680669883624, "grad_norm": 0.13410568237304688, "learning_rate": 5.9642350269656544e-05, "loss": 0.00459064319729805, "step": 142240 }, { "epoch": 40.37751915980698, "grad_norm": 0.06903411448001862, "learning_rate": 5.963951177973318e-05, "loss": 0.005993295460939407, "step": 142250 }, { "epoch": 40.38035764973034, "grad_norm": 0.2009345442056656, "learning_rate": 5.963667328980982e-05, "loss": 0.002538302168250084, "step": 142260 }, { "epoch": 40.383196139653705, "grad_norm": 0.08502268046140671, "learning_rate": 5.963383479988647e-05, "loss": 0.012985649704933166, "step": 142270 }, { "epoch": 40.38603462957707, "grad_norm": 0.4799043536186218, "learning_rate": 5.96309963099631e-05, "loss": 0.0014283055439591407, "step": 142280 }, { "epoch": 40.388873119500424, "grad_norm": 6.5437798500061035, "learning_rate": 5.9628157820039744e-05, "loss": 0.00463481992483139, "step": 142290 }, { "epoch": 40.39171160942379, "grad_norm": 6.778109073638916, "learning_rate": 5.962531933011638e-05, "loss": 0.013238084316253663, "step": 142300 }, { "epoch": 40.39455009934715, "grad_norm": 0.1805339902639389, "learning_rate": 5.962248084019302e-05, "loss": 0.004393396154046059, "step": 142310 }, { "epoch": 40.397388589270506, "grad_norm": 3.6597604751586914, "learning_rate": 5.9619642350269654e-05, "loss": 0.004605831205844879, "step": 142320 }, { "epoch": 40.40022707919387, "grad_norm": 0.11904043704271317, "learning_rate": 5.96168038603463e-05, "loss": 0.003317001089453697, "step": 142330 }, { "epoch": 40.40306556911723, "grad_norm": 2.5841944217681885, "learning_rate": 5.9613965370422944e-05, "loss": 0.0045594900846481325, "step": 142340 }, { "epoch": 40.40590405904059, "grad_norm": 0.11179997771978378, "learning_rate": 5.961112688049958e-05, "loss": 0.008069639652967453, "step": 142350 }, { "epoch": 40.40874254896395, "grad_norm": 0.7843412160873413, "learning_rate": 5.960828839057622e-05, "loss": 0.0018729304894804955, "step": 142360 }, { "epoch": 40.41158103888731, "grad_norm": 2.934800863265991, "learning_rate": 5.9605449900652854e-05, "loss": 0.019526392221450806, "step": 142370 }, { "epoch": 40.414419528810676, "grad_norm": 0.5769354701042175, "learning_rate": 5.960261141072949e-05, "loss": 0.012909942865371704, "step": 142380 }, { "epoch": 40.41725801873403, "grad_norm": 0.25136059522628784, "learning_rate": 5.959977292080613e-05, "loss": 0.009529231488704682, "step": 142390 }, { "epoch": 40.420096508657394, "grad_norm": 0.08988066762685776, "learning_rate": 5.959693443088278e-05, "loss": 0.005256969109177589, "step": 142400 }, { "epoch": 40.42293499858076, "grad_norm": 0.7548783421516418, "learning_rate": 5.959409594095941e-05, "loss": 0.0011375119909644126, "step": 142410 }, { "epoch": 40.42577348850411, "grad_norm": 3.813415050506592, "learning_rate": 5.9591257451036055e-05, "loss": 0.004427648335695267, "step": 142420 }, { "epoch": 40.428611978427476, "grad_norm": 0.06868291646242142, "learning_rate": 5.958841896111269e-05, "loss": 0.0034062966704368593, "step": 142430 }, { "epoch": 40.43145046835084, "grad_norm": 1.811394214630127, "learning_rate": 5.958558047118933e-05, "loss": 0.002454865537583828, "step": 142440 }, { "epoch": 40.434288958274195, "grad_norm": 1.7922894954681396, "learning_rate": 5.9582741981265965e-05, "loss": 0.002000577375292778, "step": 142450 }, { "epoch": 40.43712744819756, "grad_norm": 0.5963754057884216, "learning_rate": 5.9579903491342606e-05, "loss": 0.004449887573719025, "step": 142460 }, { "epoch": 40.43996593812092, "grad_norm": 0.10476581752300262, "learning_rate": 5.9577065001419255e-05, "loss": 0.00043170023709535597, "step": 142470 }, { "epoch": 40.44280442804428, "grad_norm": 0.3502050042152405, "learning_rate": 5.957422651149589e-05, "loss": 0.001779523491859436, "step": 142480 }, { "epoch": 40.44564291796764, "grad_norm": 0.03826627880334854, "learning_rate": 5.957138802157253e-05, "loss": 0.0007922479882836342, "step": 142490 }, { "epoch": 40.448481407891, "grad_norm": 4.65405797958374, "learning_rate": 5.9568549531649165e-05, "loss": 0.0019172223284840584, "step": 142500 }, { "epoch": 40.448481407891, "eval_accuracy": 0.9725949004896038, "eval_loss": 0.09436613321304321, "eval_runtime": 32.2473, "eval_samples_per_second": 487.7, "eval_steps_per_second": 7.629, "step": 142500 }, { "epoch": 40.451319897814365, "grad_norm": 0.7506247758865356, "learning_rate": 5.95657110417258e-05, "loss": 0.0015973871573805808, "step": 142510 }, { "epoch": 40.45415838773772, "grad_norm": 2.9844815731048584, "learning_rate": 5.956287255180244e-05, "loss": 0.005705595761537552, "step": 142520 }, { "epoch": 40.45699687766108, "grad_norm": 1.3179161548614502, "learning_rate": 5.956003406187909e-05, "loss": 0.003897494077682495, "step": 142530 }, { "epoch": 40.459835367584446, "grad_norm": 1.0199309587478638, "learning_rate": 5.9557195571955724e-05, "loss": 0.007392019033432007, "step": 142540 }, { "epoch": 40.46267385750781, "grad_norm": 0.9979227781295776, "learning_rate": 5.9554357082032365e-05, "loss": 0.0027575120329856873, "step": 142550 }, { "epoch": 40.465512347431165, "grad_norm": 1.0810099840164185, "learning_rate": 5.9551518592109e-05, "loss": 0.0050247333943843845, "step": 142560 }, { "epoch": 40.46835083735453, "grad_norm": 0.1577218621969223, "learning_rate": 5.954868010218564e-05, "loss": 0.002727147191762924, "step": 142570 }, { "epoch": 40.47118932727789, "grad_norm": 0.08516820520162582, "learning_rate": 5.9545841612262276e-05, "loss": 0.004151012748479843, "step": 142580 }, { "epoch": 40.47402781720125, "grad_norm": 0.803595244884491, "learning_rate": 5.954300312233892e-05, "loss": 0.002104494534432888, "step": 142590 }, { "epoch": 40.47686630712461, "grad_norm": 0.5441530346870422, "learning_rate": 5.9540164632415565e-05, "loss": 0.01703992784023285, "step": 142600 }, { "epoch": 40.47970479704797, "grad_norm": 0.2445831149816513, "learning_rate": 5.95373261424922e-05, "loss": 0.0015179546549916267, "step": 142610 }, { "epoch": 40.48254328697133, "grad_norm": 0.27345046401023865, "learning_rate": 5.9534487652568835e-05, "loss": 0.004393500089645386, "step": 142620 }, { "epoch": 40.48538177689469, "grad_norm": 3.094712734222412, "learning_rate": 5.9531649162645476e-05, "loss": 0.004758062213659287, "step": 142630 }, { "epoch": 40.488220266818054, "grad_norm": 0.22535870969295502, "learning_rate": 5.952881067272211e-05, "loss": 0.0016810072585940361, "step": 142640 }, { "epoch": 40.49105875674142, "grad_norm": 1.0743430852890015, "learning_rate": 5.952597218279875e-05, "loss": 0.005776835978031159, "step": 142650 }, { "epoch": 40.49389724666477, "grad_norm": 0.6973803639411926, "learning_rate": 5.9523133692875387e-05, "loss": 0.005281873792409897, "step": 142660 }, { "epoch": 40.496735736588136, "grad_norm": 0.02622833661735058, "learning_rate": 5.9520295202952035e-05, "loss": 0.0017042310908436775, "step": 142670 }, { "epoch": 40.4995742265115, "grad_norm": 3.8924121856689453, "learning_rate": 5.9517456713028676e-05, "loss": 0.009185433387756348, "step": 142680 }, { "epoch": 40.502412716434854, "grad_norm": 1.2406048774719238, "learning_rate": 5.951461822310531e-05, "loss": 0.007903410494327546, "step": 142690 }, { "epoch": 40.50525120635822, "grad_norm": 0.7078983187675476, "learning_rate": 5.951177973318195e-05, "loss": 0.004825123399496078, "step": 142700 }, { "epoch": 40.50808969628158, "grad_norm": 0.41203054785728455, "learning_rate": 5.950894124325859e-05, "loss": 0.010903126001358033, "step": 142710 }, { "epoch": 40.510928186204936, "grad_norm": 0.4151240885257721, "learning_rate": 5.950610275333522e-05, "loss": 0.0050274964421987535, "step": 142720 }, { "epoch": 40.5137666761283, "grad_norm": 0.245313361287117, "learning_rate": 5.9503264263411876e-05, "loss": 0.008467765152454376, "step": 142730 }, { "epoch": 40.51660516605166, "grad_norm": 0.15165676176548004, "learning_rate": 5.950042577348851e-05, "loss": 0.0008470991626381875, "step": 142740 }, { "epoch": 40.519443655975024, "grad_norm": 1.4660048484802246, "learning_rate": 5.9497587283565145e-05, "loss": 0.007968025654554367, "step": 142750 }, { "epoch": 40.52228214589838, "grad_norm": 0.2969275116920471, "learning_rate": 5.949474879364179e-05, "loss": 0.0014995524659752845, "step": 142760 }, { "epoch": 40.52512063582174, "grad_norm": 1.2491763830184937, "learning_rate": 5.949191030371842e-05, "loss": 0.0022022584453225136, "step": 142770 }, { "epoch": 40.527959125745106, "grad_norm": 0.3539761006832123, "learning_rate": 5.948907181379506e-05, "loss": 0.004901013150811195, "step": 142780 }, { "epoch": 40.53079761566846, "grad_norm": 0.2932395040988922, "learning_rate": 5.94862333238717e-05, "loss": 0.0009664127603173256, "step": 142790 }, { "epoch": 40.533636105591825, "grad_norm": 0.06991647928953171, "learning_rate": 5.9483394833948345e-05, "loss": 0.0027611522004008294, "step": 142800 }, { "epoch": 40.53647459551519, "grad_norm": 0.04482040926814079, "learning_rate": 5.948055634402499e-05, "loss": 0.0039312392473220825, "step": 142810 }, { "epoch": 40.53931308543854, "grad_norm": 10.150687217712402, "learning_rate": 5.947771785410162e-05, "loss": 0.005960384756326676, "step": 142820 }, { "epoch": 40.542151575361906, "grad_norm": 0.01725679822266102, "learning_rate": 5.947487936417826e-05, "loss": 0.003008277155458927, "step": 142830 }, { "epoch": 40.54499006528527, "grad_norm": 8.811765670776367, "learning_rate": 5.94720408742549e-05, "loss": 0.008335787057876586, "step": 142840 }, { "epoch": 40.54782855520863, "grad_norm": 0.2974540591239929, "learning_rate": 5.946920238433153e-05, "loss": 0.010828736424446105, "step": 142850 }, { "epoch": 40.55066704513199, "grad_norm": 0.5259221792221069, "learning_rate": 5.9466363894408173e-05, "loss": 0.0023151716217398644, "step": 142860 }, { "epoch": 40.55350553505535, "grad_norm": 0.025457030162215233, "learning_rate": 5.946352540448482e-05, "loss": 0.000605616346001625, "step": 142870 }, { "epoch": 40.55634402497871, "grad_norm": 0.6669510006904602, "learning_rate": 5.9460686914561456e-05, "loss": 0.0019593492150306703, "step": 142880 }, { "epoch": 40.55918251490207, "grad_norm": 1.8184717893600464, "learning_rate": 5.94578484246381e-05, "loss": 0.001833096705377102, "step": 142890 }, { "epoch": 40.56202100482543, "grad_norm": 0.07129436731338501, "learning_rate": 5.945500993471473e-05, "loss": 0.0073227077722549435, "step": 142900 }, { "epoch": 40.564859494748795, "grad_norm": 15.05155086517334, "learning_rate": 5.9452171444791373e-05, "loss": 0.00830138623714447, "step": 142910 }, { "epoch": 40.56769798467215, "grad_norm": 0.0586596354842186, "learning_rate": 5.944933295486801e-05, "loss": 0.0008426051586866379, "step": 142920 }, { "epoch": 40.570536474595514, "grad_norm": 0.06396622955799103, "learning_rate": 5.9446494464944656e-05, "loss": 0.003173793479800224, "step": 142930 }, { "epoch": 40.57337496451888, "grad_norm": 0.7168685793876648, "learning_rate": 5.94436559750213e-05, "loss": 0.0038959700614213943, "step": 142940 }, { "epoch": 40.57621345444224, "grad_norm": 0.30210697650909424, "learning_rate": 5.944081748509793e-05, "loss": 0.0015396114438772201, "step": 142950 }, { "epoch": 40.579051944365595, "grad_norm": 1.1743474006652832, "learning_rate": 5.9437978995174574e-05, "loss": 0.0014950692653656005, "step": 142960 }, { "epoch": 40.58189043428896, "grad_norm": 1.883886694908142, "learning_rate": 5.943514050525121e-05, "loss": 0.000945562869310379, "step": 142970 }, { "epoch": 40.58472892421232, "grad_norm": 0.27309176325798035, "learning_rate": 5.943230201532784e-05, "loss": 0.0006550701335072517, "step": 142980 }, { "epoch": 40.58756741413568, "grad_norm": 0.8929429054260254, "learning_rate": 5.9429463525404484e-05, "loss": 0.010310186445713044, "step": 142990 }, { "epoch": 40.59040590405904, "grad_norm": 0.5350882411003113, "learning_rate": 5.942662503548113e-05, "loss": 0.002197693847119808, "step": 143000 }, { "epoch": 40.59040590405904, "eval_accuracy": 0.9751382971959052, "eval_loss": 0.09259281307458878, "eval_runtime": 31.7159, "eval_samples_per_second": 495.872, "eval_steps_per_second": 7.756, "step": 143000 }, { "epoch": 40.5932443939824, "grad_norm": 0.37987083196640015, "learning_rate": 5.942378654555777e-05, "loss": 0.00469839945435524, "step": 143010 }, { "epoch": 40.596082883905765, "grad_norm": 9.071054458618164, "learning_rate": 5.942094805563441e-05, "loss": 0.0034980453550815583, "step": 143020 }, { "epoch": 40.59892137382912, "grad_norm": 0.37976956367492676, "learning_rate": 5.941810956571104e-05, "loss": 0.00638132244348526, "step": 143030 }, { "epoch": 40.601759863752484, "grad_norm": 0.4288770854473114, "learning_rate": 5.9415271075787684e-05, "loss": 0.0011041268706321716, "step": 143040 }, { "epoch": 40.60459835367585, "grad_norm": 0.32534340023994446, "learning_rate": 5.941243258586432e-05, "loss": 0.0007978539913892746, "step": 143050 }, { "epoch": 40.6074368435992, "grad_norm": 0.9897381663322449, "learning_rate": 5.940959409594096e-05, "loss": 0.000856412760913372, "step": 143060 }, { "epoch": 40.610275333522566, "grad_norm": 0.5289235711097717, "learning_rate": 5.940675560601761e-05, "loss": 0.0027045389637351037, "step": 143070 }, { "epoch": 40.61311382344593, "grad_norm": 0.018197203055024147, "learning_rate": 5.940391711609424e-05, "loss": 0.002272966876626015, "step": 143080 }, { "epoch": 40.615952313369284, "grad_norm": 0.2511475682258606, "learning_rate": 5.940107862617088e-05, "loss": 0.005710091441869736, "step": 143090 }, { "epoch": 40.61879080329265, "grad_norm": 0.25161951780319214, "learning_rate": 5.939824013624752e-05, "loss": 0.005178947374224663, "step": 143100 }, { "epoch": 40.62162929321601, "grad_norm": 0.30159807205200195, "learning_rate": 5.9395401646324154e-05, "loss": 0.0008851200342178345, "step": 143110 }, { "epoch": 40.62446778313937, "grad_norm": 4.385353088378906, "learning_rate": 5.9392563156400795e-05, "loss": 0.0027853500097990034, "step": 143120 }, { "epoch": 40.62730627306273, "grad_norm": 0.12101732194423676, "learning_rate": 5.938972466647744e-05, "loss": 0.0026125997304916383, "step": 143130 }, { "epoch": 40.63014476298609, "grad_norm": 0.7842867970466614, "learning_rate": 5.938688617655408e-05, "loss": 0.0013510072603821754, "step": 143140 }, { "epoch": 40.632983252909455, "grad_norm": 0.029502004384994507, "learning_rate": 5.938404768663072e-05, "loss": 0.0018030934035778046, "step": 143150 }, { "epoch": 40.63582174283281, "grad_norm": 0.04478999599814415, "learning_rate": 5.9381209196707354e-05, "loss": 0.0013331590220332147, "step": 143160 }, { "epoch": 40.63866023275617, "grad_norm": 19.59030532836914, "learning_rate": 5.9378370706783995e-05, "loss": 0.013360127806663513, "step": 143170 }, { "epoch": 40.641498722679536, "grad_norm": 0.6162864565849304, "learning_rate": 5.937553221686063e-05, "loss": 0.0010724138468503953, "step": 143180 }, { "epoch": 40.64433721260289, "grad_norm": 0.12171696871519089, "learning_rate": 5.9372693726937264e-05, "loss": 0.0011655807495117188, "step": 143190 }, { "epoch": 40.647175702526255, "grad_norm": 16.83856201171875, "learning_rate": 5.936985523701392e-05, "loss": 0.006465972959995269, "step": 143200 }, { "epoch": 40.65001419244962, "grad_norm": 8.244622230529785, "learning_rate": 5.9367016747090554e-05, "loss": 0.004117476567625999, "step": 143210 }, { "epoch": 40.65285268237298, "grad_norm": 1.1474430561065674, "learning_rate": 5.936417825716719e-05, "loss": 0.01116764172911644, "step": 143220 }, { "epoch": 40.655691172296336, "grad_norm": 0.09357480704784393, "learning_rate": 5.936133976724383e-05, "loss": 0.003266451507806778, "step": 143230 }, { "epoch": 40.6585296622197, "grad_norm": 0.09653209894895554, "learning_rate": 5.9358501277320464e-05, "loss": 0.004706088453531265, "step": 143240 }, { "epoch": 40.66136815214306, "grad_norm": 0.44743612408638, "learning_rate": 5.9355662787397106e-05, "loss": 0.0010750528424978256, "step": 143250 }, { "epoch": 40.66420664206642, "grad_norm": 2.37954044342041, "learning_rate": 5.935282429747374e-05, "loss": 0.009068956971168518, "step": 143260 }, { "epoch": 40.66704513198978, "grad_norm": 1.0571801662445068, "learning_rate": 5.934998580755039e-05, "loss": 0.0034874562174081803, "step": 143270 }, { "epoch": 40.669883621913144, "grad_norm": 14.328583717346191, "learning_rate": 5.934714731762703e-05, "loss": 0.007115014642477035, "step": 143280 }, { "epoch": 40.67272211183651, "grad_norm": 0.07091622054576874, "learning_rate": 5.9344308827703664e-05, "loss": 0.0014853779226541518, "step": 143290 }, { "epoch": 40.67556060175986, "grad_norm": 23.17186164855957, "learning_rate": 5.934175418677264e-05, "loss": 0.02061387002468109, "step": 143300 }, { "epoch": 40.678399091683225, "grad_norm": 0.5094237327575684, "learning_rate": 5.9338915696849286e-05, "loss": 0.006845498085021972, "step": 143310 }, { "epoch": 40.68123758160659, "grad_norm": 0.054741036146879196, "learning_rate": 5.933607720692592e-05, "loss": 0.010843484848737716, "step": 143320 }, { "epoch": 40.684076071529944, "grad_norm": 0.17377549409866333, "learning_rate": 5.933323871700256e-05, "loss": 0.006014581024646759, "step": 143330 }, { "epoch": 40.68691456145331, "grad_norm": 8.716096878051758, "learning_rate": 5.9330400227079196e-05, "loss": 0.009864570200443267, "step": 143340 }, { "epoch": 40.68975305137667, "grad_norm": 16.881797790527344, "learning_rate": 5.932756173715584e-05, "loss": 0.007041013985872269, "step": 143350 }, { "epoch": 40.692591541300025, "grad_norm": 0.269989550113678, "learning_rate": 5.932472324723247e-05, "loss": 0.0024568244814872743, "step": 143360 }, { "epoch": 40.69543003122339, "grad_norm": 0.391430139541626, "learning_rate": 5.932188475730911e-05, "loss": 0.0036700524389743803, "step": 143370 }, { "epoch": 40.69826852114675, "grad_norm": 6.283731460571289, "learning_rate": 5.9319046267385755e-05, "loss": 0.0034056290984153747, "step": 143380 }, { "epoch": 40.701107011070114, "grad_norm": 0.13294091820716858, "learning_rate": 5.9316207777462396e-05, "loss": 0.004032780230045318, "step": 143390 }, { "epoch": 40.70394550099347, "grad_norm": 4.740020751953125, "learning_rate": 5.931336928753903e-05, "loss": 0.008626027405261994, "step": 143400 }, { "epoch": 40.70678399091683, "grad_norm": 1.1918461322784424, "learning_rate": 5.931053079761567e-05, "loss": 0.019481292366981505, "step": 143410 }, { "epoch": 40.709622480840196, "grad_norm": 0.389034241437912, "learning_rate": 5.930769230769231e-05, "loss": 0.003907892853021622, "step": 143420 }, { "epoch": 40.71246097076355, "grad_norm": 0.27757883071899414, "learning_rate": 5.930485381776895e-05, "loss": 0.007003211230039596, "step": 143430 }, { "epoch": 40.715299460686914, "grad_norm": 0.1613476425409317, "learning_rate": 5.930201532784558e-05, "loss": 0.010487939417362212, "step": 143440 }, { "epoch": 40.71813795061028, "grad_norm": 0.48295992612838745, "learning_rate": 5.929917683792223e-05, "loss": 0.000825997069478035, "step": 143450 }, { "epoch": 40.72097644053363, "grad_norm": 0.0925155058503151, "learning_rate": 5.929633834799887e-05, "loss": 0.009538304805755616, "step": 143460 }, { "epoch": 40.723814930456996, "grad_norm": 1.1203057765960693, "learning_rate": 5.929349985807551e-05, "loss": 0.004477202147245407, "step": 143470 }, { "epoch": 40.72665342038036, "grad_norm": 0.14477191865444183, "learning_rate": 5.929066136815214e-05, "loss": 0.006990453600883484, "step": 143480 }, { "epoch": 40.72949191030372, "grad_norm": 0.19251132011413574, "learning_rate": 5.928782287822878e-05, "loss": 0.0024141045287251472, "step": 143490 }, { "epoch": 40.73233040022708, "grad_norm": 0.47015950083732605, "learning_rate": 5.928498438830542e-05, "loss": 0.024002668261528016, "step": 143500 }, { "epoch": 40.73233040022708, "eval_accuracy": 0.9729128250778916, "eval_loss": 0.09465668350458145, "eval_runtime": 32.0739, "eval_samples_per_second": 490.336, "eval_steps_per_second": 7.67, "step": 143500 }, { "epoch": 40.73516889015044, "grad_norm": 0.09650905430316925, "learning_rate": 5.9282145898382066e-05, "loss": 0.0011524861678481103, "step": 143510 }, { "epoch": 40.7380073800738, "grad_norm": 0.7886379957199097, "learning_rate": 5.927930740845871e-05, "loss": 0.0030128732323646545, "step": 143520 }, { "epoch": 40.74084586999716, "grad_norm": 1.2624696493148804, "learning_rate": 5.927646891853534e-05, "loss": 0.004453198611736297, "step": 143530 }, { "epoch": 40.74368435992052, "grad_norm": 2.1769087314605713, "learning_rate": 5.927363042861198e-05, "loss": 0.004307749122381211, "step": 143540 }, { "epoch": 40.746522849843885, "grad_norm": 0.29042473435401917, "learning_rate": 5.927079193868862e-05, "loss": 0.007949556410312652, "step": 143550 }, { "epoch": 40.74936133976724, "grad_norm": 0.4124404489994049, "learning_rate": 5.926795344876526e-05, "loss": 0.00828683227300644, "step": 143560 }, { "epoch": 40.7521998296906, "grad_norm": 0.41829487681388855, "learning_rate": 5.9265114958841894e-05, "loss": 0.004325699433684349, "step": 143570 }, { "epoch": 40.755038319613966, "grad_norm": 6.501859664916992, "learning_rate": 5.926227646891854e-05, "loss": 0.0022047311067581177, "step": 143580 }, { "epoch": 40.75787680953733, "grad_norm": 0.3197261393070221, "learning_rate": 5.925943797899518e-05, "loss": 0.001522185280919075, "step": 143590 }, { "epoch": 40.760715299460685, "grad_norm": 0.036058973520994186, "learning_rate": 5.925659948907182e-05, "loss": 0.004107113182544708, "step": 143600 }, { "epoch": 40.76355378938405, "grad_norm": 1.0390346050262451, "learning_rate": 5.925376099914845e-05, "loss": 0.0022269533947110176, "step": 143610 }, { "epoch": 40.76639227930741, "grad_norm": 16.896867752075195, "learning_rate": 5.9250922509225094e-05, "loss": 0.010248854756355286, "step": 143620 }, { "epoch": 40.76923076923077, "grad_norm": 0.5223643183708191, "learning_rate": 5.924808401930173e-05, "loss": 0.0008883951231837273, "step": 143630 }, { "epoch": 40.77206925915413, "grad_norm": 17.652469635009766, "learning_rate": 5.924524552937837e-05, "loss": 0.013144730031490326, "step": 143640 }, { "epoch": 40.77490774907749, "grad_norm": 0.915235698223114, "learning_rate": 5.924240703945502e-05, "loss": 0.001236843317747116, "step": 143650 }, { "epoch": 40.77774623900085, "grad_norm": 0.07645450532436371, "learning_rate": 5.923956854953165e-05, "loss": 0.003057604283094406, "step": 143660 }, { "epoch": 40.78058472892421, "grad_norm": 1.5591310262680054, "learning_rate": 5.9236730059608294e-05, "loss": 0.006219648569822311, "step": 143670 }, { "epoch": 40.783423218847574, "grad_norm": 1.6420620679855347, "learning_rate": 5.923389156968493e-05, "loss": 0.0022735850885510444, "step": 143680 }, { "epoch": 40.78626170877094, "grad_norm": 16.00141143798828, "learning_rate": 5.923105307976157e-05, "loss": 0.0214938759803772, "step": 143690 }, { "epoch": 40.78910019869429, "grad_norm": 0.6556671261787415, "learning_rate": 5.9228214589838204e-05, "loss": 0.004216181486845017, "step": 143700 }, { "epoch": 40.791938688617655, "grad_norm": 1.5717039108276367, "learning_rate": 5.922537609991485e-05, "loss": 0.0013555359095335008, "step": 143710 }, { "epoch": 40.79477717854102, "grad_norm": 0.12915366888046265, "learning_rate": 5.9222537609991494e-05, "loss": 0.001456248201429844, "step": 143720 }, { "epoch": 40.797615668464374, "grad_norm": 0.18152378499507904, "learning_rate": 5.921969912006813e-05, "loss": 0.0017806293442845344, "step": 143730 }, { "epoch": 40.80045415838774, "grad_norm": 0.02659328281879425, "learning_rate": 5.921686063014476e-05, "loss": 0.0010754212737083435, "step": 143740 }, { "epoch": 40.8032926483111, "grad_norm": 0.06944055110216141, "learning_rate": 5.9214022140221405e-05, "loss": 0.000644514337182045, "step": 143750 }, { "epoch": 40.80613113823446, "grad_norm": 0.09241548180580139, "learning_rate": 5.921118365029804e-05, "loss": 0.0017124557867646216, "step": 143760 }, { "epoch": 40.80896962815782, "grad_norm": 6.628686904907227, "learning_rate": 5.920834516037468e-05, "loss": 0.0020653253421187403, "step": 143770 }, { "epoch": 40.81180811808118, "grad_norm": 0.5456545948982239, "learning_rate": 5.920550667045133e-05, "loss": 0.0012607796117663384, "step": 143780 }, { "epoch": 40.814646608004544, "grad_norm": 17.83376693725586, "learning_rate": 5.920266818052796e-05, "loss": 0.009623174369335175, "step": 143790 }, { "epoch": 40.8174850979279, "grad_norm": 0.06481852382421494, "learning_rate": 5.9199829690604605e-05, "loss": 0.0019452426582574843, "step": 143800 }, { "epoch": 40.82032358785126, "grad_norm": 0.07820414751768112, "learning_rate": 5.919699120068124e-05, "loss": 0.001421624980866909, "step": 143810 }, { "epoch": 40.823162077774626, "grad_norm": 0.0670999139547348, "learning_rate": 5.919415271075788e-05, "loss": 0.001543007232248783, "step": 143820 }, { "epoch": 40.82600056769798, "grad_norm": 0.13607297837734222, "learning_rate": 5.9191314220834515e-05, "loss": 0.0052397135645151135, "step": 143830 }, { "epoch": 40.828839057621344, "grad_norm": 0.1956016719341278, "learning_rate": 5.918847573091115e-05, "loss": 0.0017496682703495026, "step": 143840 }, { "epoch": 40.83167754754471, "grad_norm": 5.186715602874756, "learning_rate": 5.91856372409878e-05, "loss": 0.00398058295249939, "step": 143850 }, { "epoch": 40.83451603746807, "grad_norm": 0.18321655690670013, "learning_rate": 5.918279875106444e-05, "loss": 0.0027581514790654183, "step": 143860 }, { "epoch": 40.837354527391426, "grad_norm": 1.7091448307037354, "learning_rate": 5.9179960261141074e-05, "loss": 0.0009309165179729462, "step": 143870 }, { "epoch": 40.84019301731479, "grad_norm": 7.4326653480529785, "learning_rate": 5.9177121771217715e-05, "loss": 0.004746191948652267, "step": 143880 }, { "epoch": 40.84303150723815, "grad_norm": 0.03949809819459915, "learning_rate": 5.917428328129435e-05, "loss": 0.0022472869604825974, "step": 143890 }, { "epoch": 40.84586999716151, "grad_norm": 0.11706339567899704, "learning_rate": 5.917144479137099e-05, "loss": 0.0025604210793972017, "step": 143900 }, { "epoch": 40.84870848708487, "grad_norm": 4.989575386047363, "learning_rate": 5.916860630144764e-05, "loss": 0.0019094107672572135, "step": 143910 }, { "epoch": 40.85154697700823, "grad_norm": 0.0716421976685524, "learning_rate": 5.9165767811524274e-05, "loss": 0.0038660924881696703, "step": 143920 }, { "epoch": 40.85438546693159, "grad_norm": 0.13984334468841553, "learning_rate": 5.9162929321600915e-05, "loss": 0.00573047436773777, "step": 143930 }, { "epoch": 40.85722395685495, "grad_norm": 0.0861579105257988, "learning_rate": 5.916009083167755e-05, "loss": 0.004090435802936554, "step": 143940 }, { "epoch": 40.860062446778315, "grad_norm": 0.12914298474788666, "learning_rate": 5.9157252341754185e-05, "loss": 0.006060747802257538, "step": 143950 }, { "epoch": 40.86290093670168, "grad_norm": 0.5987483859062195, "learning_rate": 5.9154413851830826e-05, "loss": 0.0017599912360310554, "step": 143960 }, { "epoch": 40.865739426625034, "grad_norm": 0.5651119947433472, "learning_rate": 5.915157536190746e-05, "loss": 0.0009978052228689193, "step": 143970 }, { "epoch": 40.868577916548396, "grad_norm": 3.598417043685913, "learning_rate": 5.914873687198411e-05, "loss": 0.002420127019286156, "step": 143980 }, { "epoch": 40.87141640647176, "grad_norm": 0.5995482802391052, "learning_rate": 5.914589838206075e-05, "loss": 0.002202806994318962, "step": 143990 }, { "epoch": 40.874254896395115, "grad_norm": 0.7933487296104431, "learning_rate": 5.9143059892137385e-05, "loss": 0.0027261780574917792, "step": 144000 }, { "epoch": 40.874254896395115, "eval_accuracy": 0.9737394290074395, "eval_loss": 0.09021281450986862, "eval_runtime": 31.4668, "eval_samples_per_second": 499.796, "eval_steps_per_second": 7.818, "step": 144000 }, { "epoch": 40.87709338631848, "grad_norm": 1.2335987091064453, "learning_rate": 5.9140221402214026e-05, "loss": 0.004290033876895904, "step": 144010 }, { "epoch": 40.87993187624184, "grad_norm": 0.2803191840648651, "learning_rate": 5.913738291229066e-05, "loss": 0.003197193518280983, "step": 144020 }, { "epoch": 40.8827703661652, "grad_norm": 0.6220667958259583, "learning_rate": 5.91345444223673e-05, "loss": 0.003210219740867615, "step": 144030 }, { "epoch": 40.88560885608856, "grad_norm": 19.223304748535156, "learning_rate": 5.913170593244395e-05, "loss": 0.01699031889438629, "step": 144040 }, { "epoch": 40.88844734601192, "grad_norm": 0.05284702032804489, "learning_rate": 5.9128867442520585e-05, "loss": 0.0030202146619558333, "step": 144050 }, { "epoch": 40.891285835935285, "grad_norm": 0.20078560709953308, "learning_rate": 5.9126028952597226e-05, "loss": 0.002349603921175003, "step": 144060 }, { "epoch": 40.89412432585864, "grad_norm": 0.7740545272827148, "learning_rate": 5.912319046267386e-05, "loss": 0.0013783114030957223, "step": 144070 }, { "epoch": 40.896962815782004, "grad_norm": 1.0110328197479248, "learning_rate": 5.9120351972750495e-05, "loss": 0.0038494981825351717, "step": 144080 }, { "epoch": 40.89980130570537, "grad_norm": 0.07562289386987686, "learning_rate": 5.911751348282714e-05, "loss": 0.0016973508521914483, "step": 144090 }, { "epoch": 40.90263979562872, "grad_norm": 0.3942168056964874, "learning_rate": 5.911467499290377e-05, "loss": 0.0029261182993650437, "step": 144100 }, { "epoch": 40.905478285552086, "grad_norm": 0.034911178052425385, "learning_rate": 5.911183650298042e-05, "loss": 0.0006117725744843483, "step": 144110 }, { "epoch": 40.90831677547545, "grad_norm": 0.12756870687007904, "learning_rate": 5.910899801305706e-05, "loss": 0.0014733750373125075, "step": 144120 }, { "epoch": 40.91115526539881, "grad_norm": 13.804348945617676, "learning_rate": 5.9106159523133696e-05, "loss": 0.006341956555843353, "step": 144130 }, { "epoch": 40.91399375532217, "grad_norm": 0.037310294806957245, "learning_rate": 5.910332103321034e-05, "loss": 0.0033818788826465605, "step": 144140 }, { "epoch": 40.91683224524553, "grad_norm": 0.13073444366455078, "learning_rate": 5.910048254328697e-05, "loss": 0.0013555899262428285, "step": 144150 }, { "epoch": 40.91967073516889, "grad_norm": 0.06129813939332962, "learning_rate": 5.909764405336361e-05, "loss": 0.0014847632497549056, "step": 144160 }, { "epoch": 40.92250922509225, "grad_norm": 0.08728554099798203, "learning_rate": 5.909480556344025e-05, "loss": 0.0020656703040003777, "step": 144170 }, { "epoch": 40.92534771501561, "grad_norm": 6.159107208251953, "learning_rate": 5.9091967073516896e-05, "loss": 0.0031618215143680573, "step": 144180 }, { "epoch": 40.928186204938974, "grad_norm": 0.3490411341190338, "learning_rate": 5.908912858359354e-05, "loss": 0.0013254201039671898, "step": 144190 }, { "epoch": 40.93102469486233, "grad_norm": 0.09766576439142227, "learning_rate": 5.908629009367017e-05, "loss": 0.0020377801731228827, "step": 144200 }, { "epoch": 40.93386318478569, "grad_norm": 0.9051247239112854, "learning_rate": 5.9083451603746806e-05, "loss": 0.00801626518368721, "step": 144210 }, { "epoch": 40.936701674709056, "grad_norm": 0.4656331241130829, "learning_rate": 5.908061311382345e-05, "loss": 0.001072445698082447, "step": 144220 }, { "epoch": 40.93954016463242, "grad_norm": 11.59216022491455, "learning_rate": 5.907777462390008e-05, "loss": 0.007193699479103088, "step": 144230 }, { "epoch": 40.942378654555775, "grad_norm": 0.25008267164230347, "learning_rate": 5.907493613397673e-05, "loss": 0.0019127871841192246, "step": 144240 }, { "epoch": 40.94521714447914, "grad_norm": 0.6627295017242432, "learning_rate": 5.907209764405337e-05, "loss": 0.005394937843084336, "step": 144250 }, { "epoch": 40.9480556344025, "grad_norm": 2.252340316772461, "learning_rate": 5.9069259154130006e-05, "loss": 0.0020184533670544623, "step": 144260 }, { "epoch": 40.950894124325856, "grad_norm": 5.645776748657227, "learning_rate": 5.906642066420665e-05, "loss": 0.0031109314411878588, "step": 144270 }, { "epoch": 40.95373261424922, "grad_norm": 0.11262688785791397, "learning_rate": 5.906358217428328e-05, "loss": 0.002817433327436447, "step": 144280 }, { "epoch": 40.95657110417258, "grad_norm": 1.9388688802719116, "learning_rate": 5.9060743684359924e-05, "loss": 0.00715155228972435, "step": 144290 }, { "epoch": 40.95940959409594, "grad_norm": 3.580753803253174, "learning_rate": 5.905790519443656e-05, "loss": 0.004485449939966202, "step": 144300 }, { "epoch": 40.9622480840193, "grad_norm": 0.1425146758556366, "learning_rate": 5.9055066704513206e-05, "loss": 0.002665933407843113, "step": 144310 }, { "epoch": 40.96508657394266, "grad_norm": 0.10770668834447861, "learning_rate": 5.905222821458984e-05, "loss": 0.003633125871419907, "step": 144320 }, { "epoch": 40.967925063866026, "grad_norm": 0.26043370366096497, "learning_rate": 5.904938972466648e-05, "loss": 0.0013338970020413399, "step": 144330 }, { "epoch": 40.97076355378938, "grad_norm": 0.18556566536426544, "learning_rate": 5.904655123474312e-05, "loss": 0.01361810863018036, "step": 144340 }, { "epoch": 40.973602043712745, "grad_norm": 2.348299503326416, "learning_rate": 5.904371274481976e-05, "loss": 0.01884114444255829, "step": 144350 }, { "epoch": 40.97644053363611, "grad_norm": 6.988702774047852, "learning_rate": 5.904087425489639e-05, "loss": 0.0031696990132331847, "step": 144360 }, { "epoch": 40.979279023559464, "grad_norm": 0.3378346562385559, "learning_rate": 5.9038035764973034e-05, "loss": 0.007236155867576599, "step": 144370 }, { "epoch": 40.98211751348283, "grad_norm": 1.0769144296646118, "learning_rate": 5.903519727504968e-05, "loss": 0.009124004095792771, "step": 144380 }, { "epoch": 40.98495600340619, "grad_norm": 0.3540835380554199, "learning_rate": 5.903235878512632e-05, "loss": 0.0022158462554216383, "step": 144390 }, { "epoch": 40.987794493329545, "grad_norm": 0.15500855445861816, "learning_rate": 5.902952029520296e-05, "loss": 0.001619946025311947, "step": 144400 }, { "epoch": 40.99063298325291, "grad_norm": 1.889005184173584, "learning_rate": 5.902668180527959e-05, "loss": 0.00350106880068779, "step": 144410 }, { "epoch": 40.99347147317627, "grad_norm": 0.9555891752243042, "learning_rate": 5.902384331535623e-05, "loss": 0.0017476914450526237, "step": 144420 }, { "epoch": 40.996309963099634, "grad_norm": 0.1884075254201889, "learning_rate": 5.902100482543287e-05, "loss": 0.006235598027706147, "step": 144430 }, { "epoch": 40.99914845302299, "grad_norm": 1.1192851066589355, "learning_rate": 5.901816633550952e-05, "loss": 0.0019264807924628258, "step": 144440 }, { "epoch": 41.00198694294635, "grad_norm": 0.46357545256614685, "learning_rate": 5.901532784558615e-05, "loss": 0.0009607197716832161, "step": 144450 }, { "epoch": 41.004825432869715, "grad_norm": 0.15065976977348328, "learning_rate": 5.901248935566279e-05, "loss": 0.0011963650584220886, "step": 144460 }, { "epoch": 41.00766392279307, "grad_norm": 0.2063741534948349, "learning_rate": 5.900965086573943e-05, "loss": 0.0011081535369157792, "step": 144470 }, { "epoch": 41.010502412716434, "grad_norm": 2.1283223628997803, "learning_rate": 5.900681237581607e-05, "loss": 0.007787109911441803, "step": 144480 }, { "epoch": 41.0133409026398, "grad_norm": 0.027833949774503708, "learning_rate": 5.9003973885892704e-05, "loss": 0.003902086988091469, "step": 144490 }, { "epoch": 41.01617939256316, "grad_norm": 0.1856052279472351, "learning_rate": 5.9001135395969345e-05, "loss": 0.00402202382683754, "step": 144500 }, { "epoch": 41.01617939256316, "eval_accuracy": 0.9760920709607681, "eval_loss": 0.0826275423169136, "eval_runtime": 31.6204, "eval_samples_per_second": 497.368, "eval_steps_per_second": 7.78, "step": 144500 }, { "epoch": 41.019017882486516, "grad_norm": 0.2354753464460373, "learning_rate": 5.899829690604599e-05, "loss": 0.0023992141708731652, "step": 144510 }, { "epoch": 41.02185637240988, "grad_norm": 0.15165922045707703, "learning_rate": 5.899545841612263e-05, "loss": 0.004209460318088531, "step": 144520 }, { "epoch": 41.02469486233324, "grad_norm": 0.3885304629802704, "learning_rate": 5.899261992619927e-05, "loss": 0.0012431671842932702, "step": 144530 }, { "epoch": 41.0275333522566, "grad_norm": 0.042866360396146774, "learning_rate": 5.8989781436275904e-05, "loss": 0.004140231758356094, "step": 144540 }, { "epoch": 41.03037184217996, "grad_norm": 0.03467678278684616, "learning_rate": 5.898694294635254e-05, "loss": 0.00139390230178833, "step": 144550 }, { "epoch": 41.03321033210332, "grad_norm": 0.38611334562301636, "learning_rate": 5.898410445642918e-05, "loss": 0.0009662073105573654, "step": 144560 }, { "epoch": 41.03604882202668, "grad_norm": 0.3164624273777008, "learning_rate": 5.8981265966505814e-05, "loss": 0.00044748857617378236, "step": 144570 }, { "epoch": 41.03888731195004, "grad_norm": 0.07682400196790695, "learning_rate": 5.897842747658246e-05, "loss": 0.002089514955878258, "step": 144580 }, { "epoch": 41.041725801873405, "grad_norm": 0.41576650738716125, "learning_rate": 5.8975588986659104e-05, "loss": 0.002344292029738426, "step": 144590 }, { "epoch": 41.04456429179677, "grad_norm": 0.11612515896558762, "learning_rate": 5.897275049673574e-05, "loss": 0.019361433386802674, "step": 144600 }, { "epoch": 41.04740278172012, "grad_norm": 0.18328799307346344, "learning_rate": 5.896991200681238e-05, "loss": 0.006509527564048767, "step": 144610 }, { "epoch": 41.050241271643486, "grad_norm": 0.043397288769483566, "learning_rate": 5.8967073516889014e-05, "loss": 0.00677272081375122, "step": 144620 }, { "epoch": 41.05307976156685, "grad_norm": 0.7659888863563538, "learning_rate": 5.8964235026965656e-05, "loss": 0.0016362672671675683, "step": 144630 }, { "epoch": 41.055918251490205, "grad_norm": 0.47028297185897827, "learning_rate": 5.8961396537042304e-05, "loss": 0.0068118274211883545, "step": 144640 }, { "epoch": 41.05875674141357, "grad_norm": 0.030550427734851837, "learning_rate": 5.895855804711894e-05, "loss": 0.0008579207584261894, "step": 144650 }, { "epoch": 41.06159523133693, "grad_norm": 15.132512092590332, "learning_rate": 5.895571955719558e-05, "loss": 0.01919366419315338, "step": 144660 }, { "epoch": 41.064433721260286, "grad_norm": 21.779508590698242, "learning_rate": 5.8952881067272215e-05, "loss": 0.01040000021457672, "step": 144670 }, { "epoch": 41.06727221118365, "grad_norm": 0.026017799973487854, "learning_rate": 5.895004257734885e-05, "loss": 0.0019133087247610093, "step": 144680 }, { "epoch": 41.07011070110701, "grad_norm": 0.6309369206428528, "learning_rate": 5.894720408742549e-05, "loss": 0.0006341924890875816, "step": 144690 }, { "epoch": 41.072949191030375, "grad_norm": 0.012568322941660881, "learning_rate": 5.8944365597502125e-05, "loss": 0.0006431559100747109, "step": 144700 }, { "epoch": 41.07578768095373, "grad_norm": 0.0832361951470375, "learning_rate": 5.894152710757877e-05, "loss": 0.00044695138931274416, "step": 144710 }, { "epoch": 41.078626170877094, "grad_norm": 0.7290443778038025, "learning_rate": 5.8938688617655415e-05, "loss": 0.0009395722299814224, "step": 144720 }, { "epoch": 41.08146466080046, "grad_norm": 0.05872228369116783, "learning_rate": 5.893585012773205e-05, "loss": 0.0034778788685798645, "step": 144730 }, { "epoch": 41.08430315072381, "grad_norm": 0.0184700358659029, "learning_rate": 5.893301163780869e-05, "loss": 0.0009902486577630042, "step": 144740 }, { "epoch": 41.087141640647175, "grad_norm": 2.1326634883880615, "learning_rate": 5.8930173147885325e-05, "loss": 0.00894305258989334, "step": 144750 }, { "epoch": 41.08998013057054, "grad_norm": 0.19816449284553528, "learning_rate": 5.8927334657961967e-05, "loss": 0.006394730508327484, "step": 144760 }, { "epoch": 41.092818620493894, "grad_norm": 0.10723547637462616, "learning_rate": 5.89244961680386e-05, "loss": 0.009548346698284148, "step": 144770 }, { "epoch": 41.09565711041726, "grad_norm": 0.2586078941822052, "learning_rate": 5.892165767811525e-05, "loss": 0.0029551828280091285, "step": 144780 }, { "epoch": 41.09849560034062, "grad_norm": 0.006448034197092056, "learning_rate": 5.8918819188191884e-05, "loss": 0.0050257608294487, "step": 144790 }, { "epoch": 41.10133409026398, "grad_norm": 0.2986944019794464, "learning_rate": 5.8915980698268525e-05, "loss": 0.008844950050115586, "step": 144800 }, { "epoch": 41.10417258018734, "grad_norm": 0.00902221817523241, "learning_rate": 5.891314220834516e-05, "loss": 0.007380933314561844, "step": 144810 }, { "epoch": 41.1070110701107, "grad_norm": 0.11151455342769623, "learning_rate": 5.89103037184218e-05, "loss": 0.002139158546924591, "step": 144820 }, { "epoch": 41.109849560034064, "grad_norm": 1.2141203880310059, "learning_rate": 5.8907465228498436e-05, "loss": 0.009808076173067093, "step": 144830 }, { "epoch": 41.11268804995742, "grad_norm": 7.722042083740234, "learning_rate": 5.8904626738575084e-05, "loss": 0.025947058200836183, "step": 144840 }, { "epoch": 41.11552653988078, "grad_norm": 1.430345892906189, "learning_rate": 5.8901788248651725e-05, "loss": 0.006334872543811798, "step": 144850 }, { "epoch": 41.118365029804146, "grad_norm": 0.153959721326828, "learning_rate": 5.889894975872836e-05, "loss": 0.0007213087752461434, "step": 144860 }, { "epoch": 41.1212035197275, "grad_norm": 0.7330533862113953, "learning_rate": 5.8896111268805e-05, "loss": 0.0019706562161445618, "step": 144870 }, { "epoch": 41.124042009650864, "grad_norm": 0.06862904131412506, "learning_rate": 5.8893272778881636e-05, "loss": 0.007902867347002029, "step": 144880 }, { "epoch": 41.12688049957423, "grad_norm": 14.054582595825195, "learning_rate": 5.889043428895827e-05, "loss": 0.01762775331735611, "step": 144890 }, { "epoch": 41.12971898949759, "grad_norm": 0.5380801558494568, "learning_rate": 5.888759579903491e-05, "loss": 0.002777547389268875, "step": 144900 }, { "epoch": 41.132557479420946, "grad_norm": 0.7241384387016296, "learning_rate": 5.888475730911156e-05, "loss": 0.0009766379371285439, "step": 144910 }, { "epoch": 41.13539596934431, "grad_norm": 0.174872025847435, "learning_rate": 5.8881918819188195e-05, "loss": 0.0022920602932572365, "step": 144920 }, { "epoch": 41.13823445926767, "grad_norm": 0.44176143407821655, "learning_rate": 5.8879080329264836e-05, "loss": 0.010026943683624268, "step": 144930 }, { "epoch": 41.14107294919103, "grad_norm": 9.389435768127441, "learning_rate": 5.887624183934147e-05, "loss": 0.00444253571331501, "step": 144940 }, { "epoch": 41.14391143911439, "grad_norm": 0.1750490367412567, "learning_rate": 5.887340334941811e-05, "loss": 0.0013035297393798827, "step": 144950 }, { "epoch": 41.14674992903775, "grad_norm": 0.43471288681030273, "learning_rate": 5.887056485949475e-05, "loss": 0.0019206492230296134, "step": 144960 }, { "epoch": 41.149588418961116, "grad_norm": 0.39026525616645813, "learning_rate": 5.886772636957139e-05, "loss": 0.009408944100141526, "step": 144970 }, { "epoch": 41.15242690888447, "grad_norm": 0.15557347238063812, "learning_rate": 5.8864887879648036e-05, "loss": 0.012084005773067475, "step": 144980 }, { "epoch": 41.155265398807835, "grad_norm": 0.24035830795764923, "learning_rate": 5.886204938972467e-05, "loss": 0.0006706902757287025, "step": 144990 }, { "epoch": 41.1581038887312, "grad_norm": 0.021844398230314255, "learning_rate": 5.885921089980131e-05, "loss": 0.0008088205009698868, "step": 145000 }, { "epoch": 41.1581038887312, "eval_accuracy": 0.9741845234310421, "eval_loss": 0.09154852479696274, "eval_runtime": 31.6504, "eval_samples_per_second": 496.898, "eval_steps_per_second": 7.772, "step": 145000 }, { "epoch": 41.16094237865455, "grad_norm": 0.04944935068488121, "learning_rate": 5.885637240987795e-05, "loss": 0.002171516790986061, "step": 145010 }, { "epoch": 41.163780868577916, "grad_norm": 5.444542407989502, "learning_rate": 5.885353391995458e-05, "loss": 0.0021147722378373145, "step": 145020 }, { "epoch": 41.16661935850128, "grad_norm": 0.28274980187416077, "learning_rate": 5.885069543003122e-05, "loss": 0.0013165883719921112, "step": 145030 }, { "epoch": 41.169457848424635, "grad_norm": 1.0897352695465088, "learning_rate": 5.884785694010787e-05, "loss": 0.0014064969494938851, "step": 145040 }, { "epoch": 41.172296338348, "grad_norm": 0.014121860265731812, "learning_rate": 5.8845018450184505e-05, "loss": 0.0024939673021435738, "step": 145050 }, { "epoch": 41.17513482827136, "grad_norm": 0.0752185583114624, "learning_rate": 5.884217996026115e-05, "loss": 0.0042482312768697735, "step": 145060 }, { "epoch": 41.177973318194724, "grad_norm": 0.6332480311393738, "learning_rate": 5.883934147033778e-05, "loss": 0.0029303392395377157, "step": 145070 }, { "epoch": 41.18081180811808, "grad_norm": 0.1931389719247818, "learning_rate": 5.883650298041442e-05, "loss": 0.00168503075838089, "step": 145080 }, { "epoch": 41.18365029804144, "grad_norm": 2.711085081100464, "learning_rate": 5.883366449049106e-05, "loss": 0.0037287812680006026, "step": 145090 }, { "epoch": 41.186488787964805, "grad_norm": 2.196972608566284, "learning_rate": 5.88308260005677e-05, "loss": 0.0017890317365527153, "step": 145100 }, { "epoch": 41.18932727788816, "grad_norm": 0.3981809616088867, "learning_rate": 5.882798751064435e-05, "loss": 0.010249429941177368, "step": 145110 }, { "epoch": 41.192165767811524, "grad_norm": 20.459148406982422, "learning_rate": 5.882514902072098e-05, "loss": 0.013088300824165344, "step": 145120 }, { "epoch": 41.19500425773489, "grad_norm": 0.2179378718137741, "learning_rate": 5.882231053079762e-05, "loss": 0.0017420345917344092, "step": 145130 }, { "epoch": 41.19784274765824, "grad_norm": 0.22374498844146729, "learning_rate": 5.881947204087426e-05, "loss": 0.0007976058870553971, "step": 145140 }, { "epoch": 41.200681237581605, "grad_norm": 0.10189680755138397, "learning_rate": 5.881663355095089e-05, "loss": 0.0005658766254782677, "step": 145150 }, { "epoch": 41.20351972750497, "grad_norm": 1.3007010221481323, "learning_rate": 5.8813795061027533e-05, "loss": 0.002078183926641941, "step": 145160 }, { "epoch": 41.20635821742833, "grad_norm": 0.15351806581020355, "learning_rate": 5.881095657110417e-05, "loss": 0.0015081293880939485, "step": 145170 }, { "epoch": 41.20919670735169, "grad_norm": 12.227166175842285, "learning_rate": 5.8808118081180816e-05, "loss": 0.0032019950449466705, "step": 145180 }, { "epoch": 41.21203519727505, "grad_norm": 0.04119537025690079, "learning_rate": 5.880527959125746e-05, "loss": 0.0008252900093793869, "step": 145190 }, { "epoch": 41.21487368719841, "grad_norm": 0.12294608354568481, "learning_rate": 5.880244110133409e-05, "loss": 0.0005951128900051117, "step": 145200 }, { "epoch": 41.21771217712177, "grad_norm": 0.018754906952381134, "learning_rate": 5.8799602611410734e-05, "loss": 0.008452561497688294, "step": 145210 }, { "epoch": 41.22055066704513, "grad_norm": 0.1233539804816246, "learning_rate": 5.879676412148737e-05, "loss": 0.0011247556656599044, "step": 145220 }, { "epoch": 41.223389156968494, "grad_norm": 0.1056794673204422, "learning_rate": 5.879392563156401e-05, "loss": 0.0011659959331154824, "step": 145230 }, { "epoch": 41.22622764689185, "grad_norm": 0.024493688717484474, "learning_rate": 5.879108714164066e-05, "loss": 0.002005003020167351, "step": 145240 }, { "epoch": 41.22906613681521, "grad_norm": 0.3306806683540344, "learning_rate": 5.878824865171729e-05, "loss": 0.0010999446734786034, "step": 145250 }, { "epoch": 41.231904626738576, "grad_norm": 7.67410135269165, "learning_rate": 5.878541016179393e-05, "loss": 0.005765287578105927, "step": 145260 }, { "epoch": 41.23474311666194, "grad_norm": 0.2873842716217041, "learning_rate": 5.878257167187057e-05, "loss": 0.008141850680112838, "step": 145270 }, { "epoch": 41.237581606585294, "grad_norm": 0.6817256808280945, "learning_rate": 5.87797331819472e-05, "loss": 0.0006693607196211815, "step": 145280 }, { "epoch": 41.24042009650866, "grad_norm": 0.21207401156425476, "learning_rate": 5.8776894692023844e-05, "loss": 0.005440269410610199, "step": 145290 }, { "epoch": 41.24325858643202, "grad_norm": 0.6745044589042664, "learning_rate": 5.877405620210048e-05, "loss": 0.003842515125870705, "step": 145300 }, { "epoch": 41.246097076355376, "grad_norm": 0.0258292555809021, "learning_rate": 5.877121771217713e-05, "loss": 0.0012391526252031326, "step": 145310 }, { "epoch": 41.24893556627874, "grad_norm": 7.156068801879883, "learning_rate": 5.876837922225377e-05, "loss": 0.008458669483661651, "step": 145320 }, { "epoch": 41.2517740562021, "grad_norm": 0.9577639698982239, "learning_rate": 5.87655407323304e-05, "loss": 0.0007464131340384483, "step": 145330 }, { "epoch": 41.254612546125465, "grad_norm": 0.06425510346889496, "learning_rate": 5.8762702242407044e-05, "loss": 0.008060353249311447, "step": 145340 }, { "epoch": 41.25745103604882, "grad_norm": 0.21420449018478394, "learning_rate": 5.875986375248368e-05, "loss": 0.009614725410938264, "step": 145350 }, { "epoch": 41.26028952597218, "grad_norm": 0.5821102261543274, "learning_rate": 5.8757025262560314e-05, "loss": 0.0005190258845686913, "step": 145360 }, { "epoch": 41.263128015895546, "grad_norm": 0.008450837805867195, "learning_rate": 5.875418677263697e-05, "loss": 0.0007219363003969192, "step": 145370 }, { "epoch": 41.2659665058189, "grad_norm": 0.1311962902545929, "learning_rate": 5.87513482827136e-05, "loss": 0.0009339587762951851, "step": 145380 }, { "epoch": 41.268804995742265, "grad_norm": 0.0633564293384552, "learning_rate": 5.874850979279024e-05, "loss": 0.0006327318027615547, "step": 145390 }, { "epoch": 41.27164348566563, "grad_norm": 9.734660148620605, "learning_rate": 5.874567130286688e-05, "loss": 0.007412242889404297, "step": 145400 }, { "epoch": 41.274481975588984, "grad_norm": 0.03324989229440689, "learning_rate": 5.8742832812943514e-05, "loss": 0.001157754845917225, "step": 145410 }, { "epoch": 41.27732046551235, "grad_norm": 0.07201370596885681, "learning_rate": 5.8739994323020155e-05, "loss": 0.004410015046596527, "step": 145420 }, { "epoch": 41.28015895543571, "grad_norm": 0.029034489765763283, "learning_rate": 5.873715583309679e-05, "loss": 0.001518348976969719, "step": 145430 }, { "epoch": 41.28299744535907, "grad_norm": 0.7649355530738831, "learning_rate": 5.873431734317344e-05, "loss": 0.0031650718301534653, "step": 145440 }, { "epoch": 41.28583593528243, "grad_norm": 0.2929200530052185, "learning_rate": 5.873147885325008e-05, "loss": 0.0032642263919115066, "step": 145450 }, { "epoch": 41.28867442520579, "grad_norm": 0.11481241136789322, "learning_rate": 5.8728640363326714e-05, "loss": 0.003931598365306854, "step": 145460 }, { "epoch": 41.291512915129154, "grad_norm": 4.839142799377441, "learning_rate": 5.8725801873403355e-05, "loss": 0.0013753734529018402, "step": 145470 }, { "epoch": 41.29435140505251, "grad_norm": 9.920478820800781, "learning_rate": 5.872296338347999e-05, "loss": 0.0020329605787992476, "step": 145480 }, { "epoch": 41.29718989497587, "grad_norm": 6.25946569442749, "learning_rate": 5.8720124893556624e-05, "loss": 0.004577324539422989, "step": 145490 }, { "epoch": 41.300028384899235, "grad_norm": 0.11472012102603912, "learning_rate": 5.8717286403633266e-05, "loss": 0.005163762718439102, "step": 145500 }, { "epoch": 41.300028384899235, "eval_accuracy": 0.9755198067018503, "eval_loss": 0.09338675439357758, "eval_runtime": 32.2479, "eval_samples_per_second": 487.691, "eval_steps_per_second": 7.628, "step": 145500 }, { "epoch": 41.30286687482259, "grad_norm": 0.07173950970172882, "learning_rate": 5.8714447913709914e-05, "loss": 0.010778900235891342, "step": 145510 }, { "epoch": 41.305705364745954, "grad_norm": 1.6479077339172363, "learning_rate": 5.871160942378655e-05, "loss": 0.0010015567764639854, "step": 145520 }, { "epoch": 41.30854385466932, "grad_norm": 0.04712911695241928, "learning_rate": 5.870877093386319e-05, "loss": 0.011802864074707032, "step": 145530 }, { "epoch": 41.31138234459268, "grad_norm": 0.3111684322357178, "learning_rate": 5.8705932443939824e-05, "loss": 0.002516550198197365, "step": 145540 }, { "epoch": 41.314220834516036, "grad_norm": 9.086713790893555, "learning_rate": 5.8703093954016466e-05, "loss": 0.006249541044235229, "step": 145550 }, { "epoch": 41.3170593244394, "grad_norm": 0.04649857059121132, "learning_rate": 5.87002554640931e-05, "loss": 0.0026647591963410377, "step": 145560 }, { "epoch": 41.31989781436276, "grad_norm": 1.4562132358551025, "learning_rate": 5.869741697416975e-05, "loss": 0.0009696928784251214, "step": 145570 }, { "epoch": 41.32273630428612, "grad_norm": 4.884853839874268, "learning_rate": 5.869457848424639e-05, "loss": 0.004829346761107444, "step": 145580 }, { "epoch": 41.32557479420948, "grad_norm": 0.13710953295230865, "learning_rate": 5.8691739994323025e-05, "loss": 0.013420160114765167, "step": 145590 }, { "epoch": 41.32841328413284, "grad_norm": 1.368688702583313, "learning_rate": 5.8688901504399666e-05, "loss": 0.0017476556822657585, "step": 145600 }, { "epoch": 41.3312517740562, "grad_norm": 0.7805861234664917, "learning_rate": 5.86860630144763e-05, "loss": 0.0015975465998053552, "step": 145610 }, { "epoch": 41.33409026397956, "grad_norm": 0.08170569688081741, "learning_rate": 5.8683224524552935e-05, "loss": 0.01065317764878273, "step": 145620 }, { "epoch": 41.336928753902924, "grad_norm": 2.0264453887939453, "learning_rate": 5.8680386034629576e-05, "loss": 0.005311264842748642, "step": 145630 }, { "epoch": 41.33976724382629, "grad_norm": 0.6953568458557129, "learning_rate": 5.8677547544706225e-05, "loss": 0.0026627188548445702, "step": 145640 }, { "epoch": 41.34260573374964, "grad_norm": 1.5391521453857422, "learning_rate": 5.867470905478286e-05, "loss": 0.0055102802813053135, "step": 145650 }, { "epoch": 41.345444223673006, "grad_norm": 0.07290791720151901, "learning_rate": 5.86718705648595e-05, "loss": 0.0025353509932756423, "step": 145660 }, { "epoch": 41.34828271359637, "grad_norm": 0.09601853787899017, "learning_rate": 5.8669032074936135e-05, "loss": 0.0034578032791614532, "step": 145670 }, { "epoch": 41.351121203519725, "grad_norm": 1.0158658027648926, "learning_rate": 5.8666193585012777e-05, "loss": 0.0020593130961060525, "step": 145680 }, { "epoch": 41.35395969344309, "grad_norm": 5.684053421020508, "learning_rate": 5.866335509508941e-05, "loss": 0.004532975703477859, "step": 145690 }, { "epoch": 41.35679818336645, "grad_norm": 0.06394979357719421, "learning_rate": 5.866051660516605e-05, "loss": 0.007678359001874924, "step": 145700 }, { "epoch": 41.35963667328981, "grad_norm": 0.7588818073272705, "learning_rate": 5.86576781152427e-05, "loss": 0.0011769847944378853, "step": 145710 }, { "epoch": 41.36247516321317, "grad_norm": 0.534053385257721, "learning_rate": 5.8654839625319335e-05, "loss": 0.00214748103171587, "step": 145720 }, { "epoch": 41.36531365313653, "grad_norm": 0.1367609053850174, "learning_rate": 5.865200113539597e-05, "loss": 0.0035141266882419584, "step": 145730 }, { "epoch": 41.368152143059895, "grad_norm": 0.217321515083313, "learning_rate": 5.864916264547261e-05, "loss": 0.0026524193584918977, "step": 145740 }, { "epoch": 41.37099063298325, "grad_norm": 0.034695882350206375, "learning_rate": 5.8646324155549246e-05, "loss": 0.007649475336074829, "step": 145750 }, { "epoch": 41.37382912290661, "grad_norm": 3.788834810256958, "learning_rate": 5.864348566562589e-05, "loss": 0.003025222010910511, "step": 145760 }, { "epoch": 41.376667612829976, "grad_norm": 1.6785097122192383, "learning_rate": 5.8640647175702535e-05, "loss": 0.007743267714977265, "step": 145770 }, { "epoch": 41.37950610275333, "grad_norm": 0.09142900258302689, "learning_rate": 5.863780868577917e-05, "loss": 0.0012349803000688553, "step": 145780 }, { "epoch": 41.382344592676695, "grad_norm": 0.8324795961380005, "learning_rate": 5.863497019585581e-05, "loss": 0.003918953984975815, "step": 145790 }, { "epoch": 41.38518308260006, "grad_norm": 0.1989109069108963, "learning_rate": 5.8632131705932446e-05, "loss": 0.00940801203250885, "step": 145800 }, { "epoch": 41.38802157252342, "grad_norm": 2.302684783935547, "learning_rate": 5.862929321600909e-05, "loss": 0.015047499537467956, "step": 145810 }, { "epoch": 41.39086006244678, "grad_norm": 2.939088821411133, "learning_rate": 5.862645472608572e-05, "loss": 0.009790368378162384, "step": 145820 }, { "epoch": 41.39369855237014, "grad_norm": 0.38063910603523254, "learning_rate": 5.8623616236162357e-05, "loss": 0.0014207422733306884, "step": 145830 }, { "epoch": 41.3965370422935, "grad_norm": 0.25288718938827515, "learning_rate": 5.862077774623901e-05, "loss": 0.006240326911211014, "step": 145840 }, { "epoch": 41.39937553221686, "grad_norm": 34.06725311279297, "learning_rate": 5.8617939256315646e-05, "loss": 0.020991221070289612, "step": 145850 }, { "epoch": 41.40221402214022, "grad_norm": 0.08132422715425491, "learning_rate": 5.861510076639228e-05, "loss": 0.013836082816123963, "step": 145860 }, { "epoch": 41.405052512063584, "grad_norm": 0.782006561756134, "learning_rate": 5.861226227646892e-05, "loss": 0.004109282791614532, "step": 145870 }, { "epoch": 41.40789100198694, "grad_norm": 0.8225546479225159, "learning_rate": 5.860942378654556e-05, "loss": 0.0011184751987457276, "step": 145880 }, { "epoch": 41.4107294919103, "grad_norm": 0.42617157101631165, "learning_rate": 5.86065852966222e-05, "loss": 0.0021033253520727157, "step": 145890 }, { "epoch": 41.413567981833665, "grad_norm": 0.9703125953674316, "learning_rate": 5.860374680669883e-05, "loss": 0.0019806453958153723, "step": 145900 }, { "epoch": 41.41640647175703, "grad_norm": 0.03499014675617218, "learning_rate": 5.860090831677548e-05, "loss": 0.010398822277784348, "step": 145910 }, { "epoch": 41.419244961680384, "grad_norm": 2.9516637325286865, "learning_rate": 5.859806982685212e-05, "loss": 0.01679158806800842, "step": 145920 }, { "epoch": 41.42208345160375, "grad_norm": 0.27762582898139954, "learning_rate": 5.859523133692876e-05, "loss": 0.005511163175106049, "step": 145930 }, { "epoch": 41.42492194152711, "grad_norm": 0.32349109649658203, "learning_rate": 5.85923928470054e-05, "loss": 0.0019127661362290382, "step": 145940 }, { "epoch": 41.427760431450466, "grad_norm": 0.8312756419181824, "learning_rate": 5.858955435708203e-05, "loss": 0.0010938579216599464, "step": 145950 }, { "epoch": 41.43059892137383, "grad_norm": 0.03454333171248436, "learning_rate": 5.858671586715867e-05, "loss": 0.001368466205894947, "step": 145960 }, { "epoch": 41.43343741129719, "grad_norm": 1.7128342390060425, "learning_rate": 5.858387737723532e-05, "loss": 0.001388213410973549, "step": 145970 }, { "epoch": 41.43627590122055, "grad_norm": 0.6372775435447693, "learning_rate": 5.858103888731196e-05, "loss": 0.0012451563030481338, "step": 145980 }, { "epoch": 41.43911439114391, "grad_norm": 0.5016331076622009, "learning_rate": 5.857820039738859e-05, "loss": 0.0017490141093730927, "step": 145990 }, { "epoch": 41.44195288106727, "grad_norm": 2.7045888900756836, "learning_rate": 5.857536190746523e-05, "loss": 0.0021271197125315665, "step": 146000 }, { "epoch": 41.44195288106727, "eval_accuracy": 0.9755833916195078, "eval_loss": 0.08457674086093903, "eval_runtime": 32.2163, "eval_samples_per_second": 488.169, "eval_steps_per_second": 7.636, "step": 146000 }, { "epoch": 41.444791370990636, "grad_norm": 0.15012149512767792, "learning_rate": 5.857252341754187e-05, "loss": 0.00483151376247406, "step": 146010 }, { "epoch": 41.44762986091399, "grad_norm": 2.36712908744812, "learning_rate": 5.856968492761851e-05, "loss": 0.0020745106041431425, "step": 146020 }, { "epoch": 41.450468350837355, "grad_norm": 0.8119973540306091, "learning_rate": 5.856684643769514e-05, "loss": 0.002600039727985859, "step": 146030 }, { "epoch": 41.45330684076072, "grad_norm": 1.290583848953247, "learning_rate": 5.856400794777179e-05, "loss": 0.0017417054623365402, "step": 146040 }, { "epoch": 41.45614533068407, "grad_norm": 2.629918336868286, "learning_rate": 5.856116945784843e-05, "loss": 0.012116938829421997, "step": 146050 }, { "epoch": 41.458983820607436, "grad_norm": 0.017288506031036377, "learning_rate": 5.855833096792507e-05, "loss": 0.0030928194522857667, "step": 146060 }, { "epoch": 41.4618223105308, "grad_norm": 0.027421049773693085, "learning_rate": 5.855549247800171e-05, "loss": 0.003031834028661251, "step": 146070 }, { "epoch": 41.464660800454155, "grad_norm": 0.6058752536773682, "learning_rate": 5.8552653988078343e-05, "loss": 0.008507491648197174, "step": 146080 }, { "epoch": 41.46749929037752, "grad_norm": 0.07510282099246979, "learning_rate": 5.854981549815498e-05, "loss": 0.0008268224075436593, "step": 146090 }, { "epoch": 41.47033778030088, "grad_norm": 1.5502657890319824, "learning_rate": 5.854697700823162e-05, "loss": 0.0015598092228174209, "step": 146100 }, { "epoch": 41.47317627022424, "grad_norm": 0.13633672893047333, "learning_rate": 5.854413851830827e-05, "loss": 0.0009845592081546783, "step": 146110 }, { "epoch": 41.4760147601476, "grad_norm": 0.07037965208292007, "learning_rate": 5.85413000283849e-05, "loss": 0.003234618902206421, "step": 146120 }, { "epoch": 41.47885325007096, "grad_norm": 0.558222770690918, "learning_rate": 5.8538461538461544e-05, "loss": 0.001115960069000721, "step": 146130 }, { "epoch": 41.481691739994325, "grad_norm": 1.0038466453552246, "learning_rate": 5.853562304853818e-05, "loss": 0.0006118463352322578, "step": 146140 }, { "epoch": 41.48453022991768, "grad_norm": 2.4307286739349365, "learning_rate": 5.853278455861482e-05, "loss": 0.0014129804447293281, "step": 146150 }, { "epoch": 41.487368719841044, "grad_norm": 0.030612953007221222, "learning_rate": 5.8529946068691454e-05, "loss": 0.001954994536936283, "step": 146160 }, { "epoch": 41.49020720976441, "grad_norm": 1.3565865755081177, "learning_rate": 5.85271075787681e-05, "loss": 0.006973596662282944, "step": 146170 }, { "epoch": 41.49304569968777, "grad_norm": 0.11380746215581894, "learning_rate": 5.8524269088844744e-05, "loss": 0.001620149053633213, "step": 146180 }, { "epoch": 41.495884189611125, "grad_norm": 0.09980122745037079, "learning_rate": 5.852143059892138e-05, "loss": 0.003427586704492569, "step": 146190 }, { "epoch": 41.49872267953449, "grad_norm": 0.3162451982498169, "learning_rate": 5.851859210899801e-05, "loss": 0.006603902578353882, "step": 146200 }, { "epoch": 41.50156116945785, "grad_norm": 0.037389349192380905, "learning_rate": 5.8515753619074654e-05, "loss": 0.0033258195966482163, "step": 146210 }, { "epoch": 41.50439965938121, "grad_norm": 0.0722975879907608, "learning_rate": 5.851291512915129e-05, "loss": 0.0030925540253520013, "step": 146220 }, { "epoch": 41.50723814930457, "grad_norm": 0.941077470779419, "learning_rate": 5.851007663922793e-05, "loss": 0.003653224930167198, "step": 146230 }, { "epoch": 41.51007663922793, "grad_norm": 0.14868809282779694, "learning_rate": 5.850723814930458e-05, "loss": 0.0018859857693314553, "step": 146240 }, { "epoch": 41.51291512915129, "grad_norm": 0.26543790102005005, "learning_rate": 5.850439965938121e-05, "loss": 0.0016311092302203178, "step": 146250 }, { "epoch": 41.51575361907465, "grad_norm": 0.6739418506622314, "learning_rate": 5.8501561169457854e-05, "loss": 0.002829033508896828, "step": 146260 }, { "epoch": 41.518592108998014, "grad_norm": 0.05034260079264641, "learning_rate": 5.849872267953449e-05, "loss": 0.006151802837848663, "step": 146270 }, { "epoch": 41.52143059892138, "grad_norm": 0.18017621338367462, "learning_rate": 5.849588418961113e-05, "loss": 0.001409577578306198, "step": 146280 }, { "epoch": 41.52426908884473, "grad_norm": 0.020883165299892426, "learning_rate": 5.8493045699687765e-05, "loss": 0.0012053649872541428, "step": 146290 }, { "epoch": 41.527107578768096, "grad_norm": 0.027409229427576065, "learning_rate": 5.84902072097644e-05, "loss": 0.0016215600073337555, "step": 146300 }, { "epoch": 41.52994606869146, "grad_norm": 0.2754102945327759, "learning_rate": 5.8487368719841054e-05, "loss": 0.003755996376276016, "step": 146310 }, { "epoch": 41.532784558614814, "grad_norm": 0.04513297602534294, "learning_rate": 5.848453022991769e-05, "loss": 0.0024851767346262934, "step": 146320 }, { "epoch": 41.53562304853818, "grad_norm": 0.10702929645776749, "learning_rate": 5.8481691739994324e-05, "loss": 0.0011313427239656448, "step": 146330 }, { "epoch": 41.53846153846154, "grad_norm": 0.024906206876039505, "learning_rate": 5.8478853250070965e-05, "loss": 0.0014658907428383827, "step": 146340 }, { "epoch": 41.541300028384896, "grad_norm": 0.018419666215777397, "learning_rate": 5.84760147601476e-05, "loss": 0.0027098411694169043, "step": 146350 }, { "epoch": 41.54413851830826, "grad_norm": 0.6486563086509705, "learning_rate": 5.847317627022424e-05, "loss": 0.006957846134901047, "step": 146360 }, { "epoch": 41.54697700823162, "grad_norm": 0.34428128600120544, "learning_rate": 5.847033778030089e-05, "loss": 0.0010294463485479356, "step": 146370 }, { "epoch": 41.549815498154985, "grad_norm": 5.296970367431641, "learning_rate": 5.8467499290377524e-05, "loss": 0.0021519895642995836, "step": 146380 }, { "epoch": 41.55265398807834, "grad_norm": 0.21933986246585846, "learning_rate": 5.8464660800454165e-05, "loss": 0.0022130172699689867, "step": 146390 }, { "epoch": 41.5554924780017, "grad_norm": 0.6236490607261658, "learning_rate": 5.84618223105308e-05, "loss": 0.004703926295042038, "step": 146400 }, { "epoch": 41.558330967925066, "grad_norm": 1.012663722038269, "learning_rate": 5.845898382060744e-05, "loss": 0.001440688967704773, "step": 146410 }, { "epoch": 41.56116945784842, "grad_norm": 0.08192196488380432, "learning_rate": 5.8456145330684076e-05, "loss": 0.0007352463901042939, "step": 146420 }, { "epoch": 41.564007947771785, "grad_norm": 3.1840624809265137, "learning_rate": 5.845330684076071e-05, "loss": 0.0022382942959666254, "step": 146430 }, { "epoch": 41.56684643769515, "grad_norm": 13.333020210266113, "learning_rate": 5.8450468350837365e-05, "loss": 0.0022849526256322862, "step": 146440 }, { "epoch": 41.56968492761851, "grad_norm": 1.0773590803146362, "learning_rate": 5.8447629860914e-05, "loss": 0.005852021276950836, "step": 146450 }, { "epoch": 41.572523417541866, "grad_norm": 3.876779556274414, "learning_rate": 5.8444791370990634e-05, "loss": 0.00789552479982376, "step": 146460 }, { "epoch": 41.57536190746523, "grad_norm": 0.21269336342811584, "learning_rate": 5.8441952881067276e-05, "loss": 0.0036786511540412904, "step": 146470 }, { "epoch": 41.57820039738859, "grad_norm": 0.03821628540754318, "learning_rate": 5.843911439114391e-05, "loss": 0.0013611899688839913, "step": 146480 }, { "epoch": 41.58103888731195, "grad_norm": 0.045921362936496735, "learning_rate": 5.843627590122055e-05, "loss": 0.00150004792958498, "step": 146490 }, { "epoch": 41.58387737723531, "grad_norm": 1.1227059364318848, "learning_rate": 5.8433437411297186e-05, "loss": 0.0010045304894447326, "step": 146500 }, { "epoch": 41.58387737723531, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.08926868438720703, "eval_runtime": 32.4508, "eval_samples_per_second": 484.642, "eval_steps_per_second": 7.581, "step": 146500 }, { "epoch": 41.586715867158674, "grad_norm": 0.06089174374938011, "learning_rate": 5.8430598921373834e-05, "loss": 0.007467126846313477, "step": 146510 }, { "epoch": 41.58955435708203, "grad_norm": 0.19324269890785217, "learning_rate": 5.8427760431450476e-05, "loss": 0.007337193936109543, "step": 146520 }, { "epoch": 41.59239284700539, "grad_norm": 0.8590618371963501, "learning_rate": 5.842492194152711e-05, "loss": 0.015850162506103514, "step": 146530 }, { "epoch": 41.595231336928755, "grad_norm": 0.10166192054748535, "learning_rate": 5.842208345160375e-05, "loss": 0.0023206565529108047, "step": 146540 }, { "epoch": 41.59806982685212, "grad_norm": 0.02678696997463703, "learning_rate": 5.8419244961680386e-05, "loss": 0.0014745475724339484, "step": 146550 }, { "epoch": 41.600908316775474, "grad_norm": 0.15094992518424988, "learning_rate": 5.841640647175702e-05, "loss": 0.0025883780792355536, "step": 146560 }, { "epoch": 41.60374680669884, "grad_norm": 0.1696808785200119, "learning_rate": 5.841356798183367e-05, "loss": 0.0011387221515178681, "step": 146570 }, { "epoch": 41.6065852966222, "grad_norm": 0.1471494436264038, "learning_rate": 5.841072949191031e-05, "loss": 0.000934501364827156, "step": 146580 }, { "epoch": 41.609423786545555, "grad_norm": 0.0824422538280487, "learning_rate": 5.8407891001986945e-05, "loss": 0.0021930107846856116, "step": 146590 }, { "epoch": 41.61226227646892, "grad_norm": 3.8588407039642334, "learning_rate": 5.8405052512063587e-05, "loss": 0.005647891014814377, "step": 146600 }, { "epoch": 41.61510076639228, "grad_norm": 0.5680772662162781, "learning_rate": 5.840221402214022e-05, "loss": 0.0016940992325544357, "step": 146610 }, { "epoch": 41.61793925631564, "grad_norm": 0.15707533061504364, "learning_rate": 5.839937553221686e-05, "loss": 0.001939178816974163, "step": 146620 }, { "epoch": 41.620777746239, "grad_norm": 0.7400066256523132, "learning_rate": 5.83965370422935e-05, "loss": 0.00835677981376648, "step": 146630 }, { "epoch": 41.62361623616236, "grad_norm": 0.18991097807884216, "learning_rate": 5.8393698552370145e-05, "loss": 0.004536560922861099, "step": 146640 }, { "epoch": 41.626454726085726, "grad_norm": 0.8818489909172058, "learning_rate": 5.839086006244679e-05, "loss": 0.0011925742030143738, "step": 146650 }, { "epoch": 41.62929321600908, "grad_norm": 0.5600959658622742, "learning_rate": 5.838802157252342e-05, "loss": 0.004391982033848762, "step": 146660 }, { "epoch": 41.632131705932444, "grad_norm": 1.530693531036377, "learning_rate": 5.8385183082600056e-05, "loss": 0.004468155652284622, "step": 146670 }, { "epoch": 41.63497019585581, "grad_norm": 1.5133119821548462, "learning_rate": 5.83823445926767e-05, "loss": 0.002784373238682747, "step": 146680 }, { "epoch": 41.63780868577916, "grad_norm": 0.01541772298514843, "learning_rate": 5.837950610275333e-05, "loss": 0.0006218397989869118, "step": 146690 }, { "epoch": 41.640647175702526, "grad_norm": 0.1743476837873459, "learning_rate": 5.837666761282998e-05, "loss": 0.0012261386960744857, "step": 146700 }, { "epoch": 41.64348566562589, "grad_norm": 0.04073362052440643, "learning_rate": 5.837382912290662e-05, "loss": 0.0010330639779567719, "step": 146710 }, { "epoch": 41.646324155549244, "grad_norm": 0.21755410730838776, "learning_rate": 5.8370990632983256e-05, "loss": 0.0016678448766469956, "step": 146720 }, { "epoch": 41.64916264547261, "grad_norm": 1.0872743129730225, "learning_rate": 5.83681521430599e-05, "loss": 0.0011772913858294487, "step": 146730 }, { "epoch": 41.65200113539597, "grad_norm": 3.5790693759918213, "learning_rate": 5.836531365313653e-05, "loss": 0.005790837854146957, "step": 146740 }, { "epoch": 41.65483962531933, "grad_norm": 16.24040412902832, "learning_rate": 5.836247516321317e-05, "loss": 0.010630351305007935, "step": 146750 }, { "epoch": 41.65767811524269, "grad_norm": 0.74370276927948, "learning_rate": 5.835963667328981e-05, "loss": 0.0034596666693687437, "step": 146760 }, { "epoch": 41.66051660516605, "grad_norm": 0.079166479408741, "learning_rate": 5.8356798183366456e-05, "loss": 0.009169553965330124, "step": 146770 }, { "epoch": 41.663355095089415, "grad_norm": 0.9231542348861694, "learning_rate": 5.83539596934431e-05, "loss": 0.001522885262966156, "step": 146780 }, { "epoch": 41.66619358501277, "grad_norm": 0.6771502494812012, "learning_rate": 5.835112120351973e-05, "loss": 0.0025657746940851212, "step": 146790 }, { "epoch": 41.66903207493613, "grad_norm": 13.296609878540039, "learning_rate": 5.8348282713596367e-05, "loss": 0.009668388962745666, "step": 146800 }, { "epoch": 41.671870564859496, "grad_norm": 0.15599903464317322, "learning_rate": 5.834544422367301e-05, "loss": 0.017988750338554384, "step": 146810 }, { "epoch": 41.67470905478285, "grad_norm": 0.08326566219329834, "learning_rate": 5.834260573374964e-05, "loss": 0.004128555208444596, "step": 146820 }, { "epoch": 41.677547544706215, "grad_norm": 1.728201150894165, "learning_rate": 5.8339767243826284e-05, "loss": 0.0039325617253780365, "step": 146830 }, { "epoch": 41.68038603462958, "grad_norm": 0.33730950951576233, "learning_rate": 5.833692875390293e-05, "loss": 0.005415488034486771, "step": 146840 }, { "epoch": 41.68322452455294, "grad_norm": 0.17004677653312683, "learning_rate": 5.833409026397957e-05, "loss": 0.0012051627039909364, "step": 146850 }, { "epoch": 41.6860630144763, "grad_norm": 0.032009221613407135, "learning_rate": 5.833125177405621e-05, "loss": 0.004847260564565659, "step": 146860 }, { "epoch": 41.68890150439966, "grad_norm": 0.035009924322366714, "learning_rate": 5.832841328413284e-05, "loss": 0.003128740191459656, "step": 146870 }, { "epoch": 41.69173999432302, "grad_norm": 1.0089366436004639, "learning_rate": 5.8325574794209484e-05, "loss": 0.0009750666096806527, "step": 146880 }, { "epoch": 41.69457848424638, "grad_norm": 0.11992795765399933, "learning_rate": 5.832273630428612e-05, "loss": 0.003372279927134514, "step": 146890 }, { "epoch": 41.69741697416974, "grad_norm": 4.18795919418335, "learning_rate": 5.831989781436277e-05, "loss": 0.004128644615411759, "step": 146900 }, { "epoch": 41.700255464093104, "grad_norm": 0.07851298898458481, "learning_rate": 5.831705932443941e-05, "loss": 0.005390677601099014, "step": 146910 }, { "epoch": 41.70309395401647, "grad_norm": 0.33531221747398376, "learning_rate": 5.831422083451604e-05, "loss": 0.0010899268090724945, "step": 146920 }, { "epoch": 41.70593244393982, "grad_norm": 0.868411660194397, "learning_rate": 5.831138234459268e-05, "loss": 0.003384992852807045, "step": 146930 }, { "epoch": 41.708770933863185, "grad_norm": 0.737566351890564, "learning_rate": 5.830854385466932e-05, "loss": 0.002056899666786194, "step": 146940 }, { "epoch": 41.71160942378655, "grad_norm": 0.03186449408531189, "learning_rate": 5.830570536474595e-05, "loss": 0.0067067302763462065, "step": 146950 }, { "epoch": 41.714447913709904, "grad_norm": 3.8335111141204834, "learning_rate": 5.8302866874822595e-05, "loss": 0.002956207096576691, "step": 146960 }, { "epoch": 41.71728640363327, "grad_norm": 3.4212467670440674, "learning_rate": 5.830002838489924e-05, "loss": 0.011097586154937744, "step": 146970 }, { "epoch": 41.72012489355663, "grad_norm": 3.9659836292266846, "learning_rate": 5.829718989497588e-05, "loss": 0.015092048048973083, "step": 146980 }, { "epoch": 41.722963383479986, "grad_norm": 11.982810974121094, "learning_rate": 5.829435140505252e-05, "loss": 0.012914462387561798, "step": 146990 }, { "epoch": 41.72580187340335, "grad_norm": 0.0876314714550972, "learning_rate": 5.8291512915129153e-05, "loss": 0.0031192868947982786, "step": 147000 }, { "epoch": 41.72580187340335, "eval_accuracy": 0.972976409995549, "eval_loss": 0.09423615783452988, "eval_runtime": 32.4026, "eval_samples_per_second": 485.363, "eval_steps_per_second": 7.592, "step": 147000 }, { "epoch": 41.72864036332671, "grad_norm": 0.08131708204746246, "learning_rate": 5.8288674425205795e-05, "loss": 0.003147163242101669, "step": 147010 }, { "epoch": 41.731478853250074, "grad_norm": 0.33748137950897217, "learning_rate": 5.828583593528243e-05, "loss": 0.0013698210939764976, "step": 147020 }, { "epoch": 41.73431734317343, "grad_norm": 0.6632359027862549, "learning_rate": 5.8282997445359064e-05, "loss": 0.0026838269084692003, "step": 147030 }, { "epoch": 41.73715583309679, "grad_norm": 0.24051140248775482, "learning_rate": 5.828015895543571e-05, "loss": 0.001899159513413906, "step": 147040 }, { "epoch": 41.739994323020156, "grad_norm": 0.6021842360496521, "learning_rate": 5.8277320465512354e-05, "loss": 0.013184882700443268, "step": 147050 }, { "epoch": 41.74283281294351, "grad_norm": 0.7475828528404236, "learning_rate": 5.827448197558899e-05, "loss": 0.009713547676801682, "step": 147060 }, { "epoch": 41.745671302866874, "grad_norm": 0.5117128491401672, "learning_rate": 5.827164348566563e-05, "loss": 0.015313288569450379, "step": 147070 }, { "epoch": 41.74850979279024, "grad_norm": 0.11252349615097046, "learning_rate": 5.8268804995742264e-05, "loss": 0.002850516326725483, "step": 147080 }, { "epoch": 41.75134828271359, "grad_norm": 0.8572073578834534, "learning_rate": 5.8265966505818905e-05, "loss": 0.0026715762913227083, "step": 147090 }, { "epoch": 41.754186772636956, "grad_norm": 0.11815090477466583, "learning_rate": 5.8263128015895554e-05, "loss": 0.001083856076002121, "step": 147100 }, { "epoch": 41.75702526256032, "grad_norm": 3.042510509490967, "learning_rate": 5.826028952597219e-05, "loss": 0.0012336423620581626, "step": 147110 }, { "epoch": 41.75986375248368, "grad_norm": 0.2670920491218567, "learning_rate": 5.825745103604883e-05, "loss": 0.0025270368903875353, "step": 147120 }, { "epoch": 41.76270224240704, "grad_norm": 0.047469254583120346, "learning_rate": 5.8254612546125464e-05, "loss": 0.004080427438020706, "step": 147130 }, { "epoch": 41.7655407323304, "grad_norm": 0.6935328245162964, "learning_rate": 5.82517740562021e-05, "loss": 0.004740985110402107, "step": 147140 }, { "epoch": 41.76837922225376, "grad_norm": 4.168476581573486, "learning_rate": 5.824893556627874e-05, "loss": 0.002882787026464939, "step": 147150 }, { "epoch": 41.77121771217712, "grad_norm": 0.0887054055929184, "learning_rate": 5.8246097076355375e-05, "loss": 0.008554118871688842, "step": 147160 }, { "epoch": 41.77405620210048, "grad_norm": 0.2847028970718384, "learning_rate": 5.824325858643202e-05, "loss": 0.0011478766798973083, "step": 147170 }, { "epoch": 41.776894692023845, "grad_norm": 0.40821418166160583, "learning_rate": 5.8240420096508664e-05, "loss": 0.012484822422266006, "step": 147180 }, { "epoch": 41.7797331819472, "grad_norm": 0.44162291288375854, "learning_rate": 5.82375816065853e-05, "loss": 0.0021684063598513604, "step": 147190 }, { "epoch": 41.78257167187056, "grad_norm": 0.19811448454856873, "learning_rate": 5.823474311666194e-05, "loss": 0.01060350239276886, "step": 147200 }, { "epoch": 41.785410161793926, "grad_norm": 12.074902534484863, "learning_rate": 5.8231904626738575e-05, "loss": 0.017477259039878845, "step": 147210 }, { "epoch": 41.78824865171729, "grad_norm": 0.6531860828399658, "learning_rate": 5.8229066136815216e-05, "loss": 0.006938712298870086, "step": 147220 }, { "epoch": 41.791087141640645, "grad_norm": 0.047674279659986496, "learning_rate": 5.822622764689185e-05, "loss": 0.0006639098748564721, "step": 147230 }, { "epoch": 41.79392563156401, "grad_norm": 1.0376085042953491, "learning_rate": 5.82233891569685e-05, "loss": 0.005062695965170861, "step": 147240 }, { "epoch": 41.79676412148737, "grad_norm": 0.05884583666920662, "learning_rate": 5.822055066704514e-05, "loss": 0.002892335504293442, "step": 147250 }, { "epoch": 41.79960261141073, "grad_norm": 0.2725791931152344, "learning_rate": 5.8217712177121775e-05, "loss": 0.0029735064134001734, "step": 147260 }, { "epoch": 41.80244110133409, "grad_norm": 0.23383000493049622, "learning_rate": 5.821487368719841e-05, "loss": 0.002071506716310978, "step": 147270 }, { "epoch": 41.80527959125745, "grad_norm": 0.11522078514099121, "learning_rate": 5.821203519727505e-05, "loss": 0.004184079170227051, "step": 147280 }, { "epoch": 41.808118081180815, "grad_norm": 0.07482937723398209, "learning_rate": 5.8209196707351686e-05, "loss": 0.009014660865068436, "step": 147290 }, { "epoch": 41.81095657110417, "grad_norm": 0.04252932593226433, "learning_rate": 5.8206358217428334e-05, "loss": 0.0013432646170258521, "step": 147300 }, { "epoch": 41.813795061027534, "grad_norm": 2.7575271129608154, "learning_rate": 5.8203519727504975e-05, "loss": 0.0031303130090236664, "step": 147310 }, { "epoch": 41.8166335509509, "grad_norm": 0.19590429961681366, "learning_rate": 5.820096508657394e-05, "loss": 0.01184888780117035, "step": 147320 }, { "epoch": 41.81947204087425, "grad_norm": 0.23850500583648682, "learning_rate": 5.819812659665058e-05, "loss": 0.004408773779869079, "step": 147330 }, { "epoch": 41.822310530797616, "grad_norm": 7.02516508102417, "learning_rate": 5.819528810672722e-05, "loss": 0.010765761137008667, "step": 147340 }, { "epoch": 41.82514902072098, "grad_norm": 0.7308682203292847, "learning_rate": 5.8192449616803866e-05, "loss": 0.008822530508041382, "step": 147350 }, { "epoch": 41.827987510644334, "grad_norm": 0.1495169699192047, "learning_rate": 5.818961112688051e-05, "loss": 0.0010842572897672652, "step": 147360 }, { "epoch": 41.8308260005677, "grad_norm": 0.287382036447525, "learning_rate": 5.818677263695714e-05, "loss": 0.0037784580141305923, "step": 147370 }, { "epoch": 41.83366449049106, "grad_norm": 0.020530765876173973, "learning_rate": 5.818393414703378e-05, "loss": 0.0010303573682904244, "step": 147380 }, { "epoch": 41.83650298041442, "grad_norm": 6.735590934753418, "learning_rate": 5.818109565711042e-05, "loss": 0.005411159992218017, "step": 147390 }, { "epoch": 41.83934147033778, "grad_norm": 18.0218563079834, "learning_rate": 5.817825716718706e-05, "loss": 0.020626306533813477, "step": 147400 }, { "epoch": 41.84217996026114, "grad_norm": 0.13333527743816376, "learning_rate": 5.8175418677263693e-05, "loss": 0.0016832556575536728, "step": 147410 }, { "epoch": 41.845018450184504, "grad_norm": 3.54260516166687, "learning_rate": 5.817258018734034e-05, "loss": 0.005826597288250923, "step": 147420 }, { "epoch": 41.84785694010786, "grad_norm": 0.11311717331409454, "learning_rate": 5.8169741697416976e-05, "loss": 0.0017257191240787507, "step": 147430 }, { "epoch": 41.85069543003122, "grad_norm": 0.14873036742210388, "learning_rate": 5.816690320749362e-05, "loss": 0.014115546643733979, "step": 147440 }, { "epoch": 41.853533919954586, "grad_norm": 0.7668888568878174, "learning_rate": 5.816406471757025e-05, "loss": 0.004536485671997071, "step": 147450 }, { "epoch": 41.85637240987794, "grad_norm": 0.2620188891887665, "learning_rate": 5.8161226227646894e-05, "loss": 0.005364926531910896, "step": 147460 }, { "epoch": 41.859210899801305, "grad_norm": 0.5208867788314819, "learning_rate": 5.815838773772353e-05, "loss": 0.003286202996969223, "step": 147470 }, { "epoch": 41.86204938972467, "grad_norm": 0.3200452923774719, "learning_rate": 5.8155549247800176e-05, "loss": 0.006648755818605423, "step": 147480 }, { "epoch": 41.86488787964803, "grad_norm": 7.516880512237549, "learning_rate": 5.815271075787682e-05, "loss": 0.007744076102972031, "step": 147490 }, { "epoch": 41.867726369571386, "grad_norm": 0.26730769872665405, "learning_rate": 5.814987226795345e-05, "loss": 0.000565139576792717, "step": 147500 }, { "epoch": 41.867726369571386, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.08290290087461472, "eval_runtime": 32.6819, "eval_samples_per_second": 481.214, "eval_steps_per_second": 7.527, "step": 147500 }, { "epoch": 41.87056485949475, "grad_norm": 1.3433256149291992, "learning_rate": 5.8147033778030094e-05, "loss": 0.0023064883425831795, "step": 147510 }, { "epoch": 41.87340334941811, "grad_norm": 0.3978353440761566, "learning_rate": 5.814419528810673e-05, "loss": 0.0038914754986763, "step": 147520 }, { "epoch": 41.87624183934147, "grad_norm": 9.57226276397705, "learning_rate": 5.814135679818336e-05, "loss": 0.012706468999385833, "step": 147530 }, { "epoch": 41.87908032926483, "grad_norm": 0.041330594569444656, "learning_rate": 5.8138518308260004e-05, "loss": 0.0021071359515190125, "step": 147540 }, { "epoch": 41.88191881918819, "grad_norm": 5.490805149078369, "learning_rate": 5.813567981833665e-05, "loss": 0.0053326129913330075, "step": 147550 }, { "epoch": 41.88475730911155, "grad_norm": 0.4416806101799011, "learning_rate": 5.813284132841329e-05, "loss": 0.003304172307252884, "step": 147560 }, { "epoch": 41.88759579903491, "grad_norm": 0.06793039292097092, "learning_rate": 5.813000283848993e-05, "loss": 0.007199781388044358, "step": 147570 }, { "epoch": 41.890434288958275, "grad_norm": 0.3708445727825165, "learning_rate": 5.812716434856656e-05, "loss": 0.008208294957876205, "step": 147580 }, { "epoch": 41.89327277888164, "grad_norm": 4.664440155029297, "learning_rate": 5.8124325858643204e-05, "loss": 0.002201572060585022, "step": 147590 }, { "epoch": 41.896111268804994, "grad_norm": 0.04257483780384064, "learning_rate": 5.812148736871984e-05, "loss": 0.0008619504049420356, "step": 147600 }, { "epoch": 41.89894975872836, "grad_norm": 2.6341915130615234, "learning_rate": 5.811864887879648e-05, "loss": 0.0020460978150367735, "step": 147610 }, { "epoch": 41.90178824865172, "grad_norm": 0.07851394265890121, "learning_rate": 5.811581038887313e-05, "loss": 0.0030582845211029053, "step": 147620 }, { "epoch": 41.904626738575075, "grad_norm": 4.1485819816589355, "learning_rate": 5.811297189894976e-05, "loss": 0.003317224979400635, "step": 147630 }, { "epoch": 41.90746522849844, "grad_norm": 1.9572997093200684, "learning_rate": 5.8110133409026404e-05, "loss": 0.007461913675069809, "step": 147640 }, { "epoch": 41.9103037184218, "grad_norm": 1.7077418565750122, "learning_rate": 5.810729491910304e-05, "loss": 0.005768641829490662, "step": 147650 }, { "epoch": 41.913142208345164, "grad_norm": 3.030463457107544, "learning_rate": 5.8104456429179674e-05, "loss": 0.004807484894990921, "step": 147660 }, { "epoch": 41.91598069826852, "grad_norm": 0.41401901841163635, "learning_rate": 5.8101617939256315e-05, "loss": 0.008545652776956559, "step": 147670 }, { "epoch": 41.91881918819188, "grad_norm": 0.0818423181772232, "learning_rate": 5.809877944933296e-05, "loss": 0.004737967997789383, "step": 147680 }, { "epoch": 41.921657678115245, "grad_norm": 0.5672314167022705, "learning_rate": 5.80959409594096e-05, "loss": 0.009779351949691772, "step": 147690 }, { "epoch": 41.9244961680386, "grad_norm": 2.996091842651367, "learning_rate": 5.809310246948624e-05, "loss": 0.0027373740449547768, "step": 147700 }, { "epoch": 41.927334657961964, "grad_norm": 3.0213868618011475, "learning_rate": 5.8090263979562874e-05, "loss": 0.003720492497086525, "step": 147710 }, { "epoch": 41.93017314788533, "grad_norm": 1.7075763940811157, "learning_rate": 5.8087425489639515e-05, "loss": 0.0017891475930809976, "step": 147720 }, { "epoch": 41.93301163780868, "grad_norm": 5.161940574645996, "learning_rate": 5.808458699971615e-05, "loss": 0.0019787611439824105, "step": 147730 }, { "epoch": 41.935850127732046, "grad_norm": 0.11445596069097519, "learning_rate": 5.808174850979279e-05, "loss": 0.008735276758670807, "step": 147740 }, { "epoch": 41.93868861765541, "grad_norm": 0.6976454854011536, "learning_rate": 5.807891001986944e-05, "loss": 0.0025481412187218664, "step": 147750 }, { "epoch": 41.94152710757877, "grad_norm": 0.6270613670349121, "learning_rate": 5.8076071529946074e-05, "loss": 0.003670267015695572, "step": 147760 }, { "epoch": 41.94436559750213, "grad_norm": 4.4429450035095215, "learning_rate": 5.8073233040022715e-05, "loss": 0.008809293806552886, "step": 147770 }, { "epoch": 41.94720408742549, "grad_norm": 1.2670964002609253, "learning_rate": 5.807039455009935e-05, "loss": 0.00708872526884079, "step": 147780 }, { "epoch": 41.95004257734885, "grad_norm": 0.062080785632133484, "learning_rate": 5.8067556060175984e-05, "loss": 0.005391184613108635, "step": 147790 }, { "epoch": 41.95288106727221, "grad_norm": 0.05487651750445366, "learning_rate": 5.8064717570252626e-05, "loss": 0.002111133560538292, "step": 147800 }, { "epoch": 41.95571955719557, "grad_norm": 0.3206935226917267, "learning_rate": 5.806187908032926e-05, "loss": 0.0010101672261953354, "step": 147810 }, { "epoch": 41.958558047118935, "grad_norm": 0.04768664762377739, "learning_rate": 5.805904059040591e-05, "loss": 0.0011282308027148248, "step": 147820 }, { "epoch": 41.96139653704229, "grad_norm": 14.992984771728516, "learning_rate": 5.805620210048255e-05, "loss": 0.0044780105352401735, "step": 147830 }, { "epoch": 41.96423502696565, "grad_norm": 0.9442441463470459, "learning_rate": 5.8053363610559185e-05, "loss": 0.0007804796099662781, "step": 147840 }, { "epoch": 41.967073516889016, "grad_norm": 0.10974982380867004, "learning_rate": 5.8050525120635826e-05, "loss": 0.0007784070447087287, "step": 147850 }, { "epoch": 41.96991200681238, "grad_norm": 0.34406018257141113, "learning_rate": 5.804768663071246e-05, "loss": 0.0025797396898269652, "step": 147860 }, { "epoch": 41.972750496735735, "grad_norm": 0.03177465498447418, "learning_rate": 5.80448481407891e-05, "loss": 0.00403272807598114, "step": 147870 }, { "epoch": 41.9755889866591, "grad_norm": 0.03622500225901604, "learning_rate": 5.804200965086575e-05, "loss": 0.010548439621925355, "step": 147880 }, { "epoch": 41.97842747658246, "grad_norm": 0.034628067165613174, "learning_rate": 5.8039171160942385e-05, "loss": 0.0008944230154156685, "step": 147890 }, { "epoch": 41.981265966505816, "grad_norm": 0.20137465000152588, "learning_rate": 5.803633267101902e-05, "loss": 0.0024202339351177215, "step": 147900 }, { "epoch": 41.98410445642918, "grad_norm": 1.4294395446777344, "learning_rate": 5.803349418109566e-05, "loss": 0.007550927251577378, "step": 147910 }, { "epoch": 41.98694294635254, "grad_norm": 4.568727970123291, "learning_rate": 5.8030655691172295e-05, "loss": 0.004716400429606438, "step": 147920 }, { "epoch": 41.9897814362759, "grad_norm": 0.061145152896642685, "learning_rate": 5.8027817201248937e-05, "loss": 0.0014157941564917564, "step": 147930 }, { "epoch": 41.99261992619926, "grad_norm": 0.06142732873558998, "learning_rate": 5.802497871132557e-05, "loss": 0.009902379661798476, "step": 147940 }, { "epoch": 41.995458416122624, "grad_norm": 0.016658145934343338, "learning_rate": 5.802214022140222e-05, "loss": 0.005079532414674759, "step": 147950 }, { "epoch": 41.99829690604599, "grad_norm": 0.11378389596939087, "learning_rate": 5.801930173147886e-05, "loss": 0.0011573072522878646, "step": 147960 }, { "epoch": 42.00113539596934, "grad_norm": 10.588242530822754, "learning_rate": 5.8016463241555495e-05, "loss": 0.002886012941598892, "step": 147970 }, { "epoch": 42.003973885892705, "grad_norm": 0.22206243872642517, "learning_rate": 5.801362475163214e-05, "loss": 0.0018182601779699326, "step": 147980 }, { "epoch": 42.00681237581607, "grad_norm": 0.05613575875759125, "learning_rate": 5.801078626170877e-05, "loss": 0.0010436916723847388, "step": 147990 }, { "epoch": 42.009650865739424, "grad_norm": 0.167624369263649, "learning_rate": 5.8007947771785406e-05, "loss": 0.003317735716700554, "step": 148000 }, { "epoch": 42.009650865739424, "eval_accuracy": 0.974883957525275, "eval_loss": 0.09121189266443253, "eval_runtime": 31.741, "eval_samples_per_second": 495.48, "eval_steps_per_second": 7.75, "step": 148000 }, { "epoch": 42.01248935566279, "grad_norm": 5.4926347732543945, "learning_rate": 5.800510928186205e-05, "loss": 0.0019615299999713896, "step": 148010 }, { "epoch": 42.01532784558615, "grad_norm": 5.0397467613220215, "learning_rate": 5.8002270791938695e-05, "loss": 0.003204669803380966, "step": 148020 }, { "epoch": 42.018166335509505, "grad_norm": 0.20917385816574097, "learning_rate": 5.799943230201533e-05, "loss": 0.0020218903198838236, "step": 148030 }, { "epoch": 42.02100482543287, "grad_norm": 0.01600181870162487, "learning_rate": 5.799659381209197e-05, "loss": 0.002963532507419586, "step": 148040 }, { "epoch": 42.02384331535623, "grad_norm": 0.07229290902614594, "learning_rate": 5.7993755322168606e-05, "loss": 0.0038999214768409727, "step": 148050 }, { "epoch": 42.026681805279594, "grad_norm": 0.9045941233634949, "learning_rate": 5.799091683224525e-05, "loss": 0.0007863270118832588, "step": 148060 }, { "epoch": 42.02952029520295, "grad_norm": 0.12249844521284103, "learning_rate": 5.798807834232188e-05, "loss": 0.00138645451515913, "step": 148070 }, { "epoch": 42.03235878512631, "grad_norm": 0.03424160182476044, "learning_rate": 5.798523985239853e-05, "loss": 0.004904663190245628, "step": 148080 }, { "epoch": 42.035197275049676, "grad_norm": 0.40684759616851807, "learning_rate": 5.798240136247517e-05, "loss": 0.0022047240287065505, "step": 148090 }, { "epoch": 42.03803576497303, "grad_norm": 0.14067214727401733, "learning_rate": 5.7979562872551806e-05, "loss": 0.004714854434132576, "step": 148100 }, { "epoch": 42.040874254896394, "grad_norm": 4.484437942504883, "learning_rate": 5.797672438262845e-05, "loss": 0.003201736509799957, "step": 148110 }, { "epoch": 42.04371274481976, "grad_norm": 0.16209560632705688, "learning_rate": 5.797388589270508e-05, "loss": 0.0010487783700227737, "step": 148120 }, { "epoch": 42.04655123474312, "grad_norm": 0.01927056722342968, "learning_rate": 5.797104740278172e-05, "loss": 0.0006518881767988205, "step": 148130 }, { "epoch": 42.049389724666476, "grad_norm": 0.5525190830230713, "learning_rate": 5.796820891285836e-05, "loss": 0.004366601258516312, "step": 148140 }, { "epoch": 42.05222821458984, "grad_norm": 0.0631304606795311, "learning_rate": 5.7965370422935006e-05, "loss": 0.0025670694187283514, "step": 148150 }, { "epoch": 42.0550667045132, "grad_norm": 1.1734358072280884, "learning_rate": 5.796253193301164e-05, "loss": 0.005134715884923935, "step": 148160 }, { "epoch": 42.05790519443656, "grad_norm": 2.5967254638671875, "learning_rate": 5.795969344308828e-05, "loss": 0.004553484916687012, "step": 148170 }, { "epoch": 42.06074368435992, "grad_norm": 0.06469224393367767, "learning_rate": 5.795685495316492e-05, "loss": 0.0013846702873706817, "step": 148180 }, { "epoch": 42.06358217428328, "grad_norm": 0.07613036036491394, "learning_rate": 5.795401646324156e-05, "loss": 0.0019763268530368803, "step": 148190 }, { "epoch": 42.06642066420664, "grad_norm": 4.161798000335693, "learning_rate": 5.795117797331819e-05, "loss": 0.0025999821722507478, "step": 148200 }, { "epoch": 42.06925915413, "grad_norm": 0.09685018658638, "learning_rate": 5.7948339483394834e-05, "loss": 0.003942451626062393, "step": 148210 }, { "epoch": 42.072097644053365, "grad_norm": 0.13936004042625427, "learning_rate": 5.794550099347148e-05, "loss": 0.002917243354022503, "step": 148220 }, { "epoch": 42.07493613397673, "grad_norm": 0.09791680425405502, "learning_rate": 5.794266250354812e-05, "loss": 0.0016802605241537094, "step": 148230 }, { "epoch": 42.07777462390008, "grad_norm": 0.16870345175266266, "learning_rate": 5.793982401362476e-05, "loss": 0.00816027894616127, "step": 148240 }, { "epoch": 42.080613113823446, "grad_norm": 1.217581033706665, "learning_rate": 5.793698552370139e-05, "loss": 0.0022849004715681078, "step": 148250 }, { "epoch": 42.08345160374681, "grad_norm": 0.06864102929830551, "learning_rate": 5.793414703377803e-05, "loss": 0.0018223153427243232, "step": 148260 }, { "epoch": 42.086290093670165, "grad_norm": 0.252546489238739, "learning_rate": 5.793130854385467e-05, "loss": 0.0015053559094667436, "step": 148270 }, { "epoch": 42.08912858359353, "grad_norm": 0.006149183958768845, "learning_rate": 5.792847005393132e-05, "loss": 0.0008971454575657844, "step": 148280 }, { "epoch": 42.09196707351689, "grad_norm": 0.1221720352768898, "learning_rate": 5.792563156400795e-05, "loss": 0.0007475227117538452, "step": 148290 }, { "epoch": 42.09480556344025, "grad_norm": 1.5749468803405762, "learning_rate": 5.792279307408459e-05, "loss": 0.0032606054097414016, "step": 148300 }, { "epoch": 42.09764405336361, "grad_norm": 0.5113258957862854, "learning_rate": 5.791995458416123e-05, "loss": 0.009348827600479125, "step": 148310 }, { "epoch": 42.10048254328697, "grad_norm": 0.14693041145801544, "learning_rate": 5.791711609423787e-05, "loss": 0.0007658191025257111, "step": 148320 }, { "epoch": 42.103321033210335, "grad_norm": 0.8705531358718872, "learning_rate": 5.7914277604314503e-05, "loss": 0.005033469200134278, "step": 148330 }, { "epoch": 42.10615952313369, "grad_norm": 0.11414027959108353, "learning_rate": 5.7911439114391145e-05, "loss": 0.0024085933342576026, "step": 148340 }, { "epoch": 42.108998013057054, "grad_norm": 0.21188539266586304, "learning_rate": 5.790860062446779e-05, "loss": 0.001377679780125618, "step": 148350 }, { "epoch": 42.11183650298042, "grad_norm": 0.5982687473297119, "learning_rate": 5.790576213454443e-05, "loss": 0.0022996239364147185, "step": 148360 }, { "epoch": 42.11467499290377, "grad_norm": 8.45866870880127, "learning_rate": 5.790292364462106e-05, "loss": 0.006784467399120331, "step": 148370 }, { "epoch": 42.117513482827135, "grad_norm": 0.1945953071117401, "learning_rate": 5.7900085154697704e-05, "loss": 0.0046535424888134004, "step": 148380 }, { "epoch": 42.1203519727505, "grad_norm": 1.637442946434021, "learning_rate": 5.789724666477434e-05, "loss": 0.0013331301510334016, "step": 148390 }, { "epoch": 42.123190462673854, "grad_norm": 0.034490346908569336, "learning_rate": 5.789440817485098e-05, "loss": 0.0010250095278024674, "step": 148400 }, { "epoch": 42.12602895259722, "grad_norm": 0.16968096792697906, "learning_rate": 5.789156968492763e-05, "loss": 0.0010447947308421135, "step": 148410 }, { "epoch": 42.12886744252058, "grad_norm": 0.012737770564854145, "learning_rate": 5.788873119500426e-05, "loss": 0.0008038455620408058, "step": 148420 }, { "epoch": 42.13170593244394, "grad_norm": 0.522477924823761, "learning_rate": 5.7885892705080904e-05, "loss": 0.0010614242404699326, "step": 148430 }, { "epoch": 42.1345444223673, "grad_norm": 0.09368687123060226, "learning_rate": 5.788305421515754e-05, "loss": 0.017077405750751496, "step": 148440 }, { "epoch": 42.13738291229066, "grad_norm": 0.23763950169086456, "learning_rate": 5.788021572523418e-05, "loss": 0.0013159574940800668, "step": 148450 }, { "epoch": 42.140221402214024, "grad_norm": 0.023820364847779274, "learning_rate": 5.7877377235310814e-05, "loss": 0.0026591250672936438, "step": 148460 }, { "epoch": 42.14305989213738, "grad_norm": 0.09343444555997849, "learning_rate": 5.787453874538745e-05, "loss": 0.0009782956913113594, "step": 148470 }, { "epoch": 42.14589838206074, "grad_norm": 3.2588257789611816, "learning_rate": 5.7871700255464104e-05, "loss": 0.002280403859913349, "step": 148480 }, { "epoch": 42.148736871984106, "grad_norm": 9.96806526184082, "learning_rate": 5.786886176554074e-05, "loss": 0.008225031197071075, "step": 148490 }, { "epoch": 42.15157536190747, "grad_norm": 0.048505425453186035, "learning_rate": 5.786602327561737e-05, "loss": 0.000473700650036335, "step": 148500 }, { "epoch": 42.15157536190747, "eval_accuracy": 0.975837731290138, "eval_loss": 0.08783867955207825, "eval_runtime": 32.0658, "eval_samples_per_second": 490.461, "eval_steps_per_second": 7.672, "step": 148500 }, { "epoch": 42.154413851830824, "grad_norm": 0.02808949165046215, "learning_rate": 5.7863184785694014e-05, "loss": 0.00038874801248311996, "step": 148510 }, { "epoch": 42.15725234175419, "grad_norm": 0.12483259290456772, "learning_rate": 5.786034629577065e-05, "loss": 0.0015161557123064995, "step": 148520 }, { "epoch": 42.16009083167755, "grad_norm": 0.06408767402172089, "learning_rate": 5.785750780584729e-05, "loss": 0.0007976977154612541, "step": 148530 }, { "epoch": 42.162929321600906, "grad_norm": 0.18530237674713135, "learning_rate": 5.7854669315923925e-05, "loss": 0.0013058725744485855, "step": 148540 }, { "epoch": 42.16576781152427, "grad_norm": 0.23273755609989166, "learning_rate": 5.785183082600057e-05, "loss": 0.0030816461890935897, "step": 148550 }, { "epoch": 42.16860630144763, "grad_norm": 1.2946865558624268, "learning_rate": 5.7848992336077214e-05, "loss": 0.0039606474339962, "step": 148560 }, { "epoch": 42.17144479137099, "grad_norm": 0.20483577251434326, "learning_rate": 5.784615384615385e-05, "loss": 0.001383386179804802, "step": 148570 }, { "epoch": 42.17428328129435, "grad_norm": 0.9227098822593689, "learning_rate": 5.784331535623049e-05, "loss": 0.0020934173837304114, "step": 148580 }, { "epoch": 42.17712177121771, "grad_norm": 19.145475387573242, "learning_rate": 5.7840476866307125e-05, "loss": 0.005884546041488648, "step": 148590 }, { "epoch": 42.179960261141076, "grad_norm": 0.8967348337173462, "learning_rate": 5.783763837638376e-05, "loss": 0.0015535371378064156, "step": 148600 }, { "epoch": 42.18279875106443, "grad_norm": 2.5110926628112793, "learning_rate": 5.7834799886460415e-05, "loss": 0.002668975479900837, "step": 148610 }, { "epoch": 42.185637240987795, "grad_norm": 0.17472758889198303, "learning_rate": 5.783196139653705e-05, "loss": 0.002374446578323841, "step": 148620 }, { "epoch": 42.18847573091116, "grad_norm": 0.21558703482151031, "learning_rate": 5.7829122906613684e-05, "loss": 0.0027756338939070703, "step": 148630 }, { "epoch": 42.19131422083451, "grad_norm": 0.5122671723365784, "learning_rate": 5.7826284416690325e-05, "loss": 0.0006461162120103836, "step": 148640 }, { "epoch": 42.194152710757876, "grad_norm": 0.1394033581018448, "learning_rate": 5.782344592676696e-05, "loss": 0.001151873730123043, "step": 148650 }, { "epoch": 42.19699120068124, "grad_norm": 7.446272373199463, "learning_rate": 5.78206074368436e-05, "loss": 0.006092076003551483, "step": 148660 }, { "epoch": 42.199829690604595, "grad_norm": 5.922327518463135, "learning_rate": 5.7817768946920236e-05, "loss": 0.003573869913816452, "step": 148670 }, { "epoch": 42.20266818052796, "grad_norm": 9.825607299804688, "learning_rate": 5.7814930456996884e-05, "loss": 0.008025586605072021, "step": 148680 }, { "epoch": 42.20550667045132, "grad_norm": 0.8440441489219666, "learning_rate": 5.7812091967073525e-05, "loss": 0.0035113126039505007, "step": 148690 }, { "epoch": 42.208345160374684, "grad_norm": 0.029959499835968018, "learning_rate": 5.780925347715016e-05, "loss": 0.0037767499685287475, "step": 148700 }, { "epoch": 42.21118365029804, "grad_norm": 1.2642771005630493, "learning_rate": 5.78064149872268e-05, "loss": 0.000986599363386631, "step": 148710 }, { "epoch": 42.2140221402214, "grad_norm": 2.8234050273895264, "learning_rate": 5.7803576497303436e-05, "loss": 0.00487886555492878, "step": 148720 }, { "epoch": 42.216860630144765, "grad_norm": 0.03957867994904518, "learning_rate": 5.780073800738007e-05, "loss": 0.006504786014556885, "step": 148730 }, { "epoch": 42.21969912006812, "grad_norm": 0.427886962890625, "learning_rate": 5.779789951745671e-05, "loss": 0.004809509590268135, "step": 148740 }, { "epoch": 42.222537609991484, "grad_norm": 0.7828105092048645, "learning_rate": 5.779506102753336e-05, "loss": 0.004443696141242981, "step": 148750 }, { "epoch": 42.22537609991485, "grad_norm": 0.1608571708202362, "learning_rate": 5.7792222537609994e-05, "loss": 0.0009651016443967819, "step": 148760 }, { "epoch": 42.2282145898382, "grad_norm": 0.06307506561279297, "learning_rate": 5.7789384047686636e-05, "loss": 0.0010277647525072098, "step": 148770 }, { "epoch": 42.231053079761566, "grad_norm": 0.650701642036438, "learning_rate": 5.778654555776327e-05, "loss": 0.0035609032958745955, "step": 148780 }, { "epoch": 42.23389156968493, "grad_norm": 0.16893111169338226, "learning_rate": 5.778370706783991e-05, "loss": 0.0019431231543421746, "step": 148790 }, { "epoch": 42.23673005960829, "grad_norm": 3.2165372371673584, "learning_rate": 5.7780868577916546e-05, "loss": 0.007633565366268158, "step": 148800 }, { "epoch": 42.23956854953165, "grad_norm": 15.186038970947266, "learning_rate": 5.7778030087993195e-05, "loss": 0.004476289451122284, "step": 148810 }, { "epoch": 42.24240703945501, "grad_norm": 0.12604351341724396, "learning_rate": 5.7775191598069836e-05, "loss": 0.00441405177116394, "step": 148820 }, { "epoch": 42.24524552937837, "grad_norm": 0.07461309432983398, "learning_rate": 5.777235310814647e-05, "loss": 0.0020631251856684683, "step": 148830 }, { "epoch": 42.24808401930173, "grad_norm": 0.2549339532852173, "learning_rate": 5.7769514618223105e-05, "loss": 0.002142311818897724, "step": 148840 }, { "epoch": 42.25092250922509, "grad_norm": 0.19857609272003174, "learning_rate": 5.7766676128299747e-05, "loss": 0.0020988911390304567, "step": 148850 }, { "epoch": 42.253760999148454, "grad_norm": 3.5835609436035156, "learning_rate": 5.776383763837638e-05, "loss": 0.004204310476779938, "step": 148860 }, { "epoch": 42.25659948907182, "grad_norm": 0.9174870252609253, "learning_rate": 5.776099914845302e-05, "loss": 0.0028860537335276605, "step": 148870 }, { "epoch": 42.25943797899517, "grad_norm": 0.7222262620925903, "learning_rate": 5.775816065852967e-05, "loss": 0.0025588532909750937, "step": 148880 }, { "epoch": 42.262276468918536, "grad_norm": 0.4497431516647339, "learning_rate": 5.7755322168606305e-05, "loss": 0.0009511243551969528, "step": 148890 }, { "epoch": 42.2651149588419, "grad_norm": 0.19622796773910522, "learning_rate": 5.775248367868295e-05, "loss": 0.0010238831862807273, "step": 148900 }, { "epoch": 42.267953448765255, "grad_norm": 0.05724845081567764, "learning_rate": 5.774964518875958e-05, "loss": 0.004121816903352738, "step": 148910 }, { "epoch": 42.27079193868862, "grad_norm": 0.6709813475608826, "learning_rate": 5.774680669883622e-05, "loss": 0.009487597644329071, "step": 148920 }, { "epoch": 42.27363042861198, "grad_norm": 4.7786712646484375, "learning_rate": 5.774396820891286e-05, "loss": 0.007756906002759934, "step": 148930 }, { "epoch": 42.276468918535336, "grad_norm": 1.7194970846176147, "learning_rate": 5.774112971898949e-05, "loss": 0.005990612506866455, "step": 148940 }, { "epoch": 42.2793074084587, "grad_norm": 0.8138684630393982, "learning_rate": 5.773829122906615e-05, "loss": 0.003585950285196304, "step": 148950 }, { "epoch": 42.28214589838206, "grad_norm": 0.024683451279997826, "learning_rate": 5.773545273914278e-05, "loss": 0.00307918693870306, "step": 148960 }, { "epoch": 42.284984388305425, "grad_norm": 0.06129902973771095, "learning_rate": 5.7732614249219416e-05, "loss": 0.0009402599185705185, "step": 148970 }, { "epoch": 42.28782287822878, "grad_norm": 0.28746408224105835, "learning_rate": 5.772977575929606e-05, "loss": 0.007334737479686737, "step": 148980 }, { "epoch": 42.29066136815214, "grad_norm": 0.262344628572464, "learning_rate": 5.772693726937269e-05, "loss": 0.005848598480224609, "step": 148990 }, { "epoch": 42.293499858075506, "grad_norm": 0.3501192629337311, "learning_rate": 5.772409877944933e-05, "loss": 0.0011028772220015525, "step": 149000 }, { "epoch": 42.293499858075506, "eval_accuracy": 0.9762828257137407, "eval_loss": 0.08106570690870285, "eval_runtime": 33.8397, "eval_samples_per_second": 464.75, "eval_steps_per_second": 7.27, "step": 149000 }, { "epoch": 42.29633834799886, "grad_norm": 1.8995906114578247, "learning_rate": 5.772126028952598e-05, "loss": 0.0007258778437972069, "step": 149010 }, { "epoch": 42.299176837922225, "grad_norm": 1.8555222749710083, "learning_rate": 5.7718421799602616e-05, "loss": 0.0010192519053816794, "step": 149020 }, { "epoch": 42.30201532784559, "grad_norm": 1.5609384775161743, "learning_rate": 5.771586715867159e-05, "loss": 0.004942493140697479, "step": 149030 }, { "epoch": 42.304853817768944, "grad_norm": 6.585638523101807, "learning_rate": 5.7713028668748224e-05, "loss": 0.0029562147334218024, "step": 149040 }, { "epoch": 42.30769230769231, "grad_norm": 0.07576531171798706, "learning_rate": 5.7710190178824865e-05, "loss": 0.0008905645459890366, "step": 149050 }, { "epoch": 42.31053079761567, "grad_norm": 4.283070087432861, "learning_rate": 5.770735168890151e-05, "loss": 0.00501292496919632, "step": 149060 }, { "epoch": 42.31336928753903, "grad_norm": 0.49264270067214966, "learning_rate": 5.770451319897815e-05, "loss": 0.0007249932736158371, "step": 149070 }, { "epoch": 42.31620777746239, "grad_norm": 0.05113029107451439, "learning_rate": 5.770167470905479e-05, "loss": 0.004156504943966866, "step": 149080 }, { "epoch": 42.31904626738575, "grad_norm": 9.798721313476562, "learning_rate": 5.7698836219131424e-05, "loss": 0.003379834443330765, "step": 149090 }, { "epoch": 42.321884757309114, "grad_norm": 14.896623611450195, "learning_rate": 5.7695997729208065e-05, "loss": 0.005084022134542465, "step": 149100 }, { "epoch": 42.32472324723247, "grad_norm": 0.18293990194797516, "learning_rate": 5.76931592392847e-05, "loss": 0.0008425790816545487, "step": 149110 }, { "epoch": 42.32756173715583, "grad_norm": 0.1553041636943817, "learning_rate": 5.7690320749361334e-05, "loss": 0.0030866006389260294, "step": 149120 }, { "epoch": 42.330400227079195, "grad_norm": 0.11500255018472672, "learning_rate": 5.768748225943799e-05, "loss": 0.0015179915353655815, "step": 149130 }, { "epoch": 42.33323871700255, "grad_norm": 0.3087509572505951, "learning_rate": 5.7684643769514624e-05, "loss": 0.0034201137721538545, "step": 149140 }, { "epoch": 42.336077206925914, "grad_norm": 0.45559585094451904, "learning_rate": 5.768180527959126e-05, "loss": 0.0027576066553592683, "step": 149150 }, { "epoch": 42.33891569684928, "grad_norm": 0.18280380964279175, "learning_rate": 5.76789667896679e-05, "loss": 0.001021822914481163, "step": 149160 }, { "epoch": 42.34175418677264, "grad_norm": 0.04084552451968193, "learning_rate": 5.7676128299744535e-05, "loss": 0.0037411779165267946, "step": 149170 }, { "epoch": 42.344592676695996, "grad_norm": 0.19830608367919922, "learning_rate": 5.7673289809821176e-05, "loss": 0.006974570453166962, "step": 149180 }, { "epoch": 42.34743116661936, "grad_norm": 0.027047844603657722, "learning_rate": 5.7670451319897824e-05, "loss": 0.002917725592851639, "step": 149190 }, { "epoch": 42.35026965654272, "grad_norm": 0.038492824882268906, "learning_rate": 5.766761282997446e-05, "loss": 0.0018636330962181092, "step": 149200 }, { "epoch": 42.35310814646608, "grad_norm": 1.1602259874343872, "learning_rate": 5.76647743400511e-05, "loss": 0.0006949957460165024, "step": 149210 }, { "epoch": 42.35594663638944, "grad_norm": 0.71578049659729, "learning_rate": 5.7661935850127735e-05, "loss": 0.003599078580737114, "step": 149220 }, { "epoch": 42.3587851263128, "grad_norm": 0.07188554853200912, "learning_rate": 5.7659097360204376e-05, "loss": 0.0013271400704979897, "step": 149230 }, { "epoch": 42.36162361623616, "grad_norm": 0.08176995068788528, "learning_rate": 5.765625887028101e-05, "loss": 0.002788268029689789, "step": 149240 }, { "epoch": 42.36446210615952, "grad_norm": 0.20479156076908112, "learning_rate": 5.7653420380357645e-05, "loss": 0.005922697857022286, "step": 149250 }, { "epoch": 42.367300596082885, "grad_norm": 0.03780616447329521, "learning_rate": 5.765058189043429e-05, "loss": 0.0005988942459225655, "step": 149260 }, { "epoch": 42.37013908600625, "grad_norm": 0.044834546744823456, "learning_rate": 5.7647743400510935e-05, "loss": 0.0007186215370893478, "step": 149270 }, { "epoch": 42.3729775759296, "grad_norm": 0.365369588136673, "learning_rate": 5.764490491058757e-05, "loss": 0.0010452115908265113, "step": 149280 }, { "epoch": 42.375816065852966, "grad_norm": 0.9095590710639954, "learning_rate": 5.764206642066421e-05, "loss": 0.0015563469380140305, "step": 149290 }, { "epoch": 42.37865455577633, "grad_norm": 0.15202443301677704, "learning_rate": 5.7639227930740845e-05, "loss": 0.003445339947938919, "step": 149300 }, { "epoch": 42.381493045699685, "grad_norm": 0.41718608140945435, "learning_rate": 5.763638944081749e-05, "loss": 0.0013163993135094643, "step": 149310 }, { "epoch": 42.38433153562305, "grad_norm": 0.046813055872917175, "learning_rate": 5.763355095089412e-05, "loss": 0.0007233038544654846, "step": 149320 }, { "epoch": 42.38717002554641, "grad_norm": 0.0664995014667511, "learning_rate": 5.763071246097077e-05, "loss": 0.0009822048246860503, "step": 149330 }, { "epoch": 42.39000851546977, "grad_norm": 3.7311480045318604, "learning_rate": 5.762787397104741e-05, "loss": 0.00882490649819374, "step": 149340 }, { "epoch": 42.39284700539313, "grad_norm": 2.3043625354766846, "learning_rate": 5.7625035481124045e-05, "loss": 0.0015574658289551734, "step": 149350 }, { "epoch": 42.39568549531649, "grad_norm": 0.6846745014190674, "learning_rate": 5.762219699120068e-05, "loss": 0.0026110684499144553, "step": 149360 }, { "epoch": 42.398523985239855, "grad_norm": 0.061022449284791946, "learning_rate": 5.761935850127732e-05, "loss": 0.0007597656920552253, "step": 149370 }, { "epoch": 42.40136247516321, "grad_norm": 0.16794158518314362, "learning_rate": 5.7616520011353956e-05, "loss": 0.0011022666469216347, "step": 149380 }, { "epoch": 42.404200965086574, "grad_norm": 0.013561318628489971, "learning_rate": 5.7613681521430604e-05, "loss": 0.002890249527990818, "step": 149390 }, { "epoch": 42.40703945500994, "grad_norm": 1.4052187204360962, "learning_rate": 5.7610843031507246e-05, "loss": 0.0014119157567620277, "step": 149400 }, { "epoch": 42.40987794493329, "grad_norm": 0.48844704031944275, "learning_rate": 5.760800454158388e-05, "loss": 0.0015898007899522782, "step": 149410 }, { "epoch": 42.412716434856655, "grad_norm": 0.1818181872367859, "learning_rate": 5.760516605166052e-05, "loss": 0.0011587318032979966, "step": 149420 }, { "epoch": 42.41555492478002, "grad_norm": 0.013466368429362774, "learning_rate": 5.7602327561737156e-05, "loss": 0.0006939476355910301, "step": 149430 }, { "epoch": 42.41839341470338, "grad_norm": 0.266481876373291, "learning_rate": 5.75994890718138e-05, "loss": 0.0009322015568614006, "step": 149440 }, { "epoch": 42.42123190462674, "grad_norm": 0.08928917348384857, "learning_rate": 5.759665058189043e-05, "loss": 0.0011673543602228164, "step": 149450 }, { "epoch": 42.4240703945501, "grad_norm": 0.0835476890206337, "learning_rate": 5.759381209196708e-05, "loss": 0.0018524067476391793, "step": 149460 }, { "epoch": 42.42690888447346, "grad_norm": 0.07462149858474731, "learning_rate": 5.759097360204372e-05, "loss": 0.0008162502199411392, "step": 149470 }, { "epoch": 42.42974737439682, "grad_norm": 0.1084233745932579, "learning_rate": 5.7588135112120356e-05, "loss": 0.0011632420122623444, "step": 149480 }, { "epoch": 42.43258586432018, "grad_norm": 0.16020548343658447, "learning_rate": 5.758529662219699e-05, "loss": 0.0006304403766989708, "step": 149490 }, { "epoch": 42.435424354243544, "grad_norm": 0.025915855541825294, "learning_rate": 5.758245813227363e-05, "loss": 0.000703650526702404, "step": 149500 }, { "epoch": 42.435424354243544, "eval_accuracy": 0.9786990525847269, "eval_loss": 0.07884743809700012, "eval_runtime": 32.481, "eval_samples_per_second": 484.19, "eval_steps_per_second": 7.574, "step": 149500 }, { "epoch": 42.4382628441669, "grad_norm": 0.03452825918793678, "learning_rate": 5.757961964235027e-05, "loss": 0.0020655402913689613, "step": 149510 }, { "epoch": 42.44110133409026, "grad_norm": 2.167642116546631, "learning_rate": 5.757678115242691e-05, "loss": 0.003572450578212738, "step": 149520 }, { "epoch": 42.443939824013626, "grad_norm": 0.11725883185863495, "learning_rate": 5.7573942662503556e-05, "loss": 0.0007973648607730865, "step": 149530 }, { "epoch": 42.44677831393699, "grad_norm": 0.240860253572464, "learning_rate": 5.757110417258019e-05, "loss": 0.0015113582834601403, "step": 149540 }, { "epoch": 42.449616803860344, "grad_norm": 0.03504910692572594, "learning_rate": 5.756826568265683e-05, "loss": 0.0011814264580607414, "step": 149550 }, { "epoch": 42.45245529378371, "grad_norm": 1.6692795753479004, "learning_rate": 5.756542719273347e-05, "loss": 0.0020489107817411424, "step": 149560 }, { "epoch": 42.45529378370707, "grad_norm": 1.7897883653640747, "learning_rate": 5.756258870281011e-05, "loss": 0.006054595857858658, "step": 149570 }, { "epoch": 42.458132273630426, "grad_norm": 0.07541073113679886, "learning_rate": 5.755975021288674e-05, "loss": 0.009281496703624725, "step": 149580 }, { "epoch": 42.46097076355379, "grad_norm": 0.0854191705584526, "learning_rate": 5.755691172296339e-05, "loss": 0.005772685259580612, "step": 149590 }, { "epoch": 42.46380925347715, "grad_norm": 0.29860568046569824, "learning_rate": 5.755407323304003e-05, "loss": 0.01062425822019577, "step": 149600 }, { "epoch": 42.46664774340051, "grad_norm": 0.0560116171836853, "learning_rate": 5.755123474311667e-05, "loss": 0.006598441302776337, "step": 149610 }, { "epoch": 42.46948623332387, "grad_norm": 6.561726093292236, "learning_rate": 5.75483962531933e-05, "loss": 0.005295054987072945, "step": 149620 }, { "epoch": 42.47232472324723, "grad_norm": 0.17114068567752838, "learning_rate": 5.754555776326994e-05, "loss": 0.006758679449558258, "step": 149630 }, { "epoch": 42.475163213170596, "grad_norm": 0.5224401354789734, "learning_rate": 5.754271927334658e-05, "loss": 0.007378491014242173, "step": 149640 }, { "epoch": 42.47800170309395, "grad_norm": 2.0284016132354736, "learning_rate": 5.753988078342322e-05, "loss": 0.001416935957968235, "step": 149650 }, { "epoch": 42.480840193017315, "grad_norm": 1.306514024734497, "learning_rate": 5.753704229349987e-05, "loss": 0.002282816916704178, "step": 149660 }, { "epoch": 42.48367868294068, "grad_norm": 18.141342163085938, "learning_rate": 5.75342038035765e-05, "loss": 0.010499419271945953, "step": 149670 }, { "epoch": 42.48651717286403, "grad_norm": 0.9916077256202698, "learning_rate": 5.753136531365314e-05, "loss": 0.00396006777882576, "step": 149680 }, { "epoch": 42.489355662787396, "grad_norm": 1.541317105293274, "learning_rate": 5.752852682372978e-05, "loss": 0.006942304223775864, "step": 149690 }, { "epoch": 42.49219415271076, "grad_norm": 0.05948227271437645, "learning_rate": 5.752568833380642e-05, "loss": 0.0006588274613022804, "step": 149700 }, { "epoch": 42.49503264263412, "grad_norm": 0.27741312980651855, "learning_rate": 5.7522849843883054e-05, "loss": 0.00031501520425081253, "step": 149710 }, { "epoch": 42.49787113255748, "grad_norm": 7.813920497894287, "learning_rate": 5.752001135395969e-05, "loss": 0.007956470549106597, "step": 149720 }, { "epoch": 42.50070962248084, "grad_norm": 1.3294410705566406, "learning_rate": 5.7517172864036336e-05, "loss": 0.0014052055776119232, "step": 149730 }, { "epoch": 42.503548112404204, "grad_norm": 0.13576719164848328, "learning_rate": 5.751433437411298e-05, "loss": 0.004079211503267288, "step": 149740 }, { "epoch": 42.50638660232756, "grad_norm": 1.4054192304611206, "learning_rate": 5.751149588418961e-05, "loss": 0.004032426699995995, "step": 149750 }, { "epoch": 42.50922509225092, "grad_norm": 0.37282854318618774, "learning_rate": 5.7508657394266254e-05, "loss": 0.0011860737577080727, "step": 149760 }, { "epoch": 42.512063582174285, "grad_norm": 0.33757326006889343, "learning_rate": 5.750581890434289e-05, "loss": 0.0034485340118408205, "step": 149770 }, { "epoch": 42.51490207209764, "grad_norm": 6.6259870529174805, "learning_rate": 5.750298041441953e-05, "loss": 0.00277415718883276, "step": 149780 }, { "epoch": 42.517740562021004, "grad_norm": 0.14565500617027283, "learning_rate": 5.750014192449618e-05, "loss": 0.001903793402016163, "step": 149790 }, { "epoch": 42.52057905194437, "grad_norm": 0.20912469923496246, "learning_rate": 5.749730343457281e-05, "loss": 0.0032007738947868345, "step": 149800 }, { "epoch": 42.52341754186773, "grad_norm": 0.1471225917339325, "learning_rate": 5.7494464944649454e-05, "loss": 0.0014086702838540077, "step": 149810 }, { "epoch": 42.526256031791085, "grad_norm": 0.08750477433204651, "learning_rate": 5.749162645472609e-05, "loss": 0.007166118174791336, "step": 149820 }, { "epoch": 42.52909452171445, "grad_norm": 0.20422011613845825, "learning_rate": 5.748878796480272e-05, "loss": 0.00940827578306198, "step": 149830 }, { "epoch": 42.53193301163781, "grad_norm": 0.20312394201755524, "learning_rate": 5.7485949474879364e-05, "loss": 0.0023881040513515474, "step": 149840 }, { "epoch": 42.53477150156117, "grad_norm": 2.5897226333618164, "learning_rate": 5.7483110984956e-05, "loss": 0.001211434043943882, "step": 149850 }, { "epoch": 42.53760999148453, "grad_norm": 0.012378913350403309, "learning_rate": 5.748027249503265e-05, "loss": 0.0029115533456206323, "step": 149860 }, { "epoch": 42.54044848140789, "grad_norm": 0.17284952104091644, "learning_rate": 5.747743400510929e-05, "loss": 0.007810847461223602, "step": 149870 }, { "epoch": 42.54328697133125, "grad_norm": 4.997737407684326, "learning_rate": 5.747459551518592e-05, "loss": 0.0017485266551375389, "step": 149880 }, { "epoch": 42.54612546125461, "grad_norm": 0.11255983263254166, "learning_rate": 5.7471757025262564e-05, "loss": 0.0005200183019042015, "step": 149890 }, { "epoch": 42.548963951177974, "grad_norm": 0.08184505254030228, "learning_rate": 5.74689185353392e-05, "loss": 0.0038756825029850005, "step": 149900 }, { "epoch": 42.55180244110134, "grad_norm": 0.3472974896430969, "learning_rate": 5.746608004541584e-05, "loss": 0.0008942432701587677, "step": 149910 }, { "epoch": 42.55464093102469, "grad_norm": 0.1257934719324112, "learning_rate": 5.7463241555492475e-05, "loss": 0.009531313925981522, "step": 149920 }, { "epoch": 42.557479420948056, "grad_norm": 0.24506719410419464, "learning_rate": 5.746040306556912e-05, "loss": 0.0013905897736549377, "step": 149930 }, { "epoch": 42.56031791087142, "grad_norm": 1.3813766241073608, "learning_rate": 5.7457564575645765e-05, "loss": 0.0020813683047890665, "step": 149940 }, { "epoch": 42.563156400794774, "grad_norm": 0.283517986536026, "learning_rate": 5.74547260857224e-05, "loss": 0.0022549683228135107, "step": 149950 }, { "epoch": 42.56599489071814, "grad_norm": 0.18578699231147766, "learning_rate": 5.7451887595799034e-05, "loss": 0.0062153637409210205, "step": 149960 }, { "epoch": 42.5688333806415, "grad_norm": 0.3587241768836975, "learning_rate": 5.7449049105875675e-05, "loss": 0.012299595773220063, "step": 149970 }, { "epoch": 42.571671870564856, "grad_norm": 0.07118772715330124, "learning_rate": 5.744621061595231e-05, "loss": 0.002682354301214218, "step": 149980 }, { "epoch": 42.57451036048822, "grad_norm": 8.508957862854004, "learning_rate": 5.744337212602896e-05, "loss": 0.015873529016971588, "step": 149990 }, { "epoch": 42.57734885041158, "grad_norm": 0.6336022019386292, "learning_rate": 5.74405336361056e-05, "loss": 0.006864231079816818, "step": 150000 }, { "epoch": 42.57734885041158, "eval_accuracy": 0.9753290519488778, "eval_loss": 0.09033339470624924, "eval_runtime": 32.0947, "eval_samples_per_second": 490.019, "eval_steps_per_second": 7.665, "step": 150000 }, { "epoch": 42.580187340334945, "grad_norm": 0.3210078775882721, "learning_rate": 5.7437695146182234e-05, "loss": 0.0005479916930198669, "step": 150010 }, { "epoch": 42.5830258302583, "grad_norm": 0.12213665246963501, "learning_rate": 5.7434856656258875e-05, "loss": 0.002155088447034359, "step": 150020 }, { "epoch": 42.58586432018166, "grad_norm": 0.07583698630332947, "learning_rate": 5.743201816633551e-05, "loss": 0.0016990404576063157, "step": 150030 }, { "epoch": 42.588702810105026, "grad_norm": 0.045593857765197754, "learning_rate": 5.742917967641215e-05, "loss": 0.0010635141283273698, "step": 150040 }, { "epoch": 42.59154130002838, "grad_norm": 0.012724583968520164, "learning_rate": 5.7426341186488786e-05, "loss": 0.0017008380964398384, "step": 150050 }, { "epoch": 42.594379789951745, "grad_norm": 0.01829829439520836, "learning_rate": 5.7423502696565434e-05, "loss": 0.0013929100707173348, "step": 150060 }, { "epoch": 42.59721827987511, "grad_norm": 0.19252076745033264, "learning_rate": 5.742066420664207e-05, "loss": 0.0010853147134184838, "step": 150070 }, { "epoch": 42.60005676979847, "grad_norm": 0.20845939218997955, "learning_rate": 5.741782571671871e-05, "loss": 0.0044466637074947355, "step": 150080 }, { "epoch": 42.60289525972183, "grad_norm": 0.08697447180747986, "learning_rate": 5.7414987226795345e-05, "loss": 0.00136849507689476, "step": 150090 }, { "epoch": 42.60573374964519, "grad_norm": 0.30494001507759094, "learning_rate": 5.7412148736871986e-05, "loss": 0.00639970526099205, "step": 150100 }, { "epoch": 42.60857223956855, "grad_norm": 0.14865972101688385, "learning_rate": 5.740931024694862e-05, "loss": 0.01313437670469284, "step": 150110 }, { "epoch": 42.61141072949191, "grad_norm": 1.243951678276062, "learning_rate": 5.740647175702527e-05, "loss": 0.008478510379791259, "step": 150120 }, { "epoch": 42.61424921941527, "grad_norm": 0.16383923590183258, "learning_rate": 5.740363326710191e-05, "loss": 0.0009566636756062508, "step": 150130 }, { "epoch": 42.617087709338634, "grad_norm": 0.15019963681697845, "learning_rate": 5.7400794777178545e-05, "loss": 0.004200580716133118, "step": 150140 }, { "epoch": 42.61992619926199, "grad_norm": 0.644383430480957, "learning_rate": 5.7397956287255186e-05, "loss": 0.0029541900381445884, "step": 150150 }, { "epoch": 42.62276468918535, "grad_norm": 0.0352880097925663, "learning_rate": 5.739511779733182e-05, "loss": 0.0013392327353358268, "step": 150160 }, { "epoch": 42.625603179108715, "grad_norm": 0.3577956557273865, "learning_rate": 5.739227930740846e-05, "loss": 0.007454320788383484, "step": 150170 }, { "epoch": 42.62844166903208, "grad_norm": 0.45475128293037415, "learning_rate": 5.7389440817485097e-05, "loss": 0.001341792568564415, "step": 150180 }, { "epoch": 42.631280158955434, "grad_norm": 0.7097508311271667, "learning_rate": 5.7386602327561745e-05, "loss": 0.0022515781223773957, "step": 150190 }, { "epoch": 42.6341186488788, "grad_norm": 0.07850458472967148, "learning_rate": 5.738376383763838e-05, "loss": 0.0009548047557473183, "step": 150200 }, { "epoch": 42.63695713880216, "grad_norm": 0.03114534541964531, "learning_rate": 5.738092534771502e-05, "loss": 0.0025249719619750977, "step": 150210 }, { "epoch": 42.639795628725516, "grad_norm": 2.694411516189575, "learning_rate": 5.7378086857791655e-05, "loss": 0.001954171434044838, "step": 150220 }, { "epoch": 42.64263411864888, "grad_norm": 0.32755333185195923, "learning_rate": 5.73752483678683e-05, "loss": 0.0014858070760965347, "step": 150230 }, { "epoch": 42.64547260857224, "grad_norm": 0.21322500705718994, "learning_rate": 5.737240987794493e-05, "loss": 0.0011690650135278702, "step": 150240 }, { "epoch": 42.6483110984956, "grad_norm": 0.05923840403556824, "learning_rate": 5.736957138802157e-05, "loss": 0.001018458418548107, "step": 150250 }, { "epoch": 42.65114958841896, "grad_norm": 3.1690590381622314, "learning_rate": 5.736673289809822e-05, "loss": 0.0017630696296691895, "step": 150260 }, { "epoch": 42.65398807834232, "grad_norm": 0.5497409701347351, "learning_rate": 5.7363894408174855e-05, "loss": 0.0017364714294672012, "step": 150270 }, { "epoch": 42.656826568265686, "grad_norm": 0.15725433826446533, "learning_rate": 5.73610559182515e-05, "loss": 0.0007915511727333069, "step": 150280 }, { "epoch": 42.65966505818904, "grad_norm": 0.1343006193637848, "learning_rate": 5.735821742832813e-05, "loss": 0.0009281590580940246, "step": 150290 }, { "epoch": 42.662503548112404, "grad_norm": 2.792534112930298, "learning_rate": 5.7355378938404766e-05, "loss": 0.0007065312936902046, "step": 150300 }, { "epoch": 42.66534203803577, "grad_norm": 0.5772119164466858, "learning_rate": 5.735254044848141e-05, "loss": 0.0074326939880847934, "step": 150310 }, { "epoch": 42.66818052795912, "grad_norm": 3.087285280227661, "learning_rate": 5.7349701958558055e-05, "loss": 0.0037145815789699555, "step": 150320 }, { "epoch": 42.671019017882486, "grad_norm": 0.14270351827144623, "learning_rate": 5.734686346863469e-05, "loss": 0.005557875335216522, "step": 150330 }, { "epoch": 42.67385750780585, "grad_norm": 0.413318395614624, "learning_rate": 5.734402497871133e-05, "loss": 0.0016404284164309503, "step": 150340 }, { "epoch": 42.676695997729205, "grad_norm": 0.2032013237476349, "learning_rate": 5.7341186488787966e-05, "loss": 0.0016983469948172569, "step": 150350 }, { "epoch": 42.67953448765257, "grad_norm": 1.5514073371887207, "learning_rate": 5.733834799886461e-05, "loss": 0.0027741821482777595, "step": 150360 }, { "epoch": 42.68237297757593, "grad_norm": 4.071971893310547, "learning_rate": 5.733550950894124e-05, "loss": 0.0010571483522653579, "step": 150370 }, { "epoch": 42.68521146749929, "grad_norm": 0.16359339654445648, "learning_rate": 5.733267101901788e-05, "loss": 0.0008526347577571869, "step": 150380 }, { "epoch": 42.68804995742265, "grad_norm": 6.108848571777344, "learning_rate": 5.732983252909453e-05, "loss": 0.0035112112760543822, "step": 150390 }, { "epoch": 42.69088844734601, "grad_norm": 1.1128220558166504, "learning_rate": 5.7326994039171166e-05, "loss": 0.0015738485381007195, "step": 150400 }, { "epoch": 42.693726937269375, "grad_norm": 2.535426378250122, "learning_rate": 5.732415554924781e-05, "loss": 0.003577391058206558, "step": 150410 }, { "epoch": 42.69656542719273, "grad_norm": 0.21612073481082916, "learning_rate": 5.732131705932444e-05, "loss": 0.0022890446707606316, "step": 150420 }, { "epoch": 42.69940391711609, "grad_norm": 0.7193753123283386, "learning_rate": 5.731847856940108e-05, "loss": 0.0017169078812003136, "step": 150430 }, { "epoch": 42.702242407039456, "grad_norm": 3.320112943649292, "learning_rate": 5.731564007947772e-05, "loss": 0.0021342338994145395, "step": 150440 }, { "epoch": 42.70508089696281, "grad_norm": 1.8155882358551025, "learning_rate": 5.731280158955435e-05, "loss": 0.0017885973677039147, "step": 150450 }, { "epoch": 42.707919386886175, "grad_norm": 0.8427953720092773, "learning_rate": 5.7309963099631e-05, "loss": 0.004203182086348534, "step": 150460 }, { "epoch": 42.71075787680954, "grad_norm": 0.27758195996284485, "learning_rate": 5.730712460970764e-05, "loss": 0.004022154211997986, "step": 150470 }, { "epoch": 42.7135963667329, "grad_norm": 12.116328239440918, "learning_rate": 5.730428611978428e-05, "loss": 0.003638196736574173, "step": 150480 }, { "epoch": 42.71643485665626, "grad_norm": 0.4715937674045563, "learning_rate": 5.730144762986092e-05, "loss": 0.0016477832570672036, "step": 150490 }, { "epoch": 42.71927334657962, "grad_norm": 0.6735902428627014, "learning_rate": 5.729860913993755e-05, "loss": 0.003999575227499008, "step": 150500 }, { "epoch": 42.71927334657962, "eval_accuracy": 0.9752018821135626, "eval_loss": 0.09470957517623901, "eval_runtime": 31.8729, "eval_samples_per_second": 493.429, "eval_steps_per_second": 7.718, "step": 150500 }, { "epoch": 42.72211183650298, "grad_norm": 0.1094418317079544, "learning_rate": 5.7295770650014194e-05, "loss": 0.012765003740787506, "step": 150510 }, { "epoch": 42.72495032642634, "grad_norm": 0.18066728115081787, "learning_rate": 5.729293216009084e-05, "loss": 0.0011145541444420814, "step": 150520 }, { "epoch": 42.7277888163497, "grad_norm": 0.04154348373413086, "learning_rate": 5.729009367016748e-05, "loss": 0.0031032875180244447, "step": 150530 }, { "epoch": 42.730627306273064, "grad_norm": 1.1184370517730713, "learning_rate": 5.728725518024411e-05, "loss": 0.0032140046358108522, "step": 150540 }, { "epoch": 42.73346579619643, "grad_norm": 1.3468046188354492, "learning_rate": 5.728441669032075e-05, "loss": 0.0019706591963768007, "step": 150550 }, { "epoch": 42.73630428611978, "grad_norm": 0.034683480858802795, "learning_rate": 5.728157820039739e-05, "loss": 0.0010897317901253701, "step": 150560 }, { "epoch": 42.739142776043145, "grad_norm": 0.03907227888703346, "learning_rate": 5.727873971047403e-05, "loss": 0.0004331288859248161, "step": 150570 }, { "epoch": 42.74198126596651, "grad_norm": 4.75922155380249, "learning_rate": 5.7275901220550663e-05, "loss": 0.012294979393482208, "step": 150580 }, { "epoch": 42.744819755889864, "grad_norm": 0.07869251817464828, "learning_rate": 5.727306273062731e-05, "loss": 0.0014568734914064407, "step": 150590 }, { "epoch": 42.74765824581323, "grad_norm": 0.0386231429874897, "learning_rate": 5.727022424070395e-05, "loss": 0.0008156327530741692, "step": 150600 }, { "epoch": 42.75049673573659, "grad_norm": 0.7213430404663086, "learning_rate": 5.726738575078059e-05, "loss": 0.001945074461400509, "step": 150610 }, { "epoch": 42.753335225659946, "grad_norm": 17.490074157714844, "learning_rate": 5.726454726085723e-05, "loss": 0.00801035389304161, "step": 150620 }, { "epoch": 42.75617371558331, "grad_norm": 0.049720119684934616, "learning_rate": 5.7261708770933864e-05, "loss": 0.009720776975154877, "step": 150630 }, { "epoch": 42.75901220550667, "grad_norm": 6.460530757904053, "learning_rate": 5.7258870281010505e-05, "loss": 0.00235158484429121, "step": 150640 }, { "epoch": 42.761850695430034, "grad_norm": 0.12171939760446548, "learning_rate": 5.725603179108714e-05, "loss": 0.0057321939617395404, "step": 150650 }, { "epoch": 42.76468918535339, "grad_norm": 1.604061484336853, "learning_rate": 5.725319330116379e-05, "loss": 0.0013187101110816001, "step": 150660 }, { "epoch": 42.76752767527675, "grad_norm": 0.10278558731079102, "learning_rate": 5.725035481124042e-05, "loss": 0.0035054072737693786, "step": 150670 }, { "epoch": 42.770366165200116, "grad_norm": 0.358737975358963, "learning_rate": 5.7247516321317064e-05, "loss": 0.004805418476462364, "step": 150680 }, { "epoch": 42.77320465512347, "grad_norm": 8.513472557067871, "learning_rate": 5.72446778313937e-05, "loss": 0.0061926361173391346, "step": 150690 }, { "epoch": 42.776043145046835, "grad_norm": 1.5776017904281616, "learning_rate": 5.724183934147034e-05, "loss": 0.005824965983629226, "step": 150700 }, { "epoch": 42.7788816349702, "grad_norm": 1.6449989080429077, "learning_rate": 5.7239000851546974e-05, "loss": 0.0040501363575458525, "step": 150710 }, { "epoch": 42.78172012489355, "grad_norm": 1.2675914764404297, "learning_rate": 5.723616236162362e-05, "loss": 0.0009574921801686287, "step": 150720 }, { "epoch": 42.784558614816916, "grad_norm": 0.08177550882101059, "learning_rate": 5.7233323871700264e-05, "loss": 0.0078052222728729245, "step": 150730 }, { "epoch": 42.78739710474028, "grad_norm": 0.12202019989490509, "learning_rate": 5.72304853817769e-05, "loss": 0.0019467292353510857, "step": 150740 }, { "epoch": 42.79023559466364, "grad_norm": 0.7652762532234192, "learning_rate": 5.722764689185354e-05, "loss": 0.015189316868782044, "step": 150750 }, { "epoch": 42.793074084587, "grad_norm": 1.027755856513977, "learning_rate": 5.7224808401930174e-05, "loss": 0.0024920105934143066, "step": 150760 }, { "epoch": 42.79591257451036, "grad_norm": 0.9882604479789734, "learning_rate": 5.722196991200681e-05, "loss": 0.012038751691579818, "step": 150770 }, { "epoch": 42.79875106443372, "grad_norm": 10.733354568481445, "learning_rate": 5.721913142208345e-05, "loss": 0.024357083439826965, "step": 150780 }, { "epoch": 42.80158955435708, "grad_norm": 0.34257903695106506, "learning_rate": 5.72162929321601e-05, "loss": 0.012957312166690826, "step": 150790 }, { "epoch": 42.80442804428044, "grad_norm": 0.46132591366767883, "learning_rate": 5.721345444223673e-05, "loss": 0.006321241706609726, "step": 150800 }, { "epoch": 42.807266534203805, "grad_norm": 0.35794907808303833, "learning_rate": 5.7210615952313374e-05, "loss": 0.0012566056102514267, "step": 150810 }, { "epoch": 42.81010502412717, "grad_norm": 0.31972241401672363, "learning_rate": 5.720777746239001e-05, "loss": 0.0013749055564403535, "step": 150820 }, { "epoch": 42.812943514050524, "grad_norm": 0.7825968265533447, "learning_rate": 5.720493897246665e-05, "loss": 0.0016559494659304618, "step": 150830 }, { "epoch": 42.81578200397389, "grad_norm": 0.058416660875082016, "learning_rate": 5.7202100482543285e-05, "loss": 0.009011832624673843, "step": 150840 }, { "epoch": 42.81862049389725, "grad_norm": 10.921631813049316, "learning_rate": 5.7199261992619926e-05, "loss": 0.002360529825091362, "step": 150850 }, { "epoch": 42.821458983820605, "grad_norm": 0.14741648733615875, "learning_rate": 5.7196423502696575e-05, "loss": 0.0036463100463151933, "step": 150860 }, { "epoch": 42.82429747374397, "grad_norm": 11.011202812194824, "learning_rate": 5.719358501277321e-05, "loss": 0.003688524663448334, "step": 150870 }, { "epoch": 42.82713596366733, "grad_norm": 11.639580726623535, "learning_rate": 5.719074652284985e-05, "loss": 0.0025018757209181786, "step": 150880 }, { "epoch": 42.82997445359069, "grad_norm": 1.2539687156677246, "learning_rate": 5.7187908032926485e-05, "loss": 0.0008635060861706733, "step": 150890 }, { "epoch": 42.83281294351405, "grad_norm": 0.31407153606414795, "learning_rate": 5.718506954300312e-05, "loss": 0.004352733492851257, "step": 150900 }, { "epoch": 42.83565143343741, "grad_norm": 0.06852952390909195, "learning_rate": 5.718223105307976e-05, "loss": 0.0017024286091327668, "step": 150910 }, { "epoch": 42.838489923360775, "grad_norm": 12.521977424621582, "learning_rate": 5.717939256315641e-05, "loss": 0.0035060569643974304, "step": 150920 }, { "epoch": 42.84132841328413, "grad_norm": 0.6829904913902283, "learning_rate": 5.7176554073233044e-05, "loss": 0.0024214066565036775, "step": 150930 }, { "epoch": 42.844166903207494, "grad_norm": 0.10814104229211807, "learning_rate": 5.7173715583309685e-05, "loss": 0.00471544899046421, "step": 150940 }, { "epoch": 42.84700539313086, "grad_norm": 2.7155392169952393, "learning_rate": 5.717087709338632e-05, "loss": 0.007820738852024079, "step": 150950 }, { "epoch": 42.84984388305421, "grad_norm": 1.3649444580078125, "learning_rate": 5.716803860346296e-05, "loss": 0.0033947251737117766, "step": 150960 }, { "epoch": 42.852682372977576, "grad_norm": 0.07427273690700531, "learning_rate": 5.7165200113539596e-05, "loss": 0.002572057209908962, "step": 150970 }, { "epoch": 42.85552086290094, "grad_norm": 1.8192777633666992, "learning_rate": 5.716236162361624e-05, "loss": 0.006395651400089264, "step": 150980 }, { "epoch": 42.858359352824294, "grad_norm": 0.9430938363075256, "learning_rate": 5.7159523133692885e-05, "loss": 0.003343820944428444, "step": 150990 }, { "epoch": 42.86119784274766, "grad_norm": 1.028078556060791, "learning_rate": 5.715668464376952e-05, "loss": 0.001557229645550251, "step": 151000 }, { "epoch": 42.86119784274766, "eval_accuracy": 0.9722133909836587, "eval_loss": 0.10417237132787704, "eval_runtime": 31.6951, "eval_samples_per_second": 496.197, "eval_steps_per_second": 7.761, "step": 151000 }, { "epoch": 42.86403633267102, "grad_norm": 0.6586612462997437, "learning_rate": 5.7153846153846154e-05, "loss": 0.0047128133475780485, "step": 151010 }, { "epoch": 42.86687482259438, "grad_norm": 4.868896961212158, "learning_rate": 5.7151007663922796e-05, "loss": 0.0025026831775903703, "step": 151020 }, { "epoch": 42.86971331251774, "grad_norm": 3.928346872329712, "learning_rate": 5.714816917399943e-05, "loss": 0.0031259231269359588, "step": 151030 }, { "epoch": 42.8725518024411, "grad_norm": 0.04308502748608589, "learning_rate": 5.714533068407607e-05, "loss": 0.0005053285509347916, "step": 151040 }, { "epoch": 42.875390292364465, "grad_norm": 0.033393073827028275, "learning_rate": 5.7142492194152706e-05, "loss": 0.004916827380657196, "step": 151050 }, { "epoch": 42.87822878228782, "grad_norm": 0.1288062483072281, "learning_rate": 5.7139653704229355e-05, "loss": 0.000999310053884983, "step": 151060 }, { "epoch": 42.88106727221118, "grad_norm": 0.020401980727910995, "learning_rate": 5.7136815214305996e-05, "loss": 0.00369303897023201, "step": 151070 }, { "epoch": 42.883905762134546, "grad_norm": 0.49744778871536255, "learning_rate": 5.713397672438263e-05, "loss": 0.005060420930385589, "step": 151080 }, { "epoch": 42.8867442520579, "grad_norm": 0.28286388516426086, "learning_rate": 5.713113823445927e-05, "loss": 0.002316769398748875, "step": 151090 }, { "epoch": 42.889582741981265, "grad_norm": 0.4040142893791199, "learning_rate": 5.7128299744535907e-05, "loss": 0.0017907632514834403, "step": 151100 }, { "epoch": 42.89242123190463, "grad_norm": 8.743243217468262, "learning_rate": 5.712546125461254e-05, "loss": 0.005298207327723503, "step": 151110 }, { "epoch": 42.89525972182799, "grad_norm": 0.48302075266838074, "learning_rate": 5.7122622764689196e-05, "loss": 0.003266366571187973, "step": 151120 }, { "epoch": 42.898098211751346, "grad_norm": 0.40087440609931946, "learning_rate": 5.711978427476583e-05, "loss": 0.00104418583214283, "step": 151130 }, { "epoch": 42.90093670167471, "grad_norm": 0.7117441892623901, "learning_rate": 5.7116945784842465e-05, "loss": 0.00856827050447464, "step": 151140 }, { "epoch": 42.90377519159807, "grad_norm": 0.9934229850769043, "learning_rate": 5.7114107294919107e-05, "loss": 0.0017387747764587402, "step": 151150 }, { "epoch": 42.90661368152143, "grad_norm": 0.20940840244293213, "learning_rate": 5.711126880499574e-05, "loss": 0.004924660176038742, "step": 151160 }, { "epoch": 42.90945217144479, "grad_norm": 2.7715225219726562, "learning_rate": 5.710843031507238e-05, "loss": 0.0010767662897706032, "step": 151170 }, { "epoch": 42.912290661368154, "grad_norm": 0.8113084435462952, "learning_rate": 5.710559182514902e-05, "loss": 0.0011831255629658698, "step": 151180 }, { "epoch": 42.91512915129151, "grad_norm": 0.140447735786438, "learning_rate": 5.7102753335225665e-05, "loss": 0.005987203493714332, "step": 151190 }, { "epoch": 42.91796764121487, "grad_norm": 0.8365229964256287, "learning_rate": 5.709991484530231e-05, "loss": 0.0024534158408641817, "step": 151200 }, { "epoch": 42.920806131138235, "grad_norm": 0.20525787770748138, "learning_rate": 5.709707635537894e-05, "loss": 0.0019706249237060546, "step": 151210 }, { "epoch": 42.9236446210616, "grad_norm": 0.922799289226532, "learning_rate": 5.709423786545558e-05, "loss": 0.0012364571914076806, "step": 151220 }, { "epoch": 42.926483110984954, "grad_norm": 0.5661875605583191, "learning_rate": 5.709139937553222e-05, "loss": 0.00219468642026186, "step": 151230 }, { "epoch": 42.92932160090832, "grad_norm": 0.14702697098255157, "learning_rate": 5.708856088560885e-05, "loss": 0.00042118672281503676, "step": 151240 }, { "epoch": 42.93216009083168, "grad_norm": 4.3587846755981445, "learning_rate": 5.708572239568549e-05, "loss": 0.007931852340698242, "step": 151250 }, { "epoch": 42.934998580755035, "grad_norm": 0.2341473400592804, "learning_rate": 5.708288390576214e-05, "loss": 0.012785941362380981, "step": 151260 }, { "epoch": 42.9378370706784, "grad_norm": 0.7541271448135376, "learning_rate": 5.7080045415838776e-05, "loss": 0.003152751922607422, "step": 151270 }, { "epoch": 42.94067556060176, "grad_norm": 0.042412642389535904, "learning_rate": 5.707720692591542e-05, "loss": 0.0014407865703105927, "step": 151280 }, { "epoch": 42.943514050525124, "grad_norm": 1.1052659749984741, "learning_rate": 5.707436843599205e-05, "loss": 0.006433963775634766, "step": 151290 }, { "epoch": 42.94635254044848, "grad_norm": 0.497271329164505, "learning_rate": 5.707152994606869e-05, "loss": 0.0021620284765958785, "step": 151300 }, { "epoch": 42.94919103037184, "grad_norm": 1.0877059698104858, "learning_rate": 5.706869145614533e-05, "loss": 0.0011028502136468886, "step": 151310 }, { "epoch": 42.952029520295206, "grad_norm": 0.01732019893825054, "learning_rate": 5.7065852966221976e-05, "loss": 0.0009060354903340339, "step": 151320 }, { "epoch": 42.95486801021856, "grad_norm": 0.38991209864616394, "learning_rate": 5.706301447629862e-05, "loss": 0.0012246167287230491, "step": 151330 }, { "epoch": 42.957706500141924, "grad_norm": 1.1046416759490967, "learning_rate": 5.706017598637525e-05, "loss": 0.001381961815059185, "step": 151340 }, { "epoch": 42.96054499006529, "grad_norm": 1.6200536489486694, "learning_rate": 5.7057337496451893e-05, "loss": 0.0009480997920036316, "step": 151350 }, { "epoch": 42.96338347998864, "grad_norm": 0.43610674142837524, "learning_rate": 5.705449900652853e-05, "loss": 0.0005201894789934158, "step": 151360 }, { "epoch": 42.966221969912006, "grad_norm": 2.9958713054656982, "learning_rate": 5.705166051660516e-05, "loss": 0.004076436161994934, "step": 151370 }, { "epoch": 42.96906045983537, "grad_norm": 0.07383344322443008, "learning_rate": 5.7048822026681804e-05, "loss": 0.004621308296918869, "step": 151380 }, { "epoch": 42.97189894975873, "grad_norm": 0.1340925693511963, "learning_rate": 5.704598353675845e-05, "loss": 0.011564357578754425, "step": 151390 }, { "epoch": 42.97473743968209, "grad_norm": 1.1192890405654907, "learning_rate": 5.704314504683509e-05, "loss": 0.028056687116622923, "step": 151400 }, { "epoch": 42.97757592960545, "grad_norm": 0.548682451248169, "learning_rate": 5.704030655691173e-05, "loss": 0.005934283137321472, "step": 151410 }, { "epoch": 42.98041441952881, "grad_norm": 0.28245776891708374, "learning_rate": 5.703746806698836e-05, "loss": 0.005049395561218262, "step": 151420 }, { "epoch": 42.98325290945217, "grad_norm": 0.7276083827018738, "learning_rate": 5.7034629577065004e-05, "loss": 0.004849177971482277, "step": 151430 }, { "epoch": 42.98609139937553, "grad_norm": 0.018039369955658913, "learning_rate": 5.703179108714164e-05, "loss": 0.0021641574800014495, "step": 151440 }, { "epoch": 42.988929889298895, "grad_norm": 0.6865327954292297, "learning_rate": 5.702895259721829e-05, "loss": 0.004317700490355491, "step": 151450 }, { "epoch": 42.99176837922225, "grad_norm": 0.07889662683010101, "learning_rate": 5.702611410729493e-05, "loss": 0.0008823204785585404, "step": 151460 }, { "epoch": 42.99460686914561, "grad_norm": 0.14053185284137726, "learning_rate": 5.702327561737156e-05, "loss": 0.0097741961479187, "step": 151470 }, { "epoch": 42.997445359068976, "grad_norm": 0.3924703299999237, "learning_rate": 5.70204371274482e-05, "loss": 0.016547949612140657, "step": 151480 }, { "epoch": 43.00028384899234, "grad_norm": 0.03356565162539482, "learning_rate": 5.701788248651717e-05, "loss": 0.005016471818089485, "step": 151490 }, { "epoch": 43.003122338915695, "grad_norm": 0.2132159173488617, "learning_rate": 5.701504399659382e-05, "loss": 0.00340183712542057, "step": 151500 }, { "epoch": 43.003122338915695, "eval_accuracy": 0.9760920709607681, "eval_loss": 0.08960431069135666, "eval_runtime": 31.7302, "eval_samples_per_second": 495.648, "eval_steps_per_second": 7.753, "step": 151500 }, { "epoch": 43.00596082883906, "grad_norm": 0.07106353342533112, "learning_rate": 5.701220550667046e-05, "loss": 0.00279498677700758, "step": 151510 }, { "epoch": 43.00879931876242, "grad_norm": 0.027225233614444733, "learning_rate": 5.7009367016747095e-05, "loss": 0.0008680166676640511, "step": 151520 }, { "epoch": 43.01163780868578, "grad_norm": 0.24606360495090485, "learning_rate": 5.700652852682373e-05, "loss": 0.0014991562813520432, "step": 151530 }, { "epoch": 43.01447629860914, "grad_norm": 0.9538046717643738, "learning_rate": 5.700369003690037e-05, "loss": 0.0037996582686901093, "step": 151540 }, { "epoch": 43.0173147885325, "grad_norm": 7.158541202545166, "learning_rate": 5.7000851546977005e-05, "loss": 0.004549169540405273, "step": 151550 }, { "epoch": 43.02015327845586, "grad_norm": 0.0938863456249237, "learning_rate": 5.699801305705365e-05, "loss": 0.0009634293615818024, "step": 151560 }, { "epoch": 43.02299176837922, "grad_norm": 0.6828452944755554, "learning_rate": 5.6995174567130295e-05, "loss": 0.0008728491142392158, "step": 151570 }, { "epoch": 43.025830258302584, "grad_norm": 0.28954392671585083, "learning_rate": 5.699233607720693e-05, "loss": 0.0006391836330294609, "step": 151580 }, { "epoch": 43.02866874822595, "grad_norm": 7.125875949859619, "learning_rate": 5.698949758728357e-05, "loss": 0.0023804975673556326, "step": 151590 }, { "epoch": 43.0315072381493, "grad_norm": 0.01594804972410202, "learning_rate": 5.6986659097360205e-05, "loss": 0.00042392611503601075, "step": 151600 }, { "epoch": 43.034345728072665, "grad_norm": 0.08081094175577164, "learning_rate": 5.698382060743685e-05, "loss": 0.00041200965642929077, "step": 151610 }, { "epoch": 43.03718421799603, "grad_norm": 4.560952663421631, "learning_rate": 5.698098211751348e-05, "loss": 0.00442599281668663, "step": 151620 }, { "epoch": 43.040022707919384, "grad_norm": 0.04551665857434273, "learning_rate": 5.6978143627590116e-05, "loss": 0.0030796565115451813, "step": 151630 }, { "epoch": 43.04286119784275, "grad_norm": 0.22345243394374847, "learning_rate": 5.697530513766677e-05, "loss": 0.000508817471563816, "step": 151640 }, { "epoch": 43.04569968776611, "grad_norm": 0.09498488903045654, "learning_rate": 5.6972466647743405e-05, "loss": 0.0006739828735589981, "step": 151650 }, { "epoch": 43.04853817768947, "grad_norm": 0.01621210016310215, "learning_rate": 5.696962815782004e-05, "loss": 0.0006063472479581833, "step": 151660 }, { "epoch": 43.05137666761283, "grad_norm": 0.10096614062786102, "learning_rate": 5.696678966789668e-05, "loss": 0.003948544710874557, "step": 151670 }, { "epoch": 43.05421515753619, "grad_norm": 0.7879394292831421, "learning_rate": 5.6963951177973316e-05, "loss": 0.008247603476047517, "step": 151680 }, { "epoch": 43.057053647459554, "grad_norm": 0.710813581943512, "learning_rate": 5.696111268804996e-05, "loss": 0.00279113557189703, "step": 151690 }, { "epoch": 43.05989213738291, "grad_norm": 3.666478395462036, "learning_rate": 5.6958274198126606e-05, "loss": 0.0014192681759595872, "step": 151700 }, { "epoch": 43.06273062730627, "grad_norm": 0.4814639985561371, "learning_rate": 5.695543570820324e-05, "loss": 0.00549580454826355, "step": 151710 }, { "epoch": 43.065569117229636, "grad_norm": 0.05331253260374069, "learning_rate": 5.695259721827988e-05, "loss": 0.006542760878801346, "step": 151720 }, { "epoch": 43.06840760715299, "grad_norm": 0.4315067231655121, "learning_rate": 5.6949758728356516e-05, "loss": 0.0015864398330450058, "step": 151730 }, { "epoch": 43.071246097076354, "grad_norm": 0.2586740255355835, "learning_rate": 5.694692023843316e-05, "loss": 0.0007991410791873931, "step": 151740 }, { "epoch": 43.07408458699972, "grad_norm": 0.6832228899002075, "learning_rate": 5.694408174850979e-05, "loss": 0.006490696966648102, "step": 151750 }, { "epoch": 43.07692307692308, "grad_norm": 0.3536789119243622, "learning_rate": 5.694124325858643e-05, "loss": 0.0008261259645223618, "step": 151760 }, { "epoch": 43.079761566846436, "grad_norm": 12.853556632995605, "learning_rate": 5.693840476866308e-05, "loss": 0.0036619093269109728, "step": 151770 }, { "epoch": 43.0826000567698, "grad_norm": 0.08694365620613098, "learning_rate": 5.6935566278739716e-05, "loss": 0.0014129746705293655, "step": 151780 }, { "epoch": 43.08543854669316, "grad_norm": 13.35361099243164, "learning_rate": 5.693272778881635e-05, "loss": 0.006362512707710266, "step": 151790 }, { "epoch": 43.08827703661652, "grad_norm": 0.07861967384815216, "learning_rate": 5.692988929889299e-05, "loss": 0.005175063014030456, "step": 151800 }, { "epoch": 43.09111552653988, "grad_norm": 0.13682618737220764, "learning_rate": 5.692705080896963e-05, "loss": 0.001588154025375843, "step": 151810 }, { "epoch": 43.09395401646324, "grad_norm": 0.07961110770702362, "learning_rate": 5.692421231904627e-05, "loss": 0.008250161260366439, "step": 151820 }, { "epoch": 43.0967925063866, "grad_norm": 0.10837347060441971, "learning_rate": 5.6921373829122916e-05, "loss": 0.0005296060815453529, "step": 151830 }, { "epoch": 43.09963099630996, "grad_norm": 0.0552312433719635, "learning_rate": 5.691853533919955e-05, "loss": 0.0008237535133957863, "step": 151840 }, { "epoch": 43.102469486233325, "grad_norm": 0.12459081411361694, "learning_rate": 5.691569684927619e-05, "loss": 0.0017899278551340102, "step": 151850 }, { "epoch": 43.10530797615669, "grad_norm": 0.14180979132652283, "learning_rate": 5.691285835935283e-05, "loss": 0.0005112074315547944, "step": 151860 }, { "epoch": 43.10814646608004, "grad_norm": 0.10183339565992355, "learning_rate": 5.691001986942947e-05, "loss": 0.00047392938286066054, "step": 151870 }, { "epoch": 43.110984956003406, "grad_norm": 0.03730219975113869, "learning_rate": 5.69071813795061e-05, "loss": 0.0017279006540775299, "step": 151880 }, { "epoch": 43.11382344592677, "grad_norm": 0.031032048165798187, "learning_rate": 5.690434288958274e-05, "loss": 0.00019947458058595658, "step": 151890 }, { "epoch": 43.116661935850125, "grad_norm": 0.07724631577730179, "learning_rate": 5.6901504399659386e-05, "loss": 0.0005313152447342873, "step": 151900 }, { "epoch": 43.11950042577349, "grad_norm": 0.5991203188896179, "learning_rate": 5.689866590973603e-05, "loss": 0.0011872129514813423, "step": 151910 }, { "epoch": 43.12233891569685, "grad_norm": 0.027738042175769806, "learning_rate": 5.689582741981266e-05, "loss": 0.005116463452577591, "step": 151920 }, { "epoch": 43.12517740562021, "grad_norm": 0.03408701345324516, "learning_rate": 5.68929889298893e-05, "loss": 0.0005747660994529724, "step": 151930 }, { "epoch": 43.12801589554357, "grad_norm": 4.866060733795166, "learning_rate": 5.689015043996594e-05, "loss": 0.0018263310194015503, "step": 151940 }, { "epoch": 43.13085438546693, "grad_norm": 0.172432541847229, "learning_rate": 5.688731195004258e-05, "loss": 0.0006231596693396568, "step": 151950 }, { "epoch": 43.133692875390295, "grad_norm": 0.8032639622688293, "learning_rate": 5.6884473460119214e-05, "loss": 0.001492762193083763, "step": 151960 }, { "epoch": 43.13653136531365, "grad_norm": 0.5606970191001892, "learning_rate": 5.688163497019586e-05, "loss": 0.0009769955649971962, "step": 151970 }, { "epoch": 43.139369855237014, "grad_norm": 0.03375739976763725, "learning_rate": 5.68787964802725e-05, "loss": 0.0031879890710115434, "step": 151980 }, { "epoch": 43.14220834516038, "grad_norm": 0.23029004037380219, "learning_rate": 5.687595799034914e-05, "loss": 0.004812368005514145, "step": 151990 }, { "epoch": 43.14504683508373, "grad_norm": 0.1062697172164917, "learning_rate": 5.687311950042577e-05, "loss": 0.0015996862202882767, "step": 152000 }, { "epoch": 43.14504683508373, "eval_accuracy": 0.972849240160234, "eval_loss": 0.09538374096155167, "eval_runtime": 32.0064, "eval_samples_per_second": 491.371, "eval_steps_per_second": 7.686, "step": 152000 }, { "epoch": 43.147885325007096, "grad_norm": 0.08808532357215881, "learning_rate": 5.6870281010502414e-05, "loss": 0.0010589540004730224, "step": 152010 }, { "epoch": 43.15072381493046, "grad_norm": 0.03220302239060402, "learning_rate": 5.686744252057905e-05, "loss": 0.0013726312667131425, "step": 152020 }, { "epoch": 43.15356230485382, "grad_norm": 0.15309451520442963, "learning_rate": 5.6864604030655696e-05, "loss": 0.0019029887393116952, "step": 152030 }, { "epoch": 43.15640079477718, "grad_norm": 0.40568867325782776, "learning_rate": 5.686176554073234e-05, "loss": 0.002128172852098942, "step": 152040 }, { "epoch": 43.15923928470054, "grad_norm": 0.2462402731180191, "learning_rate": 5.685892705080897e-05, "loss": 0.003699745610356331, "step": 152050 }, { "epoch": 43.1620777746239, "grad_norm": 0.0401785671710968, "learning_rate": 5.6856088560885614e-05, "loss": 0.0003518974408507347, "step": 152060 }, { "epoch": 43.16491626454726, "grad_norm": 1.8910232782363892, "learning_rate": 5.685325007096225e-05, "loss": 0.0012026021257042884, "step": 152070 }, { "epoch": 43.16775475447062, "grad_norm": 0.08961507678031921, "learning_rate": 5.685041158103889e-05, "loss": 0.0006802882999181747, "step": 152080 }, { "epoch": 43.170593244393984, "grad_norm": 2.1218137741088867, "learning_rate": 5.6847573091115524e-05, "loss": 0.0014594115316867828, "step": 152090 }, { "epoch": 43.17343173431734, "grad_norm": 0.34050852060317993, "learning_rate": 5.684473460119217e-05, "loss": 0.0009594541043043137, "step": 152100 }, { "epoch": 43.1762702242407, "grad_norm": 0.06501282751560211, "learning_rate": 5.6841896111268814e-05, "loss": 0.002069770358502865, "step": 152110 }, { "epoch": 43.179108714164066, "grad_norm": 5.9305949211120605, "learning_rate": 5.683905762134545e-05, "loss": 0.002483154460787773, "step": 152120 }, { "epoch": 43.18194720408743, "grad_norm": 1.2521289587020874, "learning_rate": 5.683621913142208e-05, "loss": 0.018283867835998537, "step": 152130 }, { "epoch": 43.184785694010785, "grad_norm": 1.1412521600723267, "learning_rate": 5.6833380641498724e-05, "loss": 0.0013024747371673585, "step": 152140 }, { "epoch": 43.18762418393415, "grad_norm": 4.002958297729492, "learning_rate": 5.683054215157536e-05, "loss": 0.004960101097822189, "step": 152150 }, { "epoch": 43.19046267385751, "grad_norm": 6.770664215087891, "learning_rate": 5.6827703661652e-05, "loss": 0.003757569193840027, "step": 152160 }, { "epoch": 43.193301163780866, "grad_norm": 0.5399791598320007, "learning_rate": 5.682486517172865e-05, "loss": 0.002188009396195412, "step": 152170 }, { "epoch": 43.19613965370423, "grad_norm": 1.0973259210586548, "learning_rate": 5.682202668180528e-05, "loss": 0.0016393065452575684, "step": 152180 }, { "epoch": 43.19897814362759, "grad_norm": 0.5125619769096375, "learning_rate": 5.6819188191881925e-05, "loss": 0.0006142444908618927, "step": 152190 }, { "epoch": 43.20181663355095, "grad_norm": 0.2593333125114441, "learning_rate": 5.681634970195856e-05, "loss": 0.011070017516613007, "step": 152200 }, { "epoch": 43.20465512347431, "grad_norm": 0.46329739689826965, "learning_rate": 5.68135112120352e-05, "loss": 0.0012663915753364564, "step": 152210 }, { "epoch": 43.20749361339767, "grad_norm": 0.21926258504390717, "learning_rate": 5.6810672722111835e-05, "loss": 0.003219062089920044, "step": 152220 }, { "epoch": 43.210332103321036, "grad_norm": 0.053675517439842224, "learning_rate": 5.680783423218848e-05, "loss": 0.0050923660397529606, "step": 152230 }, { "epoch": 43.21317059324439, "grad_norm": 0.04515977203845978, "learning_rate": 5.6804995742265125e-05, "loss": 0.011871426552534103, "step": 152240 }, { "epoch": 43.216009083167755, "grad_norm": 0.016196435317397118, "learning_rate": 5.680215725234176e-05, "loss": 0.001237376034259796, "step": 152250 }, { "epoch": 43.21884757309112, "grad_norm": 0.21483221650123596, "learning_rate": 5.6799318762418394e-05, "loss": 0.003402319550514221, "step": 152260 }, { "epoch": 43.221686063014474, "grad_norm": 2.240077495574951, "learning_rate": 5.6796480272495035e-05, "loss": 0.0026287468150258063, "step": 152270 }, { "epoch": 43.22452455293784, "grad_norm": 0.21066312491893768, "learning_rate": 5.679364178257167e-05, "loss": 0.0008545840159058571, "step": 152280 }, { "epoch": 43.2273630428612, "grad_norm": 0.1933826506137848, "learning_rate": 5.679080329264831e-05, "loss": 0.0016031524166464806, "step": 152290 }, { "epoch": 43.230201532784555, "grad_norm": 0.7001194953918457, "learning_rate": 5.678796480272496e-05, "loss": 0.010772044211626053, "step": 152300 }, { "epoch": 43.23304002270792, "grad_norm": 0.11431702226400375, "learning_rate": 5.6785126312801594e-05, "loss": 0.014571861922740936, "step": 152310 }, { "epoch": 43.23587851263128, "grad_norm": 0.2644888460636139, "learning_rate": 5.6782287822878235e-05, "loss": 0.0034686725586652754, "step": 152320 }, { "epoch": 43.238717002554644, "grad_norm": 12.104090690612793, "learning_rate": 5.677944933295487e-05, "loss": 0.0033518128097057343, "step": 152330 }, { "epoch": 43.241555492478, "grad_norm": 0.16671255230903625, "learning_rate": 5.677661084303151e-05, "loss": 0.001333530992269516, "step": 152340 }, { "epoch": 43.24439398240136, "grad_norm": 0.08636177331209183, "learning_rate": 5.6773772353108146e-05, "loss": 0.004307601600885391, "step": 152350 }, { "epoch": 43.247232472324725, "grad_norm": 0.13912631571292877, "learning_rate": 5.677093386318478e-05, "loss": 0.0027288509532809257, "step": 152360 }, { "epoch": 43.25007096224808, "grad_norm": 14.268440246582031, "learning_rate": 5.676809537326143e-05, "loss": 0.00548018142580986, "step": 152370 }, { "epoch": 43.252909452171444, "grad_norm": 0.11489082127809525, "learning_rate": 5.676525688333807e-05, "loss": 0.0015092611312866211, "step": 152380 }, { "epoch": 43.25574794209481, "grad_norm": 0.13625004887580872, "learning_rate": 5.6762418393414705e-05, "loss": 0.004902688413858413, "step": 152390 }, { "epoch": 43.25858643201816, "grad_norm": 0.4013904631137848, "learning_rate": 5.6759579903491346e-05, "loss": 0.0045593492686748505, "step": 152400 }, { "epoch": 43.261424921941526, "grad_norm": 0.13857543468475342, "learning_rate": 5.675674141356798e-05, "loss": 0.0010303063318133354, "step": 152410 }, { "epoch": 43.26426341186489, "grad_norm": 0.029036447405815125, "learning_rate": 5.675390292364462e-05, "loss": 0.0005287723615765572, "step": 152420 }, { "epoch": 43.26710190178825, "grad_norm": 0.019244540482759476, "learning_rate": 5.675106443372127e-05, "loss": 0.0010225621983408928, "step": 152430 }, { "epoch": 43.26994039171161, "grad_norm": 0.07808752357959747, "learning_rate": 5.6748225943797905e-05, "loss": 0.0012760989367961884, "step": 152440 }, { "epoch": 43.27277888163497, "grad_norm": 1.2281979322433472, "learning_rate": 5.6745387453874546e-05, "loss": 0.005228409916162491, "step": 152450 }, { "epoch": 43.27561737155833, "grad_norm": 0.09017087519168854, "learning_rate": 5.674254896395118e-05, "loss": 0.006734376400709152, "step": 152460 }, { "epoch": 43.27845586148169, "grad_norm": 0.07635440677404404, "learning_rate": 5.6739710474027815e-05, "loss": 0.003857327625155449, "step": 152470 }, { "epoch": 43.28129435140505, "grad_norm": 0.16468802094459534, "learning_rate": 5.673687198410446e-05, "loss": 0.0010821793228387832, "step": 152480 }, { "epoch": 43.284132841328415, "grad_norm": 2.002392053604126, "learning_rate": 5.673403349418109e-05, "loss": 0.0011932019144296647, "step": 152490 }, { "epoch": 43.28697133125178, "grad_norm": 1.8941805362701416, "learning_rate": 5.673119500425774e-05, "loss": 0.005160294473171234, "step": 152500 }, { "epoch": 43.28697133125178, "eval_accuracy": 0.9726584854072614, "eval_loss": 0.10405123978853226, "eval_runtime": 31.9196, "eval_samples_per_second": 492.706, "eval_steps_per_second": 7.707, "step": 152500 }, { "epoch": 43.28980982117513, "grad_norm": 14.314725875854492, "learning_rate": 5.672835651433438e-05, "loss": 0.010491292178630828, "step": 152510 }, { "epoch": 43.292648311098496, "grad_norm": 0.1541241705417633, "learning_rate": 5.6725518024411015e-05, "loss": 0.01071455106139183, "step": 152520 }, { "epoch": 43.29548680102186, "grad_norm": 0.5924636721611023, "learning_rate": 5.672267953448766e-05, "loss": 0.011588039994239806, "step": 152530 }, { "epoch": 43.298325290945215, "grad_norm": 0.11296749860048294, "learning_rate": 5.671984104456429e-05, "loss": 0.0007361149415373802, "step": 152540 }, { "epoch": 43.30116378086858, "grad_norm": 0.07744500041007996, "learning_rate": 5.671700255464093e-05, "loss": 0.0013620154932141304, "step": 152550 }, { "epoch": 43.30400227079194, "grad_norm": 0.22644060850143433, "learning_rate": 5.671416406471757e-05, "loss": 0.002292870730161667, "step": 152560 }, { "epoch": 43.306840760715296, "grad_norm": 7.088444232940674, "learning_rate": 5.6711325574794215e-05, "loss": 0.004728332161903381, "step": 152570 }, { "epoch": 43.30967925063866, "grad_norm": 0.06695357710123062, "learning_rate": 5.670848708487086e-05, "loss": 0.0009051904082298279, "step": 152580 }, { "epoch": 43.31251774056202, "grad_norm": 19.764301300048828, "learning_rate": 5.670564859494749e-05, "loss": 0.012910376489162444, "step": 152590 }, { "epoch": 43.315356230485385, "grad_norm": 19.718048095703125, "learning_rate": 5.6702810105024126e-05, "loss": 0.010928825289011002, "step": 152600 }, { "epoch": 43.31819472040874, "grad_norm": 1.0078260898590088, "learning_rate": 5.669997161510077e-05, "loss": 0.007726716995239258, "step": 152610 }, { "epoch": 43.321033210332104, "grad_norm": 0.3209133744239807, "learning_rate": 5.66971331251774e-05, "loss": 0.015328006446361541, "step": 152620 }, { "epoch": 43.32387170025547, "grad_norm": 0.12341020256280899, "learning_rate": 5.669429463525405e-05, "loss": 0.0026687515899538996, "step": 152630 }, { "epoch": 43.32671019017882, "grad_norm": 1.8654800653457642, "learning_rate": 5.669145614533069e-05, "loss": 0.003351179510354996, "step": 152640 }, { "epoch": 43.329548680102185, "grad_norm": 0.21532008051872253, "learning_rate": 5.6688617655407326e-05, "loss": 0.0014159915968775748, "step": 152650 }, { "epoch": 43.33238717002555, "grad_norm": 0.03760688006877899, "learning_rate": 5.668577916548397e-05, "loss": 0.0011512348428368568, "step": 152660 }, { "epoch": 43.335225659948904, "grad_norm": 0.3844260573387146, "learning_rate": 5.66829406755606e-05, "loss": 0.0010544518008828163, "step": 152670 }, { "epoch": 43.33806414987227, "grad_norm": 0.2905004024505615, "learning_rate": 5.6680102185637243e-05, "loss": 0.003919313848018646, "step": 152680 }, { "epoch": 43.34090263979563, "grad_norm": 9.121471405029297, "learning_rate": 5.667726369571388e-05, "loss": 0.005170821771025657, "step": 152690 }, { "epoch": 43.34374112971899, "grad_norm": 6.7965240478515625, "learning_rate": 5.6674425205790526e-05, "loss": 0.002664828673005104, "step": 152700 }, { "epoch": 43.34657961964235, "grad_norm": 0.19793333113193512, "learning_rate": 5.667158671586717e-05, "loss": 0.0025952840223908424, "step": 152710 }, { "epoch": 43.34941810956571, "grad_norm": 0.07345113158226013, "learning_rate": 5.66687482259438e-05, "loss": 0.002797003835439682, "step": 152720 }, { "epoch": 43.352256599489074, "grad_norm": 1.87361741065979, "learning_rate": 5.666590973602044e-05, "loss": 0.002520333044230938, "step": 152730 }, { "epoch": 43.35509508941243, "grad_norm": 0.07221171259880066, "learning_rate": 5.666307124609708e-05, "loss": 0.0005156448110938072, "step": 152740 }, { "epoch": 43.35793357933579, "grad_norm": 0.06739336252212524, "learning_rate": 5.666023275617371e-05, "loss": 0.0004887999966740608, "step": 152750 }, { "epoch": 43.360772069259156, "grad_norm": 5.223864555358887, "learning_rate": 5.6657394266250354e-05, "loss": 0.002117645926773548, "step": 152760 }, { "epoch": 43.36361055918251, "grad_norm": 0.016317103058099747, "learning_rate": 5.6654555776327e-05, "loss": 0.0007661398500204087, "step": 152770 }, { "epoch": 43.366449049105874, "grad_norm": 13.086366653442383, "learning_rate": 5.665171728640364e-05, "loss": 0.004624612629413605, "step": 152780 }, { "epoch": 43.36928753902924, "grad_norm": 0.2688022255897522, "learning_rate": 5.664887879648028e-05, "loss": 0.004203634709119797, "step": 152790 }, { "epoch": 43.3721260289526, "grad_norm": 1.2943884134292603, "learning_rate": 5.664604030655691e-05, "loss": 0.0012246865779161453, "step": 152800 }, { "epoch": 43.374964518875956, "grad_norm": 0.2189081907272339, "learning_rate": 5.6643201816633554e-05, "loss": 0.001220511645078659, "step": 152810 }, { "epoch": 43.37780300879932, "grad_norm": 4.960872650146484, "learning_rate": 5.664036332671019e-05, "loss": 0.0021679319441318514, "step": 152820 }, { "epoch": 43.38064149872268, "grad_norm": 0.41459572315216064, "learning_rate": 5.663752483678684e-05, "loss": 0.001709306053817272, "step": 152830 }, { "epoch": 43.38347998864604, "grad_norm": 2.584622383117676, "learning_rate": 5.663468634686347e-05, "loss": 0.0014330632984638214, "step": 152840 }, { "epoch": 43.3863184785694, "grad_norm": 0.9256609082221985, "learning_rate": 5.663184785694011e-05, "loss": 0.0019030198454856873, "step": 152850 }, { "epoch": 43.38915696849276, "grad_norm": 0.9857291579246521, "learning_rate": 5.662900936701675e-05, "loss": 0.002566688321530819, "step": 152860 }, { "epoch": 43.391995458416126, "grad_norm": 0.2584203779697418, "learning_rate": 5.662617087709339e-05, "loss": 0.00041969213634729385, "step": 152870 }, { "epoch": 43.39483394833948, "grad_norm": 1.7201082706451416, "learning_rate": 5.6623332387170024e-05, "loss": 0.0028254957869648933, "step": 152880 }, { "epoch": 43.397672438262845, "grad_norm": 0.7246617674827576, "learning_rate": 5.6620493897246665e-05, "loss": 0.0013693271204829216, "step": 152890 }, { "epoch": 43.40051092818621, "grad_norm": 0.24480748176574707, "learning_rate": 5.661765540732331e-05, "loss": 0.002864978089928627, "step": 152900 }, { "epoch": 43.40334941810956, "grad_norm": 0.11099433898925781, "learning_rate": 5.661481691739995e-05, "loss": 0.0009727595373988151, "step": 152910 }, { "epoch": 43.406187908032926, "grad_norm": 0.009493552148342133, "learning_rate": 5.661197842747659e-05, "loss": 0.0004898803308606148, "step": 152920 }, { "epoch": 43.40902639795629, "grad_norm": 0.7696323394775391, "learning_rate": 5.6609139937553224e-05, "loss": 0.0018105385825037956, "step": 152930 }, { "epoch": 43.411864887879645, "grad_norm": 10.798267364501953, "learning_rate": 5.660630144762986e-05, "loss": 0.007064898312091827, "step": 152940 }, { "epoch": 43.41470337780301, "grad_norm": 1.795678973197937, "learning_rate": 5.66034629577065e-05, "loss": 0.007675459980964661, "step": 152950 }, { "epoch": 43.41754186772637, "grad_norm": 0.4045662581920624, "learning_rate": 5.6600624467783134e-05, "loss": 0.0015947656705975533, "step": 152960 }, { "epoch": 43.420380357649734, "grad_norm": 0.3066219091415405, "learning_rate": 5.659778597785978e-05, "loss": 0.002019953913986683, "step": 152970 }, { "epoch": 43.42321884757309, "grad_norm": 0.12634728848934174, "learning_rate": 5.6594947487936424e-05, "loss": 0.008224523067474366, "step": 152980 }, { "epoch": 43.42605733749645, "grad_norm": 1.460869312286377, "learning_rate": 5.659210899801306e-05, "loss": 0.0020893769338726997, "step": 152990 }, { "epoch": 43.428895827419815, "grad_norm": 3.644944667816162, "learning_rate": 5.65892705080897e-05, "loss": 0.004289470613002777, "step": 153000 }, { "epoch": 43.428895827419815, "eval_accuracy": 0.9748203726076174, "eval_loss": 0.09331263601779938, "eval_runtime": 32.6677, "eval_samples_per_second": 481.423, "eval_steps_per_second": 7.53, "step": 153000 }, { "epoch": 43.43173431734317, "grad_norm": 1.3801361322402954, "learning_rate": 5.6586432018166334e-05, "loss": 0.0017812473699450493, "step": 153010 }, { "epoch": 43.434572807266534, "grad_norm": 0.2670861482620239, "learning_rate": 5.6583593528242976e-05, "loss": 0.0013030862435698509, "step": 153020 }, { "epoch": 43.4374112971899, "grad_norm": 0.09452912956476212, "learning_rate": 5.6580755038319624e-05, "loss": 0.0018481139093637466, "step": 153030 }, { "epoch": 43.44024978711325, "grad_norm": 0.10628576576709747, "learning_rate": 5.657791654839626e-05, "loss": 0.0008358459919691086, "step": 153040 }, { "epoch": 43.443088277036615, "grad_norm": 2.2888851165771484, "learning_rate": 5.65750780584729e-05, "loss": 0.00723518431186676, "step": 153050 }, { "epoch": 43.44592676695998, "grad_norm": 2.4254162311553955, "learning_rate": 5.6572239568549534e-05, "loss": 0.0029633793979883193, "step": 153060 }, { "epoch": 43.44876525688334, "grad_norm": 2.268907070159912, "learning_rate": 5.656940107862617e-05, "loss": 0.0010126948356628418, "step": 153070 }, { "epoch": 43.4516037468067, "grad_norm": 0.8506951332092285, "learning_rate": 5.656656258870281e-05, "loss": 0.001731904223561287, "step": 153080 }, { "epoch": 43.45444223673006, "grad_norm": 0.07425431907176971, "learning_rate": 5.6563724098779445e-05, "loss": 0.006238695606589318, "step": 153090 }, { "epoch": 43.45728072665342, "grad_norm": 0.38330450654029846, "learning_rate": 5.656088560885609e-05, "loss": 0.00559648871421814, "step": 153100 }, { "epoch": 43.46011921657678, "grad_norm": 0.09684891253709793, "learning_rate": 5.6558047118932735e-05, "loss": 0.007665575295686722, "step": 153110 }, { "epoch": 43.46295770650014, "grad_norm": 0.097519151866436, "learning_rate": 5.655520862900937e-05, "loss": 0.0023943042382597924, "step": 153120 }, { "epoch": 43.465796196423504, "grad_norm": 0.19495947659015656, "learning_rate": 5.655237013908601e-05, "loss": 0.0033308058977127076, "step": 153130 }, { "epoch": 43.46863468634686, "grad_norm": 0.25330281257629395, "learning_rate": 5.6549531649162645e-05, "loss": 0.003457040339708328, "step": 153140 }, { "epoch": 43.47147317627022, "grad_norm": 0.231252521276474, "learning_rate": 5.6546693159239286e-05, "loss": 0.0021714739501476287, "step": 153150 }, { "epoch": 43.474311666193586, "grad_norm": 0.7858866453170776, "learning_rate": 5.6543854669315935e-05, "loss": 0.0011362943798303605, "step": 153160 }, { "epoch": 43.47715015611695, "grad_norm": 3.0450685024261475, "learning_rate": 5.654101617939257e-05, "loss": 0.0025546543300151827, "step": 153170 }, { "epoch": 43.479988646040304, "grad_norm": 0.08516174554824829, "learning_rate": 5.653817768946921e-05, "loss": 0.0009009996429085732, "step": 153180 }, { "epoch": 43.48282713596367, "grad_norm": 0.07210567593574524, "learning_rate": 5.6535339199545845e-05, "loss": 0.0018630864098668099, "step": 153190 }, { "epoch": 43.48566562588703, "grad_norm": 0.8571279048919678, "learning_rate": 5.653250070962248e-05, "loss": 0.002955697290599346, "step": 153200 }, { "epoch": 43.488504115810386, "grad_norm": 0.061951376497745514, "learning_rate": 5.652966221969912e-05, "loss": 0.004855797439813614, "step": 153210 }, { "epoch": 43.49134260573375, "grad_norm": 1.0942081212997437, "learning_rate": 5.6526823729775756e-05, "loss": 0.0029168806970119475, "step": 153220 }, { "epoch": 43.49418109565711, "grad_norm": 0.6955099105834961, "learning_rate": 5.6523985239852404e-05, "loss": 0.0016587171703577042, "step": 153230 }, { "epoch": 43.497019585580475, "grad_norm": 0.7128551602363586, "learning_rate": 5.6521146749929045e-05, "loss": 0.006736001372337342, "step": 153240 }, { "epoch": 43.49985807550383, "grad_norm": 10.458624839782715, "learning_rate": 5.651830826000568e-05, "loss": 0.009339608252048492, "step": 153250 }, { "epoch": 43.50269656542719, "grad_norm": 1.3465211391448975, "learning_rate": 5.651546977008232e-05, "loss": 0.002453538216650486, "step": 153260 }, { "epoch": 43.505535055350556, "grad_norm": 0.5278250575065613, "learning_rate": 5.6512631280158956e-05, "loss": 0.008037395775318146, "step": 153270 }, { "epoch": 43.50837354527391, "grad_norm": 0.08596598356962204, "learning_rate": 5.65097927902356e-05, "loss": 0.0030168814584612845, "step": 153280 }, { "epoch": 43.511212035197275, "grad_norm": 0.8708239793777466, "learning_rate": 5.650695430031223e-05, "loss": 0.004456581920385361, "step": 153290 }, { "epoch": 43.51405052512064, "grad_norm": 13.819242477416992, "learning_rate": 5.650411581038888e-05, "loss": 0.005208434164524078, "step": 153300 }, { "epoch": 43.51688901504399, "grad_norm": 0.02657460607588291, "learning_rate": 5.6501277320465515e-05, "loss": 0.0012954723089933396, "step": 153310 }, { "epoch": 43.519727504967356, "grad_norm": 0.7239187955856323, "learning_rate": 5.6498438830542156e-05, "loss": 0.00208011195063591, "step": 153320 }, { "epoch": 43.52256599489072, "grad_norm": 0.051288191229104996, "learning_rate": 5.649560034061879e-05, "loss": 0.0009571239352226257, "step": 153330 }, { "epoch": 43.52540448481408, "grad_norm": 0.3386920392513275, "learning_rate": 5.649276185069543e-05, "loss": 0.0017803581431508064, "step": 153340 }, { "epoch": 43.52824297473744, "grad_norm": 0.7638600468635559, "learning_rate": 5.6489923360772067e-05, "loss": 0.0008437344804406166, "step": 153350 }, { "epoch": 43.5310814646608, "grad_norm": 0.07631951570510864, "learning_rate": 5.6487084870848715e-05, "loss": 0.0015773527324199677, "step": 153360 }, { "epoch": 43.533919954584164, "grad_norm": 6.410444736480713, "learning_rate": 5.6484246380925356e-05, "loss": 0.006498938798904419, "step": 153370 }, { "epoch": 43.53675844450752, "grad_norm": 0.14903263747692108, "learning_rate": 5.648140789100199e-05, "loss": 0.004223138839006424, "step": 153380 }, { "epoch": 43.53959693443088, "grad_norm": 0.006421033293008804, "learning_rate": 5.647856940107863e-05, "loss": 0.0031513508409261705, "step": 153390 }, { "epoch": 43.542435424354245, "grad_norm": 0.07998734712600708, "learning_rate": 5.6475730911155267e-05, "loss": 0.007575667649507523, "step": 153400 }, { "epoch": 43.5452739142776, "grad_norm": 0.10729268193244934, "learning_rate": 5.64728924212319e-05, "loss": 0.0045166205614805225, "step": 153410 }, { "epoch": 43.548112404200964, "grad_norm": 6.214387893676758, "learning_rate": 5.647005393130854e-05, "loss": 0.0028106285259127616, "step": 153420 }, { "epoch": 43.55095089412433, "grad_norm": 0.10489952564239502, "learning_rate": 5.646721544138519e-05, "loss": 0.0031630709767341615, "step": 153430 }, { "epoch": 43.55378938404769, "grad_norm": 0.19084492325782776, "learning_rate": 5.6464376951461825e-05, "loss": 0.00226014107465744, "step": 153440 }, { "epoch": 43.556627873971046, "grad_norm": 1.1417911052703857, "learning_rate": 5.646153846153847e-05, "loss": 0.0030605683103203775, "step": 153450 }, { "epoch": 43.55946636389441, "grad_norm": 0.014101510867476463, "learning_rate": 5.64586999716151e-05, "loss": 0.0016963986679911613, "step": 153460 }, { "epoch": 43.56230485381777, "grad_norm": 0.8174677491188049, "learning_rate": 5.645586148169174e-05, "loss": 0.0024310821667313577, "step": 153470 }, { "epoch": 43.56514334374113, "grad_norm": 13.392679214477539, "learning_rate": 5.645302299176838e-05, "loss": 0.004158905148506165, "step": 153480 }, { "epoch": 43.56798183366449, "grad_norm": 1.612534999847412, "learning_rate": 5.645018450184502e-05, "loss": 0.0016244685277342797, "step": 153490 }, { "epoch": 43.57082032358785, "grad_norm": 0.1790313720703125, "learning_rate": 5.644734601192167e-05, "loss": 0.0010216221213340758, "step": 153500 }, { "epoch": 43.57082032358785, "eval_accuracy": 0.9717682965600559, "eval_loss": 0.09897135943174362, "eval_runtime": 32.1903, "eval_samples_per_second": 488.563, "eval_steps_per_second": 7.642, "step": 153500 }, { "epoch": 43.57365881351121, "grad_norm": 0.21555952727794647, "learning_rate": 5.64445075219983e-05, "loss": 0.003413374722003937, "step": 153510 }, { "epoch": 43.57649730343457, "grad_norm": 1.1163606643676758, "learning_rate": 5.644166903207494e-05, "loss": 0.0015559513121843338, "step": 153520 }, { "epoch": 43.579335793357934, "grad_norm": 1.9895554780960083, "learning_rate": 5.643883054215158e-05, "loss": 0.001264207437634468, "step": 153530 }, { "epoch": 43.5821742832813, "grad_norm": 0.2352142333984375, "learning_rate": 5.643599205222821e-05, "loss": 0.005587928369641304, "step": 153540 }, { "epoch": 43.58501277320465, "grad_norm": 0.08359673619270325, "learning_rate": 5.643315356230485e-05, "loss": 0.002614370733499527, "step": 153550 }, { "epoch": 43.587851263128016, "grad_norm": 0.04170574992895126, "learning_rate": 5.64303150723815e-05, "loss": 0.0072138309478759766, "step": 153560 }, { "epoch": 43.59068975305138, "grad_norm": 0.3711927533149719, "learning_rate": 5.6427476582458136e-05, "loss": 0.0020316669717431067, "step": 153570 }, { "epoch": 43.593528242974735, "grad_norm": 5.076942443847656, "learning_rate": 5.642463809253478e-05, "loss": 0.011276464164257049, "step": 153580 }, { "epoch": 43.5963667328981, "grad_norm": 1.1341822147369385, "learning_rate": 5.642179960261141e-05, "loss": 0.002077527344226837, "step": 153590 }, { "epoch": 43.59920522282146, "grad_norm": 4.92354679107666, "learning_rate": 5.6418961112688053e-05, "loss": 0.011387376487255097, "step": 153600 }, { "epoch": 43.602043712744816, "grad_norm": 16.01750373840332, "learning_rate": 5.641612262276469e-05, "loss": 0.013058266043663025, "step": 153610 }, { "epoch": 43.60488220266818, "grad_norm": 0.6884289383888245, "learning_rate": 5.641328413284133e-05, "loss": 0.0031808063387870787, "step": 153620 }, { "epoch": 43.60772069259154, "grad_norm": 0.2355767786502838, "learning_rate": 5.641044564291798e-05, "loss": 0.0008702097460627556, "step": 153630 }, { "epoch": 43.610559182514905, "grad_norm": 0.3145964741706848, "learning_rate": 5.640760715299461e-05, "loss": 0.00044127143919467924, "step": 153640 }, { "epoch": 43.61339767243826, "grad_norm": 0.06981642544269562, "learning_rate": 5.6404768663071254e-05, "loss": 0.0018135149031877518, "step": 153650 }, { "epoch": 43.61623616236162, "grad_norm": 1.7476050853729248, "learning_rate": 5.640193017314789e-05, "loss": 0.0010967563837766647, "step": 153660 }, { "epoch": 43.619074652284986, "grad_norm": 0.2768338918685913, "learning_rate": 5.639909168322452e-05, "loss": 0.004553082585334778, "step": 153670 }, { "epoch": 43.62191314220834, "grad_norm": 0.20510859787464142, "learning_rate": 5.6396253193301164e-05, "loss": 0.0032284677028656004, "step": 153680 }, { "epoch": 43.624751632131705, "grad_norm": 0.8298428058624268, "learning_rate": 5.63934147033778e-05, "loss": 0.0019706910476088523, "step": 153690 }, { "epoch": 43.62759012205507, "grad_norm": 0.14169205725193024, "learning_rate": 5.639057621345445e-05, "loss": 0.011586420238018036, "step": 153700 }, { "epoch": 43.63042861197843, "grad_norm": 0.23942674696445465, "learning_rate": 5.638773772353109e-05, "loss": 0.0013110648840665817, "step": 153710 }, { "epoch": 43.63326710190179, "grad_norm": 0.18888431787490845, "learning_rate": 5.6385183082600055e-05, "loss": 0.007014836370944977, "step": 153720 }, { "epoch": 43.63610559182515, "grad_norm": 0.21401743590831757, "learning_rate": 5.6382344592676696e-05, "loss": 0.0016837948933243752, "step": 153730 }, { "epoch": 43.63894408174851, "grad_norm": 0.6137626767158508, "learning_rate": 5.6379506102753344e-05, "loss": 0.0006144683808088303, "step": 153740 }, { "epoch": 43.64178257167187, "grad_norm": 0.12881550192832947, "learning_rate": 5.637666761282998e-05, "loss": 0.0006083633750677108, "step": 153750 }, { "epoch": 43.64462106159523, "grad_norm": 0.20826640725135803, "learning_rate": 5.637382912290662e-05, "loss": 0.0009685803204774856, "step": 153760 }, { "epoch": 43.647459551518594, "grad_norm": 2.3518834114074707, "learning_rate": 5.6370990632983255e-05, "loss": 0.0010279864072799683, "step": 153770 }, { "epoch": 43.65029804144195, "grad_norm": 0.7419127821922302, "learning_rate": 5.6368152143059896e-05, "loss": 0.0006368599832057952, "step": 153780 }, { "epoch": 43.65313653136531, "grad_norm": 0.05684041604399681, "learning_rate": 5.636531365313653e-05, "loss": 0.002681879699230194, "step": 153790 }, { "epoch": 43.655975021288675, "grad_norm": 0.5694453120231628, "learning_rate": 5.6362475163213165e-05, "loss": 0.002405887469649315, "step": 153800 }, { "epoch": 43.65881351121204, "grad_norm": 0.1537306308746338, "learning_rate": 5.635963667328982e-05, "loss": 0.0014763515442609786, "step": 153810 }, { "epoch": 43.661652001135394, "grad_norm": 0.6849051713943481, "learning_rate": 5.6356798183366455e-05, "loss": 0.0009526051580905914, "step": 153820 }, { "epoch": 43.66449049105876, "grad_norm": 0.29667145013809204, "learning_rate": 5.635395969344309e-05, "loss": 0.0013875419273972512, "step": 153830 }, { "epoch": 43.66732898098212, "grad_norm": 0.021146979182958603, "learning_rate": 5.635112120351973e-05, "loss": 0.0011237954720854759, "step": 153840 }, { "epoch": 43.670167470905476, "grad_norm": 0.029883261770009995, "learning_rate": 5.6348282713596365e-05, "loss": 0.0004882743582129478, "step": 153850 }, { "epoch": 43.67300596082884, "grad_norm": 0.11274713277816772, "learning_rate": 5.634544422367301e-05, "loss": 0.0009260876104235649, "step": 153860 }, { "epoch": 43.6758444507522, "grad_norm": 0.20485463738441467, "learning_rate": 5.634260573374964e-05, "loss": 0.0005394136533141136, "step": 153870 }, { "epoch": 43.67868294067556, "grad_norm": 0.05315103754401207, "learning_rate": 5.633976724382629e-05, "loss": 0.005065117031335831, "step": 153880 }, { "epoch": 43.68152143059892, "grad_norm": 0.09237052500247955, "learning_rate": 5.633692875390293e-05, "loss": 0.0011650104075670241, "step": 153890 }, { "epoch": 43.68435992052228, "grad_norm": 0.3734491467475891, "learning_rate": 5.6334090263979565e-05, "loss": 0.011818775534629821, "step": 153900 }, { "epoch": 43.687198410445646, "grad_norm": 0.3581596612930298, "learning_rate": 5.633125177405621e-05, "loss": 0.001146983541548252, "step": 153910 }, { "epoch": 43.690036900369, "grad_norm": 0.2726362943649292, "learning_rate": 5.632841328413284e-05, "loss": 0.0009339885786175728, "step": 153920 }, { "epoch": 43.692875390292365, "grad_norm": 0.025112181901931763, "learning_rate": 5.6325574794209476e-05, "loss": 0.0005887394770979881, "step": 153930 }, { "epoch": 43.69571388021573, "grad_norm": 0.21182525157928467, "learning_rate": 5.632273630428613e-05, "loss": 0.0014316171407699585, "step": 153940 }, { "epoch": 43.69855237013908, "grad_norm": 0.1607232242822647, "learning_rate": 5.6319897814362766e-05, "loss": 0.0008581899106502533, "step": 153950 }, { "epoch": 43.701390860062446, "grad_norm": 0.07222142070531845, "learning_rate": 5.63170593244394e-05, "loss": 0.0032442010939121245, "step": 153960 }, { "epoch": 43.70422934998581, "grad_norm": 0.8958426117897034, "learning_rate": 5.631422083451604e-05, "loss": 0.0066508248448371885, "step": 153970 }, { "epoch": 43.70706783990917, "grad_norm": 0.46692803502082825, "learning_rate": 5.6311382344592676e-05, "loss": 0.0005263220518827438, "step": 153980 }, { "epoch": 43.70990632983253, "grad_norm": 1.412104845046997, "learning_rate": 5.630854385466932e-05, "loss": 0.0006815085187554359, "step": 153990 }, { "epoch": 43.71274481975589, "grad_norm": 0.26946479082107544, "learning_rate": 5.630570536474595e-05, "loss": 0.0027316099032759665, "step": 154000 }, { "epoch": 43.71274481975589, "eval_accuracy": 0.9765371653843709, "eval_loss": 0.07975849509239197, "eval_runtime": 31.7448, "eval_samples_per_second": 495.42, "eval_steps_per_second": 7.749, "step": 154000 }, { "epoch": 43.71558330967925, "grad_norm": 0.5882258415222168, "learning_rate": 5.63028668748226e-05, "loss": 0.001388123817741871, "step": 154010 }, { "epoch": 43.71842179960261, "grad_norm": 2.0372314453125, "learning_rate": 5.630002838489924e-05, "loss": 0.0013981403782963753, "step": 154020 }, { "epoch": 43.72126028952597, "grad_norm": 0.13697589933872223, "learning_rate": 5.6297189894975876e-05, "loss": 0.0010839227586984634, "step": 154030 }, { "epoch": 43.724098779449335, "grad_norm": 0.03263700753450394, "learning_rate": 5.629435140505252e-05, "loss": 0.0033743176609277725, "step": 154040 }, { "epoch": 43.72693726937269, "grad_norm": 0.1429240107536316, "learning_rate": 5.629151291512915e-05, "loss": 0.0039602693170309065, "step": 154050 }, { "epoch": 43.729775759296054, "grad_norm": 0.05665097013115883, "learning_rate": 5.628867442520579e-05, "loss": 0.0010849833488464355, "step": 154060 }, { "epoch": 43.73261424921942, "grad_norm": 2.1330153942108154, "learning_rate": 5.628583593528243e-05, "loss": 0.00393885001540184, "step": 154070 }, { "epoch": 43.73545273914278, "grad_norm": 2.0124130249023438, "learning_rate": 5.6282997445359076e-05, "loss": 0.001319502666592598, "step": 154080 }, { "epoch": 43.738291229066135, "grad_norm": 0.789679765701294, "learning_rate": 5.628015895543571e-05, "loss": 0.0027764173224568366, "step": 154090 }, { "epoch": 43.7411297189895, "grad_norm": 0.1983095407485962, "learning_rate": 5.627732046551235e-05, "loss": 0.0013423211872577666, "step": 154100 }, { "epoch": 43.74396820891286, "grad_norm": 0.13133090734481812, "learning_rate": 5.627448197558899e-05, "loss": 0.0008127989247441291, "step": 154110 }, { "epoch": 43.74680669883622, "grad_norm": 0.7549835443496704, "learning_rate": 5.627164348566563e-05, "loss": 0.0022030359134078024, "step": 154120 }, { "epoch": 43.74964518875958, "grad_norm": 0.08254050463438034, "learning_rate": 5.626880499574226e-05, "loss": 0.0007023654878139495, "step": 154130 }, { "epoch": 43.75248367868294, "grad_norm": 6.490147590637207, "learning_rate": 5.626596650581891e-05, "loss": 0.008414805680513383, "step": 154140 }, { "epoch": 43.7553221686063, "grad_norm": 0.9044446349143982, "learning_rate": 5.626312801589555e-05, "loss": 0.0013672027736902237, "step": 154150 }, { "epoch": 43.75816065852966, "grad_norm": 0.01440686360001564, "learning_rate": 5.626028952597219e-05, "loss": 0.0005390513688325882, "step": 154160 }, { "epoch": 43.760999148453024, "grad_norm": 0.3232276737689972, "learning_rate": 5.625745103604882e-05, "loss": 0.005749291926622391, "step": 154170 }, { "epoch": 43.76383763837639, "grad_norm": 0.21527157723903656, "learning_rate": 5.625461254612546e-05, "loss": 0.0009574376046657563, "step": 154180 }, { "epoch": 43.76667612829974, "grad_norm": 0.14485757052898407, "learning_rate": 5.62517740562021e-05, "loss": 0.005866488814353943, "step": 154190 }, { "epoch": 43.769514618223106, "grad_norm": 0.6496561169624329, "learning_rate": 5.624893556627874e-05, "loss": 0.0015949683263897895, "step": 154200 }, { "epoch": 43.77235310814647, "grad_norm": 0.9769965410232544, "learning_rate": 5.624609707635539e-05, "loss": 0.00897739827632904, "step": 154210 }, { "epoch": 43.775191598069824, "grad_norm": 0.56364506483078, "learning_rate": 5.624325858643202e-05, "loss": 0.009181302785873414, "step": 154220 }, { "epoch": 43.77803008799319, "grad_norm": 0.22408020496368408, "learning_rate": 5.624042009650866e-05, "loss": 0.006026215106248856, "step": 154230 }, { "epoch": 43.78086857791655, "grad_norm": 0.4522612392902374, "learning_rate": 5.62375816065853e-05, "loss": 0.0012129290029406548, "step": 154240 }, { "epoch": 43.783707067839906, "grad_norm": 2.799694538116455, "learning_rate": 5.623474311666194e-05, "loss": 0.0012379903346300125, "step": 154250 }, { "epoch": 43.78654555776327, "grad_norm": 0.813376247882843, "learning_rate": 5.6231904626738574e-05, "loss": 0.00205894373357296, "step": 154260 }, { "epoch": 43.78938404768663, "grad_norm": 0.1333845555782318, "learning_rate": 5.622906613681521e-05, "loss": 0.002887822687625885, "step": 154270 }, { "epoch": 43.792222537609995, "grad_norm": 3.3997066020965576, "learning_rate": 5.622622764689186e-05, "loss": 0.005990870296955109, "step": 154280 }, { "epoch": 43.79506102753335, "grad_norm": 6.343020439147949, "learning_rate": 5.62233891569685e-05, "loss": 0.004266654327511788, "step": 154290 }, { "epoch": 43.79789951745671, "grad_norm": 0.46669575572013855, "learning_rate": 5.622055066704513e-05, "loss": 0.0039650309830904, "step": 154300 }, { "epoch": 43.800738007380076, "grad_norm": 14.155376434326172, "learning_rate": 5.6217712177121774e-05, "loss": 0.005905056744813919, "step": 154310 }, { "epoch": 43.80357649730343, "grad_norm": 6.257410049438477, "learning_rate": 5.621487368719841e-05, "loss": 0.007551933079957962, "step": 154320 }, { "epoch": 43.806414987226795, "grad_norm": 2.8909084796905518, "learning_rate": 5.621203519727505e-05, "loss": 0.001943877898156643, "step": 154330 }, { "epoch": 43.80925347715016, "grad_norm": 0.03649016469717026, "learning_rate": 5.62091967073517e-05, "loss": 0.002396298572421074, "step": 154340 }, { "epoch": 43.81209196707351, "grad_norm": 0.11190072447061539, "learning_rate": 5.620635821742833e-05, "loss": 0.0014627480879426003, "step": 154350 }, { "epoch": 43.814930456996876, "grad_norm": 0.03898775577545166, "learning_rate": 5.6203519727504974e-05, "loss": 0.0144993856549263, "step": 154360 }, { "epoch": 43.81776894692024, "grad_norm": 1.8075590133666992, "learning_rate": 5.620068123758161e-05, "loss": 0.0010585809126496316, "step": 154370 }, { "epoch": 43.8206074368436, "grad_norm": 0.26776123046875, "learning_rate": 5.619784274765825e-05, "loss": 0.002392314560711384, "step": 154380 }, { "epoch": 43.82344592676696, "grad_norm": 0.06966307759284973, "learning_rate": 5.6195004257734884e-05, "loss": 0.003548109531402588, "step": 154390 }, { "epoch": 43.82628441669032, "grad_norm": 0.15655361115932465, "learning_rate": 5.619216576781152e-05, "loss": 0.0030846957117319105, "step": 154400 }, { "epoch": 43.829122906613684, "grad_norm": 4.437291145324707, "learning_rate": 5.6189327277888174e-05, "loss": 0.002964920736849308, "step": 154410 }, { "epoch": 43.83196139653704, "grad_norm": 0.5085890889167786, "learning_rate": 5.618648878796481e-05, "loss": 0.0007456650957465172, "step": 154420 }, { "epoch": 43.8347998864604, "grad_norm": 0.10757845640182495, "learning_rate": 5.618365029804144e-05, "loss": 0.01442403644323349, "step": 154430 }, { "epoch": 43.837638376383765, "grad_norm": 0.28071504831314087, "learning_rate": 5.6180811808118085e-05, "loss": 0.001076718233525753, "step": 154440 }, { "epoch": 43.84047686630713, "grad_norm": 0.07889541983604431, "learning_rate": 5.617797331819472e-05, "loss": 0.003766489028930664, "step": 154450 }, { "epoch": 43.843315356230484, "grad_norm": 0.015603361651301384, "learning_rate": 5.617513482827136e-05, "loss": 0.0013997703790664673, "step": 154460 }, { "epoch": 43.84615384615385, "grad_norm": 0.1579984724521637, "learning_rate": 5.6172296338347995e-05, "loss": 0.0014079922810196876, "step": 154470 }, { "epoch": 43.84899233607721, "grad_norm": 1.1978918313980103, "learning_rate": 5.616945784842464e-05, "loss": 0.0014616191387176515, "step": 154480 }, { "epoch": 43.851830826000565, "grad_norm": 12.10133171081543, "learning_rate": 5.6166619358501285e-05, "loss": 0.007242640107870102, "step": 154490 }, { "epoch": 43.85466931592393, "grad_norm": 1.0381139516830444, "learning_rate": 5.616378086857792e-05, "loss": 0.003041612543165684, "step": 154500 }, { "epoch": 43.85466931592393, "eval_accuracy": 0.972849240160234, "eval_loss": 0.0968237966299057, "eval_runtime": 31.9377, "eval_samples_per_second": 492.427, "eval_steps_per_second": 7.702, "step": 154500 }, { "epoch": 43.85750780584729, "grad_norm": 1.2096731662750244, "learning_rate": 5.616094237865456e-05, "loss": 0.0045863516628742215, "step": 154510 }, { "epoch": 43.86034629577065, "grad_norm": 0.10764015465974808, "learning_rate": 5.6158103888731195e-05, "loss": 0.018301483988761903, "step": 154520 }, { "epoch": 43.86318478569401, "grad_norm": 0.039228178560733795, "learning_rate": 5.615526539880783e-05, "loss": 0.005936878174543381, "step": 154530 }, { "epoch": 43.86602327561737, "grad_norm": 0.17235271632671356, "learning_rate": 5.615242690888448e-05, "loss": 0.0011363167315721512, "step": 154540 }, { "epoch": 43.868861765540736, "grad_norm": 0.08166135102510452, "learning_rate": 5.614958841896112e-05, "loss": 0.002928302809596062, "step": 154550 }, { "epoch": 43.87170025546409, "grad_norm": 0.02972782775759697, "learning_rate": 5.6146749929037754e-05, "loss": 0.0018457956612110138, "step": 154560 }, { "epoch": 43.874538745387454, "grad_norm": 1.2956883907318115, "learning_rate": 5.6143911439114395e-05, "loss": 0.013591650128364562, "step": 154570 }, { "epoch": 43.87737723531082, "grad_norm": 0.15672563016414642, "learning_rate": 5.614107294919103e-05, "loss": 0.0035447567701339723, "step": 154580 }, { "epoch": 43.88021572523417, "grad_norm": 2.1472482681274414, "learning_rate": 5.613823445926767e-05, "loss": 0.0016180604696273803, "step": 154590 }, { "epoch": 43.883054215157536, "grad_norm": 0.06237005814909935, "learning_rate": 5.6135395969344306e-05, "loss": 0.001287197135388851, "step": 154600 }, { "epoch": 43.8858927050809, "grad_norm": 0.21725216507911682, "learning_rate": 5.6132557479420954e-05, "loss": 0.0022531339898705484, "step": 154610 }, { "epoch": 43.888731195004254, "grad_norm": 0.05493052676320076, "learning_rate": 5.6129718989497595e-05, "loss": 0.0009204939007759095, "step": 154620 }, { "epoch": 43.89156968492762, "grad_norm": 0.5109702348709106, "learning_rate": 5.612688049957423e-05, "loss": 0.0012773189693689347, "step": 154630 }, { "epoch": 43.89440817485098, "grad_norm": 0.14308279752731323, "learning_rate": 5.6124042009650865e-05, "loss": 0.0013164093717932702, "step": 154640 }, { "epoch": 43.89724666477434, "grad_norm": 0.05201289430260658, "learning_rate": 5.6121203519727506e-05, "loss": 0.0006537100300192833, "step": 154650 }, { "epoch": 43.9000851546977, "grad_norm": 0.057875387370586395, "learning_rate": 5.611836502980414e-05, "loss": 0.0017512915655970573, "step": 154660 }, { "epoch": 43.90292364462106, "grad_norm": 0.06067872792482376, "learning_rate": 5.611552653988078e-05, "loss": 0.002324228733778, "step": 154670 }, { "epoch": 43.905762134544425, "grad_norm": 0.03390556946396828, "learning_rate": 5.611268804995743e-05, "loss": 0.001713894121348858, "step": 154680 }, { "epoch": 43.90860062446778, "grad_norm": 6.078786373138428, "learning_rate": 5.6109849560034065e-05, "loss": 0.01019090786576271, "step": 154690 }, { "epoch": 43.91143911439114, "grad_norm": 0.533894419670105, "learning_rate": 5.6107011070110706e-05, "loss": 0.0009850585833191873, "step": 154700 }, { "epoch": 43.914277604314506, "grad_norm": 0.22418056428432465, "learning_rate": 5.610417258018734e-05, "loss": 0.01278441995382309, "step": 154710 }, { "epoch": 43.91711609423786, "grad_norm": 0.1395626664161682, "learning_rate": 5.610133409026398e-05, "loss": 0.005101402848958969, "step": 154720 }, { "epoch": 43.919954584161225, "grad_norm": 0.25007742643356323, "learning_rate": 5.609849560034062e-05, "loss": 0.004803206771612167, "step": 154730 }, { "epoch": 43.92279307408459, "grad_norm": 0.612004280090332, "learning_rate": 5.6095657110417265e-05, "loss": 0.0010332951322197914, "step": 154740 }, { "epoch": 43.92563156400795, "grad_norm": 0.7500231862068176, "learning_rate": 5.6092818620493906e-05, "loss": 0.0009252235293388366, "step": 154750 }, { "epoch": 43.928470053931306, "grad_norm": 2.3525748252868652, "learning_rate": 5.608998013057054e-05, "loss": 0.0020056005567312242, "step": 154760 }, { "epoch": 43.93130854385467, "grad_norm": 0.9214540719985962, "learning_rate": 5.6087141640647175e-05, "loss": 0.0026182567700743673, "step": 154770 }, { "epoch": 43.93414703377803, "grad_norm": 0.10013450682163239, "learning_rate": 5.608430315072382e-05, "loss": 0.0005221609026193619, "step": 154780 }, { "epoch": 43.93698552370139, "grad_norm": 1.0548710823059082, "learning_rate": 5.608146466080045e-05, "loss": 0.003946314379572868, "step": 154790 }, { "epoch": 43.93982401362475, "grad_norm": 3.3023335933685303, "learning_rate": 5.607862617087709e-05, "loss": 0.002786969766020775, "step": 154800 }, { "epoch": 43.942662503548114, "grad_norm": 0.07353068888187408, "learning_rate": 5.607578768095374e-05, "loss": 0.0038492873311042784, "step": 154810 }, { "epoch": 43.94550099347148, "grad_norm": 0.05260717496275902, "learning_rate": 5.6072949191030375e-05, "loss": 0.007866273820400237, "step": 154820 }, { "epoch": 43.94833948339483, "grad_norm": 0.03648190572857857, "learning_rate": 5.607011070110702e-05, "loss": 0.0012519409880042075, "step": 154830 }, { "epoch": 43.951177973318195, "grad_norm": 0.10745439678430557, "learning_rate": 5.606727221118365e-05, "loss": 0.00225951187312603, "step": 154840 }, { "epoch": 43.95401646324156, "grad_norm": 1.1740463972091675, "learning_rate": 5.606443372126029e-05, "loss": 0.0034063123166561127, "step": 154850 }, { "epoch": 43.956854953164914, "grad_norm": 0.44358083605766296, "learning_rate": 5.606159523133693e-05, "loss": 0.0015564899891614913, "step": 154860 }, { "epoch": 43.95969344308828, "grad_norm": 0.10875990241765976, "learning_rate": 5.6058756741413576e-05, "loss": 0.001439131610095501, "step": 154870 }, { "epoch": 43.96253193301164, "grad_norm": 0.03178190067410469, "learning_rate": 5.605591825149022e-05, "loss": 0.007920160144567489, "step": 154880 }, { "epoch": 43.965370422934996, "grad_norm": 0.21149788796901703, "learning_rate": 5.605307976156685e-05, "loss": 0.0020446011796593664, "step": 154890 }, { "epoch": 43.96820891285836, "grad_norm": 16.05747413635254, "learning_rate": 5.6050241271643486e-05, "loss": 0.0057833768427371975, "step": 154900 }, { "epoch": 43.97104740278172, "grad_norm": 0.8648762702941895, "learning_rate": 5.604740278172013e-05, "loss": 0.0012465499341487885, "step": 154910 }, { "epoch": 43.973885892705084, "grad_norm": 3.130760669708252, "learning_rate": 5.604456429179676e-05, "loss": 0.004151615127921104, "step": 154920 }, { "epoch": 43.97672438262844, "grad_norm": 0.10990035533905029, "learning_rate": 5.6041725801873403e-05, "loss": 0.0015458039939403533, "step": 154930 }, { "epoch": 43.9795628725518, "grad_norm": 2.220709800720215, "learning_rate": 5.603888731195005e-05, "loss": 0.0024961942806839945, "step": 154940 }, { "epoch": 43.982401362475166, "grad_norm": 0.50340735912323, "learning_rate": 5.6036048822026686e-05, "loss": 0.0042699694633483885, "step": 154950 }, { "epoch": 43.98523985239852, "grad_norm": 0.25851163268089294, "learning_rate": 5.603321033210333e-05, "loss": 0.0031494498252868652, "step": 154960 }, { "epoch": 43.988078342321884, "grad_norm": 0.03720707446336746, "learning_rate": 5.603037184217996e-05, "loss": 0.0015873134136199952, "step": 154970 }, { "epoch": 43.99091683224525, "grad_norm": 0.629663348197937, "learning_rate": 5.6027533352256604e-05, "loss": 0.0012056361883878708, "step": 154980 }, { "epoch": 43.9937553221686, "grad_norm": 0.4239121377468109, "learning_rate": 5.602469486233324e-05, "loss": 0.004947818070650101, "step": 154990 }, { "epoch": 43.996593812091966, "grad_norm": 1.2359132766723633, "learning_rate": 5.602185637240987e-05, "loss": 0.001002364605665207, "step": 155000 }, { "epoch": 43.996593812091966, "eval_accuracy": 0.9745660329369873, "eval_loss": 0.0955958217382431, "eval_runtime": 32.062, "eval_samples_per_second": 490.519, "eval_steps_per_second": 7.673, "step": 155000 }, { "epoch": 43.99943230201533, "grad_norm": 0.12081151455640793, "learning_rate": 5.601901788248652e-05, "loss": 0.0013408733531832695, "step": 155010 }, { "epoch": 44.00227079193869, "grad_norm": 4.688199043273926, "learning_rate": 5.601617939256316e-05, "loss": 0.0015946285799145698, "step": 155020 }, { "epoch": 44.00510928186205, "grad_norm": 1.5189108848571777, "learning_rate": 5.60133409026398e-05, "loss": 0.002266383357346058, "step": 155030 }, { "epoch": 44.00794777178541, "grad_norm": 0.7865810394287109, "learning_rate": 5.601050241271644e-05, "loss": 0.002522528916597366, "step": 155040 }, { "epoch": 44.01078626170877, "grad_norm": 1.2058391571044922, "learning_rate": 5.600766392279307e-05, "loss": 0.002017972059547901, "step": 155050 }, { "epoch": 44.01362475163213, "grad_norm": 0.1725837141275406, "learning_rate": 5.6004825432869714e-05, "loss": 0.0013281796127557755, "step": 155060 }, { "epoch": 44.01646324155549, "grad_norm": 0.21224147081375122, "learning_rate": 5.600198694294636e-05, "loss": 0.0020997019484639166, "step": 155070 }, { "epoch": 44.019301731478855, "grad_norm": 0.27076128125190735, "learning_rate": 5.5999148453023e-05, "loss": 0.0013346156105399133, "step": 155080 }, { "epoch": 44.02214022140221, "grad_norm": 0.06251348555088043, "learning_rate": 5.599630996309964e-05, "loss": 0.0029946783557534217, "step": 155090 }, { "epoch": 44.02497871132557, "grad_norm": 0.010691402480006218, "learning_rate": 5.599347147317627e-05, "loss": 0.00544789507985115, "step": 155100 }, { "epoch": 44.027817201248936, "grad_norm": 0.4243180453777313, "learning_rate": 5.599063298325291e-05, "loss": 0.004329235851764679, "step": 155110 }, { "epoch": 44.0306556911723, "grad_norm": 0.1557484269142151, "learning_rate": 5.598779449332955e-05, "loss": 0.0009197456762194634, "step": 155120 }, { "epoch": 44.033494181095655, "grad_norm": 0.1867402046918869, "learning_rate": 5.5984956003406184e-05, "loss": 0.004693562164902687, "step": 155130 }, { "epoch": 44.03633267101902, "grad_norm": 0.08925186097621918, "learning_rate": 5.598211751348283e-05, "loss": 0.0023587899282574653, "step": 155140 }, { "epoch": 44.03917116094238, "grad_norm": 0.6825480461120605, "learning_rate": 5.597927902355947e-05, "loss": 0.00494980663061142, "step": 155150 }, { "epoch": 44.04200965086574, "grad_norm": 0.06050508841872215, "learning_rate": 5.597644053363611e-05, "loss": 0.005510495975613594, "step": 155160 }, { "epoch": 44.0448481407891, "grad_norm": 0.09389100223779678, "learning_rate": 5.597360204371275e-05, "loss": 0.0005796948447823524, "step": 155170 }, { "epoch": 44.04768663071246, "grad_norm": 0.13872447609901428, "learning_rate": 5.5970763553789384e-05, "loss": 0.001945292204618454, "step": 155180 }, { "epoch": 44.050525120635825, "grad_norm": 1.585614800453186, "learning_rate": 5.5967925063866025e-05, "loss": 0.0033866841346025466, "step": 155190 }, { "epoch": 44.05336361055918, "grad_norm": 2.0271971225738525, "learning_rate": 5.596508657394266e-05, "loss": 0.0005810413509607315, "step": 155200 }, { "epoch": 44.056202100482544, "grad_norm": 0.6688646078109741, "learning_rate": 5.596224808401931e-05, "loss": 0.004142041504383087, "step": 155210 }, { "epoch": 44.05904059040591, "grad_norm": 1.002747893333435, "learning_rate": 5.595940959409595e-05, "loss": 0.002308500185608864, "step": 155220 }, { "epoch": 44.06187908032926, "grad_norm": 0.08227542787790298, "learning_rate": 5.5956571104172584e-05, "loss": 0.0006013959646224976, "step": 155230 }, { "epoch": 44.064717570252625, "grad_norm": 0.007276277057826519, "learning_rate": 5.595373261424922e-05, "loss": 0.0010032646358013153, "step": 155240 }, { "epoch": 44.06755606017599, "grad_norm": 0.045482609421014786, "learning_rate": 5.595089412432586e-05, "loss": 0.0028629707172513007, "step": 155250 }, { "epoch": 44.070394550099344, "grad_norm": 0.08305247128009796, "learning_rate": 5.5948055634402494e-05, "loss": 0.0004930403083562851, "step": 155260 }, { "epoch": 44.07323304002271, "grad_norm": 0.13856832683086395, "learning_rate": 5.594521714447914e-05, "loss": 0.003337107226252556, "step": 155270 }, { "epoch": 44.07607152994607, "grad_norm": 0.07452182471752167, "learning_rate": 5.5942378654555784e-05, "loss": 0.0006238771602511406, "step": 155280 }, { "epoch": 44.07891001986943, "grad_norm": 2.890653610229492, "learning_rate": 5.593954016463242e-05, "loss": 0.0013073312118649482, "step": 155290 }, { "epoch": 44.08174850979279, "grad_norm": 0.14537155628204346, "learning_rate": 5.593670167470906e-05, "loss": 0.0005009999498724937, "step": 155300 }, { "epoch": 44.08458699971615, "grad_norm": 0.14274157583713531, "learning_rate": 5.5933863184785694e-05, "loss": 0.000645207054913044, "step": 155310 }, { "epoch": 44.087425489639514, "grad_norm": 0.028981607407331467, "learning_rate": 5.5931024694862336e-05, "loss": 0.012816362082958221, "step": 155320 }, { "epoch": 44.09026397956287, "grad_norm": 0.29338452219963074, "learning_rate": 5.592818620493897e-05, "loss": 0.0023719411343336104, "step": 155330 }, { "epoch": 44.09310246948623, "grad_norm": 0.1496899127960205, "learning_rate": 5.592534771501562e-05, "loss": 0.00653311014175415, "step": 155340 }, { "epoch": 44.095940959409596, "grad_norm": 1.0653738975524902, "learning_rate": 5.592250922509226e-05, "loss": 0.0006035493686795234, "step": 155350 }, { "epoch": 44.09877944933295, "grad_norm": 5.344523906707764, "learning_rate": 5.5919670735168894e-05, "loss": 0.0025301782414317133, "step": 155360 }, { "epoch": 44.101617939256315, "grad_norm": 0.9273077845573425, "learning_rate": 5.591683224524553e-05, "loss": 0.0004893086850643158, "step": 155370 }, { "epoch": 44.10445642917968, "grad_norm": 0.018016008660197258, "learning_rate": 5.591399375532217e-05, "loss": 0.0004981115460395813, "step": 155380 }, { "epoch": 44.10729491910304, "grad_norm": 0.21669256687164307, "learning_rate": 5.5911155265398805e-05, "loss": 0.0007338380441069603, "step": 155390 }, { "epoch": 44.110133409026396, "grad_norm": 0.019063038751482964, "learning_rate": 5.5908316775475446e-05, "loss": 0.0010421304032206535, "step": 155400 }, { "epoch": 44.11297189894976, "grad_norm": 0.17975348234176636, "learning_rate": 5.5905478285552095e-05, "loss": 0.0004953864961862564, "step": 155410 }, { "epoch": 44.11581038887312, "grad_norm": 0.8710076808929443, "learning_rate": 5.590263979562873e-05, "loss": 0.0012292493134737014, "step": 155420 }, { "epoch": 44.11864887879648, "grad_norm": 0.6368151903152466, "learning_rate": 5.589980130570537e-05, "loss": 0.004834149032831192, "step": 155430 }, { "epoch": 44.12148736871984, "grad_norm": 16.330114364624023, "learning_rate": 5.5896962815782005e-05, "loss": 0.017424367368221283, "step": 155440 }, { "epoch": 44.1243258586432, "grad_norm": 2.5074987411499023, "learning_rate": 5.5894124325858647e-05, "loss": 0.0013069190084934235, "step": 155450 }, { "epoch": 44.12716434856656, "grad_norm": 0.04622608423233032, "learning_rate": 5.589128583593528e-05, "loss": 0.0004179947078227997, "step": 155460 }, { "epoch": 44.13000283848992, "grad_norm": 0.9279323220252991, "learning_rate": 5.588844734601193e-05, "loss": 0.0008724264800548554, "step": 155470 }, { "epoch": 44.132841328413285, "grad_norm": 1.57437002658844, "learning_rate": 5.5885608856088564e-05, "loss": 0.001136513240635395, "step": 155480 }, { "epoch": 44.13567981833665, "grad_norm": 0.744023859500885, "learning_rate": 5.5882770366165205e-05, "loss": 0.0014183659106492997, "step": 155490 }, { "epoch": 44.138518308260004, "grad_norm": 0.02020115777850151, "learning_rate": 5.587993187624184e-05, "loss": 0.0006978582590818405, "step": 155500 }, { "epoch": 44.138518308260004, "eval_accuracy": 0.9761556558784257, "eval_loss": 0.08622970432043076, "eval_runtime": 31.8332, "eval_samples_per_second": 494.044, "eval_steps_per_second": 7.728, "step": 155500 }, { "epoch": 44.14135679818337, "grad_norm": 0.6633445620536804, "learning_rate": 5.587709338631848e-05, "loss": 0.0008507579565048218, "step": 155510 }, { "epoch": 44.14419528810673, "grad_norm": 0.07336318492889404, "learning_rate": 5.5874254896395116e-05, "loss": 0.005176513642072678, "step": 155520 }, { "epoch": 44.147033778030085, "grad_norm": 5.161157608032227, "learning_rate": 5.587141640647176e-05, "loss": 0.00277931559830904, "step": 155530 }, { "epoch": 44.14987226795345, "grad_norm": 6.645428657531738, "learning_rate": 5.5868577916548405e-05, "loss": 0.00671527236700058, "step": 155540 }, { "epoch": 44.15271075787681, "grad_norm": 0.3229260742664337, "learning_rate": 5.586573942662504e-05, "loss": 0.0010661270469427108, "step": 155550 }, { "epoch": 44.15554924780017, "grad_norm": 3.415201425552368, "learning_rate": 5.586290093670168e-05, "loss": 0.0021433599293231966, "step": 155560 }, { "epoch": 44.15838773772353, "grad_norm": 0.0876779854297638, "learning_rate": 5.5860062446778316e-05, "loss": 0.012350483983755111, "step": 155570 }, { "epoch": 44.16122622764689, "grad_norm": 0.23315846920013428, "learning_rate": 5.585722395685495e-05, "loss": 0.0018002785742282867, "step": 155580 }, { "epoch": 44.164064717570255, "grad_norm": 0.07376910746097565, "learning_rate": 5.585438546693159e-05, "loss": 0.012228453159332275, "step": 155590 }, { "epoch": 44.16690320749361, "grad_norm": 0.08329633623361588, "learning_rate": 5.5851546977008227e-05, "loss": 0.000987526960670948, "step": 155600 }, { "epoch": 44.169741697416974, "grad_norm": 0.21942900121212006, "learning_rate": 5.5848708487084875e-05, "loss": 0.0008349103853106498, "step": 155610 }, { "epoch": 44.17258018734034, "grad_norm": 0.05533232167363167, "learning_rate": 5.5845869997161516e-05, "loss": 0.0007051410153508187, "step": 155620 }, { "epoch": 44.17541867726369, "grad_norm": 0.16942307353019714, "learning_rate": 5.584303150723815e-05, "loss": 0.006307969987392426, "step": 155630 }, { "epoch": 44.178257167187056, "grad_norm": 1.175797700881958, "learning_rate": 5.584019301731479e-05, "loss": 0.0035138830542564394, "step": 155640 }, { "epoch": 44.18109565711042, "grad_norm": 14.757887840270996, "learning_rate": 5.5837354527391427e-05, "loss": 0.022019030153751375, "step": 155650 }, { "epoch": 44.18393414703378, "grad_norm": 0.5485493540763855, "learning_rate": 5.583451603746807e-05, "loss": 0.015479469299316406, "step": 155660 }, { "epoch": 44.18677263695714, "grad_norm": 1.075817584991455, "learning_rate": 5.5831677547544716e-05, "loss": 0.023088744282722472, "step": 155670 }, { "epoch": 44.1896111268805, "grad_norm": 0.04194627329707146, "learning_rate": 5.582883905762135e-05, "loss": 0.001118011213839054, "step": 155680 }, { "epoch": 44.19244961680386, "grad_norm": 0.02505800686776638, "learning_rate": 5.582600056769799e-05, "loss": 0.0059205576777458194, "step": 155690 }, { "epoch": 44.19528810672722, "grad_norm": 0.3731585741043091, "learning_rate": 5.582316207777463e-05, "loss": 0.003644236922264099, "step": 155700 }, { "epoch": 44.19812659665058, "grad_norm": 0.05007988214492798, "learning_rate": 5.582032358785126e-05, "loss": 0.0004676518961787224, "step": 155710 }, { "epoch": 44.200965086573945, "grad_norm": 0.052320413291454315, "learning_rate": 5.58174850979279e-05, "loss": 0.0016772165894508363, "step": 155720 }, { "epoch": 44.2038035764973, "grad_norm": 0.13691850006580353, "learning_rate": 5.581464660800454e-05, "loss": 0.007365772128105163, "step": 155730 }, { "epoch": 44.20664206642066, "grad_norm": 0.23982998728752136, "learning_rate": 5.5811808118081185e-05, "loss": 0.004035079479217529, "step": 155740 }, { "epoch": 44.209480556344026, "grad_norm": Infinity, "learning_rate": 5.580896962815783e-05, "loss": 0.005990353226661682, "step": 155750 }, { "epoch": 44.21231904626739, "grad_norm": 1.0479826927185059, "learning_rate": 5.580641498722679e-05, "loss": 0.005594062805175781, "step": 155760 }, { "epoch": 44.215157536190745, "grad_norm": 0.11009394377470016, "learning_rate": 5.5803576497303435e-05, "loss": 0.004703034460544586, "step": 155770 }, { "epoch": 44.21799602611411, "grad_norm": 0.17580120265483856, "learning_rate": 5.580073800738007e-05, "loss": 0.0016249192878603936, "step": 155780 }, { "epoch": 44.22083451603747, "grad_norm": 0.027019785717129707, "learning_rate": 5.579789951745672e-05, "loss": 0.0014676047489047051, "step": 155790 }, { "epoch": 44.223673005960826, "grad_norm": 0.14550164341926575, "learning_rate": 5.579506102753336e-05, "loss": 0.014735449850559235, "step": 155800 }, { "epoch": 44.22651149588419, "grad_norm": 0.27819570899009705, "learning_rate": 5.579222253760999e-05, "loss": 0.0011222263798117637, "step": 155810 }, { "epoch": 44.22934998580755, "grad_norm": 0.05686723068356514, "learning_rate": 5.5789384047686635e-05, "loss": 0.0006251854822039605, "step": 155820 }, { "epoch": 44.23218847573091, "grad_norm": 1.6666109561920166, "learning_rate": 5.578654555776327e-05, "loss": 0.003666987642645836, "step": 155830 }, { "epoch": 44.23502696565427, "grad_norm": 0.08091799914836884, "learning_rate": 5.578370706783991e-05, "loss": 0.0009525692090392113, "step": 155840 }, { "epoch": 44.237865455577634, "grad_norm": 0.20077741146087646, "learning_rate": 5.578086857791656e-05, "loss": 0.0007513668388128281, "step": 155850 }, { "epoch": 44.240703945501, "grad_norm": 0.3532382845878601, "learning_rate": 5.5778030087993193e-05, "loss": 0.0012952215969562531, "step": 155860 }, { "epoch": 44.24354243542435, "grad_norm": 1.4566701650619507, "learning_rate": 5.5775191598069835e-05, "loss": 0.00212986133992672, "step": 155870 }, { "epoch": 44.246380925347715, "grad_norm": 2.030355453491211, "learning_rate": 5.577235310814647e-05, "loss": 0.008475768566131591, "step": 155880 }, { "epoch": 44.24921941527108, "grad_norm": 0.2468125969171524, "learning_rate": 5.5769514618223104e-05, "loss": 0.001193469762802124, "step": 155890 }, { "epoch": 44.252057905194434, "grad_norm": 5.4765849113464355, "learning_rate": 5.5766676128299745e-05, "loss": 0.006750188767910004, "step": 155900 }, { "epoch": 44.2548963951178, "grad_norm": 0.23432430624961853, "learning_rate": 5.576383763837638e-05, "loss": 0.001567850448191166, "step": 155910 }, { "epoch": 44.25773488504116, "grad_norm": 0.1899024248123169, "learning_rate": 5.576099914845303e-05, "loss": 0.00036251526325941085, "step": 155920 }, { "epoch": 44.260573374964515, "grad_norm": 0.19278600811958313, "learning_rate": 5.575816065852967e-05, "loss": 0.0036965854465961455, "step": 155930 }, { "epoch": 44.26341186488788, "grad_norm": 1.2772260904312134, "learning_rate": 5.5755322168606304e-05, "loss": 0.000999061018228531, "step": 155940 }, { "epoch": 44.26625035481124, "grad_norm": 1.1016117334365845, "learning_rate": 5.5752483678682945e-05, "loss": 0.00040059220045804975, "step": 155950 }, { "epoch": 44.269088844734604, "grad_norm": 0.21109764277935028, "learning_rate": 5.574964518875958e-05, "loss": 0.0005548624321818352, "step": 155960 }, { "epoch": 44.27192733465796, "grad_norm": 0.0556270033121109, "learning_rate": 5.574680669883622e-05, "loss": 0.0008237482979893684, "step": 155970 }, { "epoch": 44.27476582458132, "grad_norm": 0.17667454481124878, "learning_rate": 5.5743968208912856e-05, "loss": 0.003913568705320359, "step": 155980 }, { "epoch": 44.277604314504686, "grad_norm": 0.8863526582717896, "learning_rate": 5.5741129718989504e-05, "loss": 0.0010313203558325768, "step": 155990 }, { "epoch": 44.28044280442804, "grad_norm": 0.047872792929410934, "learning_rate": 5.573829122906614e-05, "loss": 0.007978184521198273, "step": 156000 }, { "epoch": 44.28044280442804, "eval_accuracy": 0.9725949004896038, "eval_loss": 0.09618312120437622, "eval_runtime": 32.0601, "eval_samples_per_second": 490.547, "eval_steps_per_second": 7.673, "step": 156000 }, { "epoch": 44.283281294351404, "grad_norm": 8.187911987304688, "learning_rate": 5.573545273914278e-05, "loss": 0.0035466670989990233, "step": 156010 }, { "epoch": 44.28611978427477, "grad_norm": 1.4802619218826294, "learning_rate": 5.5732614249219415e-05, "loss": 0.0011472728103399277, "step": 156020 }, { "epoch": 44.28895827419813, "grad_norm": 6.284488201141357, "learning_rate": 5.5729775759296056e-05, "loss": 0.003243030607700348, "step": 156030 }, { "epoch": 44.291796764121486, "grad_norm": 0.009590772911906242, "learning_rate": 5.572693726937269e-05, "loss": 0.0037116315215826035, "step": 156040 }, { "epoch": 44.29463525404485, "grad_norm": 0.03841324895620346, "learning_rate": 5.572409877944934e-05, "loss": 0.001790476031601429, "step": 156050 }, { "epoch": 44.29747374396821, "grad_norm": 0.12985964119434357, "learning_rate": 5.572126028952598e-05, "loss": 0.0009689955040812492, "step": 156060 }, { "epoch": 44.30031223389157, "grad_norm": 0.1811363846063614, "learning_rate": 5.5718421799602615e-05, "loss": 0.0054326742887496945, "step": 156070 }, { "epoch": 44.30315072381493, "grad_norm": 14.500101089477539, "learning_rate": 5.5715583309679256e-05, "loss": 0.009616748988628387, "step": 156080 }, { "epoch": 44.30598921373829, "grad_norm": 19.557910919189453, "learning_rate": 5.571274481975589e-05, "loss": 0.014311686158180237, "step": 156090 }, { "epoch": 44.30882770366165, "grad_norm": 0.08027077466249466, "learning_rate": 5.5709906329832525e-05, "loss": 0.0009311344474554062, "step": 156100 }, { "epoch": 44.31166619358501, "grad_norm": 3.8744192123413086, "learning_rate": 5.570706783990917e-05, "loss": 0.002939853072166443, "step": 156110 }, { "epoch": 44.314504683508375, "grad_norm": 0.10265488922595978, "learning_rate": 5.5704229349985815e-05, "loss": 0.0017543647438287735, "step": 156120 }, { "epoch": 44.31734317343174, "grad_norm": 0.7896320223808289, "learning_rate": 5.570139086006245e-05, "loss": 0.0042994610965251924, "step": 156130 }, { "epoch": 44.32018166335509, "grad_norm": 1.4950884580612183, "learning_rate": 5.569855237013909e-05, "loss": 0.005641095712780953, "step": 156140 }, { "epoch": 44.323020153278456, "grad_norm": 0.0156754981726408, "learning_rate": 5.5695713880215725e-05, "loss": 0.0016691450029611588, "step": 156150 }, { "epoch": 44.32585864320182, "grad_norm": 0.4329633116722107, "learning_rate": 5.569287539029237e-05, "loss": 0.0005211511626839638, "step": 156160 }, { "epoch": 44.328697133125175, "grad_norm": 0.2618117928504944, "learning_rate": 5.5690036900369e-05, "loss": 0.0018415918573737144, "step": 156170 }, { "epoch": 44.33153562304854, "grad_norm": 0.21483588218688965, "learning_rate": 5.568719841044564e-05, "loss": 0.00474206693470478, "step": 156180 }, { "epoch": 44.3343741129719, "grad_norm": 0.14301232993602753, "learning_rate": 5.568435992052229e-05, "loss": 0.0010020313784480095, "step": 156190 }, { "epoch": 44.33721260289526, "grad_norm": 0.21054522693157196, "learning_rate": 5.5681521430598926e-05, "loss": 0.0062213890254497525, "step": 156200 }, { "epoch": 44.34005109281862, "grad_norm": 0.11314497888088226, "learning_rate": 5.567868294067557e-05, "loss": 0.0023398233577609064, "step": 156210 }, { "epoch": 44.34288958274198, "grad_norm": 1.2806332111358643, "learning_rate": 5.56758444507522e-05, "loss": 0.004950839653611183, "step": 156220 }, { "epoch": 44.345728072665345, "grad_norm": 1.388614296913147, "learning_rate": 5.5673005960828836e-05, "loss": 0.002655931003391743, "step": 156230 }, { "epoch": 44.3485665625887, "grad_norm": 0.05348653346300125, "learning_rate": 5.567016747090548e-05, "loss": 0.0009570807218551636, "step": 156240 }, { "epoch": 44.351405052512064, "grad_norm": 0.28925183415412903, "learning_rate": 5.5667328980982126e-05, "loss": 0.004495839029550553, "step": 156250 }, { "epoch": 44.35424354243543, "grad_norm": 0.5233582258224487, "learning_rate": 5.566449049105876e-05, "loss": 0.001182178221642971, "step": 156260 }, { "epoch": 44.35708203235878, "grad_norm": 0.5457421541213989, "learning_rate": 5.56616520011354e-05, "loss": 0.000948018953204155, "step": 156270 }, { "epoch": 44.359920522282145, "grad_norm": 0.1991112381219864, "learning_rate": 5.5658813511212036e-05, "loss": 0.006093402951955795, "step": 156280 }, { "epoch": 44.36275901220551, "grad_norm": 0.13211938738822937, "learning_rate": 5.565597502128868e-05, "loss": 0.0007587602362036705, "step": 156290 }, { "epoch": 44.365597502128864, "grad_norm": 0.014945046044886112, "learning_rate": 5.565313653136531e-05, "loss": 0.0016382332891225816, "step": 156300 }, { "epoch": 44.36843599205223, "grad_norm": 0.2198319137096405, "learning_rate": 5.5650298041441954e-05, "loss": 0.0027062106877565385, "step": 156310 }, { "epoch": 44.37127448197559, "grad_norm": 0.14761118590831757, "learning_rate": 5.56474595515186e-05, "loss": 0.0017314694821834564, "step": 156320 }, { "epoch": 44.37411297189895, "grad_norm": 0.3220134675502777, "learning_rate": 5.5644621061595236e-05, "loss": 0.0005900824442505836, "step": 156330 }, { "epoch": 44.37695146182231, "grad_norm": 2.4303300380706787, "learning_rate": 5.564178257167188e-05, "loss": 0.0010822070762515068, "step": 156340 }, { "epoch": 44.37978995174567, "grad_norm": 0.6298781037330627, "learning_rate": 5.563894408174851e-05, "loss": 0.0008630633354187011, "step": 156350 }, { "epoch": 44.382628441669034, "grad_norm": 0.04808414354920387, "learning_rate": 5.563610559182515e-05, "loss": 0.011258452385663986, "step": 156360 }, { "epoch": 44.38546693159239, "grad_norm": 0.06292856484651566, "learning_rate": 5.563326710190179e-05, "loss": 0.0007125638425350189, "step": 156370 }, { "epoch": 44.38830542151575, "grad_norm": 0.07413753122091293, "learning_rate": 5.563042861197842e-05, "loss": 0.0014533979818224906, "step": 156380 }, { "epoch": 44.391143911439116, "grad_norm": 0.13524796068668365, "learning_rate": 5.562759012205507e-05, "loss": 0.0012958969920873642, "step": 156390 }, { "epoch": 44.39398240136248, "grad_norm": 0.15299531817436218, "learning_rate": 5.562475163213171e-05, "loss": 0.0003364142030477524, "step": 156400 }, { "epoch": 44.396820891285834, "grad_norm": 0.29562321305274963, "learning_rate": 5.562191314220835e-05, "loss": 0.0009649790823459625, "step": 156410 }, { "epoch": 44.3996593812092, "grad_norm": 1.9424642324447632, "learning_rate": 5.561907465228499e-05, "loss": 0.0019111409783363342, "step": 156420 }, { "epoch": 44.40249787113256, "grad_norm": 0.030079824849963188, "learning_rate": 5.561623616236162e-05, "loss": 0.003973577171564102, "step": 156430 }, { "epoch": 44.405336361055916, "grad_norm": 1.194277286529541, "learning_rate": 5.5613397672438264e-05, "loss": 0.0012745341286063195, "step": 156440 }, { "epoch": 44.40817485097928, "grad_norm": 0.27833881974220276, "learning_rate": 5.561055918251491e-05, "loss": 0.002846476808190346, "step": 156450 }, { "epoch": 44.41101334090264, "grad_norm": 0.03315252438187599, "learning_rate": 5.560772069259155e-05, "loss": 0.004304178059101105, "step": 156460 }, { "epoch": 44.413851830826, "grad_norm": 0.44952377676963806, "learning_rate": 5.560488220266818e-05, "loss": 0.0010804397985339164, "step": 156470 }, { "epoch": 44.41669032074936, "grad_norm": 7.029206275939941, "learning_rate": 5.560204371274482e-05, "loss": 0.004529815167188644, "step": 156480 }, { "epoch": 44.41952881067272, "grad_norm": 0.6543444991111755, "learning_rate": 5.559920522282146e-05, "loss": 0.000869227759540081, "step": 156490 }, { "epoch": 44.422367300596086, "grad_norm": 0.3735924959182739, "learning_rate": 5.55963667328981e-05, "loss": 0.0007621526718139648, "step": 156500 }, { "epoch": 44.422367300596086, "eval_accuracy": 0.9753926368665352, "eval_loss": 0.08934874832630157, "eval_runtime": 32.9707, "eval_samples_per_second": 476.999, "eval_steps_per_second": 7.461, "step": 156500 }, { "epoch": 44.42520579051944, "grad_norm": 0.16405533254146576, "learning_rate": 5.5593528242974734e-05, "loss": 0.003479709103703499, "step": 156510 }, { "epoch": 44.428044280442805, "grad_norm": 0.0796213299036026, "learning_rate": 5.559068975305138e-05, "loss": 0.008253187686204911, "step": 156520 }, { "epoch": 44.43088277036617, "grad_norm": 19.184030532836914, "learning_rate": 5.558785126312802e-05, "loss": 0.00849815011024475, "step": 156530 }, { "epoch": 44.43372126028952, "grad_norm": 0.02729126811027527, "learning_rate": 5.558501277320466e-05, "loss": 0.005532644689083099, "step": 156540 }, { "epoch": 44.436559750212886, "grad_norm": 0.08558712154626846, "learning_rate": 5.55821742832813e-05, "loss": 0.003780781850218773, "step": 156550 }, { "epoch": 44.43939824013625, "grad_norm": 0.0852358415722847, "learning_rate": 5.5579335793357934e-05, "loss": 0.0030274834483861924, "step": 156560 }, { "epoch": 44.442236730059605, "grad_norm": 0.032969292253255844, "learning_rate": 5.557649730343457e-05, "loss": 0.003079797141253948, "step": 156570 }, { "epoch": 44.44507521998297, "grad_norm": 0.05022619664669037, "learning_rate": 5.557365881351122e-05, "loss": 0.001191606931388378, "step": 156580 }, { "epoch": 44.44791370990633, "grad_norm": 0.053763579577207565, "learning_rate": 5.557082032358786e-05, "loss": 0.0009511610493063927, "step": 156590 }, { "epoch": 44.450752199829694, "grad_norm": 0.8627061247825623, "learning_rate": 5.556798183366449e-05, "loss": 0.003018593601882458, "step": 156600 }, { "epoch": 44.45359068975305, "grad_norm": 4.448215961456299, "learning_rate": 5.5565143343741134e-05, "loss": 0.008427640050649643, "step": 156610 }, { "epoch": 44.45642917967641, "grad_norm": 1.170464277267456, "learning_rate": 5.556230485381777e-05, "loss": 0.004199491068720818, "step": 156620 }, { "epoch": 44.459267669599775, "grad_norm": 0.0877622589468956, "learning_rate": 5.555946636389441e-05, "loss": 0.0013260142877697945, "step": 156630 }, { "epoch": 44.46210615952313, "grad_norm": 0.39314037561416626, "learning_rate": 5.5556627873971044e-05, "loss": 0.004947011917829513, "step": 156640 }, { "epoch": 44.464944649446494, "grad_norm": 0.22980397939682007, "learning_rate": 5.555378938404769e-05, "loss": 0.005471432954072953, "step": 156650 }, { "epoch": 44.46778313936986, "grad_norm": 0.2401466816663742, "learning_rate": 5.5550950894124334e-05, "loss": 0.002054726146161556, "step": 156660 }, { "epoch": 44.47062162929321, "grad_norm": 0.058545272797346115, "learning_rate": 5.554811240420097e-05, "loss": 0.0003682799637317657, "step": 156670 }, { "epoch": 44.473460119216575, "grad_norm": 0.18302831053733826, "learning_rate": 5.554527391427761e-05, "loss": 0.0005778366699814797, "step": 156680 }, { "epoch": 44.47629860913994, "grad_norm": 0.5530268549919128, "learning_rate": 5.5542435424354245e-05, "loss": 0.0011473154649138451, "step": 156690 }, { "epoch": 44.4791370990633, "grad_norm": 0.11177114397287369, "learning_rate": 5.553959693443088e-05, "loss": 0.0008618583902716637, "step": 156700 }, { "epoch": 44.48197558898666, "grad_norm": 2.068586587905884, "learning_rate": 5.553675844450752e-05, "loss": 0.0035226158797740936, "step": 156710 }, { "epoch": 44.48481407891002, "grad_norm": 16.873788833618164, "learning_rate": 5.553391995458417e-05, "loss": 0.012060369551181793, "step": 156720 }, { "epoch": 44.48765256883338, "grad_norm": 6.657575607299805, "learning_rate": 5.55310814646608e-05, "loss": 0.0025987731292843818, "step": 156730 }, { "epoch": 44.49049105875674, "grad_norm": 0.3594381511211395, "learning_rate": 5.5528242974737445e-05, "loss": 0.004024265706539154, "step": 156740 }, { "epoch": 44.4933295486801, "grad_norm": 1.5028173923492432, "learning_rate": 5.552540448481408e-05, "loss": 0.0045437000691890715, "step": 156750 }, { "epoch": 44.496168038603464, "grad_norm": 5.458350658416748, "learning_rate": 5.552256599489072e-05, "loss": 0.002493935637176037, "step": 156760 }, { "epoch": 44.49900652852683, "grad_norm": 0.19793866574764252, "learning_rate": 5.5519727504967355e-05, "loss": 0.009273440390825272, "step": 156770 }, { "epoch": 44.50184501845018, "grad_norm": 4.402702331542969, "learning_rate": 5.5516889015044e-05, "loss": 0.005135349929332733, "step": 156780 }, { "epoch": 44.504683508373546, "grad_norm": 2.735562324523926, "learning_rate": 5.5514050525120645e-05, "loss": 0.004181587696075439, "step": 156790 }, { "epoch": 44.50752199829691, "grad_norm": 0.13431775569915771, "learning_rate": 5.551121203519728e-05, "loss": 0.0009526653215289116, "step": 156800 }, { "epoch": 44.510360488220265, "grad_norm": 0.040909476578235626, "learning_rate": 5.5508373545273914e-05, "loss": 0.002434661239385605, "step": 156810 }, { "epoch": 44.51319897814363, "grad_norm": 1.6300687789916992, "learning_rate": 5.5505535055350555e-05, "loss": 0.008047007769346238, "step": 156820 }, { "epoch": 44.51603746806699, "grad_norm": 1.2510794401168823, "learning_rate": 5.550269656542719e-05, "loss": 0.0012539243325591087, "step": 156830 }, { "epoch": 44.518875957990346, "grad_norm": 2.2361247539520264, "learning_rate": 5.549985807550383e-05, "loss": 0.0011626319959759713, "step": 156840 }, { "epoch": 44.52171444791371, "grad_norm": 1.9043679237365723, "learning_rate": 5.549701958558048e-05, "loss": 0.0007231362164020538, "step": 156850 }, { "epoch": 44.52455293783707, "grad_norm": 0.19966337084770203, "learning_rate": 5.5494181095657114e-05, "loss": 0.0008175697177648545, "step": 156860 }, { "epoch": 44.527391427760435, "grad_norm": 0.4779660999774933, "learning_rate": 5.5491342605733755e-05, "loss": 0.0009424742311239243, "step": 156870 }, { "epoch": 44.53022991768379, "grad_norm": 0.47506165504455566, "learning_rate": 5.548850411581039e-05, "loss": 0.00657263770699501, "step": 156880 }, { "epoch": 44.53306840760715, "grad_norm": 0.19432635605335236, "learning_rate": 5.548566562588703e-05, "loss": 0.003304903954267502, "step": 156890 }, { "epoch": 44.535906897530516, "grad_norm": 17.39341926574707, "learning_rate": 5.5482827135963666e-05, "loss": 0.003694375976920128, "step": 156900 }, { "epoch": 44.53874538745387, "grad_norm": 0.04237337037920952, "learning_rate": 5.547998864604031e-05, "loss": 0.0011420505121350288, "step": 156910 }, { "epoch": 44.541583877377235, "grad_norm": 0.039938390254974365, "learning_rate": 5.5477150156116955e-05, "loss": 0.001709469221532345, "step": 156920 }, { "epoch": 44.5444223673006, "grad_norm": 0.02060115337371826, "learning_rate": 5.547431166619359e-05, "loss": 0.0006421186029911041, "step": 156930 }, { "epoch": 44.547260857223954, "grad_norm": 0.027185415849089622, "learning_rate": 5.5471473176270225e-05, "loss": 0.0010054798796772957, "step": 156940 }, { "epoch": 44.55009934714732, "grad_norm": 0.09638462215662003, "learning_rate": 5.5468634686346866e-05, "loss": 0.005101707205176354, "step": 156950 }, { "epoch": 44.55293783707068, "grad_norm": 0.16471844911575317, "learning_rate": 5.54657961964235e-05, "loss": 0.0036167684942483903, "step": 156960 }, { "epoch": 44.55577632699404, "grad_norm": 0.6293802261352539, "learning_rate": 5.546295770650014e-05, "loss": 0.010815633088350296, "step": 156970 }, { "epoch": 44.5586148169174, "grad_norm": 0.2345420867204666, "learning_rate": 5.546011921657679e-05, "loss": 0.001975184865295887, "step": 156980 }, { "epoch": 44.56145330684076, "grad_norm": 0.11501927673816681, "learning_rate": 5.5457280726653425e-05, "loss": 0.004590024799108505, "step": 156990 }, { "epoch": 44.564291796764124, "grad_norm": 0.22351092100143433, "learning_rate": 5.5454442236730066e-05, "loss": 0.004032894968986511, "step": 157000 }, { "epoch": 44.564291796764124, "eval_accuracy": 0.9736758440897819, "eval_loss": 0.09944281727075577, "eval_runtime": 32.2713, "eval_samples_per_second": 487.337, "eval_steps_per_second": 7.623, "step": 157000 }, { "epoch": 44.56713028668748, "grad_norm": 0.3598601818084717, "learning_rate": 5.54516037468067e-05, "loss": 0.0010118138045072556, "step": 157010 }, { "epoch": 44.56996877661084, "grad_norm": 0.0883999690413475, "learning_rate": 5.544876525688334e-05, "loss": 0.0013120384886860848, "step": 157020 }, { "epoch": 44.572807266534205, "grad_norm": 0.11062635481357574, "learning_rate": 5.544592676695998e-05, "loss": 0.0014809662476181983, "step": 157030 }, { "epoch": 44.57564575645756, "grad_norm": 4.825936794281006, "learning_rate": 5.544308827703661e-05, "loss": 0.0015304675325751305, "step": 157040 }, { "epoch": 44.578484246380924, "grad_norm": 1.0261945724487305, "learning_rate": 5.5440249787113266e-05, "loss": 0.001288410648703575, "step": 157050 }, { "epoch": 44.58132273630429, "grad_norm": 1.421907663345337, "learning_rate": 5.54374112971899e-05, "loss": 0.0020915113389492037, "step": 157060 }, { "epoch": 44.58416122622765, "grad_norm": 0.6488006711006165, "learning_rate": 5.5434572807266535e-05, "loss": 0.0036552704870700834, "step": 157070 }, { "epoch": 44.586999716151006, "grad_norm": 0.19210556149482727, "learning_rate": 5.543173431734318e-05, "loss": 0.0008521279320120812, "step": 157080 }, { "epoch": 44.58983820607437, "grad_norm": 0.8077877163887024, "learning_rate": 5.542889582741981e-05, "loss": 0.0020171890035271646, "step": 157090 }, { "epoch": 44.59267669599773, "grad_norm": 0.21412065625190735, "learning_rate": 5.542605733749645e-05, "loss": 0.0013764137402176857, "step": 157100 }, { "epoch": 44.59551518592109, "grad_norm": 3.524261236190796, "learning_rate": 5.542321884757309e-05, "loss": 0.006814248114824295, "step": 157110 }, { "epoch": 44.59835367584445, "grad_norm": 0.1553005874156952, "learning_rate": 5.5420380357649736e-05, "loss": 0.001669175736606121, "step": 157120 }, { "epoch": 44.60119216576781, "grad_norm": 0.27482202649116516, "learning_rate": 5.541754186772638e-05, "loss": 0.003913236036896706, "step": 157130 }, { "epoch": 44.604030655691176, "grad_norm": 0.02628505788743496, "learning_rate": 5.541470337780301e-05, "loss": 0.006351328641176224, "step": 157140 }, { "epoch": 44.60686914561453, "grad_norm": 0.0274568572640419, "learning_rate": 5.541186488787965e-05, "loss": 0.005317531526088715, "step": 157150 }, { "epoch": 44.609707635537895, "grad_norm": 0.053229253739118576, "learning_rate": 5.540902639795629e-05, "loss": 0.002090226300060749, "step": 157160 }, { "epoch": 44.61254612546126, "grad_norm": 7.986204147338867, "learning_rate": 5.540618790803292e-05, "loss": 0.0033102646470069886, "step": 157170 }, { "epoch": 44.61538461538461, "grad_norm": 0.46482500433921814, "learning_rate": 5.540334941810957e-05, "loss": 0.009142915904521941, "step": 157180 }, { "epoch": 44.618223105307976, "grad_norm": 2.049476146697998, "learning_rate": 5.540051092818621e-05, "loss": 0.0022250888869166374, "step": 157190 }, { "epoch": 44.62106159523134, "grad_norm": 1.8629913330078125, "learning_rate": 5.5397672438262846e-05, "loss": 0.0019881924614310264, "step": 157200 }, { "epoch": 44.623900085154695, "grad_norm": 0.06500549614429474, "learning_rate": 5.539483394833949e-05, "loss": 0.002598787099123001, "step": 157210 }, { "epoch": 44.62673857507806, "grad_norm": 0.8193596005439758, "learning_rate": 5.539199545841612e-05, "loss": 0.012364324927330018, "step": 157220 }, { "epoch": 44.62957706500142, "grad_norm": 0.020166484639048576, "learning_rate": 5.5389156968492764e-05, "loss": 0.003661905229091644, "step": 157230 }, { "epoch": 44.63241555492478, "grad_norm": 2.322923183441162, "learning_rate": 5.53863184785694e-05, "loss": 0.0022890416905283926, "step": 157240 }, { "epoch": 44.63525404484814, "grad_norm": 0.14427955448627472, "learning_rate": 5.5383479988646046e-05, "loss": 0.004511619359254837, "step": 157250 }, { "epoch": 44.6380925347715, "grad_norm": 0.12158554047346115, "learning_rate": 5.538064149872269e-05, "loss": 0.00046363212168216706, "step": 157260 }, { "epoch": 44.640931024694865, "grad_norm": 0.6672930121421814, "learning_rate": 5.537780300879932e-05, "loss": 0.004486248642206192, "step": 157270 }, { "epoch": 44.64376951461822, "grad_norm": 0.4299502968788147, "learning_rate": 5.537496451887596e-05, "loss": 0.00474189892411232, "step": 157280 }, { "epoch": 44.646608004541584, "grad_norm": 0.1382371485233307, "learning_rate": 5.53721260289526e-05, "loss": 0.001899905689060688, "step": 157290 }, { "epoch": 44.64944649446495, "grad_norm": 0.447896808385849, "learning_rate": 5.536928753902923e-05, "loss": 0.010579816997051239, "step": 157300 }, { "epoch": 44.6522849843883, "grad_norm": 0.5387517213821411, "learning_rate": 5.5366449049105874e-05, "loss": 0.0007550682872533798, "step": 157310 }, { "epoch": 44.655123474311665, "grad_norm": 1.9794776439666748, "learning_rate": 5.536361055918252e-05, "loss": 0.0013253185898065567, "step": 157320 }, { "epoch": 44.65796196423503, "grad_norm": 0.6754081845283508, "learning_rate": 5.536077206925916e-05, "loss": 0.0031715922057628633, "step": 157330 }, { "epoch": 44.66080045415839, "grad_norm": 0.13467875123023987, "learning_rate": 5.53579335793358e-05, "loss": 0.0006900811567902565, "step": 157340 }, { "epoch": 44.66363894408175, "grad_norm": 0.02628202736377716, "learning_rate": 5.535509508941243e-05, "loss": 0.003623931109905243, "step": 157350 }, { "epoch": 44.66647743400511, "grad_norm": 0.08537986874580383, "learning_rate": 5.5352256599489074e-05, "loss": 0.0027131034061312676, "step": 157360 }, { "epoch": 44.66931592392847, "grad_norm": 0.10028999298810959, "learning_rate": 5.534941810956571e-05, "loss": 0.0012365680187940598, "step": 157370 }, { "epoch": 44.67215441385183, "grad_norm": 0.043753731995821, "learning_rate": 5.534657961964236e-05, "loss": 0.0054683715105056764, "step": 157380 }, { "epoch": 44.67499290377519, "grad_norm": 13.11633586883545, "learning_rate": 5.5343741129719e-05, "loss": 0.00984930396080017, "step": 157390 }, { "epoch": 44.677831393698554, "grad_norm": 2.39262318611145, "learning_rate": 5.534090263979563e-05, "loss": 0.0043279752135276794, "step": 157400 }, { "epoch": 44.68066988362191, "grad_norm": 5.638765335083008, "learning_rate": 5.533806414987227e-05, "loss": 0.015101422369480134, "step": 157410 }, { "epoch": 44.68350837354527, "grad_norm": 1.4931830167770386, "learning_rate": 5.533522565994891e-05, "loss": 0.0011003555729985238, "step": 157420 }, { "epoch": 44.686346863468636, "grad_norm": 1.04764986038208, "learning_rate": 5.5332387170025544e-05, "loss": 0.0030232537537813187, "step": 157430 }, { "epoch": 44.689185353392, "grad_norm": 0.7580337524414062, "learning_rate": 5.5329548680102185e-05, "loss": 0.0013986112549901008, "step": 157440 }, { "epoch": 44.692023843315354, "grad_norm": 0.152394637465477, "learning_rate": 5.532671019017883e-05, "loss": 0.003720354661345482, "step": 157450 }, { "epoch": 44.69486233323872, "grad_norm": 2.5823490619659424, "learning_rate": 5.532387170025547e-05, "loss": 0.008514960110187531, "step": 157460 }, { "epoch": 44.69770082316208, "grad_norm": 0.0679573267698288, "learning_rate": 5.532103321033211e-05, "loss": 0.0012488575652241708, "step": 157470 }, { "epoch": 44.700539313085436, "grad_norm": 0.09576544165611267, "learning_rate": 5.5318194720408744e-05, "loss": 0.0026581190526485444, "step": 157480 }, { "epoch": 44.7033778030088, "grad_norm": 0.04460605978965759, "learning_rate": 5.5315356230485385e-05, "loss": 0.0015082472935318946, "step": 157490 }, { "epoch": 44.70621629293216, "grad_norm": 0.1593327820301056, "learning_rate": 5.531251774056202e-05, "loss": 0.0024950157850980757, "step": 157500 }, { "epoch": 44.70621629293216, "eval_accuracy": 0.972976409995549, "eval_loss": 0.10400718450546265, "eval_runtime": 32.28, "eval_samples_per_second": 487.206, "eval_steps_per_second": 7.621, "step": 157500 }, { "epoch": 44.70905478285552, "grad_norm": 0.07861676812171936, "learning_rate": 5.5309679250638654e-05, "loss": 0.004844729602336883, "step": 157510 }, { "epoch": 44.71189327277888, "grad_norm": 0.11340316385030746, "learning_rate": 5.530684076071531e-05, "loss": 0.001508037932217121, "step": 157520 }, { "epoch": 44.71473176270224, "grad_norm": 1.2561005353927612, "learning_rate": 5.5304002270791944e-05, "loss": 0.0034207697957754136, "step": 157530 }, { "epoch": 44.717570252625606, "grad_norm": 1.3330212831497192, "learning_rate": 5.530116378086858e-05, "loss": 0.0008071361109614373, "step": 157540 }, { "epoch": 44.72040874254896, "grad_norm": 0.05329032614827156, "learning_rate": 5.529832529094522e-05, "loss": 0.00037527326494455335, "step": 157550 }, { "epoch": 44.723247232472325, "grad_norm": 0.04077674821019173, "learning_rate": 5.5295486801021854e-05, "loss": 0.0013462277129292488, "step": 157560 }, { "epoch": 44.72608572239569, "grad_norm": 0.18929848074913025, "learning_rate": 5.5292648311098496e-05, "loss": 0.002169429138302803, "step": 157570 }, { "epoch": 44.72892421231904, "grad_norm": 0.19470995664596558, "learning_rate": 5.5289809821175144e-05, "loss": 0.001561649888753891, "step": 157580 }, { "epoch": 44.731762702242406, "grad_norm": 0.5087531805038452, "learning_rate": 5.528697133125178e-05, "loss": 0.0035420402884483336, "step": 157590 }, { "epoch": 44.73460119216577, "grad_norm": 10.44636344909668, "learning_rate": 5.528413284132842e-05, "loss": 0.0034440584480762483, "step": 157600 }, { "epoch": 44.73743968208913, "grad_norm": 0.058147307485342026, "learning_rate": 5.5281294351405054e-05, "loss": 0.0006294047459959983, "step": 157610 }, { "epoch": 44.74027817201249, "grad_norm": 0.9640230536460876, "learning_rate": 5.5278455861481696e-05, "loss": 0.0008123567327857018, "step": 157620 }, { "epoch": 44.74311666193585, "grad_norm": 0.33940911293029785, "learning_rate": 5.527561737155833e-05, "loss": 0.001241137646138668, "step": 157630 }, { "epoch": 44.745955151859214, "grad_norm": 0.0839107409119606, "learning_rate": 5.5272778881634965e-05, "loss": 0.007611919939517975, "step": 157640 }, { "epoch": 44.74879364178257, "grad_norm": 0.48125582933425903, "learning_rate": 5.526994039171161e-05, "loss": 0.0010891607031226157, "step": 157650 }, { "epoch": 44.75163213170593, "grad_norm": 0.027461860328912735, "learning_rate": 5.5267101901788255e-05, "loss": 0.0008801359683275223, "step": 157660 }, { "epoch": 44.754470621629295, "grad_norm": 0.06395852565765381, "learning_rate": 5.526426341186489e-05, "loss": 0.00028124842792749406, "step": 157670 }, { "epoch": 44.75730911155265, "grad_norm": 0.03503318876028061, "learning_rate": 5.526142492194153e-05, "loss": 0.0009743746370077134, "step": 157680 }, { "epoch": 44.760147601476014, "grad_norm": 0.5716536045074463, "learning_rate": 5.5258586432018165e-05, "loss": 0.0009931830689311027, "step": 157690 }, { "epoch": 44.76298609139938, "grad_norm": 0.03745920583605766, "learning_rate": 5.5255747942094807e-05, "loss": 0.0009652700275182724, "step": 157700 }, { "epoch": 44.76582458132274, "grad_norm": 0.2622953951358795, "learning_rate": 5.5252909452171455e-05, "loss": 0.00228087417781353, "step": 157710 }, { "epoch": 44.768663071246095, "grad_norm": 0.055658817291259766, "learning_rate": 5.525007096224809e-05, "loss": 0.0042261935770511625, "step": 157720 }, { "epoch": 44.77150156116946, "grad_norm": 0.04795948415994644, "learning_rate": 5.524723247232473e-05, "loss": 0.0022229550406336783, "step": 157730 }, { "epoch": 44.77434005109282, "grad_norm": 0.05885598063468933, "learning_rate": 5.5244393982401365e-05, "loss": 0.0038751911371946333, "step": 157740 }, { "epoch": 44.77717854101618, "grad_norm": 0.2620353400707245, "learning_rate": 5.5241555492478e-05, "loss": 0.00743177980184555, "step": 157750 }, { "epoch": 44.78001703093954, "grad_norm": 0.30183443427085876, "learning_rate": 5.523871700255464e-05, "loss": 0.0017947282642126084, "step": 157760 }, { "epoch": 44.7828555208629, "grad_norm": 0.8360267877578735, "learning_rate": 5.5235878512631276e-05, "loss": 0.004555998742580414, "step": 157770 }, { "epoch": 44.78569401078626, "grad_norm": 1.1649465560913086, "learning_rate": 5.5233040022707924e-05, "loss": 0.006046444922685623, "step": 157780 }, { "epoch": 44.78853250070962, "grad_norm": 10.252805709838867, "learning_rate": 5.5230201532784565e-05, "loss": 0.0029329689219594, "step": 157790 }, { "epoch": 44.791370990632984, "grad_norm": 2.4858438968658447, "learning_rate": 5.52273630428612e-05, "loss": 0.0016216130927205086, "step": 157800 }, { "epoch": 44.79420948055635, "grad_norm": 0.12734635174274445, "learning_rate": 5.522452455293784e-05, "loss": 0.01193375512957573, "step": 157810 }, { "epoch": 44.7970479704797, "grad_norm": 0.10532432794570923, "learning_rate": 5.5221686063014476e-05, "loss": 0.0007083123549818992, "step": 157820 }, { "epoch": 44.799886460403066, "grad_norm": 0.0994689092040062, "learning_rate": 5.521884757309112e-05, "loss": 0.0012152742594480515, "step": 157830 }, { "epoch": 44.80272495032643, "grad_norm": 0.02299094758927822, "learning_rate": 5.521600908316775e-05, "loss": 0.0008349131792783737, "step": 157840 }, { "epoch": 44.805563440249784, "grad_norm": 0.214923694729805, "learning_rate": 5.52131705932444e-05, "loss": 0.0034560278058052064, "step": 157850 }, { "epoch": 44.80840193017315, "grad_norm": 1.6340503692626953, "learning_rate": 5.521033210332104e-05, "loss": 0.005717422813177109, "step": 157860 }, { "epoch": 44.81124042009651, "grad_norm": 0.25019407272338867, "learning_rate": 5.5207493613397676e-05, "loss": 0.0007282810285687447, "step": 157870 }, { "epoch": 44.814078910019866, "grad_norm": 12.4509859085083, "learning_rate": 5.520465512347431e-05, "loss": 0.0034079603850841523, "step": 157880 }, { "epoch": 44.81691739994323, "grad_norm": 4.388509750366211, "learning_rate": 5.520181663355095e-05, "loss": 0.005642292648553848, "step": 157890 }, { "epoch": 44.81975588986659, "grad_norm": 0.4250636696815491, "learning_rate": 5.5198978143627587e-05, "loss": 0.006513231992721557, "step": 157900 }, { "epoch": 44.822594379789955, "grad_norm": 2.885422468185425, "learning_rate": 5.5196139653704235e-05, "loss": 0.017210948467254638, "step": 157910 }, { "epoch": 44.82543286971331, "grad_norm": 0.007075225003063679, "learning_rate": 5.5193301163780876e-05, "loss": 0.007407717406749725, "step": 157920 }, { "epoch": 44.82827135963667, "grad_norm": 15.851521492004395, "learning_rate": 5.519046267385751e-05, "loss": 0.007784406840801239, "step": 157930 }, { "epoch": 44.831109849560036, "grad_norm": 0.10416088253259659, "learning_rate": 5.518762418393415e-05, "loss": 0.0037881582975387573, "step": 157940 }, { "epoch": 44.83394833948339, "grad_norm": 0.9782705903053284, "learning_rate": 5.518478569401079e-05, "loss": 0.0033412180840969087, "step": 157950 }, { "epoch": 44.836786829406755, "grad_norm": 3.805529832839966, "learning_rate": 5.518194720408743e-05, "loss": 0.002774055115878582, "step": 157960 }, { "epoch": 44.83962531933012, "grad_norm": 10.517626762390137, "learning_rate": 5.517910871416406e-05, "loss": 0.0051384449005126955, "step": 157970 }, { "epoch": 44.84246380925348, "grad_norm": 0.4152916371822357, "learning_rate": 5.517627022424071e-05, "loss": 0.000879182480275631, "step": 157980 }, { "epoch": 44.845302299176836, "grad_norm": 0.04031709209084511, "learning_rate": 5.517343173431735e-05, "loss": 0.010970105230808259, "step": 157990 }, { "epoch": 44.8481407891002, "grad_norm": 1.6094872951507568, "learning_rate": 5.517059324439399e-05, "loss": 0.0008959716185927391, "step": 158000 }, { "epoch": 44.8481407891002, "eval_accuracy": 0.9748203726076174, "eval_loss": 0.08915136009454727, "eval_runtime": 32.0232, "eval_samples_per_second": 491.112, "eval_steps_per_second": 7.682, "step": 158000 }, { "epoch": 44.85097927902356, "grad_norm": 0.3079792261123657, "learning_rate": 5.516775475447062e-05, "loss": 0.00936158150434494, "step": 158010 }, { "epoch": 44.85381776894692, "grad_norm": 0.08267982304096222, "learning_rate": 5.516491626454726e-05, "loss": 0.0007191915065050125, "step": 158020 }, { "epoch": 44.85665625887028, "grad_norm": 1.1622956991195679, "learning_rate": 5.51620777746239e-05, "loss": 0.0011630786582827568, "step": 158030 }, { "epoch": 44.859494748793644, "grad_norm": 0.25272417068481445, "learning_rate": 5.515923928470054e-05, "loss": 0.002562100812792778, "step": 158040 }, { "epoch": 44.862333238717, "grad_norm": 0.599229633808136, "learning_rate": 5.515640079477719e-05, "loss": 0.0006453372538089752, "step": 158050 }, { "epoch": 44.86517172864036, "grad_norm": 0.8068385720252991, "learning_rate": 5.515356230485382e-05, "loss": 0.004246723651885986, "step": 158060 }, { "epoch": 44.868010218563725, "grad_norm": 1.126819133758545, "learning_rate": 5.515072381493046e-05, "loss": 0.000901707261800766, "step": 158070 }, { "epoch": 44.87084870848709, "grad_norm": 0.11892633140087128, "learning_rate": 5.51478853250071e-05, "loss": 0.001006169244647026, "step": 158080 }, { "epoch": 44.873687198410444, "grad_norm": 0.028892138972878456, "learning_rate": 5.514504683508374e-05, "loss": 0.0023022569715976716, "step": 158090 }, { "epoch": 44.87652568833381, "grad_norm": 0.3775464594364166, "learning_rate": 5.5142208345160373e-05, "loss": 0.004564998298883438, "step": 158100 }, { "epoch": 44.87936417825717, "grad_norm": 0.09537631273269653, "learning_rate": 5.513936985523702e-05, "loss": 0.0006770772859454155, "step": 158110 }, { "epoch": 44.882202668180526, "grad_norm": 0.13109567761421204, "learning_rate": 5.5136531365313656e-05, "loss": 0.0017221098765730857, "step": 158120 }, { "epoch": 44.88504115810389, "grad_norm": 0.069832943379879, "learning_rate": 5.51336928753903e-05, "loss": 0.0010123152285814285, "step": 158130 }, { "epoch": 44.88787964802725, "grad_norm": 0.07793183624744415, "learning_rate": 5.513085438546693e-05, "loss": 0.000627504475414753, "step": 158140 }, { "epoch": 44.89071813795061, "grad_norm": 1.6821398735046387, "learning_rate": 5.5128015895543574e-05, "loss": 0.0018203791230916976, "step": 158150 }, { "epoch": 44.89355662787397, "grad_norm": 0.17002630233764648, "learning_rate": 5.512517740562021e-05, "loss": 0.0011295968666672706, "step": 158160 }, { "epoch": 44.89639511779733, "grad_norm": 0.1596800684928894, "learning_rate": 5.512233891569685e-05, "loss": 0.00046041086316108706, "step": 158170 }, { "epoch": 44.899233607720696, "grad_norm": 0.07032977044582367, "learning_rate": 5.51195004257735e-05, "loss": 0.0019820904359221458, "step": 158180 }, { "epoch": 44.90207209764405, "grad_norm": 0.061592888087034225, "learning_rate": 5.511666193585013e-05, "loss": 0.0005488898605108261, "step": 158190 }, { "epoch": 44.904910587567414, "grad_norm": 0.08529050648212433, "learning_rate": 5.5113823445926774e-05, "loss": 0.0008605325594544411, "step": 158200 }, { "epoch": 44.90774907749078, "grad_norm": 0.082225501537323, "learning_rate": 5.511098495600341e-05, "loss": 0.0005826020613312721, "step": 158210 }, { "epoch": 44.91058756741413, "grad_norm": 0.011607063002884388, "learning_rate": 5.510814646608004e-05, "loss": 0.0008266100659966469, "step": 158220 }, { "epoch": 44.913426057337496, "grad_norm": 5.987351894378662, "learning_rate": 5.5105307976156684e-05, "loss": 0.001955020986497402, "step": 158230 }, { "epoch": 44.91626454726086, "grad_norm": 0.046347372233867645, "learning_rate": 5.510246948623332e-05, "loss": 0.0015226827934384346, "step": 158240 }, { "epoch": 44.919103037184215, "grad_norm": 0.2993282377719879, "learning_rate": 5.509963099630997e-05, "loss": 0.0010629979893565177, "step": 158250 }, { "epoch": 44.92194152710758, "grad_norm": 0.07779286801815033, "learning_rate": 5.509679250638661e-05, "loss": 0.0006899459287524224, "step": 158260 }, { "epoch": 44.92478001703094, "grad_norm": 0.19331727921962738, "learning_rate": 5.509395401646324e-05, "loss": 0.0005407802760601044, "step": 158270 }, { "epoch": 44.9276185069543, "grad_norm": 0.16289174556732178, "learning_rate": 5.5091115526539884e-05, "loss": 0.0012634962797164917, "step": 158280 }, { "epoch": 44.93045699687766, "grad_norm": 0.16641496121883392, "learning_rate": 5.508827703661652e-05, "loss": 0.0025248169898986817, "step": 158290 }, { "epoch": 44.93329548680102, "grad_norm": 0.0467965193092823, "learning_rate": 5.508543854669316e-05, "loss": 0.0012122685089707374, "step": 158300 }, { "epoch": 44.936133976724385, "grad_norm": 0.1455937623977661, "learning_rate": 5.508260005676981e-05, "loss": 0.001590898260474205, "step": 158310 }, { "epoch": 44.93897246664774, "grad_norm": 2.0370683670043945, "learning_rate": 5.507976156684644e-05, "loss": 0.004656960070133209, "step": 158320 }, { "epoch": 44.9418109565711, "grad_norm": 2.585007667541504, "learning_rate": 5.5076923076923084e-05, "loss": 0.00512169897556305, "step": 158330 }, { "epoch": 44.944649446494466, "grad_norm": 0.9029721021652222, "learning_rate": 5.507408458699972e-05, "loss": 0.0007221681997179985, "step": 158340 }, { "epoch": 44.94748793641783, "grad_norm": 0.4834648370742798, "learning_rate": 5.5071246097076354e-05, "loss": 0.0022313324734568596, "step": 158350 }, { "epoch": 44.950326426341185, "grad_norm": 0.16262923181056976, "learning_rate": 5.5068407607152995e-05, "loss": 0.003935231268405915, "step": 158360 }, { "epoch": 44.95316491626455, "grad_norm": 2.9060633182525635, "learning_rate": 5.506556911722963e-05, "loss": 0.005235027521848679, "step": 158370 }, { "epoch": 44.95600340618791, "grad_norm": 0.5851252675056458, "learning_rate": 5.506273062730628e-05, "loss": 0.0008652579039335251, "step": 158380 }, { "epoch": 44.95884189611127, "grad_norm": 0.10988026857376099, "learning_rate": 5.505989213738292e-05, "loss": 0.007165488600730896, "step": 158390 }, { "epoch": 44.96168038603463, "grad_norm": 0.080855593085289, "learning_rate": 5.5057053647459554e-05, "loss": 0.0013157708570361137, "step": 158400 }, { "epoch": 44.96451887595799, "grad_norm": 1.039370059967041, "learning_rate": 5.5054215157536195e-05, "loss": 0.015054267644882203, "step": 158410 }, { "epoch": 44.96735736588135, "grad_norm": 0.9792690277099609, "learning_rate": 5.505137666761283e-05, "loss": 0.007184583693742752, "step": 158420 }, { "epoch": 44.97019585580471, "grad_norm": 0.21361017227172852, "learning_rate": 5.504853817768947e-05, "loss": 0.007206868380308151, "step": 158430 }, { "epoch": 44.973034345728074, "grad_norm": 3.469264507293701, "learning_rate": 5.5045699687766106e-05, "loss": 0.004988017305731773, "step": 158440 }, { "epoch": 44.97587283565144, "grad_norm": 0.06772604584693909, "learning_rate": 5.5042861197842754e-05, "loss": 0.000697045587003231, "step": 158450 }, { "epoch": 44.97871132557479, "grad_norm": 0.10127173364162445, "learning_rate": 5.5040022707919395e-05, "loss": 0.0003275051712989807, "step": 158460 }, { "epoch": 44.981549815498155, "grad_norm": 0.19566664099693298, "learning_rate": 5.503718421799603e-05, "loss": 0.001727229543030262, "step": 158470 }, { "epoch": 44.98438830542152, "grad_norm": 0.12345828860998154, "learning_rate": 5.5034345728072664e-05, "loss": 0.0004245707765221596, "step": 158480 }, { "epoch": 44.987226795344874, "grad_norm": 0.045536983758211136, "learning_rate": 5.5031507238149306e-05, "loss": 0.0016574151813983917, "step": 158490 }, { "epoch": 44.99006528526824, "grad_norm": 1.9903802871704102, "learning_rate": 5.502866874822594e-05, "loss": 0.0023794559761881827, "step": 158500 }, { "epoch": 44.99006528526824, "eval_accuracy": 0.9745660329369873, "eval_loss": 0.09136876463890076, "eval_runtime": 31.8606, "eval_samples_per_second": 493.619, "eval_steps_per_second": 7.721, "step": 158500 }, { "epoch": 44.9929037751916, "grad_norm": 0.04272126778960228, "learning_rate": 5.502583025830259e-05, "loss": 0.0008862776681780815, "step": 158510 }, { "epoch": 44.995742265114956, "grad_norm": 0.8683083653450012, "learning_rate": 5.502299176837923e-05, "loss": 0.005330255255103111, "step": 158520 }, { "epoch": 44.99858075503832, "grad_norm": 0.060387711971998215, "learning_rate": 5.5020153278455864e-05, "loss": 0.001135408878326416, "step": 158530 }, { "epoch": 45.00141924496168, "grad_norm": 0.25611069798469543, "learning_rate": 5.5017314788532506e-05, "loss": 0.006997285783290863, "step": 158540 }, { "epoch": 45.004257734885044, "grad_norm": 0.023752298206090927, "learning_rate": 5.501447629860914e-05, "loss": 0.002735184133052826, "step": 158550 }, { "epoch": 45.0070962248084, "grad_norm": 0.012567800469696522, "learning_rate": 5.501163780868578e-05, "loss": 0.0006977530196309089, "step": 158560 }, { "epoch": 45.00993471473176, "grad_norm": 0.1428321897983551, "learning_rate": 5.5008799318762416e-05, "loss": 0.00831996276974678, "step": 158570 }, { "epoch": 45.012773204655126, "grad_norm": 0.35385504364967346, "learning_rate": 5.5005960828839065e-05, "loss": 0.002828647755086422, "step": 158580 }, { "epoch": 45.01561169457848, "grad_norm": 0.08775291591882706, "learning_rate": 5.50031223389157e-05, "loss": 0.0012270327657461166, "step": 158590 }, { "epoch": 45.018450184501845, "grad_norm": 0.11947706341743469, "learning_rate": 5.500028384899234e-05, "loss": 0.00974058136343956, "step": 158600 }, { "epoch": 45.02128867442521, "grad_norm": 5.909399032592773, "learning_rate": 5.4997445359068975e-05, "loss": 0.004210643097758293, "step": 158610 }, { "epoch": 45.02412716434856, "grad_norm": Infinity, "learning_rate": 5.4994606869145617e-05, "loss": 0.004809434339404106, "step": 158620 }, { "epoch": 45.026965654271926, "grad_norm": 1.364916443824768, "learning_rate": 5.4992052228214596e-05, "loss": 0.002781349606812, "step": 158630 }, { "epoch": 45.02980414419529, "grad_norm": 4.440274715423584, "learning_rate": 5.498921373829123e-05, "loss": 0.002674400433897972, "step": 158640 }, { "epoch": 45.03264263411865, "grad_norm": 1.0811306238174438, "learning_rate": 5.498637524836787e-05, "loss": 0.0009710287675261497, "step": 158650 }, { "epoch": 45.03548112404201, "grad_norm": 0.13827070593833923, "learning_rate": 5.498353675844451e-05, "loss": 0.0006848923861980438, "step": 158660 }, { "epoch": 45.03831961396537, "grad_norm": 0.16593563556671143, "learning_rate": 5.498069826852115e-05, "loss": 0.0021458635106682777, "step": 158670 }, { "epoch": 45.04115810388873, "grad_norm": 0.13837239146232605, "learning_rate": 5.497785977859778e-05, "loss": 0.0011216219514608382, "step": 158680 }, { "epoch": 45.04399659381209, "grad_norm": 0.26147106289863586, "learning_rate": 5.497502128867443e-05, "loss": 0.007856369018554688, "step": 158690 }, { "epoch": 45.04683508373545, "grad_norm": 0.3573833405971527, "learning_rate": 5.497218279875107e-05, "loss": 0.0011279525235295296, "step": 158700 }, { "epoch": 45.049673573658815, "grad_norm": 0.31289637088775635, "learning_rate": 5.496934430882771e-05, "loss": 0.002954649366438389, "step": 158710 }, { "epoch": 45.05251206358217, "grad_norm": 0.02787191979587078, "learning_rate": 5.496650581890435e-05, "loss": 0.0023475375026464464, "step": 158720 }, { "epoch": 45.055350553505534, "grad_norm": 9.228250503540039, "learning_rate": 5.496366732898098e-05, "loss": 0.006078017503023147, "step": 158730 }, { "epoch": 45.0581890434289, "grad_norm": 0.2058650553226471, "learning_rate": 5.496082883905762e-05, "loss": 0.0012133600190281867, "step": 158740 }, { "epoch": 45.06102753335226, "grad_norm": 0.02194630540907383, "learning_rate": 5.495799034913426e-05, "loss": 0.003739486634731293, "step": 158750 }, { "epoch": 45.063866023275615, "grad_norm": 3.892699956893921, "learning_rate": 5.495515185921091e-05, "loss": 0.002123177982866764, "step": 158760 }, { "epoch": 45.06670451319898, "grad_norm": 6.4801177978515625, "learning_rate": 5.495231336928754e-05, "loss": 0.001968359388411045, "step": 158770 }, { "epoch": 45.06954300312234, "grad_norm": 0.29151490330696106, "learning_rate": 5.494947487936418e-05, "loss": 0.0038392387330532076, "step": 158780 }, { "epoch": 45.0723814930457, "grad_norm": 0.03870563581585884, "learning_rate": 5.494663638944082e-05, "loss": 0.005506311729550362, "step": 158790 }, { "epoch": 45.07521998296906, "grad_norm": 0.06824429333209991, "learning_rate": 5.494379789951746e-05, "loss": 0.0007027361541986465, "step": 158800 }, { "epoch": 45.07805847289242, "grad_norm": 0.05247722193598747, "learning_rate": 5.4940959409594094e-05, "loss": 0.014284053444862365, "step": 158810 }, { "epoch": 45.080896962815785, "grad_norm": 12.008231163024902, "learning_rate": 5.4938120919670735e-05, "loss": 0.0030184928327798843, "step": 158820 }, { "epoch": 45.08373545273914, "grad_norm": 0.0152906384319067, "learning_rate": 5.493528242974738e-05, "loss": 0.0033500224351882936, "step": 158830 }, { "epoch": 45.086573942662504, "grad_norm": 0.6462904810905457, "learning_rate": 5.493244393982402e-05, "loss": 0.0014458335936069489, "step": 158840 }, { "epoch": 45.08941243258587, "grad_norm": 0.020498517900705338, "learning_rate": 5.492960544990066e-05, "loss": 0.005823376774787903, "step": 158850 }, { "epoch": 45.09225092250922, "grad_norm": 1.6525417566299438, "learning_rate": 5.4926766959977294e-05, "loss": 0.001875150017440319, "step": 158860 }, { "epoch": 45.095089412432586, "grad_norm": 15.990608215332031, "learning_rate": 5.492392847005393e-05, "loss": 0.013257858157157899, "step": 158870 }, { "epoch": 45.09792790235595, "grad_norm": 1.0381603240966797, "learning_rate": 5.492108998013057e-05, "loss": 0.0030856169760227203, "step": 158880 }, { "epoch": 45.100766392279304, "grad_norm": 14.545747756958008, "learning_rate": 5.491825149020722e-05, "loss": 0.010082698613405227, "step": 158890 }, { "epoch": 45.10360488220267, "grad_norm": 0.15826590359210968, "learning_rate": 5.491541300028385e-05, "loss": 0.000534714013338089, "step": 158900 }, { "epoch": 45.10644337212603, "grad_norm": 2.7404963970184326, "learning_rate": 5.4912574510360494e-05, "loss": 0.0007791947573423386, "step": 158910 }, { "epoch": 45.10928186204939, "grad_norm": 0.6924488544464111, "learning_rate": 5.490973602043713e-05, "loss": 0.0006656453013420105, "step": 158920 }, { "epoch": 45.11212035197275, "grad_norm": 0.05508305877447128, "learning_rate": 5.490689753051377e-05, "loss": 0.002227383479475975, "step": 158930 }, { "epoch": 45.11495884189611, "grad_norm": 0.014868048951029778, "learning_rate": 5.4904059040590405e-05, "loss": 0.0007092274725437164, "step": 158940 }, { "epoch": 45.117797331819474, "grad_norm": 0.14755873382091522, "learning_rate": 5.4901220550667046e-05, "loss": 0.0010996513068675996, "step": 158950 }, { "epoch": 45.12063582174283, "grad_norm": 0.08222337812185287, "learning_rate": 5.4898382060743694e-05, "loss": 0.001336578093469143, "step": 158960 }, { "epoch": 45.12347431166619, "grad_norm": 0.09603218734264374, "learning_rate": 5.489554357082033e-05, "loss": 0.000968967005610466, "step": 158970 }, { "epoch": 45.126312801589556, "grad_norm": 7.601822376251221, "learning_rate": 5.489270508089697e-05, "loss": 0.001595294661819935, "step": 158980 }, { "epoch": 45.12915129151291, "grad_norm": 0.008883538655936718, "learning_rate": 5.4889866590973605e-05, "loss": 0.0028997644782066344, "step": 158990 }, { "epoch": 45.131989781436275, "grad_norm": 0.2879927158355713, "learning_rate": 5.488702810105024e-05, "loss": 0.0030341632664203644, "step": 159000 }, { "epoch": 45.131989781436275, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.09030680358409882, "eval_runtime": 32.3828, "eval_samples_per_second": 485.66, "eval_steps_per_second": 7.597, "step": 159000 }, { "epoch": 45.13482827135964, "grad_norm": 7.417815685272217, "learning_rate": 5.488418961112688e-05, "loss": 0.0016166044399142266, "step": 159010 }, { "epoch": 45.137666761283, "grad_norm": 4.801940441131592, "learning_rate": 5.4881351121203515e-05, "loss": 0.0023504666984081267, "step": 159020 }, { "epoch": 45.140505251206356, "grad_norm": 1.0077598094940186, "learning_rate": 5.487851263128016e-05, "loss": 0.000833679549396038, "step": 159030 }, { "epoch": 45.14334374112972, "grad_norm": 1.24357271194458, "learning_rate": 5.4875674141356805e-05, "loss": 0.001734432578086853, "step": 159040 }, { "epoch": 45.14618223105308, "grad_norm": 0.19335401058197021, "learning_rate": 5.487283565143344e-05, "loss": 0.007602711021900177, "step": 159050 }, { "epoch": 45.14902072097644, "grad_norm": 0.04252135381102562, "learning_rate": 5.486999716151008e-05, "loss": 0.0013842761516571044, "step": 159060 }, { "epoch": 45.1518592108998, "grad_norm": 0.5231683850288391, "learning_rate": 5.4867158671586715e-05, "loss": 0.003377676010131836, "step": 159070 }, { "epoch": 45.154697700823164, "grad_norm": 0.04143055900931358, "learning_rate": 5.486432018166336e-05, "loss": 0.0005700143054127693, "step": 159080 }, { "epoch": 45.15753619074652, "grad_norm": 0.0542815737426281, "learning_rate": 5.4861481691740005e-05, "loss": 0.00034311339259147645, "step": 159090 }, { "epoch": 45.16037468066988, "grad_norm": 0.03018852323293686, "learning_rate": 5.485864320181664e-05, "loss": 0.0005938779562711715, "step": 159100 }, { "epoch": 45.163213170593245, "grad_norm": 0.543493390083313, "learning_rate": 5.4855804711893274e-05, "loss": 0.0012732425704598427, "step": 159110 }, { "epoch": 45.16605166051661, "grad_norm": 2.836855173110962, "learning_rate": 5.4852966221969915e-05, "loss": 0.0009161999449133873, "step": 159120 }, { "epoch": 45.168890150439964, "grad_norm": 0.06751839816570282, "learning_rate": 5.485012773204655e-05, "loss": 0.0008341988548636436, "step": 159130 }, { "epoch": 45.17172864036333, "grad_norm": 0.06446419656276703, "learning_rate": 5.484728924212319e-05, "loss": 0.0013551948592066764, "step": 159140 }, { "epoch": 45.17456713028669, "grad_norm": 0.15203124284744263, "learning_rate": 5.4844450752199826e-05, "loss": 0.0006399227306246758, "step": 159150 }, { "epoch": 45.177405620210045, "grad_norm": 0.24372035264968872, "learning_rate": 5.4841612262276474e-05, "loss": 0.0010714277625083924, "step": 159160 }, { "epoch": 45.18024411013341, "grad_norm": 2.453103542327881, "learning_rate": 5.4838773772353115e-05, "loss": 0.0009769123047590257, "step": 159170 }, { "epoch": 45.18308260005677, "grad_norm": 6.146474361419678, "learning_rate": 5.483593528242975e-05, "loss": 0.008214885741472245, "step": 159180 }, { "epoch": 45.185921089980134, "grad_norm": 2.4516446590423584, "learning_rate": 5.483309679250639e-05, "loss": 0.0070764578878879545, "step": 159190 }, { "epoch": 45.18875957990349, "grad_norm": 0.14871080219745636, "learning_rate": 5.4830258302583026e-05, "loss": 0.022754067182540895, "step": 159200 }, { "epoch": 45.19159806982685, "grad_norm": 0.13743652403354645, "learning_rate": 5.482741981265966e-05, "loss": 0.003486846014857292, "step": 159210 }, { "epoch": 45.194436559750216, "grad_norm": 0.303010493516922, "learning_rate": 5.48245813227363e-05, "loss": 0.0032066166400909423, "step": 159220 }, { "epoch": 45.19727504967357, "grad_norm": 0.9396266937255859, "learning_rate": 5.482174283281295e-05, "loss": 0.003225202113389969, "step": 159230 }, { "epoch": 45.200113539596934, "grad_norm": 0.1093941256403923, "learning_rate": 5.4818904342889585e-05, "loss": 0.0003883266821503639, "step": 159240 }, { "epoch": 45.2029520295203, "grad_norm": 0.205396369099617, "learning_rate": 5.4816065852966226e-05, "loss": 0.0008455120027065277, "step": 159250 }, { "epoch": 45.20579051944365, "grad_norm": 0.9547229409217834, "learning_rate": 5.481322736304286e-05, "loss": 0.0006294973194599151, "step": 159260 }, { "epoch": 45.208629009367016, "grad_norm": 0.44635310769081116, "learning_rate": 5.48103888731195e-05, "loss": 0.000569654256105423, "step": 159270 }, { "epoch": 45.21146749929038, "grad_norm": 0.17590799927711487, "learning_rate": 5.480755038319614e-05, "loss": 0.00045420415699481964, "step": 159280 }, { "epoch": 45.21430598921374, "grad_norm": 0.22263941168785095, "learning_rate": 5.4804711893272785e-05, "loss": 0.0020836981013417242, "step": 159290 }, { "epoch": 45.2171444791371, "grad_norm": 0.06841296702623367, "learning_rate": 5.4801873403349426e-05, "loss": 0.0012127941474318505, "step": 159300 }, { "epoch": 45.21998296906046, "grad_norm": 0.029327819123864174, "learning_rate": 5.479903491342606e-05, "loss": 0.00021926909685134887, "step": 159310 }, { "epoch": 45.22282145898382, "grad_norm": 0.014533000998198986, "learning_rate": 5.47961964235027e-05, "loss": 0.0007028359919786453, "step": 159320 }, { "epoch": 45.22565994890718, "grad_norm": 0.15540358424186707, "learning_rate": 5.479335793357934e-05, "loss": 0.0007843375205993652, "step": 159330 }, { "epoch": 45.22849843883054, "grad_norm": 0.6212250590324402, "learning_rate": 5.479051944365597e-05, "loss": 0.0020840223878622055, "step": 159340 }, { "epoch": 45.231336928753905, "grad_norm": 0.7989044189453125, "learning_rate": 5.478768095373261e-05, "loss": 0.000829460471868515, "step": 159350 }, { "epoch": 45.23417541867726, "grad_norm": 0.35492047667503357, "learning_rate": 5.478484246380926e-05, "loss": 0.0034455753862857818, "step": 159360 }, { "epoch": 45.23701390860062, "grad_norm": 0.13733632862567902, "learning_rate": 5.4782003973885896e-05, "loss": 0.000853135995566845, "step": 159370 }, { "epoch": 45.239852398523986, "grad_norm": 8.629515647888184, "learning_rate": 5.477916548396254e-05, "loss": 0.007530410587787628, "step": 159380 }, { "epoch": 45.24269088844735, "grad_norm": 0.2308046519756317, "learning_rate": 5.477632699403917e-05, "loss": 0.005640815943479538, "step": 159390 }, { "epoch": 45.245529378370705, "grad_norm": 0.09870049357414246, "learning_rate": 5.477348850411581e-05, "loss": 0.004666077718138695, "step": 159400 }, { "epoch": 45.24836786829407, "grad_norm": 0.05703624337911606, "learning_rate": 5.477065001419245e-05, "loss": 0.0029806135222315787, "step": 159410 }, { "epoch": 45.25120635821743, "grad_norm": 0.07162238657474518, "learning_rate": 5.4767811524269096e-05, "loss": 0.004788489639759063, "step": 159420 }, { "epoch": 45.254044848140786, "grad_norm": 5.795346736907959, "learning_rate": 5.476497303434574e-05, "loss": 0.0038419514894485474, "step": 159430 }, { "epoch": 45.25688333806415, "grad_norm": 4.59437370300293, "learning_rate": 5.476213454442237e-05, "loss": 0.0024315565824508667, "step": 159440 }, { "epoch": 45.25972182798751, "grad_norm": 0.03767161816358566, "learning_rate": 5.475929605449901e-05, "loss": 0.0021321330219507217, "step": 159450 }, { "epoch": 45.26256031791087, "grad_norm": 0.23922036588191986, "learning_rate": 5.475645756457565e-05, "loss": 0.0019891796633601188, "step": 159460 }, { "epoch": 45.26539880783423, "grad_norm": 0.021695628762245178, "learning_rate": 5.475361907465228e-05, "loss": 0.002495933882892132, "step": 159470 }, { "epoch": 45.268237297757594, "grad_norm": 8.530279159545898, "learning_rate": 5.4750780584728924e-05, "loss": 0.0025252345949411392, "step": 159480 }, { "epoch": 45.27107578768096, "grad_norm": 0.1892513483762741, "learning_rate": 5.474794209480557e-05, "loss": 0.004962759092450142, "step": 159490 }, { "epoch": 45.27391427760431, "grad_norm": 0.023996317759156227, "learning_rate": 5.4745103604882206e-05, "loss": 0.002082580886781216, "step": 159500 }, { "epoch": 45.27391427760431, "eval_accuracy": 0.9755833916195078, "eval_loss": 0.09203548729419708, "eval_runtime": 32.1218, "eval_samples_per_second": 489.604, "eval_steps_per_second": 7.658, "step": 159500 }, { "epoch": 45.276752767527675, "grad_norm": 0.027179552242159843, "learning_rate": 5.474226511495885e-05, "loss": 0.0035132870078086855, "step": 159510 }, { "epoch": 45.27959125745104, "grad_norm": 0.893366277217865, "learning_rate": 5.473942662503548e-05, "loss": 0.0012043889611959458, "step": 159520 }, { "epoch": 45.282429747374394, "grad_norm": 0.8222613334655762, "learning_rate": 5.4736588135112124e-05, "loss": 0.003142710030078888, "step": 159530 }, { "epoch": 45.28526823729776, "grad_norm": 0.9906347990036011, "learning_rate": 5.473374964518876e-05, "loss": 0.001996843330562115, "step": 159540 }, { "epoch": 45.28810672722112, "grad_norm": 0.6114441156387329, "learning_rate": 5.47309111552654e-05, "loss": 0.0016777245327830315, "step": 159550 }, { "epoch": 45.29094521714448, "grad_norm": 0.4522089660167694, "learning_rate": 5.472807266534205e-05, "loss": 0.003216322511434555, "step": 159560 }, { "epoch": 45.29378370706784, "grad_norm": 0.4906410276889801, "learning_rate": 5.472523417541868e-05, "loss": 0.0005746565759181977, "step": 159570 }, { "epoch": 45.2966221969912, "grad_norm": 0.15105196833610535, "learning_rate": 5.472239568549532e-05, "loss": 0.0016492562368512153, "step": 159580 }, { "epoch": 45.299460686914564, "grad_norm": 0.11974382400512695, "learning_rate": 5.471955719557196e-05, "loss": 0.0005740797147154808, "step": 159590 }, { "epoch": 45.30229917683792, "grad_norm": 0.6734367609024048, "learning_rate": 5.471671870564859e-05, "loss": 0.002903760597109795, "step": 159600 }, { "epoch": 45.30513766676128, "grad_norm": 8.564242362976074, "learning_rate": 5.4713880215725234e-05, "loss": 0.007018131017684936, "step": 159610 }, { "epoch": 45.307976156684646, "grad_norm": 0.6227974891662598, "learning_rate": 5.471104172580188e-05, "loss": 0.0009871799498796462, "step": 159620 }, { "epoch": 45.310814646608, "grad_norm": 0.04845915362238884, "learning_rate": 5.470820323587852e-05, "loss": 0.0020764444023370745, "step": 159630 }, { "epoch": 45.313653136531364, "grad_norm": 11.776887893676758, "learning_rate": 5.470536474595516e-05, "loss": 0.005326366052031517, "step": 159640 }, { "epoch": 45.31649162645473, "grad_norm": 0.05242522433400154, "learning_rate": 5.470252625603179e-05, "loss": 0.0018631642684340476, "step": 159650 }, { "epoch": 45.31933011637809, "grad_norm": 1.9630978107452393, "learning_rate": 5.4699687766108434e-05, "loss": 0.0009176427498459816, "step": 159660 }, { "epoch": 45.322168606301446, "grad_norm": 0.03910171985626221, "learning_rate": 5.469684927618507e-05, "loss": 0.0008452320471405983, "step": 159670 }, { "epoch": 45.32500709622481, "grad_norm": 0.15117180347442627, "learning_rate": 5.4694010786261704e-05, "loss": 0.008197706192731857, "step": 159680 }, { "epoch": 45.32784558614817, "grad_norm": 4.397069931030273, "learning_rate": 5.469117229633836e-05, "loss": 0.0032413676381111145, "step": 159690 }, { "epoch": 45.33068407607153, "grad_norm": 0.5860365629196167, "learning_rate": 5.468833380641499e-05, "loss": 0.0007270824164152146, "step": 159700 }, { "epoch": 45.33352256599489, "grad_norm": 0.6500213742256165, "learning_rate": 5.468549531649163e-05, "loss": 0.008513080328702927, "step": 159710 }, { "epoch": 45.33636105591825, "grad_norm": 1.2633423805236816, "learning_rate": 5.468265682656827e-05, "loss": 0.0035127781331539152, "step": 159720 }, { "epoch": 45.33919954584161, "grad_norm": 0.019626816734671593, "learning_rate": 5.4679818336644904e-05, "loss": 0.0005236783996224404, "step": 159730 }, { "epoch": 45.34203803576497, "grad_norm": 0.6326139569282532, "learning_rate": 5.4676979846721545e-05, "loss": 0.0007827391847968102, "step": 159740 }, { "epoch": 45.344876525688335, "grad_norm": 0.0590711310505867, "learning_rate": 5.467414135679818e-05, "loss": 0.0004486672580242157, "step": 159750 }, { "epoch": 45.3477150156117, "grad_norm": 0.17331865429878235, "learning_rate": 5.467130286687483e-05, "loss": 0.0013717750087380409, "step": 159760 }, { "epoch": 45.35055350553505, "grad_norm": 0.06820020079612732, "learning_rate": 5.466846437695147e-05, "loss": 0.00045934971421957016, "step": 159770 }, { "epoch": 45.353391995458416, "grad_norm": 0.39945074915885925, "learning_rate": 5.4665625887028104e-05, "loss": 0.0003818601369857788, "step": 159780 }, { "epoch": 45.35623048538178, "grad_norm": 0.12620282173156738, "learning_rate": 5.4662787397104745e-05, "loss": 0.001192593015730381, "step": 159790 }, { "epoch": 45.359068975305135, "grad_norm": 0.43111440539360046, "learning_rate": 5.465994890718138e-05, "loss": 0.00087503083050251, "step": 159800 }, { "epoch": 45.3619074652285, "grad_norm": 0.24580995738506317, "learning_rate": 5.4657110417258014e-05, "loss": 0.0008986582979559898, "step": 159810 }, { "epoch": 45.36474595515186, "grad_norm": 0.8788180351257324, "learning_rate": 5.465427192733467e-05, "loss": 0.000782516784965992, "step": 159820 }, { "epoch": 45.36758444507522, "grad_norm": 1.376896619796753, "learning_rate": 5.4651433437411304e-05, "loss": 0.004692263901233673, "step": 159830 }, { "epoch": 45.37042293499858, "grad_norm": 1.479958176612854, "learning_rate": 5.464859494748794e-05, "loss": 0.0067971058189868925, "step": 159840 }, { "epoch": 45.37326142492194, "grad_norm": 0.37699586153030396, "learning_rate": 5.464575645756458e-05, "loss": 0.007466115802526474, "step": 159850 }, { "epoch": 45.376099914845305, "grad_norm": 0.29239821434020996, "learning_rate": 5.4642917967641214e-05, "loss": 0.0010301649570465087, "step": 159860 }, { "epoch": 45.37893840476866, "grad_norm": 0.08531893789768219, "learning_rate": 5.4640079477717856e-05, "loss": 0.006689010560512543, "step": 159870 }, { "epoch": 45.381776894692024, "grad_norm": 0.42887499928474426, "learning_rate": 5.463724098779449e-05, "loss": 0.0028180574998259546, "step": 159880 }, { "epoch": 45.38461538461539, "grad_norm": 0.036703355610370636, "learning_rate": 5.463440249787114e-05, "loss": 0.0038086168467998506, "step": 159890 }, { "epoch": 45.38745387453874, "grad_norm": 0.047074250876903534, "learning_rate": 5.463156400794778e-05, "loss": 0.0005731752142310142, "step": 159900 }, { "epoch": 45.390292364462105, "grad_norm": 0.028384489938616753, "learning_rate": 5.4628725518024415e-05, "loss": 0.0017218556255102157, "step": 159910 }, { "epoch": 45.39313085438547, "grad_norm": 0.010840391740202904, "learning_rate": 5.4625887028101056e-05, "loss": 0.0014307018369436263, "step": 159920 }, { "epoch": 45.395969344308824, "grad_norm": 3.2325572967529297, "learning_rate": 5.462304853817769e-05, "loss": 0.0027173059061169624, "step": 159930 }, { "epoch": 45.39880783423219, "grad_norm": 0.4755809009075165, "learning_rate": 5.4620210048254325e-05, "loss": 0.0003724571317434311, "step": 159940 }, { "epoch": 45.40164632415555, "grad_norm": 0.0686555951833725, "learning_rate": 5.4617371558330967e-05, "loss": 0.001898372545838356, "step": 159950 }, { "epoch": 45.40448481407891, "grad_norm": 0.38064396381378174, "learning_rate": 5.4614533068407615e-05, "loss": 0.007128436118364334, "step": 159960 }, { "epoch": 45.40732330400227, "grad_norm": 13.691572189331055, "learning_rate": 5.461169457848425e-05, "loss": 0.01318945586681366, "step": 159970 }, { "epoch": 45.41016179392563, "grad_norm": 0.013193273916840553, "learning_rate": 5.460885608856089e-05, "loss": 0.0010002166032791137, "step": 159980 }, { "epoch": 45.413000283848994, "grad_norm": 0.37461698055267334, "learning_rate": 5.4606017598637525e-05, "loss": 0.0003531694412231445, "step": 159990 }, { "epoch": 45.41583877377235, "grad_norm": 0.11720555275678635, "learning_rate": 5.460317910871417e-05, "loss": 0.0019867658615112306, "step": 160000 }, { "epoch": 45.41583877377235, "eval_accuracy": 0.974883957525275, "eval_loss": 0.0900978073477745, "eval_runtime": 32.6263, "eval_samples_per_second": 482.035, "eval_steps_per_second": 7.54, "step": 160000 }, { "epoch": 45.41867726369571, "grad_norm": 4.958388805389404, "learning_rate": 5.46003406187908e-05, "loss": 0.0009833529591560363, "step": 160010 }, { "epoch": 45.421515753619076, "grad_norm": 0.06956835836172104, "learning_rate": 5.459750212886745e-05, "loss": 0.0013391496613621712, "step": 160020 }, { "epoch": 45.42435424354244, "grad_norm": 0.026643194258213043, "learning_rate": 5.459466363894409e-05, "loss": 0.0011911196634173393, "step": 160030 }, { "epoch": 45.427192733465795, "grad_norm": 0.2273787409067154, "learning_rate": 5.4591825149020725e-05, "loss": 0.0006981289014220237, "step": 160040 }, { "epoch": 45.43003122338916, "grad_norm": 14.426826477050781, "learning_rate": 5.458898665909736e-05, "loss": 0.0055053897202014925, "step": 160050 }, { "epoch": 45.43286971331252, "grad_norm": 0.431980699300766, "learning_rate": 5.4586148169174e-05, "loss": 0.0007277240976691246, "step": 160060 }, { "epoch": 45.435708203235876, "grad_norm": 1.786981463432312, "learning_rate": 5.4583309679250636e-05, "loss": 0.0007163923233747483, "step": 160070 }, { "epoch": 45.43854669315924, "grad_norm": 0.17560170590877533, "learning_rate": 5.458047118932728e-05, "loss": 0.00046657361090183256, "step": 160080 }, { "epoch": 45.4413851830826, "grad_norm": 0.023844320327043533, "learning_rate": 5.4577632699403925e-05, "loss": 0.0002781439572572708, "step": 160090 }, { "epoch": 45.44422367300596, "grad_norm": 0.010763479396700859, "learning_rate": 5.457479420948056e-05, "loss": 0.000533713772892952, "step": 160100 }, { "epoch": 45.44706216292932, "grad_norm": 0.025526750832796097, "learning_rate": 5.45719557195572e-05, "loss": 0.0005674446001648903, "step": 160110 }, { "epoch": 45.44990065285268, "grad_norm": 6.410455226898193, "learning_rate": 5.4569117229633836e-05, "loss": 0.0033612500876188276, "step": 160120 }, { "epoch": 45.452739142776046, "grad_norm": 0.08697986602783203, "learning_rate": 5.456627873971048e-05, "loss": 0.003753416985273361, "step": 160130 }, { "epoch": 45.4555776326994, "grad_norm": 7.346334457397461, "learning_rate": 5.456344024978711e-05, "loss": 0.0026715217158198357, "step": 160140 }, { "epoch": 45.458416122622765, "grad_norm": 0.8403939604759216, "learning_rate": 5.4560601759863747e-05, "loss": 0.0032804422080516815, "step": 160150 }, { "epoch": 45.46125461254613, "grad_norm": 0.1929531842470169, "learning_rate": 5.45577632699404e-05, "loss": 0.0012107642367482185, "step": 160160 }, { "epoch": 45.464093102469484, "grad_norm": 1.530828833580017, "learning_rate": 5.4554924780017036e-05, "loss": 0.0019105054438114167, "step": 160170 }, { "epoch": 45.46693159239285, "grad_norm": 1.5098488330841064, "learning_rate": 5.455208629009367e-05, "loss": 0.002033945731818676, "step": 160180 }, { "epoch": 45.46977008231621, "grad_norm": 16.700885772705078, "learning_rate": 5.454924780017031e-05, "loss": 0.006096748635172844, "step": 160190 }, { "epoch": 45.472608572239565, "grad_norm": 0.8432513475418091, "learning_rate": 5.454640931024695e-05, "loss": 0.005097533762454987, "step": 160200 }, { "epoch": 45.47544706216293, "grad_norm": 0.8789168000221252, "learning_rate": 5.454357082032359e-05, "loss": 0.005188009887933731, "step": 160210 }, { "epoch": 45.47828555208629, "grad_norm": 0.4936543405056, "learning_rate": 5.4540732330400236e-05, "loss": 0.0010072994977235795, "step": 160220 }, { "epoch": 45.481124042009654, "grad_norm": 0.048039380460977554, "learning_rate": 5.453789384047687e-05, "loss": 0.004002508521080017, "step": 160230 }, { "epoch": 45.48396253193301, "grad_norm": 5.892132759094238, "learning_rate": 5.453505535055351e-05, "loss": 0.005026974529027939, "step": 160240 }, { "epoch": 45.48680102185637, "grad_norm": 0.048962634056806564, "learning_rate": 5.453221686063015e-05, "loss": 0.0012001559138298036, "step": 160250 }, { "epoch": 45.489639511779735, "grad_norm": 0.15728425979614258, "learning_rate": 5.452937837070679e-05, "loss": 0.0015126109123229981, "step": 160260 }, { "epoch": 45.49247800170309, "grad_norm": 0.11251980811357498, "learning_rate": 5.452653988078342e-05, "loss": 0.0026271834969520567, "step": 160270 }, { "epoch": 45.495316491626454, "grad_norm": 1.234209656715393, "learning_rate": 5.452370139086006e-05, "loss": 0.0013548396527767181, "step": 160280 }, { "epoch": 45.49815498154982, "grad_norm": 0.22638452053070068, "learning_rate": 5.4520862900936706e-05, "loss": 0.0005816129967570304, "step": 160290 }, { "epoch": 45.50099347147318, "grad_norm": 4.075749397277832, "learning_rate": 5.451802441101335e-05, "loss": 0.0029568765312433243, "step": 160300 }, { "epoch": 45.503831961396536, "grad_norm": 0.01485262718051672, "learning_rate": 5.451518592108998e-05, "loss": 0.0004826672375202179, "step": 160310 }, { "epoch": 45.5066704513199, "grad_norm": 0.04394745081663132, "learning_rate": 5.451234743116662e-05, "loss": 0.0036881044507026672, "step": 160320 }, { "epoch": 45.50950894124326, "grad_norm": 0.07913976162672043, "learning_rate": 5.450950894124326e-05, "loss": 0.003663523495197296, "step": 160330 }, { "epoch": 45.51234743116662, "grad_norm": 0.2938038408756256, "learning_rate": 5.45066704513199e-05, "loss": 0.0007372915744781495, "step": 160340 }, { "epoch": 45.51518592108998, "grad_norm": 0.2071990668773651, "learning_rate": 5.4503831961396533e-05, "loss": 0.004440349712967873, "step": 160350 }, { "epoch": 45.51802441101334, "grad_norm": 1.4186187982559204, "learning_rate": 5.450099347147318e-05, "loss": 0.00868437960743904, "step": 160360 }, { "epoch": 45.5208629009367, "grad_norm": 0.02248881570994854, "learning_rate": 5.449815498154982e-05, "loss": 0.001902194507420063, "step": 160370 }, { "epoch": 45.52370139086006, "grad_norm": 0.536666750907898, "learning_rate": 5.449531649162646e-05, "loss": 0.0017030799761414527, "step": 160380 }, { "epoch": 45.526539880783425, "grad_norm": 0.7999932169914246, "learning_rate": 5.44924780017031e-05, "loss": 0.008185773342847823, "step": 160390 }, { "epoch": 45.52937837070679, "grad_norm": 0.053994931280612946, "learning_rate": 5.4489639511779734e-05, "loss": 0.004301007837057114, "step": 160400 }, { "epoch": 45.53221686063014, "grad_norm": 0.11849386990070343, "learning_rate": 5.448680102185637e-05, "loss": 0.007906029373407364, "step": 160410 }, { "epoch": 45.535055350553506, "grad_norm": 0.08292537182569504, "learning_rate": 5.4483962531933016e-05, "loss": 0.013807235658168793, "step": 160420 }, { "epoch": 45.53789384047687, "grad_norm": 0.18553201854228973, "learning_rate": 5.448112404200966e-05, "loss": 0.0031265586614608763, "step": 160430 }, { "epoch": 45.540732330400225, "grad_norm": 7.04033899307251, "learning_rate": 5.447828555208629e-05, "loss": 0.010655637830495834, "step": 160440 }, { "epoch": 45.54357082032359, "grad_norm": 0.03732825815677643, "learning_rate": 5.4475447062162934e-05, "loss": 0.004703976958990097, "step": 160450 }, { "epoch": 45.54640931024695, "grad_norm": 0.04775502160191536, "learning_rate": 5.447260857223957e-05, "loss": 0.012050092220306396, "step": 160460 }, { "epoch": 45.549247800170306, "grad_norm": 0.3837871849536896, "learning_rate": 5.446977008231621e-05, "loss": 0.004815547168254853, "step": 160470 }, { "epoch": 45.55208629009367, "grad_norm": 0.1937353014945984, "learning_rate": 5.4466931592392844e-05, "loss": 0.001340416818857193, "step": 160480 }, { "epoch": 45.55492478001703, "grad_norm": 0.09393924474716187, "learning_rate": 5.446409310246949e-05, "loss": 0.004818068444728851, "step": 160490 }, { "epoch": 45.557763269940395, "grad_norm": 0.2552574574947357, "learning_rate": 5.4461254612546134e-05, "loss": 0.003326507657766342, "step": 160500 }, { "epoch": 45.557763269940395, "eval_accuracy": 0.9755833916195078, "eval_loss": 0.08832064270973206, "eval_runtime": 32.4717, "eval_samples_per_second": 484.329, "eval_steps_per_second": 7.576, "step": 160500 }, { "epoch": 45.56060175986375, "grad_norm": 0.15256895124912262, "learning_rate": 5.445841612262277e-05, "loss": 0.004682324081659317, "step": 160510 }, { "epoch": 45.563440249787114, "grad_norm": 1.1362718343734741, "learning_rate": 5.44555776326994e-05, "loss": 0.006822779029607773, "step": 160520 }, { "epoch": 45.56627873971048, "grad_norm": 0.19589383900165558, "learning_rate": 5.4452739142776044e-05, "loss": 0.003481447696685791, "step": 160530 }, { "epoch": 45.56911722963383, "grad_norm": 2.7340099811553955, "learning_rate": 5.444990065285268e-05, "loss": 0.0034966353327035905, "step": 160540 }, { "epoch": 45.571955719557195, "grad_norm": 0.1126856580376625, "learning_rate": 5.444706216292932e-05, "loss": 0.012968908250331878, "step": 160550 }, { "epoch": 45.57479420948056, "grad_norm": 0.2477794587612152, "learning_rate": 5.444422367300597e-05, "loss": 0.0015185700729489326, "step": 160560 }, { "epoch": 45.577632699403914, "grad_norm": 0.16882750391960144, "learning_rate": 5.44413851830826e-05, "loss": 0.004273098334670067, "step": 160570 }, { "epoch": 45.58047118932728, "grad_norm": 0.14559344947338104, "learning_rate": 5.4438546693159244e-05, "loss": 0.0012252137064933776, "step": 160580 }, { "epoch": 45.58330967925064, "grad_norm": 0.043425749987363815, "learning_rate": 5.443570820323588e-05, "loss": 0.0019860824570059775, "step": 160590 }, { "epoch": 45.586148169174, "grad_norm": 0.03831684961915016, "learning_rate": 5.443286971331252e-05, "loss": 0.0009444382041692734, "step": 160600 }, { "epoch": 45.58898665909736, "grad_norm": 0.7685577869415283, "learning_rate": 5.4430031223389155e-05, "loss": 0.01020767241716385, "step": 160610 }, { "epoch": 45.59182514902072, "grad_norm": 0.2840322256088257, "learning_rate": 5.44271927334658e-05, "loss": 0.0070201151072978975, "step": 160620 }, { "epoch": 45.594663638944084, "grad_norm": 2.4745922088623047, "learning_rate": 5.4424354243542444e-05, "loss": 0.003092491067945957, "step": 160630 }, { "epoch": 45.59750212886744, "grad_norm": 5.53104829788208, "learning_rate": 5.442151575361908e-05, "loss": 0.011387206614017487, "step": 160640 }, { "epoch": 45.6003406187908, "grad_norm": 0.024465475231409073, "learning_rate": 5.4418677263695714e-05, "loss": 0.0013364035636186599, "step": 160650 }, { "epoch": 45.603179108714166, "grad_norm": 0.01895463839173317, "learning_rate": 5.4415838773772355e-05, "loss": 0.0007305957376956939, "step": 160660 }, { "epoch": 45.60601759863752, "grad_norm": 0.32076385617256165, "learning_rate": 5.441300028384899e-05, "loss": 0.002248106338083744, "step": 160670 }, { "epoch": 45.608856088560884, "grad_norm": 3.1270592212677, "learning_rate": 5.441016179392563e-05, "loss": 0.0029866548255085947, "step": 160680 }, { "epoch": 45.61169457848425, "grad_norm": 0.11881868541240692, "learning_rate": 5.440732330400228e-05, "loss": 0.006914189457893372, "step": 160690 }, { "epoch": 45.61453306840761, "grad_norm": 0.057857874780893326, "learning_rate": 5.4404484814078914e-05, "loss": 0.004651518911123276, "step": 160700 }, { "epoch": 45.617371558330966, "grad_norm": 0.28851884603500366, "learning_rate": 5.4401646324155555e-05, "loss": 0.017529812455177308, "step": 160710 }, { "epoch": 45.62021004825433, "grad_norm": 0.1574196070432663, "learning_rate": 5.439880783423219e-05, "loss": 0.002354937419295311, "step": 160720 }, { "epoch": 45.62304853817769, "grad_norm": 1.5710221529006958, "learning_rate": 5.439596934430883e-05, "loss": 0.0017747893929481507, "step": 160730 }, { "epoch": 45.62588702810105, "grad_norm": 0.05999365076422691, "learning_rate": 5.4393130854385466e-05, "loss": 0.003612013906240463, "step": 160740 }, { "epoch": 45.62872551802441, "grad_norm": 0.3694084584712982, "learning_rate": 5.4390292364462114e-05, "loss": 0.0051765207201242445, "step": 160750 }, { "epoch": 45.63156400794777, "grad_norm": 1.15484619140625, "learning_rate": 5.438745387453875e-05, "loss": 0.0035917483270168304, "step": 160760 }, { "epoch": 45.634402497871136, "grad_norm": 0.17776988446712494, "learning_rate": 5.438461538461539e-05, "loss": 0.0012994332239031792, "step": 160770 }, { "epoch": 45.63724098779449, "grad_norm": 0.09033899754285812, "learning_rate": 5.4381776894692024e-05, "loss": 0.0009247826412320137, "step": 160780 }, { "epoch": 45.640079477717855, "grad_norm": 0.47340404987335205, "learning_rate": 5.4378938404768666e-05, "loss": 0.0009023766964673996, "step": 160790 }, { "epoch": 45.64291796764122, "grad_norm": 0.4041638970375061, "learning_rate": 5.43760999148453e-05, "loss": 0.0017378553748130798, "step": 160800 }, { "epoch": 45.64575645756457, "grad_norm": 14.27238655090332, "learning_rate": 5.437326142492194e-05, "loss": 0.0035444900393486025, "step": 160810 }, { "epoch": 45.648594947487936, "grad_norm": 3.5852174758911133, "learning_rate": 5.437042293499859e-05, "loss": 0.0025195956230163576, "step": 160820 }, { "epoch": 45.6514334374113, "grad_norm": 0.16901333630084991, "learning_rate": 5.4367584445075225e-05, "loss": 0.006023672223091125, "step": 160830 }, { "epoch": 45.654271927334655, "grad_norm": 0.060844410210847855, "learning_rate": 5.4364745955151866e-05, "loss": 0.006948278099298477, "step": 160840 }, { "epoch": 45.65711041725802, "grad_norm": 0.11191053688526154, "learning_rate": 5.43619074652285e-05, "loss": 0.0009334005415439605, "step": 160850 }, { "epoch": 45.65994890718138, "grad_norm": 1.9554001092910767, "learning_rate": 5.435906897530514e-05, "loss": 0.0008851522579789162, "step": 160860 }, { "epoch": 45.66278739710474, "grad_norm": 0.06833608448505402, "learning_rate": 5.4356230485381777e-05, "loss": 0.005166731029748917, "step": 160870 }, { "epoch": 45.6656258870281, "grad_norm": 2.002039670944214, "learning_rate": 5.435339199545841e-05, "loss": 0.0012178577482700348, "step": 160880 }, { "epoch": 45.66846437695146, "grad_norm": 0.5515984892845154, "learning_rate": 5.435055350553506e-05, "loss": 0.0012314150109887124, "step": 160890 }, { "epoch": 45.671302866874825, "grad_norm": 1.4120569229125977, "learning_rate": 5.43477150156117e-05, "loss": 0.0019286371767520904, "step": 160900 }, { "epoch": 45.67414135679818, "grad_norm": 1.3146798610687256, "learning_rate": 5.4344876525688335e-05, "loss": 0.001442255638539791, "step": 160910 }, { "epoch": 45.676979846721544, "grad_norm": 11.858302116394043, "learning_rate": 5.4342038035764977e-05, "loss": 0.002761480025947094, "step": 160920 }, { "epoch": 45.67981833664491, "grad_norm": 0.33902010321617126, "learning_rate": 5.433919954584161e-05, "loss": 0.0006219649687409401, "step": 160930 }, { "epoch": 45.68265682656826, "grad_norm": 0.08200369030237198, "learning_rate": 5.433636105591825e-05, "loss": 0.0021788103505969046, "step": 160940 }, { "epoch": 45.685495316491625, "grad_norm": 0.8130740523338318, "learning_rate": 5.43335225659949e-05, "loss": 0.0027270039543509485, "step": 160950 }, { "epoch": 45.68833380641499, "grad_norm": 0.34162166714668274, "learning_rate": 5.4330684076071535e-05, "loss": 0.0019761096686124803, "step": 160960 }, { "epoch": 45.69117229633835, "grad_norm": 0.047219403088092804, "learning_rate": 5.432784558614818e-05, "loss": 0.004996160045266151, "step": 160970 }, { "epoch": 45.69401078626171, "grad_norm": 0.2030765861272812, "learning_rate": 5.432500709622481e-05, "loss": 0.0004779621958732605, "step": 160980 }, { "epoch": 45.69684927618507, "grad_norm": 0.08220353722572327, "learning_rate": 5.4322168606301446e-05, "loss": 0.001334654539823532, "step": 160990 }, { "epoch": 45.69968776610843, "grad_norm": 0.1498045176267624, "learning_rate": 5.431933011637809e-05, "loss": 0.006664988398551941, "step": 161000 }, { "epoch": 45.69968776610843, "eval_accuracy": 0.9773637693139188, "eval_loss": 0.08327616751194, "eval_runtime": 32.7296, "eval_samples_per_second": 480.513, "eval_steps_per_second": 7.516, "step": 161000 }, { "epoch": 45.70252625603179, "grad_norm": 8.07216739654541, "learning_rate": 5.431649162645472e-05, "loss": 0.0029264120385050774, "step": 161010 }, { "epoch": 45.70536474595515, "grad_norm": 10.56723690032959, "learning_rate": 5.431365313653137e-05, "loss": 0.020343662798404695, "step": 161020 }, { "epoch": 45.708203235878514, "grad_norm": 0.24377812445163727, "learning_rate": 5.431081464660801e-05, "loss": 0.002720019780099392, "step": 161030 }, { "epoch": 45.71104172580187, "grad_norm": 0.38839972019195557, "learning_rate": 5.4307976156684646e-05, "loss": 0.004410373046994209, "step": 161040 }, { "epoch": 45.71388021572523, "grad_norm": 11.797942161560059, "learning_rate": 5.430513766676129e-05, "loss": 0.012972970306873322, "step": 161050 }, { "epoch": 45.716718705648596, "grad_norm": 0.23124346137046814, "learning_rate": 5.430229917683792e-05, "loss": 0.008590377867221832, "step": 161060 }, { "epoch": 45.71955719557196, "grad_norm": 0.02862338349223137, "learning_rate": 5.429946068691456e-05, "loss": 0.0035096026957035066, "step": 161070 }, { "epoch": 45.722395685495314, "grad_norm": 0.1332978457212448, "learning_rate": 5.42966221969912e-05, "loss": 0.008227867633104324, "step": 161080 }, { "epoch": 45.72523417541868, "grad_norm": 0.09351450204849243, "learning_rate": 5.4293783707067846e-05, "loss": 0.003157670050859451, "step": 161090 }, { "epoch": 45.72807266534204, "grad_norm": 0.04391924664378166, "learning_rate": 5.429094521714449e-05, "loss": 0.002350090630352497, "step": 161100 }, { "epoch": 45.730911155265396, "grad_norm": 0.7595681548118591, "learning_rate": 5.428810672722112e-05, "loss": 0.000853271409869194, "step": 161110 }, { "epoch": 45.73374964518876, "grad_norm": 0.016838688403367996, "learning_rate": 5.428526823729776e-05, "loss": 0.0007471306249499321, "step": 161120 }, { "epoch": 45.73658813511212, "grad_norm": 0.715798556804657, "learning_rate": 5.42824297473744e-05, "loss": 0.0015186576172709465, "step": 161130 }, { "epoch": 45.739426625035485, "grad_norm": 0.015432768501341343, "learning_rate": 5.427959125745103e-05, "loss": 0.0006268056109547615, "step": 161140 }, { "epoch": 45.74226511495884, "grad_norm": 5.377730369567871, "learning_rate": 5.427675276752768e-05, "loss": 0.00495859682559967, "step": 161150 }, { "epoch": 45.7451036048822, "grad_norm": 0.10866178572177887, "learning_rate": 5.427391427760432e-05, "loss": 0.0012252561748027802, "step": 161160 }, { "epoch": 45.747942094805566, "grad_norm": 0.13667306303977966, "learning_rate": 5.427107578768096e-05, "loss": 0.0005437683314085007, "step": 161170 }, { "epoch": 45.75078058472892, "grad_norm": 0.04786505177617073, "learning_rate": 5.42682372977576e-05, "loss": 0.005026413500308991, "step": 161180 }, { "epoch": 45.753619074652285, "grad_norm": 0.40971362590789795, "learning_rate": 5.426539880783423e-05, "loss": 0.0017123386263847352, "step": 161190 }, { "epoch": 45.75645756457565, "grad_norm": 0.037988048046827316, "learning_rate": 5.4262560317910874e-05, "loss": 0.0012234851717948913, "step": 161200 }, { "epoch": 45.759296054499, "grad_norm": 0.4397001564502716, "learning_rate": 5.425972182798751e-05, "loss": 0.001748490147292614, "step": 161210 }, { "epoch": 45.762134544422366, "grad_norm": 0.042097996920347214, "learning_rate": 5.425688333806416e-05, "loss": 0.0038283582776784896, "step": 161220 }, { "epoch": 45.76497303434573, "grad_norm": 0.7592105865478516, "learning_rate": 5.425404484814079e-05, "loss": 0.0008447635918855667, "step": 161230 }, { "epoch": 45.76781152426909, "grad_norm": 0.36097195744514465, "learning_rate": 5.425120635821743e-05, "loss": 0.001230390928685665, "step": 161240 }, { "epoch": 45.77065001419245, "grad_norm": 2.0582427978515625, "learning_rate": 5.424836786829407e-05, "loss": 0.0010907815769314766, "step": 161250 }, { "epoch": 45.77348850411581, "grad_norm": 11.278580665588379, "learning_rate": 5.424552937837071e-05, "loss": 0.004879453778266906, "step": 161260 }, { "epoch": 45.776326994039174, "grad_norm": 0.3125346004962921, "learning_rate": 5.4242690888447343e-05, "loss": 0.00045435577630996703, "step": 161270 }, { "epoch": 45.77916548396253, "grad_norm": 0.5384764075279236, "learning_rate": 5.4239852398523985e-05, "loss": 0.0020776621997356413, "step": 161280 }, { "epoch": 45.78200397388589, "grad_norm": 0.1224488690495491, "learning_rate": 5.423701390860063e-05, "loss": 0.001577579416334629, "step": 161290 }, { "epoch": 45.784842463809255, "grad_norm": 0.08446390181779861, "learning_rate": 5.423417541867727e-05, "loss": 0.000662391446530819, "step": 161300 }, { "epoch": 45.78768095373261, "grad_norm": 1.3894742727279663, "learning_rate": 5.423133692875391e-05, "loss": 0.010084594786167144, "step": 161310 }, { "epoch": 45.790519443655974, "grad_norm": 0.3065516948699951, "learning_rate": 5.4228498438830543e-05, "loss": 0.0021734993904829027, "step": 161320 }, { "epoch": 45.79335793357934, "grad_norm": 0.25830909609794617, "learning_rate": 5.4225659948907185e-05, "loss": 0.0012144390493631363, "step": 161330 }, { "epoch": 45.7961964235027, "grad_norm": 0.23656773567199707, "learning_rate": 5.422282145898382e-05, "loss": 0.0013937612995505333, "step": 161340 }, { "epoch": 45.799034913426055, "grad_norm": 0.17942753434181213, "learning_rate": 5.421998296906047e-05, "loss": 0.002073255926370621, "step": 161350 }, { "epoch": 45.80187340334942, "grad_norm": 0.07943885028362274, "learning_rate": 5.42171444791371e-05, "loss": 0.0021154118701815607, "step": 161360 }, { "epoch": 45.80471189327278, "grad_norm": 0.5212387442588806, "learning_rate": 5.4214305989213744e-05, "loss": 0.004924054071307182, "step": 161370 }, { "epoch": 45.80755038319614, "grad_norm": 0.026930060237646103, "learning_rate": 5.421146749929038e-05, "loss": 0.0008995577692985535, "step": 161380 }, { "epoch": 45.8103888731195, "grad_norm": 1.582486867904663, "learning_rate": 5.420862900936702e-05, "loss": 0.000835118442773819, "step": 161390 }, { "epoch": 45.81322736304286, "grad_norm": 7.061641693115234, "learning_rate": 5.4205790519443654e-05, "loss": 0.004739048331975937, "step": 161400 }, { "epoch": 45.81606585296622, "grad_norm": 0.0528242252767086, "learning_rate": 5.4202952029520296e-05, "loss": 0.00348777137696743, "step": 161410 }, { "epoch": 45.81890434288958, "grad_norm": 0.4669622778892517, "learning_rate": 5.4200113539596944e-05, "loss": 0.0009508721530437469, "step": 161420 }, { "epoch": 45.821742832812944, "grad_norm": 0.10133606195449829, "learning_rate": 5.419727504967358e-05, "loss": 0.0009340867400169373, "step": 161430 }, { "epoch": 45.82458132273631, "grad_norm": 0.10479370504617691, "learning_rate": 5.419443655975022e-05, "loss": 0.00103929340839386, "step": 161440 }, { "epoch": 45.82741981265966, "grad_norm": 0.4021565020084381, "learning_rate": 5.4191598069826854e-05, "loss": 0.0011530278250575065, "step": 161450 }, { "epoch": 45.830258302583026, "grad_norm": 0.2476356327533722, "learning_rate": 5.418875957990349e-05, "loss": 0.0013172492384910584, "step": 161460 }, { "epoch": 45.83309679250639, "grad_norm": 0.10578019171953201, "learning_rate": 5.418592108998013e-05, "loss": 0.0006564496085047722, "step": 161470 }, { "epoch": 45.835935282429745, "grad_norm": 2.070984125137329, "learning_rate": 5.4183082600056765e-05, "loss": 0.0017098024487495421, "step": 161480 }, { "epoch": 45.83877377235311, "grad_norm": 0.710365891456604, "learning_rate": 5.418024411013341e-05, "loss": 0.0031216679140925407, "step": 161490 }, { "epoch": 45.84161226227647, "grad_norm": 0.02391446754336357, "learning_rate": 5.4177405620210054e-05, "loss": 0.0007541656494140625, "step": 161500 }, { "epoch": 45.84161226227647, "eval_accuracy": 0.9771730145609462, "eval_loss": 0.08208510279655457, "eval_runtime": 32.4833, "eval_samples_per_second": 484.157, "eval_steps_per_second": 7.573, "step": 161500 }, { "epoch": 45.84445075219983, "grad_norm": 0.12032294273376465, "learning_rate": 5.417456713028669e-05, "loss": 0.0010130520910024642, "step": 161510 }, { "epoch": 45.84728924212319, "grad_norm": 0.037861891090869904, "learning_rate": 5.417172864036333e-05, "loss": 0.0032112617045640945, "step": 161520 }, { "epoch": 45.85012773204655, "grad_norm": 0.6204045414924622, "learning_rate": 5.4168890150439965e-05, "loss": 0.0014088433235883713, "step": 161530 }, { "epoch": 45.852966221969915, "grad_norm": 0.11994437873363495, "learning_rate": 5.4166051660516606e-05, "loss": 0.005461747571825981, "step": 161540 }, { "epoch": 45.85580471189327, "grad_norm": 0.0325290709733963, "learning_rate": 5.4163213170593254e-05, "loss": 0.004705595597624779, "step": 161550 }, { "epoch": 45.85864320181663, "grad_norm": 0.04252807796001434, "learning_rate": 5.416037468066989e-05, "loss": 0.0005035419017076492, "step": 161560 }, { "epoch": 45.861481691739996, "grad_norm": 0.22419458627700806, "learning_rate": 5.415753619074653e-05, "loss": 0.000624791719019413, "step": 161570 }, { "epoch": 45.86432018166335, "grad_norm": 0.1020413190126419, "learning_rate": 5.4154697700823165e-05, "loss": 0.000715498998761177, "step": 161580 }, { "epoch": 45.867158671586715, "grad_norm": 0.032956041395664215, "learning_rate": 5.41518592108998e-05, "loss": 0.002926127426326275, "step": 161590 }, { "epoch": 45.86999716151008, "grad_norm": 4.767463684082031, "learning_rate": 5.414902072097644e-05, "loss": 0.0011207493022084235, "step": 161600 }, { "epoch": 45.87283565143344, "grad_norm": 1.5675450563430786, "learning_rate": 5.4146182231053076e-05, "loss": 0.005903331190347671, "step": 161610 }, { "epoch": 45.8756741413568, "grad_norm": 0.2801133394241333, "learning_rate": 5.4143343741129724e-05, "loss": 0.007808262109756469, "step": 161620 }, { "epoch": 45.87851263128016, "grad_norm": 11.746882438659668, "learning_rate": 5.4140505251206365e-05, "loss": 0.007883566617965698, "step": 161630 }, { "epoch": 45.88135112120352, "grad_norm": 2.616649866104126, "learning_rate": 5.4137666761283e-05, "loss": 0.0033796489238739013, "step": 161640 }, { "epoch": 45.88418961112688, "grad_norm": 0.5332644581794739, "learning_rate": 5.413482827135964e-05, "loss": 0.003921709582209587, "step": 161650 }, { "epoch": 45.88702810105024, "grad_norm": 0.054670125246047974, "learning_rate": 5.4131989781436276e-05, "loss": 0.0034120339900255204, "step": 161660 }, { "epoch": 45.889866590973604, "grad_norm": 0.0413186177611351, "learning_rate": 5.412915129151292e-05, "loss": 0.005305983126163483, "step": 161670 }, { "epoch": 45.89270508089696, "grad_norm": 0.08059436827898026, "learning_rate": 5.412631280158955e-05, "loss": 0.0027314046397805213, "step": 161680 }, { "epoch": 45.89554357082032, "grad_norm": 0.3735695779323578, "learning_rate": 5.41234743116662e-05, "loss": 0.0011419204995036126, "step": 161690 }, { "epoch": 45.898382060743685, "grad_norm": 2.8354036808013916, "learning_rate": 5.4120635821742834e-05, "loss": 0.004120286554098129, "step": 161700 }, { "epoch": 45.90122055066705, "grad_norm": 1.4753535985946655, "learning_rate": 5.4117797331819476e-05, "loss": 0.0028513403609395026, "step": 161710 }, { "epoch": 45.904059040590404, "grad_norm": 0.14362750947475433, "learning_rate": 5.411495884189611e-05, "loss": 0.0010727571323513984, "step": 161720 }, { "epoch": 45.90689753051377, "grad_norm": 0.09154773503541946, "learning_rate": 5.411212035197275e-05, "loss": 0.0012993168085813523, "step": 161730 }, { "epoch": 45.90973602043713, "grad_norm": 0.23596924543380737, "learning_rate": 5.4109281862049386e-05, "loss": 0.0007534706965088844, "step": 161740 }, { "epoch": 45.912574510360486, "grad_norm": 3.5349955558776855, "learning_rate": 5.4106443372126035e-05, "loss": 0.0033326372504234314, "step": 161750 }, { "epoch": 45.91541300028385, "grad_norm": 1.8685142993927002, "learning_rate": 5.4103604882202676e-05, "loss": 0.006194271147251129, "step": 161760 }, { "epoch": 45.91825149020721, "grad_norm": 0.045612432062625885, "learning_rate": 5.410076639227931e-05, "loss": 0.0038339927792549132, "step": 161770 }, { "epoch": 45.92108998013057, "grad_norm": 2.440669059753418, "learning_rate": 5.409792790235595e-05, "loss": 0.014538729190826416, "step": 161780 }, { "epoch": 45.92392847005393, "grad_norm": 0.712125301361084, "learning_rate": 5.4095089412432586e-05, "loss": 0.0017345525324344636, "step": 161790 }, { "epoch": 45.92676695997729, "grad_norm": 1.2181425094604492, "learning_rate": 5.409225092250922e-05, "loss": 0.0027525674551725386, "step": 161800 }, { "epoch": 45.929605449900656, "grad_norm": 0.42354992032051086, "learning_rate": 5.408941243258586e-05, "loss": 0.0016804732382297515, "step": 161810 }, { "epoch": 45.93244393982401, "grad_norm": 0.5312386155128479, "learning_rate": 5.408657394266251e-05, "loss": 0.0012051908299326897, "step": 161820 }, { "epoch": 45.935282429747375, "grad_norm": 1.04977548122406, "learning_rate": 5.4083735452739145e-05, "loss": 0.0024868519976735115, "step": 161830 }, { "epoch": 45.93812091967074, "grad_norm": 0.11321166902780533, "learning_rate": 5.4080896962815787e-05, "loss": 0.0007630808278918267, "step": 161840 }, { "epoch": 45.94095940959409, "grad_norm": 0.3283163905143738, "learning_rate": 5.407805847289242e-05, "loss": 0.0074503995478153225, "step": 161850 }, { "epoch": 45.943797899517456, "grad_norm": 0.03881268948316574, "learning_rate": 5.407521998296906e-05, "loss": 0.006832519173622131, "step": 161860 }, { "epoch": 45.94663638944082, "grad_norm": 0.03810558840632439, "learning_rate": 5.40723814930457e-05, "loss": 0.010600626468658447, "step": 161870 }, { "epoch": 45.949474879364175, "grad_norm": 0.20908771455287933, "learning_rate": 5.406954300312234e-05, "loss": 0.00047017689794301985, "step": 161880 }, { "epoch": 45.95231336928754, "grad_norm": 0.23305509984493256, "learning_rate": 5.406670451319899e-05, "loss": 0.0033332813531160355, "step": 161890 }, { "epoch": 45.9551518592109, "grad_norm": 0.9041756391525269, "learning_rate": 5.406386602327562e-05, "loss": 0.002411099337041378, "step": 161900 }, { "epoch": 45.95799034913426, "grad_norm": 0.3472764194011688, "learning_rate": 5.406102753335226e-05, "loss": 0.0038720719516277314, "step": 161910 }, { "epoch": 45.96082883905762, "grad_norm": 1.885081171989441, "learning_rate": 5.40581890434289e-05, "loss": 0.0008585911244153976, "step": 161920 }, { "epoch": 45.96366732898098, "grad_norm": 4.354450702667236, "learning_rate": 5.405535055350553e-05, "loss": 0.0013964829966425895, "step": 161930 }, { "epoch": 45.966505818904345, "grad_norm": 1.279211163520813, "learning_rate": 5.405251206358217e-05, "loss": 0.012046570330858231, "step": 161940 }, { "epoch": 45.9693443088277, "grad_norm": 0.2382349967956543, "learning_rate": 5.404967357365882e-05, "loss": 0.0011683886870741845, "step": 161950 }, { "epoch": 45.972182798751064, "grad_norm": 1.485707402229309, "learning_rate": 5.4046835083735456e-05, "loss": 0.003632603958249092, "step": 161960 }, { "epoch": 45.97502128867443, "grad_norm": 0.26036351919174194, "learning_rate": 5.40439965938121e-05, "loss": 0.00047646872699260714, "step": 161970 }, { "epoch": 45.97785977859779, "grad_norm": 0.13081778585910797, "learning_rate": 5.404115810388873e-05, "loss": 0.005368375778198242, "step": 161980 }, { "epoch": 45.980698268521145, "grad_norm": 3.8794097900390625, "learning_rate": 5.403831961396537e-05, "loss": 0.0022597135975956917, "step": 161990 }, { "epoch": 45.98353675844451, "grad_norm": 0.036129508167505264, "learning_rate": 5.403548112404201e-05, "loss": 0.001491733081638813, "step": 162000 }, { "epoch": 45.98353675844451, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.08672595024108887, "eval_runtime": 32.7045, "eval_samples_per_second": 480.882, "eval_steps_per_second": 7.522, "step": 162000 }, { "epoch": 45.98637524836787, "grad_norm": 0.560282826423645, "learning_rate": 5.403264263411865e-05, "loss": 0.0043994951993227005, "step": 162010 }, { "epoch": 45.98921373829123, "grad_norm": 0.32815879583358765, "learning_rate": 5.40298041441953e-05, "loss": 0.0065226837992668155, "step": 162020 }, { "epoch": 45.99205222821459, "grad_norm": 0.04081922397017479, "learning_rate": 5.402696565427193e-05, "loss": 0.0009791692718863487, "step": 162030 }, { "epoch": 45.99489071813795, "grad_norm": 0.38631582260131836, "learning_rate": 5.4024127164348573e-05, "loss": 0.0017499243840575218, "step": 162040 }, { "epoch": 45.99772920806131, "grad_norm": 0.8775612711906433, "learning_rate": 5.402128867442521e-05, "loss": 0.0029018744826316833, "step": 162050 }, { "epoch": 46.00056769798467, "grad_norm": 0.0812491625547409, "learning_rate": 5.401845018450184e-05, "loss": 0.0011390838772058487, "step": 162060 }, { "epoch": 46.003406187908034, "grad_norm": 0.040921665728092194, "learning_rate": 5.4015611694578484e-05, "loss": 0.0009308991953730583, "step": 162070 }, { "epoch": 46.0062446778314, "grad_norm": 0.036484867334365845, "learning_rate": 5.401277320465513e-05, "loss": 0.0006519539281725883, "step": 162080 }, { "epoch": 46.00908316775475, "grad_norm": 0.6142832040786743, "learning_rate": 5.400993471473177e-05, "loss": 0.0008300293236970901, "step": 162090 }, { "epoch": 46.011921657678116, "grad_norm": 0.04029611498117447, "learning_rate": 5.400709622480841e-05, "loss": 0.001254924200475216, "step": 162100 }, { "epoch": 46.01476014760148, "grad_norm": 0.027345988899469376, "learning_rate": 5.400425773488504e-05, "loss": 0.0015933027490973473, "step": 162110 }, { "epoch": 46.017598637524834, "grad_norm": 1.4587512016296387, "learning_rate": 5.4001419244961684e-05, "loss": 0.0022168003022670748, "step": 162120 }, { "epoch": 46.0204371274482, "grad_norm": 0.22474893927574158, "learning_rate": 5.399858075503832e-05, "loss": 0.00043445434421300887, "step": 162130 }, { "epoch": 46.02327561737156, "grad_norm": 0.04749462381005287, "learning_rate": 5.399574226511496e-05, "loss": 0.0011852780357003213, "step": 162140 }, { "epoch": 46.026114107294916, "grad_norm": 0.07627009600400925, "learning_rate": 5.399290377519161e-05, "loss": 0.0017149774357676506, "step": 162150 }, { "epoch": 46.02895259721828, "grad_norm": 3.3753859996795654, "learning_rate": 5.399006528526824e-05, "loss": 0.0013398149982094764, "step": 162160 }, { "epoch": 46.03179108714164, "grad_norm": 0.3647576868534088, "learning_rate": 5.398722679534488e-05, "loss": 0.00043796300888061525, "step": 162170 }, { "epoch": 46.034629577065004, "grad_norm": 0.06689789146184921, "learning_rate": 5.398438830542152e-05, "loss": 0.0007028438150882721, "step": 162180 }, { "epoch": 46.03746806698836, "grad_norm": 4.7765421867370605, "learning_rate": 5.398154981549815e-05, "loss": 0.001455855555832386, "step": 162190 }, { "epoch": 46.04030655691172, "grad_norm": 1.0843403339385986, "learning_rate": 5.3978711325574795e-05, "loss": 0.0006022730842232705, "step": 162200 }, { "epoch": 46.043145046835086, "grad_norm": 0.044870927929878235, "learning_rate": 5.397587283565143e-05, "loss": 0.0016254452988505364, "step": 162210 }, { "epoch": 46.04598353675844, "grad_norm": 0.7910535931587219, "learning_rate": 5.397303434572808e-05, "loss": 0.0007785676047205925, "step": 162220 }, { "epoch": 46.048822026681805, "grad_norm": 0.4219972491264343, "learning_rate": 5.397019585580472e-05, "loss": 0.0022998496890068052, "step": 162230 }, { "epoch": 46.05166051660517, "grad_norm": 0.7925904989242554, "learning_rate": 5.3967357365881353e-05, "loss": 0.001314728520810604, "step": 162240 }, { "epoch": 46.05449900652852, "grad_norm": 0.12839733064174652, "learning_rate": 5.3964518875957995e-05, "loss": 0.0023147203028202057, "step": 162250 }, { "epoch": 46.057337496451886, "grad_norm": 0.07025919109582901, "learning_rate": 5.396168038603463e-05, "loss": 0.0011056412011384964, "step": 162260 }, { "epoch": 46.06017598637525, "grad_norm": 2.206904888153076, "learning_rate": 5.3958841896111264e-05, "loss": 0.0029628485441207885, "step": 162270 }, { "epoch": 46.06301447629861, "grad_norm": 0.4191048741340637, "learning_rate": 5.395600340618792e-05, "loss": 0.0017273362725973129, "step": 162280 }, { "epoch": 46.06585296622197, "grad_norm": 0.10081643611192703, "learning_rate": 5.3953164916264554e-05, "loss": 0.0011167537420988082, "step": 162290 }, { "epoch": 46.06869145614533, "grad_norm": 0.32089418172836304, "learning_rate": 5.395032642634119e-05, "loss": 0.00539734959602356, "step": 162300 }, { "epoch": 46.071529946068694, "grad_norm": 1.1832629442214966, "learning_rate": 5.394748793641783e-05, "loss": 0.002499717473983765, "step": 162310 }, { "epoch": 46.07436843599205, "grad_norm": 0.19669432938098907, "learning_rate": 5.3944649446494464e-05, "loss": 0.002529860846698284, "step": 162320 }, { "epoch": 46.07720692591541, "grad_norm": 0.034073006361722946, "learning_rate": 5.3941810956571106e-05, "loss": 0.0012178221717476846, "step": 162330 }, { "epoch": 46.080045415838775, "grad_norm": 0.5585459470748901, "learning_rate": 5.393897246664774e-05, "loss": 0.0019234787672758103, "step": 162340 }, { "epoch": 46.08288390576214, "grad_norm": 0.05181216448545456, "learning_rate": 5.393613397672439e-05, "loss": 0.0014020778238773347, "step": 162350 }, { "epoch": 46.085722395685494, "grad_norm": 0.17448733747005463, "learning_rate": 5.393329548680103e-05, "loss": 0.006489067524671555, "step": 162360 }, { "epoch": 46.08856088560886, "grad_norm": 0.07429579645395279, "learning_rate": 5.3930456996877664e-05, "loss": 0.000692986324429512, "step": 162370 }, { "epoch": 46.09139937553222, "grad_norm": 3.005459785461426, "learning_rate": 5.3927618506954306e-05, "loss": 0.002151169814169407, "step": 162380 }, { "epoch": 46.094237865455575, "grad_norm": 1.0353405475616455, "learning_rate": 5.392478001703094e-05, "loss": 0.005647667497396469, "step": 162390 }, { "epoch": 46.09707635537894, "grad_norm": 0.02238048054277897, "learning_rate": 5.3921941527107575e-05, "loss": 0.0006479542702436447, "step": 162400 }, { "epoch": 46.0999148453023, "grad_norm": 0.2964235544204712, "learning_rate": 5.3919103037184216e-05, "loss": 0.0009610552340745925, "step": 162410 }, { "epoch": 46.10275333522566, "grad_norm": 0.017714111134409904, "learning_rate": 5.3916264547260864e-05, "loss": 0.0003964593634009361, "step": 162420 }, { "epoch": 46.10559182514902, "grad_norm": 0.3485843539237976, "learning_rate": 5.39134260573375e-05, "loss": 0.0004403118044137955, "step": 162430 }, { "epoch": 46.10843031507238, "grad_norm": 0.2335071712732315, "learning_rate": 5.391058756741414e-05, "loss": 0.0017133953049778938, "step": 162440 }, { "epoch": 46.111268804995746, "grad_norm": 0.13199400901794434, "learning_rate": 5.3907749077490775e-05, "loss": 0.0008099187165498733, "step": 162450 }, { "epoch": 46.1141072949191, "grad_norm": 0.02528182603418827, "learning_rate": 5.3904910587567416e-05, "loss": 0.008167947828769683, "step": 162460 }, { "epoch": 46.116945784842464, "grad_norm": 2.1413111686706543, "learning_rate": 5.390207209764405e-05, "loss": 0.002173840068280697, "step": 162470 }, { "epoch": 46.11978427476583, "grad_norm": 22.540597915649414, "learning_rate": 5.38992336077207e-05, "loss": 0.005780110508203507, "step": 162480 }, { "epoch": 46.12262276468918, "grad_norm": 0.03292245417833328, "learning_rate": 5.389639511779734e-05, "loss": 0.0012766674160957336, "step": 162490 }, { "epoch": 46.125461254612546, "grad_norm": 0.025272870436310768, "learning_rate": 5.3893556627873975e-05, "loss": 0.005342083424329758, "step": 162500 }, { "epoch": 46.125461254612546, "eval_accuracy": 0.9769822598079736, "eval_loss": 0.08267591893672943, "eval_runtime": 31.8567, "eval_samples_per_second": 493.679, "eval_steps_per_second": 7.722, "step": 162500 }, { "epoch": 46.12829974453591, "grad_norm": 0.362273633480072, "learning_rate": 5.3890718137950616e-05, "loss": 0.0008373919874429703, "step": 162510 }, { "epoch": 46.131138234459264, "grad_norm": 0.03289893642067909, "learning_rate": 5.388787964802725e-05, "loss": 0.0003962647169828415, "step": 162520 }, { "epoch": 46.13397672438263, "grad_norm": 0.1362258642911911, "learning_rate": 5.3885041158103886e-05, "loss": 0.000758872926235199, "step": 162530 }, { "epoch": 46.13681521430599, "grad_norm": 0.08325715363025665, "learning_rate": 5.388220266818053e-05, "loss": 0.0005447294563055039, "step": 162540 }, { "epoch": 46.13965370422935, "grad_norm": 0.0715063065290451, "learning_rate": 5.3879364178257175e-05, "loss": 0.002281661704182625, "step": 162550 }, { "epoch": 46.14249219415271, "grad_norm": 4.7066426277160645, "learning_rate": 5.387652568833381e-05, "loss": 0.0012405378744006156, "step": 162560 }, { "epoch": 46.14533068407607, "grad_norm": 0.3484973609447479, "learning_rate": 5.387368719841045e-05, "loss": 0.0006002500653266907, "step": 162570 }, { "epoch": 46.148169173999435, "grad_norm": 0.054399099200963974, "learning_rate": 5.3870848708487086e-05, "loss": 0.0005867704749107361, "step": 162580 }, { "epoch": 46.15100766392279, "grad_norm": 0.3551778197288513, "learning_rate": 5.386801021856373e-05, "loss": 0.0009035732597112656, "step": 162590 }, { "epoch": 46.15384615384615, "grad_norm": 0.3016417324542999, "learning_rate": 5.386517172864036e-05, "loss": 0.004234226047992706, "step": 162600 }, { "epoch": 46.156684643769516, "grad_norm": 0.09140706807374954, "learning_rate": 5.3862333238717e-05, "loss": 0.0005207752808928489, "step": 162610 }, { "epoch": 46.15952313369287, "grad_norm": 0.04676120728254318, "learning_rate": 5.385949474879365e-05, "loss": 0.0007847731932997704, "step": 162620 }, { "epoch": 46.162361623616235, "grad_norm": 4.9255475997924805, "learning_rate": 5.3856656258870286e-05, "loss": 0.002993122488260269, "step": 162630 }, { "epoch": 46.1652001135396, "grad_norm": 2.1443607807159424, "learning_rate": 5.385381776894692e-05, "loss": 0.001250469870865345, "step": 162640 }, { "epoch": 46.16803860346296, "grad_norm": 0.23584608733654022, "learning_rate": 5.3851263128015894e-05, "loss": 0.011904342472553254, "step": 162650 }, { "epoch": 46.170877093386316, "grad_norm": 0.05330846086144447, "learning_rate": 5.384842463809254e-05, "loss": 0.0007619190961122513, "step": 162660 }, { "epoch": 46.17371558330968, "grad_norm": 0.07755720615386963, "learning_rate": 5.384558614816918e-05, "loss": 0.0005170943215489388, "step": 162670 }, { "epoch": 46.17655407323304, "grad_norm": 0.013385762460529804, "learning_rate": 5.384274765824582e-05, "loss": 0.001262364722788334, "step": 162680 }, { "epoch": 46.1793925631564, "grad_norm": 1.475870966911316, "learning_rate": 5.383990916832245e-05, "loss": 0.001679704524576664, "step": 162690 }, { "epoch": 46.18223105307976, "grad_norm": 0.09247991442680359, "learning_rate": 5.3837070678399094e-05, "loss": 0.0008517038077116013, "step": 162700 }, { "epoch": 46.185069543003124, "grad_norm": 0.25815045833587646, "learning_rate": 5.383423218847573e-05, "loss": 0.0023378446698188783, "step": 162710 }, { "epoch": 46.18790803292649, "grad_norm": 0.04754171147942543, "learning_rate": 5.383139369855237e-05, "loss": 0.0009436378255486489, "step": 162720 }, { "epoch": 46.19074652284984, "grad_norm": 0.05192510038614273, "learning_rate": 5.382855520862902e-05, "loss": 0.0006022121757268906, "step": 162730 }, { "epoch": 46.193585012773205, "grad_norm": 0.11065036803483963, "learning_rate": 5.382571671870565e-05, "loss": 0.0002622058615088463, "step": 162740 }, { "epoch": 46.19642350269657, "grad_norm": 0.021045805886387825, "learning_rate": 5.3822878228782294e-05, "loss": 0.00031739603728055956, "step": 162750 }, { "epoch": 46.199261992619924, "grad_norm": 0.44834938645362854, "learning_rate": 5.382003973885893e-05, "loss": 0.0005587384104728699, "step": 162760 }, { "epoch": 46.20210048254329, "grad_norm": 0.05369019880890846, "learning_rate": 5.381720124893557e-05, "loss": 0.0004134180024266243, "step": 162770 }, { "epoch": 46.20493897246665, "grad_norm": 0.19729112088680267, "learning_rate": 5.3814362759012204e-05, "loss": 0.0016839876770973206, "step": 162780 }, { "epoch": 46.207777462390005, "grad_norm": 0.15583227574825287, "learning_rate": 5.381152426908884e-05, "loss": 0.0017824217677116394, "step": 162790 }, { "epoch": 46.21061595231337, "grad_norm": 0.10630655288696289, "learning_rate": 5.3808685779165494e-05, "loss": 0.001859956979751587, "step": 162800 }, { "epoch": 46.21345444223673, "grad_norm": 0.13975103199481964, "learning_rate": 5.380584728924213e-05, "loss": 0.0002676393836736679, "step": 162810 }, { "epoch": 46.216292932160094, "grad_norm": 3.1000845432281494, "learning_rate": 5.380300879931876e-05, "loss": 0.002632913738489151, "step": 162820 }, { "epoch": 46.21913142208345, "grad_norm": 0.0686105266213417, "learning_rate": 5.3800170309395404e-05, "loss": 0.011178734898567199, "step": 162830 }, { "epoch": 46.22196991200681, "grad_norm": 0.09672816097736359, "learning_rate": 5.379733181947204e-05, "loss": 0.00491722822189331, "step": 162840 }, { "epoch": 46.224808401930176, "grad_norm": 0.9965134263038635, "learning_rate": 5.379449332954868e-05, "loss": 0.0023222699761390685, "step": 162850 }, { "epoch": 46.22764689185353, "grad_norm": 0.04138294234871864, "learning_rate": 5.379165483962533e-05, "loss": 0.0013759551569819451, "step": 162860 }, { "epoch": 46.230485381776894, "grad_norm": 1.2666488885879517, "learning_rate": 5.378881634970196e-05, "loss": 0.0005918083712458611, "step": 162870 }, { "epoch": 46.23332387170026, "grad_norm": 0.2824135720729828, "learning_rate": 5.3785977859778604e-05, "loss": 0.005900303274393082, "step": 162880 }, { "epoch": 46.23616236162361, "grad_norm": 1.0564559698104858, "learning_rate": 5.378342321884757e-05, "loss": 0.015002001821994782, "step": 162890 }, { "epoch": 46.239000851546976, "grad_norm": 0.09444060176610947, "learning_rate": 5.378058472892421e-05, "loss": 0.015026721358299255, "step": 162900 }, { "epoch": 46.24183934147034, "grad_norm": 1.216881275177002, "learning_rate": 5.377774623900086e-05, "loss": 0.0018469871953129768, "step": 162910 }, { "epoch": 46.2446778313937, "grad_norm": 0.06251490861177444, "learning_rate": 5.3774907749077495e-05, "loss": 0.0011286428198218346, "step": 162920 }, { "epoch": 46.24751632131706, "grad_norm": 7.733246803283691, "learning_rate": 5.3772069259154136e-05, "loss": 0.0019750483334064484, "step": 162930 }, { "epoch": 46.25035481124042, "grad_norm": 0.9402521848678589, "learning_rate": 5.376923076923077e-05, "loss": 0.0015726329758763314, "step": 162940 }, { "epoch": 46.25319330116378, "grad_norm": 0.464796245098114, "learning_rate": 5.376639227930741e-05, "loss": 0.0016161747276782989, "step": 162950 }, { "epoch": 46.25603179108714, "grad_norm": 2.556934356689453, "learning_rate": 5.376355378938405e-05, "loss": 0.0008942753076553345, "step": 162960 }, { "epoch": 46.2588702810105, "grad_norm": 2.0480096340179443, "learning_rate": 5.376071529946068e-05, "loss": 0.0023515654727816583, "step": 162970 }, { "epoch": 46.261708770933865, "grad_norm": 0.5556602478027344, "learning_rate": 5.375787680953733e-05, "loss": 0.0008019698783755302, "step": 162980 }, { "epoch": 46.26454726085722, "grad_norm": 0.1041545420885086, "learning_rate": 5.375503831961397e-05, "loss": 0.002244109660387039, "step": 162990 }, { "epoch": 46.26738575078058, "grad_norm": 0.537129819393158, "learning_rate": 5.3752199829690606e-05, "loss": 0.0029065165668725967, "step": 163000 }, { "epoch": 46.26738575078058, "eval_accuracy": 0.9783811279964393, "eval_loss": 0.07968826591968536, "eval_runtime": 31.8078, "eval_samples_per_second": 494.438, "eval_steps_per_second": 7.734, "step": 163000 }, { "epoch": 46.270224240703946, "grad_norm": 0.03095131181180477, "learning_rate": 5.374936133976725e-05, "loss": 0.0015403581783175468, "step": 163010 }, { "epoch": 46.27306273062731, "grad_norm": 0.07068592309951782, "learning_rate": 5.374652284984388e-05, "loss": 0.006108860298991203, "step": 163020 }, { "epoch": 46.275901220550665, "grad_norm": 0.10401742160320282, "learning_rate": 5.374368435992052e-05, "loss": 0.000548681989312172, "step": 163030 }, { "epoch": 46.27873971047403, "grad_norm": 0.027929771691560745, "learning_rate": 5.374084586999717e-05, "loss": 0.0009438948705792427, "step": 163040 }, { "epoch": 46.28157820039739, "grad_norm": 0.021428057923913002, "learning_rate": 5.3738007380073806e-05, "loss": 0.0013277532532811164, "step": 163050 }, { "epoch": 46.28441669032075, "grad_norm": 1.715158462524414, "learning_rate": 5.373516889015045e-05, "loss": 0.0009073581546545029, "step": 163060 }, { "epoch": 46.28725518024411, "grad_norm": 0.10850457847118378, "learning_rate": 5.373233040022708e-05, "loss": 0.0006232853978872299, "step": 163070 }, { "epoch": 46.29009367016747, "grad_norm": 0.15604525804519653, "learning_rate": 5.372949191030372e-05, "loss": 0.0010053314268589019, "step": 163080 }, { "epoch": 46.29293216009083, "grad_norm": 0.043750420212745667, "learning_rate": 5.372665342038036e-05, "loss": 0.0021579721942543983, "step": 163090 }, { "epoch": 46.29577065001419, "grad_norm": 0.018516860902309418, "learning_rate": 5.372381493045699e-05, "loss": 0.0006486643105745316, "step": 163100 }, { "epoch": 46.298609139937554, "grad_norm": 0.03702171519398689, "learning_rate": 5.372097644053364e-05, "loss": 0.0017677929252386093, "step": 163110 }, { "epoch": 46.30144762986092, "grad_norm": 0.5449747443199158, "learning_rate": 5.371813795061028e-05, "loss": 0.009665670990943908, "step": 163120 }, { "epoch": 46.30428611978427, "grad_norm": 1.1285896301269531, "learning_rate": 5.3715299460686916e-05, "loss": 0.00674062967300415, "step": 163130 }, { "epoch": 46.307124609707635, "grad_norm": 3.202997922897339, "learning_rate": 5.371246097076356e-05, "loss": 0.0023164553567767143, "step": 163140 }, { "epoch": 46.309963099631, "grad_norm": 0.017277976498007774, "learning_rate": 5.370962248084019e-05, "loss": 0.0006163859739899636, "step": 163150 }, { "epoch": 46.312801589554354, "grad_norm": 0.290584921836853, "learning_rate": 5.3706783990916834e-05, "loss": 0.00032588671892881395, "step": 163160 }, { "epoch": 46.31564007947772, "grad_norm": 0.07470045983791351, "learning_rate": 5.370394550099347e-05, "loss": 0.0010370852425694466, "step": 163170 }, { "epoch": 46.31847856940108, "grad_norm": 2.802698850631714, "learning_rate": 5.3701107011070117e-05, "loss": 0.0010930132120847702, "step": 163180 }, { "epoch": 46.32131705932444, "grad_norm": 0.18260355293750763, "learning_rate": 5.369826852114676e-05, "loss": 0.0005731813609600068, "step": 163190 }, { "epoch": 46.3241555492478, "grad_norm": 0.029815582558512688, "learning_rate": 5.369543003122339e-05, "loss": 0.0006133759394288063, "step": 163200 }, { "epoch": 46.32699403917116, "grad_norm": 0.18213213980197906, "learning_rate": 5.369259154130003e-05, "loss": 0.004297978058457374, "step": 163210 }, { "epoch": 46.329832529094524, "grad_norm": 0.08083415031433105, "learning_rate": 5.368975305137667e-05, "loss": 0.00445735901594162, "step": 163220 }, { "epoch": 46.33267101901788, "grad_norm": 0.04082489758729935, "learning_rate": 5.36869145614533e-05, "loss": 0.0039620637893676754, "step": 163230 }, { "epoch": 46.33550950894124, "grad_norm": 0.44436439871788025, "learning_rate": 5.368407607152995e-05, "loss": 0.002559477835893631, "step": 163240 }, { "epoch": 46.338347998864606, "grad_norm": 0.08053095638751984, "learning_rate": 5.368123758160659e-05, "loss": 0.0014695804566144943, "step": 163250 }, { "epoch": 46.34118648878796, "grad_norm": 1.4194035530090332, "learning_rate": 5.367839909168323e-05, "loss": 0.0059247203171253204, "step": 163260 }, { "epoch": 46.344024978711325, "grad_norm": 0.31859317421913147, "learning_rate": 5.367556060175987e-05, "loss": 0.006473459303379059, "step": 163270 }, { "epoch": 46.34686346863469, "grad_norm": 0.11668837070465088, "learning_rate": 5.36727221118365e-05, "loss": 0.0009562671184539795, "step": 163280 }, { "epoch": 46.34970195855805, "grad_norm": 0.509774386882782, "learning_rate": 5.3669883621913145e-05, "loss": 0.0011636849492788314, "step": 163290 }, { "epoch": 46.352540448481406, "grad_norm": 0.010050732642412186, "learning_rate": 5.366704513198978e-05, "loss": 0.0008292518556118012, "step": 163300 }, { "epoch": 46.35537893840477, "grad_norm": 0.3488582372665405, "learning_rate": 5.366420664206643e-05, "loss": 0.0004210282117128372, "step": 163310 }, { "epoch": 46.35821742832813, "grad_norm": 0.06465422362089157, "learning_rate": 5.366136815214307e-05, "loss": 0.004685015231370926, "step": 163320 }, { "epoch": 46.36105591825149, "grad_norm": 19.52214813232422, "learning_rate": 5.36585296622197e-05, "loss": 0.01181247979402542, "step": 163330 }, { "epoch": 46.36389440817485, "grad_norm": 0.03908555209636688, "learning_rate": 5.365569117229634e-05, "loss": 0.00047593656927347183, "step": 163340 }, { "epoch": 46.36673289809821, "grad_norm": 0.3469253480434418, "learning_rate": 5.365285268237298e-05, "loss": 0.003521581366658211, "step": 163350 }, { "epoch": 46.36957138802157, "grad_norm": 0.30431291460990906, "learning_rate": 5.3650014192449614e-05, "loss": 0.0016184480860829352, "step": 163360 }, { "epoch": 46.37240987794493, "grad_norm": 0.15744605660438538, "learning_rate": 5.3647175702526255e-05, "loss": 0.001879645511507988, "step": 163370 }, { "epoch": 46.375248367868295, "grad_norm": 0.34713560342788696, "learning_rate": 5.36443372126029e-05, "loss": 0.0005812043324112892, "step": 163380 }, { "epoch": 46.37808685779166, "grad_norm": 11.435036659240723, "learning_rate": 5.364149872267954e-05, "loss": 0.003355357050895691, "step": 163390 }, { "epoch": 46.380925347715014, "grad_norm": 0.15355592966079712, "learning_rate": 5.363866023275618e-05, "loss": 0.006638573855161667, "step": 163400 }, { "epoch": 46.38376383763838, "grad_norm": 0.030175425112247467, "learning_rate": 5.3635821742832814e-05, "loss": 0.0026629120111465454, "step": 163410 }, { "epoch": 46.38660232756174, "grad_norm": 0.12945377826690674, "learning_rate": 5.3632983252909455e-05, "loss": 0.011742984503507614, "step": 163420 }, { "epoch": 46.389440817485095, "grad_norm": 19.04966163635254, "learning_rate": 5.363014476298609e-05, "loss": 0.004785959422588348, "step": 163430 }, { "epoch": 46.39227930740846, "grad_norm": 0.2328379899263382, "learning_rate": 5.362730627306274e-05, "loss": 0.0027263203635811807, "step": 163440 }, { "epoch": 46.39511779733182, "grad_norm": 0.23741237819194794, "learning_rate": 5.362446778313937e-05, "loss": 0.00951758623123169, "step": 163450 }, { "epoch": 46.39795628725518, "grad_norm": 0.42345044016838074, "learning_rate": 5.3621629293216014e-05, "loss": 0.003709961473941803, "step": 163460 }, { "epoch": 46.40079477717854, "grad_norm": 2.548549175262451, "learning_rate": 5.361879080329265e-05, "loss": 0.0042207270860672, "step": 163470 }, { "epoch": 46.4036332671019, "grad_norm": 0.3839523196220398, "learning_rate": 5.361595231336929e-05, "loss": 0.014558973908424377, "step": 163480 }, { "epoch": 46.406471757025265, "grad_norm": 10.432478904724121, "learning_rate": 5.3613113823445925e-05, "loss": 0.014939486980438232, "step": 163490 }, { "epoch": 46.40931024694862, "grad_norm": 15.05435848236084, "learning_rate": 5.3610275333522566e-05, "loss": 0.01800643652677536, "step": 163500 }, { "epoch": 46.40931024694862, "eval_accuracy": 0.9713867870541107, "eval_loss": 0.11181638389825821, "eval_runtime": 32.6356, "eval_samples_per_second": 481.897, "eval_steps_per_second": 7.538, "step": 163500 }, { "epoch": 46.412148736871984, "grad_norm": 0.3410666286945343, "learning_rate": 5.3607436843599214e-05, "loss": 0.0025023983791470526, "step": 163510 }, { "epoch": 46.41498722679535, "grad_norm": 0.027016954496502876, "learning_rate": 5.360459835367585e-05, "loss": 0.006730303168296814, "step": 163520 }, { "epoch": 46.4178257167187, "grad_norm": 0.08643855899572372, "learning_rate": 5.360175986375249e-05, "loss": 0.005042828619480133, "step": 163530 }, { "epoch": 46.420664206642066, "grad_norm": 2.9451241493225098, "learning_rate": 5.3598921373829125e-05, "loss": 0.006969895213842392, "step": 163540 }, { "epoch": 46.42350269656543, "grad_norm": 1.2815172672271729, "learning_rate": 5.3596082883905766e-05, "loss": 0.0017025817185640335, "step": 163550 }, { "epoch": 46.42634118648879, "grad_norm": 0.1149652898311615, "learning_rate": 5.35932443939824e-05, "loss": 0.0007763613015413284, "step": 163560 }, { "epoch": 46.42917967641215, "grad_norm": 0.11142764985561371, "learning_rate": 5.3590405904059035e-05, "loss": 0.0011886388063430786, "step": 163570 }, { "epoch": 46.43201816633551, "grad_norm": 5.675490379333496, "learning_rate": 5.3587567414135683e-05, "loss": 0.006672661006450653, "step": 163580 }, { "epoch": 46.43485665625887, "grad_norm": 4.5757575035095215, "learning_rate": 5.3584728924212325e-05, "loss": 0.009459036588668823, "step": 163590 }, { "epoch": 46.43769514618223, "grad_norm": 14.7116117477417, "learning_rate": 5.358189043428896e-05, "loss": 0.013829952478408814, "step": 163600 }, { "epoch": 46.44053363610559, "grad_norm": 8.242639541625977, "learning_rate": 5.35790519443656e-05, "loss": 0.003092874214053154, "step": 163610 }, { "epoch": 46.443372126028954, "grad_norm": 0.279328316450119, "learning_rate": 5.3576213454442235e-05, "loss": 0.00081463772803545, "step": 163620 }, { "epoch": 46.44621061595231, "grad_norm": 1.7517021894454956, "learning_rate": 5.357337496451888e-05, "loss": 0.003731635957956314, "step": 163630 }, { "epoch": 46.44904910587567, "grad_norm": 0.1343434602022171, "learning_rate": 5.3570536474595525e-05, "loss": 0.001091160997748375, "step": 163640 }, { "epoch": 46.451887595799036, "grad_norm": 0.10400504618883133, "learning_rate": 5.356769798467216e-05, "loss": 0.0029919516295194627, "step": 163650 }, { "epoch": 46.4547260857224, "grad_norm": 0.03137136250734329, "learning_rate": 5.35648594947488e-05, "loss": 0.005298639088869095, "step": 163660 }, { "epoch": 46.457564575645755, "grad_norm": 0.14458556473255157, "learning_rate": 5.3562021004825435e-05, "loss": 0.0039064474403858185, "step": 163670 }, { "epoch": 46.46040306556912, "grad_norm": 0.31425708532333374, "learning_rate": 5.355918251490207e-05, "loss": 0.004829246550798416, "step": 163680 }, { "epoch": 46.46324155549248, "grad_norm": 0.7644392848014832, "learning_rate": 5.355634402497871e-05, "loss": 0.003656395524740219, "step": 163690 }, { "epoch": 46.466080045415836, "grad_norm": 0.842171847820282, "learning_rate": 5.3553505535055346e-05, "loss": 0.015391390025615691, "step": 163700 }, { "epoch": 46.4689185353392, "grad_norm": 5.345112323760986, "learning_rate": 5.3550667045131994e-05, "loss": 0.009842721372842788, "step": 163710 }, { "epoch": 46.47175702526256, "grad_norm": 0.7716372609138489, "learning_rate": 5.3547828555208636e-05, "loss": 0.002174505777657032, "step": 163720 }, { "epoch": 46.47459551518592, "grad_norm": 2.6899378299713135, "learning_rate": 5.354499006528527e-05, "loss": 0.012052251398563385, "step": 163730 }, { "epoch": 46.47743400510928, "grad_norm": 0.8180320858955383, "learning_rate": 5.354215157536191e-05, "loss": 0.0018224529922008514, "step": 163740 }, { "epoch": 46.480272495032644, "grad_norm": 0.09394227713346481, "learning_rate": 5.3539313085438546e-05, "loss": 0.0018266221508383752, "step": 163750 }, { "epoch": 46.48311098495601, "grad_norm": 0.5276504158973694, "learning_rate": 5.353647459551519e-05, "loss": 0.0030203510075807573, "step": 163760 }, { "epoch": 46.48594947487936, "grad_norm": 0.046393219381570816, "learning_rate": 5.353363610559182e-05, "loss": 0.0018759462982416153, "step": 163770 }, { "epoch": 46.488787964802725, "grad_norm": 0.24885918200016022, "learning_rate": 5.353079761566847e-05, "loss": 0.004050688445568084, "step": 163780 }, { "epoch": 46.49162645472609, "grad_norm": 0.9437452554702759, "learning_rate": 5.352795912574511e-05, "loss": 0.0016717275604605675, "step": 163790 }, { "epoch": 46.494464944649444, "grad_norm": 0.5857776403427124, "learning_rate": 5.3525120635821746e-05, "loss": 0.0010287323966622353, "step": 163800 }, { "epoch": 46.49730343457281, "grad_norm": 0.07450392097234726, "learning_rate": 5.352228214589838e-05, "loss": 0.017902839183807372, "step": 163810 }, { "epoch": 46.50014192449617, "grad_norm": 4.893764972686768, "learning_rate": 5.351944365597502e-05, "loss": 0.002689189091324806, "step": 163820 }, { "epoch": 46.502980414419525, "grad_norm": 0.6912021040916443, "learning_rate": 5.351660516605166e-05, "loss": 0.0005746757611632347, "step": 163830 }, { "epoch": 46.50581890434289, "grad_norm": 2.7230052947998047, "learning_rate": 5.3513766676128305e-05, "loss": 0.0014847451820969582, "step": 163840 }, { "epoch": 46.50865739426625, "grad_norm": 0.22354038059711456, "learning_rate": 5.3510928186204946e-05, "loss": 0.0010698890313506127, "step": 163850 }, { "epoch": 46.511495884189614, "grad_norm": 4.0255513191223145, "learning_rate": 5.350808969628158e-05, "loss": 0.0024413086473941805, "step": 163860 }, { "epoch": 46.51433437411297, "grad_norm": 0.04058212414383888, "learning_rate": 5.350525120635822e-05, "loss": 0.004021137580275535, "step": 163870 }, { "epoch": 46.51717286403633, "grad_norm": 0.2017292082309723, "learning_rate": 5.350241271643486e-05, "loss": 0.0007887685671448708, "step": 163880 }, { "epoch": 46.520011353959696, "grad_norm": 0.8076528310775757, "learning_rate": 5.34995742265115e-05, "loss": 0.001407521776854992, "step": 163890 }, { "epoch": 46.52284984388305, "grad_norm": 0.5083214044570923, "learning_rate": 5.349673573658813e-05, "loss": 0.0022427033632993697, "step": 163900 }, { "epoch": 46.525688333806414, "grad_norm": 1.9543944597244263, "learning_rate": 5.349389724666478e-05, "loss": 0.0015397829934954642, "step": 163910 }, { "epoch": 46.52852682372978, "grad_norm": 0.08514902740716934, "learning_rate": 5.3491058756741416e-05, "loss": 0.0005365850403904915, "step": 163920 }, { "epoch": 46.53136531365314, "grad_norm": 0.2308827042579651, "learning_rate": 5.348822026681806e-05, "loss": 0.00125583503395319, "step": 163930 }, { "epoch": 46.534203803576496, "grad_norm": 0.056419603526592255, "learning_rate": 5.348538177689469e-05, "loss": 0.003990138694643974, "step": 163940 }, { "epoch": 46.53704229349986, "grad_norm": 0.689139187335968, "learning_rate": 5.348254328697133e-05, "loss": 0.0010517885908484458, "step": 163950 }, { "epoch": 46.53988078342322, "grad_norm": 8.854610443115234, "learning_rate": 5.347970479704797e-05, "loss": 0.0038975954055786135, "step": 163960 }, { "epoch": 46.54271927334658, "grad_norm": 0.07472768425941467, "learning_rate": 5.347686630712461e-05, "loss": 0.0012585034593939782, "step": 163970 }, { "epoch": 46.54555776326994, "grad_norm": 0.1801338791847229, "learning_rate": 5.347402781720126e-05, "loss": 0.01476655900478363, "step": 163980 }, { "epoch": 46.5483962531933, "grad_norm": 0.3647598922252655, "learning_rate": 5.347118932727789e-05, "loss": 0.0034795574843883514, "step": 163990 }, { "epoch": 46.55123474311666, "grad_norm": 20.6460018157959, "learning_rate": 5.346835083735453e-05, "loss": 0.0075932078063488005, "step": 164000 }, { "epoch": 46.55123474311666, "eval_accuracy": 0.9720862211483436, "eval_loss": 0.09905950725078583, "eval_runtime": 33.0849, "eval_samples_per_second": 475.353, "eval_steps_per_second": 7.435, "step": 164000 }, { "epoch": 46.55407323304002, "grad_norm": 15.626503944396973, "learning_rate": 5.346551234743117e-05, "loss": 0.004081713408231736, "step": 164010 }, { "epoch": 46.556911722963385, "grad_norm": 0.44443973898887634, "learning_rate": 5.34626738575078e-05, "loss": 0.002146083302795887, "step": 164020 }, { "epoch": 46.55975021288675, "grad_norm": 0.24075619876384735, "learning_rate": 5.3459835367584444e-05, "loss": 0.004924489557743073, "step": 164030 }, { "epoch": 46.5625887028101, "grad_norm": 0.043185025453567505, "learning_rate": 5.345699687766109e-05, "loss": 0.00041001234203577044, "step": 164040 }, { "epoch": 46.565427192733466, "grad_norm": 0.2908719778060913, "learning_rate": 5.3454158387737726e-05, "loss": 0.0038782693445682526, "step": 164050 }, { "epoch": 46.56826568265683, "grad_norm": 0.32294660806655884, "learning_rate": 5.345131989781437e-05, "loss": 0.0010839059948921203, "step": 164060 }, { "epoch": 46.571104172580185, "grad_norm": 1.0365335941314697, "learning_rate": 5.3448481407891e-05, "loss": 0.0026176033541560175, "step": 164070 }, { "epoch": 46.57394266250355, "grad_norm": 18.542984008789062, "learning_rate": 5.3445642917967644e-05, "loss": 0.018523530662059785, "step": 164080 }, { "epoch": 46.57678115242691, "grad_norm": 0.12884201109409332, "learning_rate": 5.344280442804428e-05, "loss": 0.0008348416537046433, "step": 164090 }, { "epoch": 46.579619642350266, "grad_norm": 0.21558894217014313, "learning_rate": 5.343996593812092e-05, "loss": 0.0011558257043361663, "step": 164100 }, { "epoch": 46.58245813227363, "grad_norm": 1.712548017501831, "learning_rate": 5.343712744819757e-05, "loss": 0.0010267585515975953, "step": 164110 }, { "epoch": 46.58529662219699, "grad_norm": 1.4514437913894653, "learning_rate": 5.34342889582742e-05, "loss": 0.003362574800848961, "step": 164120 }, { "epoch": 46.588135112120355, "grad_norm": 3.3931257724761963, "learning_rate": 5.3431450468350844e-05, "loss": 0.0011229487136006355, "step": 164130 }, { "epoch": 46.59097360204371, "grad_norm": 0.011247451417148113, "learning_rate": 5.342861197842748e-05, "loss": 0.0004738014191389084, "step": 164140 }, { "epoch": 46.593812091967074, "grad_norm": 0.6923418045043945, "learning_rate": 5.342577348850411e-05, "loss": 0.002503395453095436, "step": 164150 }, { "epoch": 46.59665058189044, "grad_norm": 0.5976182818412781, "learning_rate": 5.3422934998580754e-05, "loss": 0.0006764981895685196, "step": 164160 }, { "epoch": 46.59948907181379, "grad_norm": 1.8433115482330322, "learning_rate": 5.34200965086574e-05, "loss": 0.0014811884611845016, "step": 164170 }, { "epoch": 46.602327561737155, "grad_norm": 0.07827173918485641, "learning_rate": 5.341725801873404e-05, "loss": 0.0009638853371143341, "step": 164180 }, { "epoch": 46.60516605166052, "grad_norm": 1.1586744785308838, "learning_rate": 5.341441952881068e-05, "loss": 0.0013540873304009437, "step": 164190 }, { "epoch": 46.608004541583874, "grad_norm": 10.735965728759766, "learning_rate": 5.341158103888731e-05, "loss": 0.0033691756427288054, "step": 164200 }, { "epoch": 46.61084303150724, "grad_norm": 0.15648287534713745, "learning_rate": 5.3408742548963955e-05, "loss": 0.004374609887599945, "step": 164210 }, { "epoch": 46.6136815214306, "grad_norm": 0.17912697792053223, "learning_rate": 5.340590405904059e-05, "loss": 0.005459198355674743, "step": 164220 }, { "epoch": 46.61652001135396, "grad_norm": 1.06454598903656, "learning_rate": 5.340306556911723e-05, "loss": 0.000906112976372242, "step": 164230 }, { "epoch": 46.61935850127732, "grad_norm": 0.04322906956076622, "learning_rate": 5.340022707919388e-05, "loss": 0.005678822100162506, "step": 164240 }, { "epoch": 46.62219699120068, "grad_norm": 4.784919738769531, "learning_rate": 5.339738858927051e-05, "loss": 0.0025967231020331384, "step": 164250 }, { "epoch": 46.625035481124044, "grad_norm": 0.6090016961097717, "learning_rate": 5.3394550099347155e-05, "loss": 0.0023083219304680823, "step": 164260 }, { "epoch": 46.6278739710474, "grad_norm": 0.07309899479150772, "learning_rate": 5.339171160942379e-05, "loss": 0.0012869982048869133, "step": 164270 }, { "epoch": 46.63071246097076, "grad_norm": 13.747698783874512, "learning_rate": 5.3388873119500424e-05, "loss": 0.003963427990674973, "step": 164280 }, { "epoch": 46.633550950894126, "grad_norm": 0.43263405561447144, "learning_rate": 5.3386034629577065e-05, "loss": 0.000798116996884346, "step": 164290 }, { "epoch": 46.63638944081748, "grad_norm": 0.04291701316833496, "learning_rate": 5.33831961396537e-05, "loss": 0.002957405708730221, "step": 164300 }, { "epoch": 46.639227930740844, "grad_norm": 0.3192860782146454, "learning_rate": 5.338035764973035e-05, "loss": 0.0012529529631137848, "step": 164310 }, { "epoch": 46.64206642066421, "grad_norm": 4.044543743133545, "learning_rate": 5.337751915980699e-05, "loss": 0.008240612596273423, "step": 164320 }, { "epoch": 46.64490491058757, "grad_norm": 0.056108467280864716, "learning_rate": 5.3374680669883624e-05, "loss": 0.009401299059391022, "step": 164330 }, { "epoch": 46.647743400510926, "grad_norm": 0.03932775557041168, "learning_rate": 5.3371842179960265e-05, "loss": 0.0019448256120085717, "step": 164340 }, { "epoch": 46.65058189043429, "grad_norm": 5.225566864013672, "learning_rate": 5.33690036900369e-05, "loss": 0.0057066213339567184, "step": 164350 }, { "epoch": 46.65342038035765, "grad_norm": 0.18549293279647827, "learning_rate": 5.336616520011354e-05, "loss": 0.0005222808569669724, "step": 164360 }, { "epoch": 46.65625887028101, "grad_norm": 3.60960054397583, "learning_rate": 5.336332671019019e-05, "loss": 0.004709808900952339, "step": 164370 }, { "epoch": 46.65909736020437, "grad_norm": 0.08255498856306076, "learning_rate": 5.3360488220266824e-05, "loss": 0.0035531751811504362, "step": 164380 }, { "epoch": 46.66193585012773, "grad_norm": 0.0329025499522686, "learning_rate": 5.335764973034346e-05, "loss": 0.002903095260262489, "step": 164390 }, { "epoch": 46.664774340051096, "grad_norm": 0.4546467959880829, "learning_rate": 5.33548112404201e-05, "loss": 0.0012167034670710563, "step": 164400 }, { "epoch": 46.66761282997445, "grad_norm": 0.17145900428295135, "learning_rate": 5.3351972750496735e-05, "loss": 0.0014900915324687959, "step": 164410 }, { "epoch": 46.670451319897815, "grad_norm": 0.04574727639555931, "learning_rate": 5.3349134260573376e-05, "loss": 0.0003438059240579605, "step": 164420 }, { "epoch": 46.67328980982118, "grad_norm": 0.017299553379416466, "learning_rate": 5.334629577065001e-05, "loss": 0.00023807957768440247, "step": 164430 }, { "epoch": 46.67612829974453, "grad_norm": 0.0297741387039423, "learning_rate": 5.334345728072666e-05, "loss": 0.0004010375589132309, "step": 164440 }, { "epoch": 46.678966789667896, "grad_norm": 0.10284103453159332, "learning_rate": 5.33406187908033e-05, "loss": 0.0018596142530441285, "step": 164450 }, { "epoch": 46.68180527959126, "grad_norm": 0.10028381645679474, "learning_rate": 5.3337780300879935e-05, "loss": 0.0006998443976044654, "step": 164460 }, { "epoch": 46.684643769514615, "grad_norm": 0.1154487356543541, "learning_rate": 5.3334941810956576e-05, "loss": 0.000403880700469017, "step": 164470 }, { "epoch": 46.68748225943798, "grad_norm": 0.13004150986671448, "learning_rate": 5.333210332103321e-05, "loss": 0.001231623813509941, "step": 164480 }, { "epoch": 46.69032074936134, "grad_norm": 0.22551992535591125, "learning_rate": 5.3329264831109845e-05, "loss": 0.0038978207856416704, "step": 164490 }, { "epoch": 46.693159239284704, "grad_norm": 0.005688790697604418, "learning_rate": 5.332642634118649e-05, "loss": 0.0018117938190698624, "step": 164500 }, { "epoch": 46.693159239284704, "eval_accuracy": 0.9773001843962612, "eval_loss": 0.08091847598552704, "eval_runtime": 32.637, "eval_samples_per_second": 481.877, "eval_steps_per_second": 7.537, "step": 164500 }, { "epoch": 46.69599772920806, "grad_norm": 0.1395488828420639, "learning_rate": 5.3323587851263135e-05, "loss": 0.0003485584631562233, "step": 164510 }, { "epoch": 46.69883621913142, "grad_norm": 0.15540945529937744, "learning_rate": 5.332074936133977e-05, "loss": 0.000623115710914135, "step": 164520 }, { "epoch": 46.701674709054785, "grad_norm": 0.6340265274047852, "learning_rate": 5.331791087141641e-05, "loss": 0.0016589660197496414, "step": 164530 }, { "epoch": 46.70451319897814, "grad_norm": 0.0849124863743782, "learning_rate": 5.3315072381493045e-05, "loss": 0.0036720722913742066, "step": 164540 }, { "epoch": 46.707351688901504, "grad_norm": 0.024355188012123108, "learning_rate": 5.331223389156969e-05, "loss": 0.000666770339012146, "step": 164550 }, { "epoch": 46.71019017882487, "grad_norm": 0.18630103766918182, "learning_rate": 5.330939540164632e-05, "loss": 0.0010414257645606994, "step": 164560 }, { "epoch": 46.71302866874822, "grad_norm": 0.016069777309894562, "learning_rate": 5.330655691172297e-05, "loss": 0.0012613140046596528, "step": 164570 }, { "epoch": 46.715867158671585, "grad_norm": 0.33277541399002075, "learning_rate": 5.330371842179961e-05, "loss": 0.0005870601162314415, "step": 164580 }, { "epoch": 46.71870564859495, "grad_norm": 0.2891557812690735, "learning_rate": 5.3300879931876245e-05, "loss": 0.0014350913465023042, "step": 164590 }, { "epoch": 46.72154413851831, "grad_norm": 0.150354266166687, "learning_rate": 5.329804144195289e-05, "loss": 0.0062258407473564144, "step": 164600 }, { "epoch": 46.72438262844167, "grad_norm": 0.043504498898983, "learning_rate": 5.329520295202952e-05, "loss": 0.0022360702976584436, "step": 164610 }, { "epoch": 46.72722111836503, "grad_norm": 0.6093674898147583, "learning_rate": 5.3292364462106156e-05, "loss": 0.0012466447427868844, "step": 164620 }, { "epoch": 46.73005960828839, "grad_norm": 1.6744266748428345, "learning_rate": 5.32895259721828e-05, "loss": 0.0014390576630830764, "step": 164630 }, { "epoch": 46.73289809821175, "grad_norm": 6.413366794586182, "learning_rate": 5.3286687482259446e-05, "loss": 0.012248972058296203, "step": 164640 }, { "epoch": 46.73573658813511, "grad_norm": 0.3209405839443207, "learning_rate": 5.328384899233608e-05, "loss": 0.0032893434166908265, "step": 164650 }, { "epoch": 46.738575078058474, "grad_norm": 0.20171353220939636, "learning_rate": 5.328101050241272e-05, "loss": 0.004455301910638809, "step": 164660 }, { "epoch": 46.74141356798184, "grad_norm": 0.2415858954191208, "learning_rate": 5.3278172012489356e-05, "loss": 0.002097412571310997, "step": 164670 }, { "epoch": 46.74425205790519, "grad_norm": 4.924857139587402, "learning_rate": 5.3275333522566e-05, "loss": 0.00393880233168602, "step": 164680 }, { "epoch": 46.747090547828556, "grad_norm": 0.23658417165279388, "learning_rate": 5.327249503264263e-05, "loss": 0.0024287857115268706, "step": 164690 }, { "epoch": 46.74992903775192, "grad_norm": 0.33361124992370605, "learning_rate": 5.3269656542719273e-05, "loss": 0.0012161394581198693, "step": 164700 }, { "epoch": 46.752767527675275, "grad_norm": 0.022411111742258072, "learning_rate": 5.326681805279592e-05, "loss": 0.0011892318725585938, "step": 164710 }, { "epoch": 46.75560601759864, "grad_norm": 0.10893318802118301, "learning_rate": 5.3263979562872556e-05, "loss": 0.007594121247529983, "step": 164720 }, { "epoch": 46.758444507522, "grad_norm": 0.04642578586935997, "learning_rate": 5.32611410729492e-05, "loss": 0.000713401846587658, "step": 164730 }, { "epoch": 46.761282997445356, "grad_norm": 0.07465706765651703, "learning_rate": 5.325830258302583e-05, "loss": 0.002715184725821018, "step": 164740 }, { "epoch": 46.76412148736872, "grad_norm": 0.10463391989469528, "learning_rate": 5.325546409310247e-05, "loss": 0.0007320273667573929, "step": 164750 }, { "epoch": 46.76695997729208, "grad_norm": 0.07132776826620102, "learning_rate": 5.325262560317911e-05, "loss": 0.0007017970085144043, "step": 164760 }, { "epoch": 46.769798467215445, "grad_norm": 0.8641230463981628, "learning_rate": 5.3249787113255756e-05, "loss": 0.0004623936489224434, "step": 164770 }, { "epoch": 46.7726369571388, "grad_norm": 13.238468170166016, "learning_rate": 5.324694862333239e-05, "loss": 0.002437843568623066, "step": 164780 }, { "epoch": 46.77547544706216, "grad_norm": 0.04031866788864136, "learning_rate": 5.324411013340903e-05, "loss": 0.0026259573176503183, "step": 164790 }, { "epoch": 46.778313936985526, "grad_norm": 0.23712824285030365, "learning_rate": 5.324127164348567e-05, "loss": 0.001870042458176613, "step": 164800 }, { "epoch": 46.78115242690888, "grad_norm": 0.018666600808501244, "learning_rate": 5.323843315356231e-05, "loss": 0.0005648083984851837, "step": 164810 }, { "epoch": 46.783990916832245, "grad_norm": 0.12453712522983551, "learning_rate": 5.323559466363894e-05, "loss": 0.011680512875318527, "step": 164820 }, { "epoch": 46.78682940675561, "grad_norm": 0.39277002215385437, "learning_rate": 5.3232756173715584e-05, "loss": 0.0030176300555467606, "step": 164830 }, { "epoch": 46.789667896678964, "grad_norm": 0.21137875318527222, "learning_rate": 5.322991768379223e-05, "loss": 0.0030567942187190054, "step": 164840 }, { "epoch": 46.79250638660233, "grad_norm": 0.8106091022491455, "learning_rate": 5.322707919386887e-05, "loss": 0.00247596874833107, "step": 164850 }, { "epoch": 46.79534487652569, "grad_norm": 4.175459384918213, "learning_rate": 5.32242407039455e-05, "loss": 0.008488701283931732, "step": 164860 }, { "epoch": 46.79818336644905, "grad_norm": 0.5828638076782227, "learning_rate": 5.322140221402214e-05, "loss": 0.001956317760050297, "step": 164870 }, { "epoch": 46.80102185637241, "grad_norm": 0.05157916992902756, "learning_rate": 5.321856372409878e-05, "loss": 0.0005281424149870872, "step": 164880 }, { "epoch": 46.80386034629577, "grad_norm": 0.06228640303015709, "learning_rate": 5.321572523417542e-05, "loss": 0.0008106857538223266, "step": 164890 }, { "epoch": 46.806698836219134, "grad_norm": 0.13045260310173035, "learning_rate": 5.3212886744252054e-05, "loss": 0.0008294342085719108, "step": 164900 }, { "epoch": 46.80953732614249, "grad_norm": 0.013395009562373161, "learning_rate": 5.32100482543287e-05, "loss": 0.005664100125432015, "step": 164910 }, { "epoch": 46.81237581606585, "grad_norm": 0.06896400451660156, "learning_rate": 5.320720976440534e-05, "loss": 0.0004715407267212868, "step": 164920 }, { "epoch": 46.815214305989215, "grad_norm": 0.8006630539894104, "learning_rate": 5.320437127448198e-05, "loss": 0.004637760668992996, "step": 164930 }, { "epoch": 46.81805279591257, "grad_norm": 0.1953216791152954, "learning_rate": 5.320153278455862e-05, "loss": 0.0015667589381337167, "step": 164940 }, { "epoch": 46.820891285835934, "grad_norm": 0.45883411169052124, "learning_rate": 5.3198694294635254e-05, "loss": 0.0017401978373527526, "step": 164950 }, { "epoch": 46.8237297757593, "grad_norm": 0.18919387459754944, "learning_rate": 5.319585580471189e-05, "loss": 0.0006857763975858689, "step": 164960 }, { "epoch": 46.82656826568266, "grad_norm": 0.7087239027023315, "learning_rate": 5.319301731478854e-05, "loss": 0.0009487476199865341, "step": 164970 }, { "epoch": 46.829406755606016, "grad_norm": 0.10814406722784042, "learning_rate": 5.319017882486518e-05, "loss": 0.00575784370303154, "step": 164980 }, { "epoch": 46.83224524552938, "grad_norm": 0.016473762691020966, "learning_rate": 5.318734033494181e-05, "loss": 0.002390122599899769, "step": 164990 }, { "epoch": 46.83508373545274, "grad_norm": 0.04235539957880974, "learning_rate": 5.3184501845018454e-05, "loss": 0.00070070531219244, "step": 165000 }, { "epoch": 46.83508373545274, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.08768175542354584, "eval_runtime": 32.5308, "eval_samples_per_second": 483.45, "eval_steps_per_second": 7.562, "step": 165000 }, { "epoch": 46.8379222253761, "grad_norm": 0.05907853692770004, "learning_rate": 5.318166335509509e-05, "loss": 0.0013150973245501518, "step": 165010 }, { "epoch": 46.84076071529946, "grad_norm": 0.06244242563843727, "learning_rate": 5.317882486517173e-05, "loss": 0.00194683987647295, "step": 165020 }, { "epoch": 46.84359920522282, "grad_norm": 0.042970847338438034, "learning_rate": 5.3175986375248364e-05, "loss": 0.0011794626712799071, "step": 165030 }, { "epoch": 46.84643769514618, "grad_norm": 0.12341023236513138, "learning_rate": 5.317314788532501e-05, "loss": 0.0035207368433475494, "step": 165040 }, { "epoch": 46.84927618506954, "grad_norm": 1.291841745376587, "learning_rate": 5.3170309395401654e-05, "loss": 0.0012865677475929261, "step": 165050 }, { "epoch": 46.852114674992904, "grad_norm": 6.701313495635986, "learning_rate": 5.316747090547829e-05, "loss": 0.005393410101532936, "step": 165060 }, { "epoch": 46.85495316491627, "grad_norm": 0.21338629722595215, "learning_rate": 5.316463241555493e-05, "loss": 0.0015699679031968116, "step": 165070 }, { "epoch": 46.85779165483962, "grad_norm": 0.08192148059606552, "learning_rate": 5.3161793925631564e-05, "loss": 0.0016354961320757867, "step": 165080 }, { "epoch": 46.860630144762986, "grad_norm": 0.051396988332271576, "learning_rate": 5.31589554357082e-05, "loss": 0.0030400993302464483, "step": 165090 }, { "epoch": 46.86346863468635, "grad_norm": 0.16242475807666779, "learning_rate": 5.315611694578484e-05, "loss": 0.0008845651522278785, "step": 165100 }, { "epoch": 46.866307124609705, "grad_norm": 0.41115468740463257, "learning_rate": 5.315327845586149e-05, "loss": 0.0019312728196382522, "step": 165110 }, { "epoch": 46.86914561453307, "grad_norm": 0.9477577209472656, "learning_rate": 5.315043996593812e-05, "loss": 0.0011264583095908164, "step": 165120 }, { "epoch": 46.87198410445643, "grad_norm": 0.026373257860541344, "learning_rate": 5.3147601476014764e-05, "loss": 0.0011991256847977637, "step": 165130 }, { "epoch": 46.87482259437979, "grad_norm": 16.66499900817871, "learning_rate": 5.31447629860914e-05, "loss": 0.007589232921600342, "step": 165140 }, { "epoch": 46.87766108430315, "grad_norm": 0.41995543241500854, "learning_rate": 5.314192449616804e-05, "loss": 0.001023968867957592, "step": 165150 }, { "epoch": 46.88049957422651, "grad_norm": 0.7740252017974854, "learning_rate": 5.3139086006244675e-05, "loss": 0.00249041635543108, "step": 165160 }, { "epoch": 46.883338064149875, "grad_norm": 0.07213396579027176, "learning_rate": 5.313624751632132e-05, "loss": 0.000631587952375412, "step": 165170 }, { "epoch": 46.88617655407323, "grad_norm": 0.14581839740276337, "learning_rate": 5.3133409026397965e-05, "loss": 0.0009664466604590416, "step": 165180 }, { "epoch": 46.889015043996594, "grad_norm": 9.587495803833008, "learning_rate": 5.31305705364746e-05, "loss": 0.0036456815898418425, "step": 165190 }, { "epoch": 46.89185353391996, "grad_norm": 1.0758684873580933, "learning_rate": 5.312773204655124e-05, "loss": 0.003959966450929641, "step": 165200 }, { "epoch": 46.89469202384331, "grad_norm": 0.6814945340156555, "learning_rate": 5.3124893556627875e-05, "loss": 0.0008657170459628105, "step": 165210 }, { "epoch": 46.897530513766675, "grad_norm": 0.02936691604554653, "learning_rate": 5.312205506670451e-05, "loss": 0.0010898422449827194, "step": 165220 }, { "epoch": 46.90036900369004, "grad_norm": 0.2673785388469696, "learning_rate": 5.311921657678115e-05, "loss": 0.0010003086179494857, "step": 165230 }, { "epoch": 46.9032074936134, "grad_norm": 0.7853323221206665, "learning_rate": 5.31163780868578e-05, "loss": 0.0013068223372101784, "step": 165240 }, { "epoch": 46.90604598353676, "grad_norm": 0.18180415034294128, "learning_rate": 5.3113539596934434e-05, "loss": 0.0009606335312128067, "step": 165250 }, { "epoch": 46.90888447346012, "grad_norm": 0.07592189311981201, "learning_rate": 5.3110701107011075e-05, "loss": 0.003295877203345299, "step": 165260 }, { "epoch": 46.91172296338348, "grad_norm": 0.07576707005500793, "learning_rate": 5.310786261708771e-05, "loss": 0.0006053812801837921, "step": 165270 }, { "epoch": 46.91456145330684, "grad_norm": 0.4915252923965454, "learning_rate": 5.310502412716435e-05, "loss": 0.00052510816603899, "step": 165280 }, { "epoch": 46.9173999432302, "grad_norm": 0.28922924399375916, "learning_rate": 5.3102185637240986e-05, "loss": 0.000751427561044693, "step": 165290 }, { "epoch": 46.920238433153564, "grad_norm": 0.04297211393713951, "learning_rate": 5.309934714731763e-05, "loss": 0.0085616834461689, "step": 165300 }, { "epoch": 46.92307692307692, "grad_norm": 2.4428343772888184, "learning_rate": 5.3096508657394275e-05, "loss": 0.007534244656562805, "step": 165310 }, { "epoch": 46.92591541300028, "grad_norm": 9.895331382751465, "learning_rate": 5.309367016747091e-05, "loss": 0.0017750348895788192, "step": 165320 }, { "epoch": 46.928753902923646, "grad_norm": 18.46493911743164, "learning_rate": 5.3090831677547545e-05, "loss": 0.01760469377040863, "step": 165330 }, { "epoch": 46.93159239284701, "grad_norm": 0.45224106311798096, "learning_rate": 5.3087993187624186e-05, "loss": 0.0005628550425171852, "step": 165340 }, { "epoch": 46.934430882770364, "grad_norm": 16.781593322753906, "learning_rate": 5.308515469770082e-05, "loss": 0.006248362362384796, "step": 165350 }, { "epoch": 46.93726937269373, "grad_norm": 0.406368225812912, "learning_rate": 5.308231620777746e-05, "loss": 0.008037784695625305, "step": 165360 }, { "epoch": 46.94010786261709, "grad_norm": 0.043875083327293396, "learning_rate": 5.307947771785411e-05, "loss": 0.0033710118383169173, "step": 165370 }, { "epoch": 46.942946352540446, "grad_norm": 4.744696617126465, "learning_rate": 5.3076639227930745e-05, "loss": 0.00232350118458271, "step": 165380 }, { "epoch": 46.94578484246381, "grad_norm": 0.13025854527950287, "learning_rate": 5.3073800738007386e-05, "loss": 0.005455337464809418, "step": 165390 }, { "epoch": 46.94862333238717, "grad_norm": 21.67111587524414, "learning_rate": 5.307096224808402e-05, "loss": 0.0073852457106113436, "step": 165400 }, { "epoch": 46.95146182231053, "grad_norm": 0.08563057333230972, "learning_rate": 5.306812375816066e-05, "loss": 0.0005698682740330696, "step": 165410 }, { "epoch": 46.95430031223389, "grad_norm": 12.591188430786133, "learning_rate": 5.3065285268237297e-05, "loss": 0.005222213640809059, "step": 165420 }, { "epoch": 46.95713880215725, "grad_norm": 0.058262646198272705, "learning_rate": 5.306244677831393e-05, "loss": 0.0011304263025522232, "step": 165430 }, { "epoch": 46.959977292080616, "grad_norm": 0.03133535757660866, "learning_rate": 5.3059608288390586e-05, "loss": 0.014902885258197784, "step": 165440 }, { "epoch": 46.96281578200397, "grad_norm": 2.620091438293457, "learning_rate": 5.305676979846722e-05, "loss": 0.001906036213040352, "step": 165450 }, { "epoch": 46.965654271927335, "grad_norm": 0.6794065833091736, "learning_rate": 5.3053931308543855e-05, "loss": 0.007320637255907059, "step": 165460 }, { "epoch": 46.9684927618507, "grad_norm": 9.20463752746582, "learning_rate": 5.30510928186205e-05, "loss": 0.005141761898994446, "step": 165470 }, { "epoch": 46.97133125177405, "grad_norm": 11.017587661743164, "learning_rate": 5.304825432869713e-05, "loss": 0.0037460081279277802, "step": 165480 }, { "epoch": 46.974169741697416, "grad_norm": 2.173290491104126, "learning_rate": 5.304541583877377e-05, "loss": 0.0010919300839304924, "step": 165490 }, { "epoch": 46.97700823162078, "grad_norm": 0.10547366738319397, "learning_rate": 5.304257734885042e-05, "loss": 0.0012940242886543274, "step": 165500 }, { "epoch": 46.97700823162078, "eval_accuracy": 0.9753926368665352, "eval_loss": 0.08706554025411606, "eval_runtime": 32.7415, "eval_samples_per_second": 480.339, "eval_steps_per_second": 7.513, "step": 165500 }, { "epoch": 46.97984672154414, "grad_norm": 0.35043612122535706, "learning_rate": 5.3039738858927055e-05, "loss": 0.0014933021739125252, "step": 165510 }, { "epoch": 46.9826852114675, "grad_norm": 0.022090183570981026, "learning_rate": 5.30369003690037e-05, "loss": 0.0019676128402352335, "step": 165520 }, { "epoch": 46.98552370139086, "grad_norm": 0.173744797706604, "learning_rate": 5.303406187908033e-05, "loss": 0.005157839506864548, "step": 165530 }, { "epoch": 46.98836219131422, "grad_norm": 0.06979185342788696, "learning_rate": 5.303122338915697e-05, "loss": 0.01020594835281372, "step": 165540 }, { "epoch": 46.99120068123758, "grad_norm": 0.5719161629676819, "learning_rate": 5.302838489923361e-05, "loss": 0.0029745982959866525, "step": 165550 }, { "epoch": 46.99403917116094, "grad_norm": 0.026198362931609154, "learning_rate": 5.302554640931024e-05, "loss": 0.0036046609282493593, "step": 165560 }, { "epoch": 46.996877661084305, "grad_norm": 0.1037336215376854, "learning_rate": 5.30227079193869e-05, "loss": 0.0013667086139321327, "step": 165570 }, { "epoch": 46.99971615100766, "grad_norm": 0.05398159846663475, "learning_rate": 5.301986942946353e-05, "loss": 0.0020139461383223535, "step": 165580 }, { "epoch": 47.002554640931024, "grad_norm": 0.7803266048431396, "learning_rate": 5.3017030939540166e-05, "loss": 0.000467654038220644, "step": 165590 }, { "epoch": 47.00539313085439, "grad_norm": 0.07606060802936554, "learning_rate": 5.301419244961681e-05, "loss": 0.00870707631111145, "step": 165600 }, { "epoch": 47.00823162077775, "grad_norm": 0.29465097188949585, "learning_rate": 5.301135395969344e-05, "loss": 0.0005928380414843559, "step": 165610 }, { "epoch": 47.011070110701105, "grad_norm": 0.029950475320219994, "learning_rate": 5.3008515469770083e-05, "loss": 0.0055006884038448335, "step": 165620 }, { "epoch": 47.01390860062447, "grad_norm": 0.02078227698802948, "learning_rate": 5.300567697984672e-05, "loss": 0.0034839794039726256, "step": 165630 }, { "epoch": 47.01674709054783, "grad_norm": 0.1554749459028244, "learning_rate": 5.3002838489923366e-05, "loss": 0.0033458419144153594, "step": 165640 }, { "epoch": 47.01958558047119, "grad_norm": 0.24589680135250092, "learning_rate": 5.300000000000001e-05, "loss": 0.002386242151260376, "step": 165650 }, { "epoch": 47.02242407039455, "grad_norm": 0.03632420673966408, "learning_rate": 5.299716151007664e-05, "loss": 0.006929624080657959, "step": 165660 }, { "epoch": 47.02526256031791, "grad_norm": 0.04455699026584625, "learning_rate": 5.2994323020153284e-05, "loss": 0.004643542319536209, "step": 165670 }, { "epoch": 47.02810105024127, "grad_norm": 1.5836797952651978, "learning_rate": 5.299148453022992e-05, "loss": 0.000981689803302288, "step": 165680 }, { "epoch": 47.03093954016463, "grad_norm": 0.5580620169639587, "learning_rate": 5.298864604030655e-05, "loss": 0.0017681455239653588, "step": 165690 }, { "epoch": 47.033778030087994, "grad_norm": 10.556929588317871, "learning_rate": 5.29858075503832e-05, "loss": 0.008701587468385697, "step": 165700 }, { "epoch": 47.03661652001136, "grad_norm": 0.32172122597694397, "learning_rate": 5.298296906045984e-05, "loss": 0.00590515248477459, "step": 165710 }, { "epoch": 47.03945500993471, "grad_norm": 0.0861821398139, "learning_rate": 5.298013057053648e-05, "loss": 0.0009194945916533471, "step": 165720 }, { "epoch": 47.042293499858076, "grad_norm": 0.03437894582748413, "learning_rate": 5.297729208061312e-05, "loss": 0.010029523074626923, "step": 165730 }, { "epoch": 47.04513198978144, "grad_norm": 0.05261431261897087, "learning_rate": 5.297445359068975e-05, "loss": 0.0018523328006267547, "step": 165740 }, { "epoch": 47.047970479704794, "grad_norm": 0.28681281208992004, "learning_rate": 5.2971615100766394e-05, "loss": 0.0004867391660809517, "step": 165750 }, { "epoch": 47.05080896962816, "grad_norm": 1.0440691709518433, "learning_rate": 5.296877661084303e-05, "loss": 0.0010393798351287843, "step": 165760 }, { "epoch": 47.05364745955152, "grad_norm": 0.6126929521560669, "learning_rate": 5.296593812091968e-05, "loss": 0.0012228472158312797, "step": 165770 }, { "epoch": 47.056485949474876, "grad_norm": 2.4870898723602295, "learning_rate": 5.296309963099632e-05, "loss": 0.0006401024758815766, "step": 165780 }, { "epoch": 47.05932443939824, "grad_norm": 0.44979774951934814, "learning_rate": 5.296026114107295e-05, "loss": 0.0011499036103487016, "step": 165790 }, { "epoch": 47.0621629293216, "grad_norm": 2.9465341567993164, "learning_rate": 5.295742265114959e-05, "loss": 0.002703012339770794, "step": 165800 }, { "epoch": 47.065001419244965, "grad_norm": 0.06846540421247482, "learning_rate": 5.295458416122623e-05, "loss": 0.0028541328385472296, "step": 165810 }, { "epoch": 47.06783990916832, "grad_norm": 7.641231060028076, "learning_rate": 5.2951745671302863e-05, "loss": 0.012261182069778442, "step": 165820 }, { "epoch": 47.07067839909168, "grad_norm": 0.24076054990291595, "learning_rate": 5.2948907181379505e-05, "loss": 0.0006500562652945518, "step": 165830 }, { "epoch": 47.073516889015046, "grad_norm": 0.2534443736076355, "learning_rate": 5.294606869145615e-05, "loss": 0.00388994887471199, "step": 165840 }, { "epoch": 47.0763553789384, "grad_norm": 0.02348289266228676, "learning_rate": 5.294323020153279e-05, "loss": 0.0010604171082377433, "step": 165850 }, { "epoch": 47.079193868861765, "grad_norm": 0.045930467545986176, "learning_rate": 5.294039171160943e-05, "loss": 0.003213449195027351, "step": 165860 }, { "epoch": 47.08203235878513, "grad_norm": 0.15712912380695343, "learning_rate": 5.2937553221686064e-05, "loss": 0.0009224269539117813, "step": 165870 }, { "epoch": 47.08487084870849, "grad_norm": 0.08416629582643509, "learning_rate": 5.2934714731762705e-05, "loss": 0.0004641583189368248, "step": 165880 }, { "epoch": 47.087709338631846, "grad_norm": 0.3400707244873047, "learning_rate": 5.293187624183934e-05, "loss": 0.0010037094354629517, "step": 165890 }, { "epoch": 47.09054782855521, "grad_norm": 0.4553772509098053, "learning_rate": 5.292903775191599e-05, "loss": 0.006027178093791008, "step": 165900 }, { "epoch": 47.09338631847857, "grad_norm": 0.09822464734315872, "learning_rate": 5.292619926199263e-05, "loss": 0.00034078452736139295, "step": 165910 }, { "epoch": 47.09622480840193, "grad_norm": 0.2871064841747284, "learning_rate": 5.2923360772069264e-05, "loss": 0.0007214108482003212, "step": 165920 }, { "epoch": 47.09906329832529, "grad_norm": 0.04511956125497818, "learning_rate": 5.29205222821459e-05, "loss": 0.000470147468149662, "step": 165930 }, { "epoch": 47.101901788248654, "grad_norm": 0.02885298989713192, "learning_rate": 5.291768379222254e-05, "loss": 0.00041520241647958757, "step": 165940 }, { "epoch": 47.10474027817201, "grad_norm": 0.011977088637650013, "learning_rate": 5.2914845302299174e-05, "loss": 0.0011746549978852272, "step": 165950 }, { "epoch": 47.10757876809537, "grad_norm": 0.4777931272983551, "learning_rate": 5.2912006812375816e-05, "loss": 0.0006107177585363388, "step": 165960 }, { "epoch": 47.110417258018735, "grad_norm": 0.0485212542116642, "learning_rate": 5.2909168322452464e-05, "loss": 0.0011476477608084678, "step": 165970 }, { "epoch": 47.1132557479421, "grad_norm": 0.09256909787654877, "learning_rate": 5.29063298325291e-05, "loss": 0.0006046649068593979, "step": 165980 }, { "epoch": 47.116094237865454, "grad_norm": 0.009375392459332943, "learning_rate": 5.290349134260574e-05, "loss": 0.00045146569609642027, "step": 165990 }, { "epoch": 47.11893272778882, "grad_norm": 0.0645822212100029, "learning_rate": 5.2900652852682374e-05, "loss": 0.004844057559967041, "step": 166000 }, { "epoch": 47.11893272778882, "eval_accuracy": 0.9774273542315762, "eval_loss": 0.0815049335360527, "eval_runtime": 32.477, "eval_samples_per_second": 484.251, "eval_steps_per_second": 7.575, "step": 166000 }, { "epoch": 47.12177121771218, "grad_norm": 0.05515605956315994, "learning_rate": 5.2897814362759016e-05, "loss": 0.0005520680919289589, "step": 166010 }, { "epoch": 47.124609707635535, "grad_norm": 0.1961761862039566, "learning_rate": 5.289497587283565e-05, "loss": 0.0013574207201600074, "step": 166020 }, { "epoch": 47.1274481975589, "grad_norm": 0.08087031543254852, "learning_rate": 5.2892137382912285e-05, "loss": 0.0004737488925457001, "step": 166030 }, { "epoch": 47.13028668748226, "grad_norm": 0.045700203627347946, "learning_rate": 5.288929889298894e-05, "loss": 0.0015544636175036431, "step": 166040 }, { "epoch": 47.13312517740562, "grad_norm": 0.02284938097000122, "learning_rate": 5.2886460403065574e-05, "loss": 0.0004111355170607567, "step": 166050 }, { "epoch": 47.13596366732898, "grad_norm": 0.009912445209920406, "learning_rate": 5.288362191314221e-05, "loss": 0.000979447178542614, "step": 166060 }, { "epoch": 47.13880215725234, "grad_norm": 0.11229164153337479, "learning_rate": 5.288078342321885e-05, "loss": 0.002558627538383007, "step": 166070 }, { "epoch": 47.141640647175706, "grad_norm": 0.1649119108915329, "learning_rate": 5.2877944933295485e-05, "loss": 0.0022402111440896987, "step": 166080 }, { "epoch": 47.14447913709906, "grad_norm": 0.4111264944076538, "learning_rate": 5.2875106443372126e-05, "loss": 0.0012208811938762664, "step": 166090 }, { "epoch": 47.147317627022424, "grad_norm": 0.7173179984092712, "learning_rate": 5.2872267953448775e-05, "loss": 0.0033980872482061385, "step": 166100 }, { "epoch": 47.15015611694579, "grad_norm": 0.014923032373189926, "learning_rate": 5.286942946352541e-05, "loss": 0.0033872973173856733, "step": 166110 }, { "epoch": 47.15299460686914, "grad_norm": 0.07523858547210693, "learning_rate": 5.286659097360205e-05, "loss": 0.0019164461642503738, "step": 166120 }, { "epoch": 47.155833096792506, "grad_norm": 0.6826074719429016, "learning_rate": 5.2863752483678685e-05, "loss": 0.0015787174925208092, "step": 166130 }, { "epoch": 47.15867158671587, "grad_norm": 11.463093757629395, "learning_rate": 5.2860913993755326e-05, "loss": 0.00977133959531784, "step": 166140 }, { "epoch": 47.161510076639225, "grad_norm": 0.1495233178138733, "learning_rate": 5.285807550383196e-05, "loss": 0.014733101427555084, "step": 166150 }, { "epoch": 47.16434856656259, "grad_norm": 0.1507890671491623, "learning_rate": 5.2855237013908596e-05, "loss": 0.007516796886920929, "step": 166160 }, { "epoch": 47.16718705648595, "grad_norm": 3.0286548137664795, "learning_rate": 5.2852398523985244e-05, "loss": 0.0013044342398643495, "step": 166170 }, { "epoch": 47.17002554640931, "grad_norm": 0.7042717337608337, "learning_rate": 5.2849560034061885e-05, "loss": 0.0006618889048695565, "step": 166180 }, { "epoch": 47.17286403633267, "grad_norm": 0.12900876998901367, "learning_rate": 5.284672154413852e-05, "loss": 0.0012743659317493439, "step": 166190 }, { "epoch": 47.17570252625603, "grad_norm": 2.6079959869384766, "learning_rate": 5.284388305421516e-05, "loss": 0.0010534578934311867, "step": 166200 }, { "epoch": 47.178541016179395, "grad_norm": 0.03617927432060242, "learning_rate": 5.2841044564291796e-05, "loss": 0.011403732001781464, "step": 166210 }, { "epoch": 47.18137950610275, "grad_norm": 3.6006979942321777, "learning_rate": 5.283820607436844e-05, "loss": 0.004512877762317657, "step": 166220 }, { "epoch": 47.18421799602611, "grad_norm": 1.7622861862182617, "learning_rate": 5.283536758444507e-05, "loss": 0.00175212100148201, "step": 166230 }, { "epoch": 47.187056485949476, "grad_norm": 1.1863501071929932, "learning_rate": 5.283252909452172e-05, "loss": 0.009258320927619934, "step": 166240 }, { "epoch": 47.18989497587283, "grad_norm": 0.2306436002254486, "learning_rate": 5.282969060459836e-05, "loss": 0.0019975405186414717, "step": 166250 }, { "epoch": 47.192733465796195, "grad_norm": 0.9713077545166016, "learning_rate": 5.2826852114674996e-05, "loss": 0.0034524496644735337, "step": 166260 }, { "epoch": 47.19557195571956, "grad_norm": 0.21769149601459503, "learning_rate": 5.282401362475163e-05, "loss": 0.000939839705824852, "step": 166270 }, { "epoch": 47.19841044564292, "grad_norm": 3.201869249343872, "learning_rate": 5.282117513482827e-05, "loss": 0.002042139321565628, "step": 166280 }, { "epoch": 47.20124893556628, "grad_norm": 0.0459790974855423, "learning_rate": 5.2818336644904906e-05, "loss": 0.008180337399244309, "step": 166290 }, { "epoch": 47.20408742548964, "grad_norm": 0.24088405072689056, "learning_rate": 5.2815498154981555e-05, "loss": 0.001287614554166794, "step": 166300 }, { "epoch": 47.206925915413, "grad_norm": 0.018882904201745987, "learning_rate": 5.2812659665058196e-05, "loss": 0.002986639179289341, "step": 166310 }, { "epoch": 47.20976440533636, "grad_norm": 0.08961990475654602, "learning_rate": 5.280982117513483e-05, "loss": 0.0024691296741366385, "step": 166320 }, { "epoch": 47.21260289525972, "grad_norm": 0.33202579617500305, "learning_rate": 5.280698268521147e-05, "loss": 0.00163177028298378, "step": 166330 }, { "epoch": 47.215441385183084, "grad_norm": 0.057728614658117294, "learning_rate": 5.2804144195288107e-05, "loss": 0.0005844833329319953, "step": 166340 }, { "epoch": 47.21827987510645, "grad_norm": 0.17069363594055176, "learning_rate": 5.280130570536475e-05, "loss": 0.0024190155789256098, "step": 166350 }, { "epoch": 47.2211183650298, "grad_norm": 0.613150417804718, "learning_rate": 5.279846721544138e-05, "loss": 0.002414032816886902, "step": 166360 }, { "epoch": 47.223956854953165, "grad_norm": 0.16664165258407593, "learning_rate": 5.279562872551803e-05, "loss": 0.002566816471517086, "step": 166370 }, { "epoch": 47.22679534487653, "grad_norm": 0.11754713207483292, "learning_rate": 5.279279023559467e-05, "loss": 0.007681527733802795, "step": 166380 }, { "epoch": 47.229633834799884, "grad_norm": 7.869407653808594, "learning_rate": 5.278995174567131e-05, "loss": 0.002509535290300846, "step": 166390 }, { "epoch": 47.23247232472325, "grad_norm": 0.03814363107085228, "learning_rate": 5.278711325574794e-05, "loss": 0.0014111274853348733, "step": 166400 }, { "epoch": 47.23531081464661, "grad_norm": 0.32036903500556946, "learning_rate": 5.278427476582458e-05, "loss": 0.0025612499564886095, "step": 166410 }, { "epoch": 47.238149304569966, "grad_norm": 1.2629543542861938, "learning_rate": 5.278143627590122e-05, "loss": 0.002174685336649418, "step": 166420 }, { "epoch": 47.24098779449333, "grad_norm": 7.14158296585083, "learning_rate": 5.277859778597786e-05, "loss": 0.0019754907116293907, "step": 166430 }, { "epoch": 47.24382628441669, "grad_norm": 0.7406134605407715, "learning_rate": 5.277575929605451e-05, "loss": 0.0006751446053385735, "step": 166440 }, { "epoch": 47.246664774340054, "grad_norm": 0.05451953411102295, "learning_rate": 5.277292080613114e-05, "loss": 0.005029942467808723, "step": 166450 }, { "epoch": 47.24950326426341, "grad_norm": 0.5551103949546814, "learning_rate": 5.277008231620778e-05, "loss": 0.0017206134274601937, "step": 166460 }, { "epoch": 47.25234175418677, "grad_norm": 0.025522135198116302, "learning_rate": 5.276724382628442e-05, "loss": 0.0007320387288928032, "step": 166470 }, { "epoch": 47.255180244110136, "grad_norm": 1.0864540338516235, "learning_rate": 5.276440533636106e-05, "loss": 0.0006345083937048912, "step": 166480 }, { "epoch": 47.25801873403349, "grad_norm": 1.630776286125183, "learning_rate": 5.276156684643769e-05, "loss": 0.0022073639556765556, "step": 166490 }, { "epoch": 47.260857223956855, "grad_norm": 0.1615860015153885, "learning_rate": 5.275872835651434e-05, "loss": 0.001271134801208973, "step": 166500 }, { "epoch": 47.260857223956855, "eval_accuracy": 0.9731035798308642, "eval_loss": 0.09798971563577652, "eval_runtime": 32.3479, "eval_samples_per_second": 486.184, "eval_steps_per_second": 7.605, "step": 166500 }, { "epoch": 47.26369571388022, "grad_norm": 6.387979507446289, "learning_rate": 5.275588986659098e-05, "loss": 0.0245142787694931, "step": 166510 }, { "epoch": 47.26653420380357, "grad_norm": 1.0076404809951782, "learning_rate": 5.275305137666762e-05, "loss": 0.0019852887839078904, "step": 166520 }, { "epoch": 47.269372693726936, "grad_norm": 0.1098475456237793, "learning_rate": 5.275021288674425e-05, "loss": 0.0047672510147094725, "step": 166530 }, { "epoch": 47.2722111836503, "grad_norm": 0.6762949824333191, "learning_rate": 5.2747374396820893e-05, "loss": 0.0014756165444850921, "step": 166540 }, { "epoch": 47.27504967357366, "grad_norm": 0.10568742454051971, "learning_rate": 5.274453590689753e-05, "loss": 0.007734911143779754, "step": 166550 }, { "epoch": 47.27788816349702, "grad_norm": 0.2346913367509842, "learning_rate": 5.274169741697417e-05, "loss": 0.001791946031153202, "step": 166560 }, { "epoch": 47.28072665342038, "grad_norm": 0.1376802921295166, "learning_rate": 5.273885892705082e-05, "loss": 0.003552095964550972, "step": 166570 }, { "epoch": 47.28356514334374, "grad_norm": 0.4562109410762787, "learning_rate": 5.273602043712745e-05, "loss": 0.0028885962441563607, "step": 166580 }, { "epoch": 47.2864036332671, "grad_norm": 2.786576509475708, "learning_rate": 5.2733181947204093e-05, "loss": 0.0011640481650829315, "step": 166590 }, { "epoch": 47.28924212319046, "grad_norm": 0.5501305460929871, "learning_rate": 5.273034345728073e-05, "loss": 0.0067717239260673525, "step": 166600 }, { "epoch": 47.292080613113825, "grad_norm": 0.04443579912185669, "learning_rate": 5.272750496735737e-05, "loss": 0.0014348624274134636, "step": 166610 }, { "epoch": 47.29491910303718, "grad_norm": 0.6530584096908569, "learning_rate": 5.2724666477434004e-05, "loss": 0.00040276926010847093, "step": 166620 }, { "epoch": 47.297757592960544, "grad_norm": 2.501415729522705, "learning_rate": 5.272182798751064e-05, "loss": 0.0007356952875852585, "step": 166630 }, { "epoch": 47.30059608288391, "grad_norm": 7.387036323547363, "learning_rate": 5.271898949758729e-05, "loss": 0.0021438641473650933, "step": 166640 }, { "epoch": 47.30343457280727, "grad_norm": 0.08627983927726746, "learning_rate": 5.271615100766393e-05, "loss": 0.0011327279731631278, "step": 166650 }, { "epoch": 47.306273062730625, "grad_norm": 6.514647006988525, "learning_rate": 5.271331251774056e-05, "loss": 0.0016768177971243857, "step": 166660 }, { "epoch": 47.30911155265399, "grad_norm": 0.4373135566711426, "learning_rate": 5.2710474027817204e-05, "loss": 0.0006712866947054863, "step": 166670 }, { "epoch": 47.31195004257735, "grad_norm": 0.023332899436354637, "learning_rate": 5.270763553789384e-05, "loss": 0.002923675626516342, "step": 166680 }, { "epoch": 47.31478853250071, "grad_norm": 1.7862659692764282, "learning_rate": 5.270479704797048e-05, "loss": 0.010661233961582185, "step": 166690 }, { "epoch": 47.31762702242407, "grad_norm": 0.8820787668228149, "learning_rate": 5.270195855804713e-05, "loss": 0.0009172948077321053, "step": 166700 }, { "epoch": 47.32046551234743, "grad_norm": 0.9722907543182373, "learning_rate": 5.269912006812376e-05, "loss": 0.0009654708206653595, "step": 166710 }, { "epoch": 47.323304002270795, "grad_norm": 4.111182689666748, "learning_rate": 5.2696281578200404e-05, "loss": 0.00997893065214157, "step": 166720 }, { "epoch": 47.32614249219415, "grad_norm": 2.895232915878296, "learning_rate": 5.269344308827704e-05, "loss": 0.0013338617980480194, "step": 166730 }, { "epoch": 47.328980982117514, "grad_norm": 0.9905418753623962, "learning_rate": 5.2690604598353673e-05, "loss": 0.002423496171832085, "step": 166740 }, { "epoch": 47.33181947204088, "grad_norm": 0.2844701409339905, "learning_rate": 5.2687766108430315e-05, "loss": 0.0046717777848243715, "step": 166750 }, { "epoch": 47.33465796196423, "grad_norm": 0.034229766577482224, "learning_rate": 5.268492761850695e-05, "loss": 0.000941377878189087, "step": 166760 }, { "epoch": 47.337496451887596, "grad_norm": 0.16480103135108948, "learning_rate": 5.26820891285836e-05, "loss": 0.007043737918138504, "step": 166770 }, { "epoch": 47.34033494181096, "grad_norm": 0.052474845200777054, "learning_rate": 5.267925063866024e-05, "loss": 0.002284706011414528, "step": 166780 }, { "epoch": 47.343173431734314, "grad_norm": 0.022756602615118027, "learning_rate": 5.2676412148736874e-05, "loss": 0.00041964594274759295, "step": 166790 }, { "epoch": 47.34601192165768, "grad_norm": 0.08518175035715103, "learning_rate": 5.2673573658813515e-05, "loss": 0.00022263973951339722, "step": 166800 }, { "epoch": 47.34885041158104, "grad_norm": 0.68224036693573, "learning_rate": 5.267073516889015e-05, "loss": 0.005037920176982879, "step": 166810 }, { "epoch": 47.3516889015044, "grad_norm": 0.0067217121832072735, "learning_rate": 5.266789667896679e-05, "loss": 0.0004578862339258194, "step": 166820 }, { "epoch": 47.35452739142776, "grad_norm": 0.016836734488606453, "learning_rate": 5.266505818904344e-05, "loss": 0.0023566238582134248, "step": 166830 }, { "epoch": 47.35736588135112, "grad_norm": 0.4242056608200073, "learning_rate": 5.2662219699120074e-05, "loss": 0.01018211543560028, "step": 166840 }, { "epoch": 47.360204371274484, "grad_norm": 0.4246620237827301, "learning_rate": 5.2659381209196715e-05, "loss": 0.0006241114810109138, "step": 166850 }, { "epoch": 47.36304286119784, "grad_norm": 0.5629755258560181, "learning_rate": 5.265654271927335e-05, "loss": 0.0011145751923322678, "step": 166860 }, { "epoch": 47.3658813511212, "grad_norm": 0.17603833973407745, "learning_rate": 5.2653704229349984e-05, "loss": 0.0015360020101070405, "step": 166870 }, { "epoch": 47.368719841044566, "grad_norm": 0.12736016511917114, "learning_rate": 5.2650865739426626e-05, "loss": 0.000812043622136116, "step": 166880 }, { "epoch": 47.37155833096792, "grad_norm": 16.477394104003906, "learning_rate": 5.264802724950326e-05, "loss": 0.004146634414792061, "step": 166890 }, { "epoch": 47.374396820891285, "grad_norm": 0.057493966072797775, "learning_rate": 5.264518875957991e-05, "loss": 0.000408392958343029, "step": 166900 }, { "epoch": 47.37723531081465, "grad_norm": 0.21680188179016113, "learning_rate": 5.264235026965655e-05, "loss": 0.0022554801777005195, "step": 166910 }, { "epoch": 47.38007380073801, "grad_norm": 0.2876989543437958, "learning_rate": 5.2639511779733184e-05, "loss": 0.0005332782864570617, "step": 166920 }, { "epoch": 47.382912290661366, "grad_norm": 1.8345699310302734, "learning_rate": 5.2636673289809826e-05, "loss": 0.0006250036880373955, "step": 166930 }, { "epoch": 47.38575078058473, "grad_norm": 0.32941848039627075, "learning_rate": 5.263411864887879e-05, "loss": 0.0045709840953350065, "step": 166940 }, { "epoch": 47.38858927050809, "grad_norm": 1.108206868171692, "learning_rate": 5.263128015895544e-05, "loss": 0.002474643290042877, "step": 166950 }, { "epoch": 47.39142776043145, "grad_norm": 0.026148110628128052, "learning_rate": 5.262844166903208e-05, "loss": 0.0009128781035542488, "step": 166960 }, { "epoch": 47.39426625035481, "grad_norm": 1.4909217357635498, "learning_rate": 5.2625603179108716e-05, "loss": 0.0008707690984010696, "step": 166970 }, { "epoch": 47.39710474027817, "grad_norm": 0.10078208148479462, "learning_rate": 5.262276468918536e-05, "loss": 0.0038687162101268767, "step": 166980 }, { "epoch": 47.39994323020153, "grad_norm": 0.08496785163879395, "learning_rate": 5.261992619926199e-05, "loss": 0.003842758387327194, "step": 166990 }, { "epoch": 47.40278172012489, "grad_norm": 0.09671993553638458, "learning_rate": 5.2617087709338634e-05, "loss": 0.0009970778599381446, "step": 167000 }, { "epoch": 47.40278172012489, "eval_accuracy": 0.9780632034081516, "eval_loss": 0.07823197543621063, "eval_runtime": 32.3913, "eval_samples_per_second": 485.531, "eval_steps_per_second": 7.595, "step": 167000 }, { "epoch": 47.405620210048255, "grad_norm": 0.7634956240653992, "learning_rate": 5.261424921941527e-05, "loss": 0.0007111424580216408, "step": 167010 }, { "epoch": 47.40845869997162, "grad_norm": 3.878443956375122, "learning_rate": 5.2611410729491916e-05, "loss": 0.003624328598380089, "step": 167020 }, { "epoch": 47.411297189894974, "grad_norm": 0.12696479260921478, "learning_rate": 5.260857223956856e-05, "loss": 0.003395608067512512, "step": 167030 }, { "epoch": 47.41413567981834, "grad_norm": 0.2264775037765503, "learning_rate": 5.260573374964519e-05, "loss": 0.001472318172454834, "step": 167040 }, { "epoch": 47.4169741697417, "grad_norm": 0.4618097245693207, "learning_rate": 5.260289525972183e-05, "loss": 0.0015398543328046799, "step": 167050 }, { "epoch": 47.419812659665055, "grad_norm": 0.7450781464576721, "learning_rate": 5.260005676979847e-05, "loss": 0.001631312258541584, "step": 167060 }, { "epoch": 47.42265114958842, "grad_norm": 0.10932065546512604, "learning_rate": 5.25972182798751e-05, "loss": 0.0033196743577718736, "step": 167070 }, { "epoch": 47.42548963951178, "grad_norm": 0.1365431249141693, "learning_rate": 5.259437978995175e-05, "loss": 0.0022184979170560837, "step": 167080 }, { "epoch": 47.428328129435144, "grad_norm": 0.14188408851623535, "learning_rate": 5.259154130002839e-05, "loss": 0.0033716864883899687, "step": 167090 }, { "epoch": 47.4311666193585, "grad_norm": 0.07446951419115067, "learning_rate": 5.258870281010503e-05, "loss": 0.0005040997639298439, "step": 167100 }, { "epoch": 47.43400510928186, "grad_norm": 0.15657943487167358, "learning_rate": 5.258586432018167e-05, "loss": 0.0006972922012209893, "step": 167110 }, { "epoch": 47.436843599205226, "grad_norm": 0.07956122606992722, "learning_rate": 5.25830258302583e-05, "loss": 0.0031499117612838745, "step": 167120 }, { "epoch": 47.43968208912858, "grad_norm": 0.5864604711532593, "learning_rate": 5.2580187340334944e-05, "loss": 0.0006696803495287896, "step": 167130 }, { "epoch": 47.442520579051944, "grad_norm": 0.05199510604143143, "learning_rate": 5.257734885041158e-05, "loss": 0.00045557301491498946, "step": 167140 }, { "epoch": 47.44535906897531, "grad_norm": 1.0746148824691772, "learning_rate": 5.257451036048823e-05, "loss": 0.0006883697584271431, "step": 167150 }, { "epoch": 47.44819755889866, "grad_norm": 0.007694663945585489, "learning_rate": 5.257167187056486e-05, "loss": 0.00048616491258144376, "step": 167160 }, { "epoch": 47.451036048822026, "grad_norm": 0.2054125964641571, "learning_rate": 5.25688333806415e-05, "loss": 0.00030087288469076156, "step": 167170 }, { "epoch": 47.45387453874539, "grad_norm": 0.23879587650299072, "learning_rate": 5.256599489071814e-05, "loss": 0.0003641139715909958, "step": 167180 }, { "epoch": 47.45671302866875, "grad_norm": 0.10503577440977097, "learning_rate": 5.256315640079478e-05, "loss": 0.001046133041381836, "step": 167190 }, { "epoch": 47.45955151859211, "grad_norm": 0.2110137939453125, "learning_rate": 5.2560317910871414e-05, "loss": 0.0010547686368227005, "step": 167200 }, { "epoch": 47.46239000851547, "grad_norm": 0.7565997242927551, "learning_rate": 5.255747942094806e-05, "loss": 0.0018064182251691819, "step": 167210 }, { "epoch": 47.46522849843883, "grad_norm": 0.048939298838377, "learning_rate": 5.25546409310247e-05, "loss": 0.0009963680058717728, "step": 167220 }, { "epoch": 47.46806698836219, "grad_norm": 0.12362924218177795, "learning_rate": 5.255180244110134e-05, "loss": 0.0016524802893400192, "step": 167230 }, { "epoch": 47.47090547828555, "grad_norm": 0.05248033255338669, "learning_rate": 5.254896395117798e-05, "loss": 0.0037679560482501984, "step": 167240 }, { "epoch": 47.473743968208915, "grad_norm": 0.34650927782058716, "learning_rate": 5.2546125461254614e-05, "loss": 0.00043793749064207076, "step": 167250 }, { "epoch": 47.47658245813227, "grad_norm": 0.7219845056533813, "learning_rate": 5.254328697133125e-05, "loss": 0.0063042744994163515, "step": 167260 }, { "epoch": 47.47942094805563, "grad_norm": 0.0650295838713646, "learning_rate": 5.254044848140789e-05, "loss": 0.0011741900816559792, "step": 167270 }, { "epoch": 47.482259437978996, "grad_norm": 0.21481147408485413, "learning_rate": 5.253760999148454e-05, "loss": 0.001363087072968483, "step": 167280 }, { "epoch": 47.48509792790236, "grad_norm": 0.19698317348957062, "learning_rate": 5.253477150156117e-05, "loss": 0.005024828761816025, "step": 167290 }, { "epoch": 47.487936417825715, "grad_norm": 0.31931695342063904, "learning_rate": 5.2531933011637814e-05, "loss": 0.0033177703619003294, "step": 167300 }, { "epoch": 47.49077490774908, "grad_norm": 2.6678307056427, "learning_rate": 5.252909452171445e-05, "loss": 0.005327405780553818, "step": 167310 }, { "epoch": 47.49361339767244, "grad_norm": 3.221245288848877, "learning_rate": 5.252625603179109e-05, "loss": 0.002242329344153404, "step": 167320 }, { "epoch": 47.496451887595796, "grad_norm": 0.07731877267360687, "learning_rate": 5.2523417541867724e-05, "loss": 0.0011165473610162736, "step": 167330 }, { "epoch": 47.49929037751916, "grad_norm": 0.1471671760082245, "learning_rate": 5.2520579051944366e-05, "loss": 0.0007773425430059433, "step": 167340 }, { "epoch": 47.50212886744252, "grad_norm": 0.31566593050956726, "learning_rate": 5.2517740562021014e-05, "loss": 0.001333497278392315, "step": 167350 }, { "epoch": 47.50496735736588, "grad_norm": 0.20246922969818115, "learning_rate": 5.251490207209765e-05, "loss": 0.0033161580562591554, "step": 167360 }, { "epoch": 47.50780584728924, "grad_norm": 0.1592196822166443, "learning_rate": 5.251206358217429e-05, "loss": 0.003666132688522339, "step": 167370 }, { "epoch": 47.510644337212604, "grad_norm": 0.045315347611904144, "learning_rate": 5.2509225092250924e-05, "loss": 0.009665761888027192, "step": 167380 }, { "epoch": 47.51348282713597, "grad_norm": 0.028198417276144028, "learning_rate": 5.250638660232756e-05, "loss": 0.0010032622143626214, "step": 167390 }, { "epoch": 47.51632131705932, "grad_norm": 18.67765235900879, "learning_rate": 5.25035481124042e-05, "loss": 0.009872537106275558, "step": 167400 }, { "epoch": 47.519159806982685, "grad_norm": 0.10111254453659058, "learning_rate": 5.250070962248085e-05, "loss": 0.0007732365280389786, "step": 167410 }, { "epoch": 47.52199829690605, "grad_norm": 1.6557422876358032, "learning_rate": 5.249787113255748e-05, "loss": 0.0020810745656490324, "step": 167420 }, { "epoch": 47.524836786829404, "grad_norm": 0.1193055659532547, "learning_rate": 5.2495032642634125e-05, "loss": 0.0017474636435508728, "step": 167430 }, { "epoch": 47.52767527675277, "grad_norm": 0.15565578639507294, "learning_rate": 5.249219415271076e-05, "loss": 0.0027186179533600805, "step": 167440 }, { "epoch": 47.53051376667613, "grad_norm": 0.8586099147796631, "learning_rate": 5.24893556627874e-05, "loss": 0.0034389063715934754, "step": 167450 }, { "epoch": 47.533352256599485, "grad_norm": 3.602588415145874, "learning_rate": 5.2486517172864035e-05, "loss": 0.004334207624197006, "step": 167460 }, { "epoch": 47.53619074652285, "grad_norm": 0.6245102882385254, "learning_rate": 5.2483678682940677e-05, "loss": 0.0024789247661828996, "step": 167470 }, { "epoch": 47.53902923644621, "grad_norm": 0.03193337097764015, "learning_rate": 5.2480840193017325e-05, "loss": 0.0022315897047519686, "step": 167480 }, { "epoch": 47.541867726369574, "grad_norm": 0.7624863386154175, "learning_rate": 5.247800170309396e-05, "loss": 0.00043298304080963135, "step": 167490 }, { "epoch": 47.54470621629293, "grad_norm": 0.08291812986135483, "learning_rate": 5.2475163213170594e-05, "loss": 0.007259470224380493, "step": 167500 }, { "epoch": 47.54470621629293, "eval_accuracy": 0.9761556558784257, "eval_loss": 0.08670477569103241, "eval_runtime": 32.1733, "eval_samples_per_second": 488.822, "eval_steps_per_second": 7.646, "step": 167500 }, { "epoch": 47.54754470621629, "grad_norm": 0.23990261554718018, "learning_rate": 5.2472324723247235e-05, "loss": 0.0005029913038015366, "step": 167510 }, { "epoch": 47.550383196139656, "grad_norm": 0.04974672198295593, "learning_rate": 5.246948623332387e-05, "loss": 0.0020073408260941504, "step": 167520 }, { "epoch": 47.55322168606301, "grad_norm": 0.14446726441383362, "learning_rate": 5.246664774340051e-05, "loss": 0.003113732486963272, "step": 167530 }, { "epoch": 47.556060175986374, "grad_norm": 0.1731187403202057, "learning_rate": 5.2463809253477146e-05, "loss": 0.0013014884665608407, "step": 167540 }, { "epoch": 47.55889866590974, "grad_norm": 0.9669291377067566, "learning_rate": 5.2460970763553794e-05, "loss": 0.0069547370076179504, "step": 167550 }, { "epoch": 47.5617371558331, "grad_norm": 0.2419898808002472, "learning_rate": 5.2458132273630435e-05, "loss": 0.0009422214701771736, "step": 167560 }, { "epoch": 47.564575645756456, "grad_norm": 0.01126257423311472, "learning_rate": 5.245529378370707e-05, "loss": 0.0006084395572543144, "step": 167570 }, { "epoch": 47.56741413567982, "grad_norm": 0.09846566617488861, "learning_rate": 5.245245529378371e-05, "loss": 0.012254256010055541, "step": 167580 }, { "epoch": 47.57025262560318, "grad_norm": 0.3159501850605011, "learning_rate": 5.2449616803860346e-05, "loss": 0.00090787373483181, "step": 167590 }, { "epoch": 47.57309111552654, "grad_norm": 10.154810905456543, "learning_rate": 5.244677831393699e-05, "loss": 0.0022215677425265314, "step": 167600 }, { "epoch": 47.5759296054499, "grad_norm": 1.2654898166656494, "learning_rate": 5.2443939824013635e-05, "loss": 0.0015854045748710632, "step": 167610 }, { "epoch": 47.57876809537326, "grad_norm": 1.8486334085464478, "learning_rate": 5.244110133409027e-05, "loss": 0.006145539879798889, "step": 167620 }, { "epoch": 47.58160658529662, "grad_norm": 3.197897434234619, "learning_rate": 5.2438262844166905e-05, "loss": 0.0019372975453734397, "step": 167630 }, { "epoch": 47.58444507521998, "grad_norm": 0.05604947358369827, "learning_rate": 5.2435424354243546e-05, "loss": 0.0009148495271801948, "step": 167640 }, { "epoch": 47.587283565143345, "grad_norm": 0.025449348613619804, "learning_rate": 5.243258586432018e-05, "loss": 0.0012341614812612534, "step": 167650 }, { "epoch": 47.59012205506671, "grad_norm": 0.0940377339720726, "learning_rate": 5.242974737439682e-05, "loss": 0.001336769200861454, "step": 167660 }, { "epoch": 47.59296054499006, "grad_norm": 0.39495372772216797, "learning_rate": 5.2426908884473457e-05, "loss": 0.0012563586235046387, "step": 167670 }, { "epoch": 47.595799034913426, "grad_norm": 0.5860205888748169, "learning_rate": 5.2424070394550105e-05, "loss": 0.0007381979376077652, "step": 167680 }, { "epoch": 47.59863752483679, "grad_norm": 0.046888090670108795, "learning_rate": 5.2421231904626746e-05, "loss": 0.0012173319235444068, "step": 167690 }, { "epoch": 47.601476014760145, "grad_norm": 0.02788430266082287, "learning_rate": 5.241839341470338e-05, "loss": 0.0017235185950994492, "step": 167700 }, { "epoch": 47.60431450468351, "grad_norm": 0.16087184846401215, "learning_rate": 5.241555492478002e-05, "loss": 0.013849596679210662, "step": 167710 }, { "epoch": 47.60715299460687, "grad_norm": 0.045793380588293076, "learning_rate": 5.241271643485666e-05, "loss": 0.011382525414228439, "step": 167720 }, { "epoch": 47.60999148453023, "grad_norm": 16.612462997436523, "learning_rate": 5.240987794493329e-05, "loss": 0.015231046080589294, "step": 167730 }, { "epoch": 47.61282997445359, "grad_norm": 0.14068502187728882, "learning_rate": 5.240703945500993e-05, "loss": 0.0008405828848481178, "step": 167740 }, { "epoch": 47.61566846437695, "grad_norm": 6.106640338897705, "learning_rate": 5.240420096508658e-05, "loss": 0.0014650883153080941, "step": 167750 }, { "epoch": 47.618506954300315, "grad_norm": 0.624684751033783, "learning_rate": 5.2401362475163215e-05, "loss": 0.0007203679531812667, "step": 167760 }, { "epoch": 47.62134544422367, "grad_norm": 0.10901641100645065, "learning_rate": 5.239852398523986e-05, "loss": 0.0009631568565964699, "step": 167770 }, { "epoch": 47.624183934147034, "grad_norm": 7.982468605041504, "learning_rate": 5.239568549531649e-05, "loss": 0.003823201358318329, "step": 167780 }, { "epoch": 47.6270224240704, "grad_norm": 0.06653553992509842, "learning_rate": 5.239284700539313e-05, "loss": 0.017026133835315704, "step": 167790 }, { "epoch": 47.62986091399375, "grad_norm": 0.10966093838214874, "learning_rate": 5.239000851546977e-05, "loss": 0.0029721369966864584, "step": 167800 }, { "epoch": 47.632699403917115, "grad_norm": 0.02199809066951275, "learning_rate": 5.2387170025546416e-05, "loss": 0.01081976741552353, "step": 167810 }, { "epoch": 47.63553789384048, "grad_norm": 2.0907764434814453, "learning_rate": 5.238433153562306e-05, "loss": 0.005662202462553978, "step": 167820 }, { "epoch": 47.63837638376384, "grad_norm": 3.259350538253784, "learning_rate": 5.238149304569969e-05, "loss": 0.01005442589521408, "step": 167830 }, { "epoch": 47.6412148736872, "grad_norm": 0.15806244313716888, "learning_rate": 5.237865455577633e-05, "loss": 0.0011510159820318222, "step": 167840 }, { "epoch": 47.64405336361056, "grad_norm": 0.5399385690689087, "learning_rate": 5.237581606585297e-05, "loss": 0.0056302003562450405, "step": 167850 }, { "epoch": 47.64689185353392, "grad_norm": 1.4807854890823364, "learning_rate": 5.23729775759296e-05, "loss": 0.0017160283401608466, "step": 167860 }, { "epoch": 47.64973034345728, "grad_norm": 0.19436568021774292, "learning_rate": 5.2370139086006243e-05, "loss": 0.001097014546394348, "step": 167870 }, { "epoch": 47.65256883338064, "grad_norm": 0.1556328386068344, "learning_rate": 5.236730059608289e-05, "loss": 0.006869234889745712, "step": 167880 }, { "epoch": 47.655407323304004, "grad_norm": 1.1442910432815552, "learning_rate": 5.2364462106159526e-05, "loss": 0.0029334781691432, "step": 167890 }, { "epoch": 47.65824581322736, "grad_norm": 0.3498494625091553, "learning_rate": 5.236162361623617e-05, "loss": 0.0010056469589471817, "step": 167900 }, { "epoch": 47.66108430315072, "grad_norm": 0.03680235147476196, "learning_rate": 5.23587851263128e-05, "loss": 0.0005800355225801468, "step": 167910 }, { "epoch": 47.663922793074086, "grad_norm": 0.16412007808685303, "learning_rate": 5.2355946636389444e-05, "loss": 0.0003994446247816086, "step": 167920 }, { "epoch": 47.66676128299745, "grad_norm": 0.011415796354413033, "learning_rate": 5.235310814646608e-05, "loss": 0.0005628641694784165, "step": 167930 }, { "epoch": 47.669599772920805, "grad_norm": 0.21112807095050812, "learning_rate": 5.235026965654272e-05, "loss": 0.006454319506883621, "step": 167940 }, { "epoch": 47.67243826284417, "grad_norm": 0.6781848073005676, "learning_rate": 5.234743116661937e-05, "loss": 0.007819818705320359, "step": 167950 }, { "epoch": 47.67527675276753, "grad_norm": 1.879334807395935, "learning_rate": 5.2344592676696e-05, "loss": 0.0011822026222944259, "step": 167960 }, { "epoch": 47.678115242690886, "grad_norm": 0.05251937359571457, "learning_rate": 5.234175418677264e-05, "loss": 0.0031770091503858567, "step": 167970 }, { "epoch": 47.68095373261425, "grad_norm": 1.402992606163025, "learning_rate": 5.233891569684928e-05, "loss": 0.0007762029767036438, "step": 167980 }, { "epoch": 47.68379222253761, "grad_norm": 1.045709490776062, "learning_rate": 5.233607720692591e-05, "loss": 0.0005439357832074165, "step": 167990 }, { "epoch": 47.68663071246097, "grad_norm": 0.02092956192791462, "learning_rate": 5.2333238717002554e-05, "loss": 0.002361937612295151, "step": 168000 }, { "epoch": 47.68663071246097, "eval_accuracy": 0.9783175430787817, "eval_loss": 0.07708636671304703, "eval_runtime": 32.529, "eval_samples_per_second": 483.476, "eval_steps_per_second": 7.562, "step": 168000 }, { "epoch": 47.68946920238433, "grad_norm": 0.3399779200553894, "learning_rate": 5.23304002270792e-05, "loss": 0.00028940942138433456, "step": 168010 }, { "epoch": 47.69230769230769, "grad_norm": 0.020810747519135475, "learning_rate": 5.232756173715584e-05, "loss": 0.0006418092176318169, "step": 168020 }, { "epoch": 47.695146182231056, "grad_norm": 0.03103160485625267, "learning_rate": 5.232472324723248e-05, "loss": 0.000648314505815506, "step": 168030 }, { "epoch": 47.69798467215441, "grad_norm": 0.030476439744234085, "learning_rate": 5.232188475730911e-05, "loss": 0.00026870854198932646, "step": 168040 }, { "epoch": 47.700823162077775, "grad_norm": 5.814699649810791, "learning_rate": 5.2319046267385754e-05, "loss": 0.0016576172783970833, "step": 168050 }, { "epoch": 47.70366165200114, "grad_norm": 0.09303194284439087, "learning_rate": 5.231620777746239e-05, "loss": 0.0017091328278183936, "step": 168060 }, { "epoch": 47.706500141924494, "grad_norm": 12.274508476257324, "learning_rate": 5.231336928753903e-05, "loss": 0.004623696953058243, "step": 168070 }, { "epoch": 47.70933863184786, "grad_norm": 0.24436743557453156, "learning_rate": 5.231053079761568e-05, "loss": 0.004368770122528076, "step": 168080 }, { "epoch": 47.71217712177122, "grad_norm": 6.048383712768555, "learning_rate": 5.230769230769231e-05, "loss": 0.0047577288001775745, "step": 168090 }, { "epoch": 47.715015611694575, "grad_norm": 0.010169700719416142, "learning_rate": 5.230485381776895e-05, "loss": 0.006274067610502243, "step": 168100 }, { "epoch": 47.71785410161794, "grad_norm": 1.0150330066680908, "learning_rate": 5.230201532784559e-05, "loss": 0.0009244069457054138, "step": 168110 }, { "epoch": 47.7206925915413, "grad_norm": 0.14437630772590637, "learning_rate": 5.2299176837922224e-05, "loss": 0.0009477993473410607, "step": 168120 }, { "epoch": 47.723531081464664, "grad_norm": 11.315572738647461, "learning_rate": 5.229662219699121e-05, "loss": 0.01503133773803711, "step": 168130 }, { "epoch": 47.72636957138802, "grad_norm": 0.1693817675113678, "learning_rate": 5.2293783707067845e-05, "loss": 0.001133992150425911, "step": 168140 }, { "epoch": 47.72920806131138, "grad_norm": 16.55990219116211, "learning_rate": 5.229094521714448e-05, "loss": 0.00455244705080986, "step": 168150 }, { "epoch": 47.732046551234745, "grad_norm": 0.017984464764595032, "learning_rate": 5.228810672722112e-05, "loss": 0.0014146020635962485, "step": 168160 }, { "epoch": 47.7348850411581, "grad_norm": 0.028887253254652023, "learning_rate": 5.2285268237297755e-05, "loss": 0.0017999103292822838, "step": 168170 }, { "epoch": 47.737723531081464, "grad_norm": 0.09977608919143677, "learning_rate": 5.22824297473744e-05, "loss": 0.0006079711019992828, "step": 168180 }, { "epoch": 47.74056202100483, "grad_norm": 0.15819256007671356, "learning_rate": 5.2279591257451045e-05, "loss": 0.002408851683139801, "step": 168190 }, { "epoch": 47.74340051092818, "grad_norm": 0.05671108141541481, "learning_rate": 5.227675276752768e-05, "loss": 0.001308099552989006, "step": 168200 }, { "epoch": 47.746239000851546, "grad_norm": 0.2843754291534424, "learning_rate": 5.227391427760432e-05, "loss": 0.007566286623477936, "step": 168210 }, { "epoch": 47.74907749077491, "grad_norm": 0.20854462683200836, "learning_rate": 5.2271075787680956e-05, "loss": 0.0013257438316941262, "step": 168220 }, { "epoch": 47.75191598069827, "grad_norm": 0.7090956568717957, "learning_rate": 5.22682372977576e-05, "loss": 0.005258341878652572, "step": 168230 }, { "epoch": 47.75475447062163, "grad_norm": 0.012205834500491619, "learning_rate": 5.226539880783423e-05, "loss": 0.0010363118723034858, "step": 168240 }, { "epoch": 47.75759296054499, "grad_norm": 0.01964578591287136, "learning_rate": 5.2262560317910866e-05, "loss": 0.0006534801796078682, "step": 168250 }, { "epoch": 47.76043145046835, "grad_norm": 0.04396224021911621, "learning_rate": 5.225972182798752e-05, "loss": 0.0006730172783136368, "step": 168260 }, { "epoch": 47.76326994039171, "grad_norm": 2.2706873416900635, "learning_rate": 5.2256883338064156e-05, "loss": 0.000860372930765152, "step": 168270 }, { "epoch": 47.76610843031507, "grad_norm": 0.499252587556839, "learning_rate": 5.225404484814079e-05, "loss": 0.0005581287667155265, "step": 168280 }, { "epoch": 47.768946920238434, "grad_norm": 10.355218887329102, "learning_rate": 5.225120635821743e-05, "loss": 0.003390992432832718, "step": 168290 }, { "epoch": 47.7717854101618, "grad_norm": 0.3579513430595398, "learning_rate": 5.2248367868294066e-05, "loss": 0.002184603177011013, "step": 168300 }, { "epoch": 47.77462390008515, "grad_norm": 0.030845995992422104, "learning_rate": 5.224552937837071e-05, "loss": 0.0020654793828725816, "step": 168310 }, { "epoch": 47.777462390008516, "grad_norm": 0.2472856044769287, "learning_rate": 5.224269088844734e-05, "loss": 0.0015220526605844499, "step": 168320 }, { "epoch": 47.78030087993188, "grad_norm": 0.1597907692193985, "learning_rate": 5.223985239852399e-05, "loss": 0.005758265405893326, "step": 168330 }, { "epoch": 47.783139369855235, "grad_norm": 0.032328344881534576, "learning_rate": 5.223701390860063e-05, "loss": 0.005417917668819427, "step": 168340 }, { "epoch": 47.7859778597786, "grad_norm": 0.2601768374443054, "learning_rate": 5.2234175418677266e-05, "loss": 0.0005021145567297936, "step": 168350 }, { "epoch": 47.78881634970196, "grad_norm": 0.043230459094047546, "learning_rate": 5.223133692875391e-05, "loss": 0.00043454207479953766, "step": 168360 }, { "epoch": 47.791654839625316, "grad_norm": 0.06415538489818573, "learning_rate": 5.222849843883054e-05, "loss": 0.000359492190182209, "step": 168370 }, { "epoch": 47.79449332954868, "grad_norm": 0.13824020326137543, "learning_rate": 5.222565994890718e-05, "loss": 0.00033246222883462904, "step": 168380 }, { "epoch": 47.79733181947204, "grad_norm": 0.01076455321162939, "learning_rate": 5.2222821458983825e-05, "loss": 0.003440087288618088, "step": 168390 }, { "epoch": 47.800170309395405, "grad_norm": 0.4593620002269745, "learning_rate": 5.2219982969060466e-05, "loss": 0.000251116044819355, "step": 168400 }, { "epoch": 47.80300879931876, "grad_norm": 0.11444472521543503, "learning_rate": 5.22171444791371e-05, "loss": 0.0006476037204265594, "step": 168410 }, { "epoch": 47.805847289242124, "grad_norm": 0.0401892364025116, "learning_rate": 5.221430598921374e-05, "loss": 0.00023717451840639115, "step": 168420 }, { "epoch": 47.80868577916549, "grad_norm": 0.07347897440195084, "learning_rate": 5.221146749929038e-05, "loss": 0.0036431849002838135, "step": 168430 }, { "epoch": 47.81152426908884, "grad_norm": 2.086364507675171, "learning_rate": 5.220862900936702e-05, "loss": 0.0055583551526069645, "step": 168440 }, { "epoch": 47.814362759012205, "grad_norm": 0.22393475472927094, "learning_rate": 5.220579051944365e-05, "loss": 0.0014174820855259896, "step": 168450 }, { "epoch": 47.81720124893557, "grad_norm": 3.2646610736846924, "learning_rate": 5.22029520295203e-05, "loss": 0.003411164879798889, "step": 168460 }, { "epoch": 47.820039738858924, "grad_norm": 0.07020312547683716, "learning_rate": 5.220011353959694e-05, "loss": 0.000320008397102356, "step": 168470 }, { "epoch": 47.82287822878229, "grad_norm": 0.5748687982559204, "learning_rate": 5.219727504967358e-05, "loss": 0.002785246819257736, "step": 168480 }, { "epoch": 47.82571671870565, "grad_norm": 0.030405137687921524, "learning_rate": 5.219443655975021e-05, "loss": 0.002555319294333458, "step": 168490 }, { "epoch": 47.82855520862901, "grad_norm": 0.6916009187698364, "learning_rate": 5.219159806982685e-05, "loss": 0.0007593544200062752, "step": 168500 }, { "epoch": 47.82855520862901, "eval_accuracy": 0.9743116932663572, "eval_loss": 0.09616226702928543, "eval_runtime": 32.4671, "eval_samples_per_second": 484.398, "eval_steps_per_second": 7.577, "step": 168500 }, { "epoch": 47.83139369855237, "grad_norm": 0.19176864624023438, "learning_rate": 5.218875957990349e-05, "loss": 0.0015144472941756248, "step": 168510 }, { "epoch": 47.83423218847573, "grad_norm": 0.05753355100750923, "learning_rate": 5.218592108998013e-05, "loss": 0.004366567730903626, "step": 168520 }, { "epoch": 47.837070678399094, "grad_norm": 0.2718294858932495, "learning_rate": 5.218308260005678e-05, "loss": 0.0005094885826110839, "step": 168530 }, { "epoch": 47.83990916832245, "grad_norm": 0.16654092073440552, "learning_rate": 5.218024411013341e-05, "loss": 0.008084045350551605, "step": 168540 }, { "epoch": 47.84274765824581, "grad_norm": 1.7645411491394043, "learning_rate": 5.217740562021005e-05, "loss": 0.0013424735516309738, "step": 168550 }, { "epoch": 47.845586148169176, "grad_norm": 0.1354674994945526, "learning_rate": 5.217456713028669e-05, "loss": 0.0007955929264426232, "step": 168560 }, { "epoch": 47.84842463809253, "grad_norm": 0.44384124875068665, "learning_rate": 5.217172864036333e-05, "loss": 0.0030870558694005013, "step": 168570 }, { "epoch": 47.851263128015894, "grad_norm": 0.2225676029920578, "learning_rate": 5.2168890150439964e-05, "loss": 0.001619594544172287, "step": 168580 }, { "epoch": 47.85410161793926, "grad_norm": 0.09802650660276413, "learning_rate": 5.216605166051661e-05, "loss": 0.0011863468214869499, "step": 168590 }, { "epoch": 47.85694010786262, "grad_norm": 2.1131603717803955, "learning_rate": 5.216321317059325e-05, "loss": 0.005621275305747986, "step": 168600 }, { "epoch": 47.859778597785976, "grad_norm": 8.648219108581543, "learning_rate": 5.216037468066989e-05, "loss": 0.012919063866138458, "step": 168610 }, { "epoch": 47.86261708770934, "grad_norm": 0.16001147031784058, "learning_rate": 5.215753619074652e-05, "loss": 0.004187991470098495, "step": 168620 }, { "epoch": 47.8654555776327, "grad_norm": 0.6787115931510925, "learning_rate": 5.2154697700823164e-05, "loss": 0.007874543964862823, "step": 168630 }, { "epoch": 47.86829406755606, "grad_norm": 13.137056350708008, "learning_rate": 5.21518592108998e-05, "loss": 0.010380356013774872, "step": 168640 }, { "epoch": 47.87113255747942, "grad_norm": 0.4771811068058014, "learning_rate": 5.214902072097644e-05, "loss": 0.002875529415905476, "step": 168650 }, { "epoch": 47.87397104740278, "grad_norm": 0.2702544629573822, "learning_rate": 5.214618223105309e-05, "loss": 0.0008950099349021911, "step": 168660 }, { "epoch": 47.876809537326146, "grad_norm": 4.477654933929443, "learning_rate": 5.214334374112972e-05, "loss": 0.0025077000260353088, "step": 168670 }, { "epoch": 47.8796480272495, "grad_norm": 0.18927837908267975, "learning_rate": 5.2140505251206364e-05, "loss": 0.00776558592915535, "step": 168680 }, { "epoch": 47.882486517172865, "grad_norm": 0.42188459634780884, "learning_rate": 5.2137666761283e-05, "loss": 0.013357076048851012, "step": 168690 }, { "epoch": 47.88532500709623, "grad_norm": 0.09594525396823883, "learning_rate": 5.213482827135964e-05, "loss": 0.0006707321852445602, "step": 168700 }, { "epoch": 47.88816349701958, "grad_norm": 0.3196571469306946, "learning_rate": 5.2131989781436275e-05, "loss": 0.0007226757705211639, "step": 168710 }, { "epoch": 47.891001986942946, "grad_norm": 0.3872777819633484, "learning_rate": 5.212915129151291e-05, "loss": 0.0008358694612979889, "step": 168720 }, { "epoch": 47.89384047686631, "grad_norm": 0.20607343316078186, "learning_rate": 5.2126312801589564e-05, "loss": 0.0006414493545889854, "step": 168730 }, { "epoch": 47.896678966789665, "grad_norm": 1.1029314994812012, "learning_rate": 5.21234743116662e-05, "loss": 0.00043768975883722304, "step": 168740 }, { "epoch": 47.89951745671303, "grad_norm": 0.05939270183444023, "learning_rate": 5.212063582174283e-05, "loss": 0.001099812053143978, "step": 168750 }, { "epoch": 47.90235594663639, "grad_norm": 0.5977734327316284, "learning_rate": 5.2117797331819475e-05, "loss": 0.0009144533425569535, "step": 168760 }, { "epoch": 47.90519443655975, "grad_norm": 0.2970137894153595, "learning_rate": 5.211495884189611e-05, "loss": 0.0012066274881362914, "step": 168770 }, { "epoch": 47.90803292648311, "grad_norm": 0.028434520587325096, "learning_rate": 5.211212035197275e-05, "loss": 0.0005499390885233879, "step": 168780 }, { "epoch": 47.91087141640647, "grad_norm": 0.2620388865470886, "learning_rate": 5.21092818620494e-05, "loss": 0.0009521033614873886, "step": 168790 }, { "epoch": 47.913709906329835, "grad_norm": 0.12066516280174255, "learning_rate": 5.210644337212603e-05, "loss": 0.0028778892010450363, "step": 168800 }, { "epoch": 47.91654839625319, "grad_norm": 0.051235031336545944, "learning_rate": 5.2103604882202675e-05, "loss": 0.0011702662333846093, "step": 168810 }, { "epoch": 47.919386886176554, "grad_norm": 0.11312884837388992, "learning_rate": 5.210076639227931e-05, "loss": 0.01661757528781891, "step": 168820 }, { "epoch": 47.92222537609992, "grad_norm": 0.1583506315946579, "learning_rate": 5.209792790235595e-05, "loss": 0.0075116358697414395, "step": 168830 }, { "epoch": 47.92506386602327, "grad_norm": 0.16481782495975494, "learning_rate": 5.2095089412432585e-05, "loss": 0.0015135286375880241, "step": 168840 }, { "epoch": 47.927902355946635, "grad_norm": 0.33268028497695923, "learning_rate": 5.209225092250922e-05, "loss": 0.0020930180326104164, "step": 168850 }, { "epoch": 47.93074084587, "grad_norm": 0.0243054311722517, "learning_rate": 5.208941243258587e-05, "loss": 0.003306695073843002, "step": 168860 }, { "epoch": 47.93357933579336, "grad_norm": 3.1700685024261475, "learning_rate": 5.208657394266251e-05, "loss": 0.0020365085452795028, "step": 168870 }, { "epoch": 47.93641782571672, "grad_norm": 0.037230346351861954, "learning_rate": 5.2083735452739144e-05, "loss": 0.0010077066719532013, "step": 168880 }, { "epoch": 47.93925631564008, "grad_norm": 0.05465046688914299, "learning_rate": 5.2080896962815785e-05, "loss": 0.003841538354754448, "step": 168890 }, { "epoch": 47.94209480556344, "grad_norm": 1.094699501991272, "learning_rate": 5.207805847289242e-05, "loss": 0.0013385560363531114, "step": 168900 }, { "epoch": 47.9449332954868, "grad_norm": 0.8456916809082031, "learning_rate": 5.207521998296906e-05, "loss": 0.001687139645218849, "step": 168910 }, { "epoch": 47.94777178541016, "grad_norm": 0.7954090237617493, "learning_rate": 5.207238149304571e-05, "loss": 0.0013142066076397897, "step": 168920 }, { "epoch": 47.950610275333524, "grad_norm": 0.4194909930229187, "learning_rate": 5.2069543003122344e-05, "loss": 0.0054662637412548065, "step": 168930 }, { "epoch": 47.95344876525688, "grad_norm": 0.3298946022987366, "learning_rate": 5.2066704513198985e-05, "loss": 0.0028723591938614847, "step": 168940 }, { "epoch": 47.95628725518024, "grad_norm": 0.07811582088470459, "learning_rate": 5.206386602327562e-05, "loss": 0.0013351984322071075, "step": 168950 }, { "epoch": 47.959125745103606, "grad_norm": 0.049127377569675446, "learning_rate": 5.2061027533352255e-05, "loss": 0.003604147583246231, "step": 168960 }, { "epoch": 47.96196423502697, "grad_norm": 6.8462724685668945, "learning_rate": 5.2058189043428896e-05, "loss": 0.0033238433301448823, "step": 168970 }, { "epoch": 47.964802724950324, "grad_norm": 0.06216711923480034, "learning_rate": 5.205535055350553e-05, "loss": 0.0019249454140663146, "step": 168980 }, { "epoch": 47.96764121487369, "grad_norm": 0.15977944433689117, "learning_rate": 5.205251206358218e-05, "loss": 0.002648826874792576, "step": 168990 }, { "epoch": 47.97047970479705, "grad_norm": 0.7573155760765076, "learning_rate": 5.204967357365882e-05, "loss": 0.0034080419689416887, "step": 169000 }, { "epoch": 47.97047970479705, "eval_accuracy": 0.9743752781840147, "eval_loss": 0.09073596447706223, "eval_runtime": 32.4432, "eval_samples_per_second": 484.756, "eval_steps_per_second": 7.582, "step": 169000 }, { "epoch": 47.973318194720406, "grad_norm": 0.48568931221961975, "learning_rate": 5.2046835083735455e-05, "loss": 0.002006968855857849, "step": 169010 }, { "epoch": 47.97615668464377, "grad_norm": 0.05867546796798706, "learning_rate": 5.2043996593812096e-05, "loss": 0.0007272759452462196, "step": 169020 }, { "epoch": 47.97899517456713, "grad_norm": 0.030273746699094772, "learning_rate": 5.204115810388873e-05, "loss": 0.0006490414962172508, "step": 169030 }, { "epoch": 47.981833664490495, "grad_norm": 0.11249876767396927, "learning_rate": 5.203831961396537e-05, "loss": 0.0006806440651416779, "step": 169040 }, { "epoch": 47.98467215441385, "grad_norm": 0.08497555553913116, "learning_rate": 5.203548112404201e-05, "loss": 0.005417253822088242, "step": 169050 }, { "epoch": 47.98751064433721, "grad_norm": 0.8877143263816833, "learning_rate": 5.2032642634118655e-05, "loss": 0.0015332011505961418, "step": 169060 }, { "epoch": 47.990349134260576, "grad_norm": 0.28416627645492554, "learning_rate": 5.2029804144195296e-05, "loss": 0.004342702031135559, "step": 169070 }, { "epoch": 47.99318762418393, "grad_norm": 0.04870780184864998, "learning_rate": 5.202696565427193e-05, "loss": 0.0015967017039656638, "step": 169080 }, { "epoch": 47.996026114107295, "grad_norm": 0.4140115976333618, "learning_rate": 5.2024127164348565e-05, "loss": 0.0004907928407192231, "step": 169090 }, { "epoch": 47.99886460403066, "grad_norm": 1.8854641914367676, "learning_rate": 5.202128867442521e-05, "loss": 0.0015880800783634186, "step": 169100 }, { "epoch": 48.00170309395401, "grad_norm": 1.3292404413223267, "learning_rate": 5.201845018450184e-05, "loss": 0.008714805543422698, "step": 169110 }, { "epoch": 48.004541583877376, "grad_norm": 0.45640259981155396, "learning_rate": 5.201561169457849e-05, "loss": 0.0004358574748039246, "step": 169120 }, { "epoch": 48.00738007380074, "grad_norm": 0.20175772905349731, "learning_rate": 5.201277320465513e-05, "loss": 0.000773577019572258, "step": 169130 }, { "epoch": 48.0102185637241, "grad_norm": 0.16190128028392792, "learning_rate": 5.2009934714731766e-05, "loss": 0.0019964657723903654, "step": 169140 }, { "epoch": 48.01305705364746, "grad_norm": 0.015249173156917095, "learning_rate": 5.200709622480841e-05, "loss": 0.006442061811685562, "step": 169150 }, { "epoch": 48.01589554357082, "grad_norm": 0.9231210350990295, "learning_rate": 5.200425773488504e-05, "loss": 0.0018255233764648438, "step": 169160 }, { "epoch": 48.018734033494184, "grad_norm": 0.03372298181056976, "learning_rate": 5.200141924496168e-05, "loss": 0.0012127842754125595, "step": 169170 }, { "epoch": 48.02157252341754, "grad_norm": 1.0563827753067017, "learning_rate": 5.199858075503832e-05, "loss": 0.0007660938426852226, "step": 169180 }, { "epoch": 48.0244110133409, "grad_norm": 7.174070358276367, "learning_rate": 5.1995742265114966e-05, "loss": 0.003965583071112633, "step": 169190 }, { "epoch": 48.027249503264265, "grad_norm": 0.040483344346284866, "learning_rate": 5.199290377519161e-05, "loss": 0.002101879194378853, "step": 169200 }, { "epoch": 48.03008799318762, "grad_norm": 0.14513573050498962, "learning_rate": 5.199006528526824e-05, "loss": 0.0010218260809779166, "step": 169210 }, { "epoch": 48.032926483110984, "grad_norm": 0.08491690456867218, "learning_rate": 5.1987226795344876e-05, "loss": 0.0019350145012140274, "step": 169220 }, { "epoch": 48.03576497303435, "grad_norm": 0.5712893009185791, "learning_rate": 5.198438830542152e-05, "loss": 0.004747232049703598, "step": 169230 }, { "epoch": 48.03860346295771, "grad_norm": 0.2694304585456848, "learning_rate": 5.198154981549815e-05, "loss": 0.0013046547770500182, "step": 169240 }, { "epoch": 48.041441952881065, "grad_norm": 0.005628513637930155, "learning_rate": 5.1978711325574794e-05, "loss": 0.004674214869737625, "step": 169250 }, { "epoch": 48.04428044280443, "grad_norm": 0.03180970996618271, "learning_rate": 5.197587283565144e-05, "loss": 0.0007027646526694298, "step": 169260 }, { "epoch": 48.04711893272779, "grad_norm": 0.02389702759683132, "learning_rate": 5.1973034345728076e-05, "loss": 0.0015586500987410546, "step": 169270 }, { "epoch": 48.04995742265115, "grad_norm": 0.39796867966651917, "learning_rate": 5.197019585580472e-05, "loss": 0.0013189222663640976, "step": 169280 }, { "epoch": 48.05279591257451, "grad_norm": 0.365920752286911, "learning_rate": 5.196735736588135e-05, "loss": 0.001648235134780407, "step": 169290 }, { "epoch": 48.05563440249787, "grad_norm": 0.856377363204956, "learning_rate": 5.1964518875957994e-05, "loss": 0.0018580781295895576, "step": 169300 }, { "epoch": 48.05847289242123, "grad_norm": 0.05779247730970383, "learning_rate": 5.196168038603463e-05, "loss": 0.002620731666684151, "step": 169310 }, { "epoch": 48.06131138234459, "grad_norm": 0.1380361169576645, "learning_rate": 5.1958841896111276e-05, "loss": 0.0025946395471692085, "step": 169320 }, { "epoch": 48.064149872267954, "grad_norm": 15.447072982788086, "learning_rate": 5.195600340618791e-05, "loss": 0.004500969499349594, "step": 169330 }, { "epoch": 48.06698836219132, "grad_norm": 0.1305493265390396, "learning_rate": 5.195316491626455e-05, "loss": 0.0011416334658861161, "step": 169340 }, { "epoch": 48.06982685211467, "grad_norm": 0.08362076431512833, "learning_rate": 5.195032642634119e-05, "loss": 0.008530428260564804, "step": 169350 }, { "epoch": 48.072665342038036, "grad_norm": 0.3601750433444977, "learning_rate": 5.194748793641783e-05, "loss": 0.00043891221284866335, "step": 169360 }, { "epoch": 48.0755038319614, "grad_norm": 8.761248588562012, "learning_rate": 5.194464944649446e-05, "loss": 0.0077661886811256405, "step": 169370 }, { "epoch": 48.078342321884755, "grad_norm": 0.0535542257130146, "learning_rate": 5.1941810956571104e-05, "loss": 0.0008526010438799858, "step": 169380 }, { "epoch": 48.08118081180812, "grad_norm": 0.030601203441619873, "learning_rate": 5.193897246664775e-05, "loss": 0.0011587688699364663, "step": 169390 }, { "epoch": 48.08401930173148, "grad_norm": 0.3816034197807312, "learning_rate": 5.193613397672439e-05, "loss": 0.0017326164990663529, "step": 169400 }, { "epoch": 48.086857791654836, "grad_norm": 0.01729806885123253, "learning_rate": 5.193329548680103e-05, "loss": 0.0002913212403655052, "step": 169410 }, { "epoch": 48.0896962815782, "grad_norm": 0.443890243768692, "learning_rate": 5.193045699687766e-05, "loss": 0.0007170522585511208, "step": 169420 }, { "epoch": 48.09253477150156, "grad_norm": 0.17501603066921234, "learning_rate": 5.19276185069543e-05, "loss": 0.00034133903682231903, "step": 169430 }, { "epoch": 48.095373261424925, "grad_norm": 0.01665317267179489, "learning_rate": 5.192478001703094e-05, "loss": 0.001759827882051468, "step": 169440 }, { "epoch": 48.09821175134828, "grad_norm": 0.12499500811100006, "learning_rate": 5.1921941527107574e-05, "loss": 0.0005489757284522056, "step": 169450 }, { "epoch": 48.10105024127164, "grad_norm": 0.014039736241102219, "learning_rate": 5.191910303718422e-05, "loss": 0.0004667326807975769, "step": 169460 }, { "epoch": 48.103888731195006, "grad_norm": 0.06626798957586288, "learning_rate": 5.191626454726086e-05, "loss": 0.0006232718005776405, "step": 169470 }, { "epoch": 48.10672722111836, "grad_norm": 0.12584063410758972, "learning_rate": 5.19134260573375e-05, "loss": 0.0003408443182706833, "step": 169480 }, { "epoch": 48.109565711041725, "grad_norm": 0.44045889377593994, "learning_rate": 5.191058756741414e-05, "loss": 0.000580497831106186, "step": 169490 }, { "epoch": 48.11240420096509, "grad_norm": 0.012688093818724155, "learning_rate": 5.1907749077490774e-05, "loss": 0.0005011571571230889, "step": 169500 }, { "epoch": 48.11240420096509, "eval_accuracy": 0.9771730145609462, "eval_loss": 0.08051182329654694, "eval_runtime": 32.8314, "eval_samples_per_second": 479.023, "eval_steps_per_second": 7.493, "step": 169500 }, { "epoch": 48.11524269088845, "grad_norm": 0.13200466334819794, "learning_rate": 5.1904910587567415e-05, "loss": 0.0029042115435004233, "step": 169510 }, { "epoch": 48.11808118081181, "grad_norm": 0.28291431069374084, "learning_rate": 5.190207209764406e-05, "loss": 0.0011116934940218926, "step": 169520 }, { "epoch": 48.12091967073517, "grad_norm": 0.015270712785422802, "learning_rate": 5.18992336077207e-05, "loss": 0.00032479409128427505, "step": 169530 }, { "epoch": 48.12375816065853, "grad_norm": 0.08360501378774643, "learning_rate": 5.189639511779734e-05, "loss": 0.0005689503625035286, "step": 169540 }, { "epoch": 48.12659665058189, "grad_norm": 0.24155980348587036, "learning_rate": 5.1893556627873974e-05, "loss": 0.00032717250287532805, "step": 169550 }, { "epoch": 48.12943514050525, "grad_norm": 0.07552524656057358, "learning_rate": 5.189071813795061e-05, "loss": 0.00027983933687210084, "step": 169560 }, { "epoch": 48.132273630428614, "grad_norm": 0.009576866403222084, "learning_rate": 5.188787964802725e-05, "loss": 0.0002992328256368637, "step": 169570 }, { "epoch": 48.13511212035197, "grad_norm": 0.054039116948843, "learning_rate": 5.1885041158103884e-05, "loss": 0.0003409136086702347, "step": 169580 }, { "epoch": 48.13795061027533, "grad_norm": 0.020957382395863533, "learning_rate": 5.188220266818053e-05, "loss": 0.00027796104550361635, "step": 169590 }, { "epoch": 48.140789100198695, "grad_norm": 0.03670799359679222, "learning_rate": 5.1879364178257174e-05, "loss": 0.00024406313896179198, "step": 169600 }, { "epoch": 48.14362759012206, "grad_norm": 0.038834430277347565, "learning_rate": 5.187652568833381e-05, "loss": 0.0005086598917841912, "step": 169610 }, { "epoch": 48.146466080045414, "grad_norm": 0.052877455949783325, "learning_rate": 5.187368719841045e-05, "loss": 0.00035694371908903124, "step": 169620 }, { "epoch": 48.14930456996878, "grad_norm": 0.03896389901638031, "learning_rate": 5.1870848708487084e-05, "loss": 0.0002942768856883049, "step": 169630 }, { "epoch": 48.15214305989214, "grad_norm": 7.123621940612793, "learning_rate": 5.1868010218563726e-05, "loss": 0.0020161252468824387, "step": 169640 }, { "epoch": 48.154981549815496, "grad_norm": 0.04535391926765442, "learning_rate": 5.186517172864036e-05, "loss": 0.0004251953214406967, "step": 169650 }, { "epoch": 48.15782003973886, "grad_norm": 0.11210761964321136, "learning_rate": 5.186233323871701e-05, "loss": 0.0008195187896490097, "step": 169660 }, { "epoch": 48.16065852966222, "grad_norm": 0.04501883685588837, "learning_rate": 5.185949474879365e-05, "loss": 0.0003616882488131523, "step": 169670 }, { "epoch": 48.16349701958558, "grad_norm": 0.010544823482632637, "learning_rate": 5.1856656258870285e-05, "loss": 0.0002958007156848907, "step": 169680 }, { "epoch": 48.16633550950894, "grad_norm": 0.00409243069589138, "learning_rate": 5.185381776894692e-05, "loss": 0.00030428580939769747, "step": 169690 }, { "epoch": 48.1691739994323, "grad_norm": 0.16142898797988892, "learning_rate": 5.185097927902356e-05, "loss": 0.0026078324764966963, "step": 169700 }, { "epoch": 48.172012489355666, "grad_norm": 0.041417717933654785, "learning_rate": 5.1848140789100195e-05, "loss": 0.0023034652695059776, "step": 169710 }, { "epoch": 48.17485097927902, "grad_norm": 0.06607963889837265, "learning_rate": 5.184530229917684e-05, "loss": 0.005011995136737823, "step": 169720 }, { "epoch": 48.177689469202384, "grad_norm": 0.08289404213428497, "learning_rate": 5.1842463809253485e-05, "loss": 0.005770009011030197, "step": 169730 }, { "epoch": 48.18052795912575, "grad_norm": 0.23443347215652466, "learning_rate": 5.183962531933012e-05, "loss": 0.0032154083251953124, "step": 169740 }, { "epoch": 48.1833664490491, "grad_norm": 0.2570763826370239, "learning_rate": 5.183678682940676e-05, "loss": 0.0017732597887516021, "step": 169750 }, { "epoch": 48.186204938972466, "grad_norm": 0.041140682995319366, "learning_rate": 5.1833948339483395e-05, "loss": 0.00029384605586528777, "step": 169760 }, { "epoch": 48.18904342889583, "grad_norm": 0.030419262126088142, "learning_rate": 5.183110984956004e-05, "loss": 0.0003978312015533447, "step": 169770 }, { "epoch": 48.191881918819185, "grad_norm": 0.5438762307167053, "learning_rate": 5.182827135963667e-05, "loss": 0.0003368331119418144, "step": 169780 }, { "epoch": 48.19472040874255, "grad_norm": 0.03378736227750778, "learning_rate": 5.182543286971332e-05, "loss": 0.0005394203588366508, "step": 169790 }, { "epoch": 48.19755889866591, "grad_norm": 0.02825208380818367, "learning_rate": 5.1822594379789954e-05, "loss": 0.0002612173557281494, "step": 169800 }, { "epoch": 48.20039738858927, "grad_norm": 0.449723482131958, "learning_rate": 5.1819755889866595e-05, "loss": 0.0017918827012181281, "step": 169810 }, { "epoch": 48.20323587851263, "grad_norm": 6.63954496383667, "learning_rate": 5.181691739994323e-05, "loss": 0.002589108794927597, "step": 169820 }, { "epoch": 48.20607436843599, "grad_norm": 0.009792627766728401, "learning_rate": 5.181407891001987e-05, "loss": 0.0001951834186911583, "step": 169830 }, { "epoch": 48.208912858359355, "grad_norm": 2.8621339797973633, "learning_rate": 5.1811240420096506e-05, "loss": 0.0012693621218204498, "step": 169840 }, { "epoch": 48.21175134828271, "grad_norm": 0.06013490632176399, "learning_rate": 5.180840193017315e-05, "loss": 0.0004048362374305725, "step": 169850 }, { "epoch": 48.214589838206074, "grad_norm": 0.13003575801849365, "learning_rate": 5.1805563440249795e-05, "loss": 0.012642888724803925, "step": 169860 }, { "epoch": 48.21742832812944, "grad_norm": 0.6260200142860413, "learning_rate": 5.180272495032643e-05, "loss": 0.0004621027037501335, "step": 169870 }, { "epoch": 48.2202668180528, "grad_norm": 0.04754795879125595, "learning_rate": 5.179988646040307e-05, "loss": 0.0005402198061347008, "step": 169880 }, { "epoch": 48.223105307976155, "grad_norm": 0.06795411556959152, "learning_rate": 5.1797047970479706e-05, "loss": 0.00043850503861904145, "step": 169890 }, { "epoch": 48.22594379789952, "grad_norm": 0.007030848879367113, "learning_rate": 5.179420948055634e-05, "loss": 0.00211766492575407, "step": 169900 }, { "epoch": 48.22878228782288, "grad_norm": 0.4730381369590759, "learning_rate": 5.179137099063298e-05, "loss": 0.0004621380940079689, "step": 169910 }, { "epoch": 48.23162077774624, "grad_norm": 0.11463402956724167, "learning_rate": 5.178853250070963e-05, "loss": 0.001957203075289726, "step": 169920 }, { "epoch": 48.2344592676696, "grad_norm": 0.6039824485778809, "learning_rate": 5.1785694010786265e-05, "loss": 0.000527418963611126, "step": 169930 }, { "epoch": 48.23729775759296, "grad_norm": 0.08987002819776535, "learning_rate": 5.1782855520862906e-05, "loss": 0.001402302272617817, "step": 169940 }, { "epoch": 48.24013624751632, "grad_norm": 0.07461860775947571, "learning_rate": 5.178001703093954e-05, "loss": 0.0011251326650381088, "step": 169950 }, { "epoch": 48.24297473743968, "grad_norm": 0.23247699439525604, "learning_rate": 5.177717854101618e-05, "loss": 0.0009967634454369546, "step": 169960 }, { "epoch": 48.245813227363044, "grad_norm": 0.3495427668094635, "learning_rate": 5.177434005109282e-05, "loss": 0.0011664878576993943, "step": 169970 }, { "epoch": 48.24865171728641, "grad_norm": 0.23903661966323853, "learning_rate": 5.177150156116946e-05, "loss": 0.0020857520401477815, "step": 169980 }, { "epoch": 48.25149020720976, "grad_norm": 0.35860276222229004, "learning_rate": 5.1768663071246106e-05, "loss": 0.0015297045931220055, "step": 169990 }, { "epoch": 48.254328697133126, "grad_norm": 0.013609755784273148, "learning_rate": 5.176582458132274e-05, "loss": 0.0011699140071868897, "step": 170000 }, { "epoch": 48.254328697133126, "eval_accuracy": 0.9751382971959052, "eval_loss": 0.0916023850440979, "eval_runtime": 32.5129, "eval_samples_per_second": 483.716, "eval_steps_per_second": 7.566, "step": 170000 }, { "epoch": 48.25716718705649, "grad_norm": 0.37539881467819214, "learning_rate": 5.176298609139938e-05, "loss": 0.0008285157382488251, "step": 170010 }, { "epoch": 48.260005676979844, "grad_norm": 0.010953563265502453, "learning_rate": 5.176014760147602e-05, "loss": 0.0034502238035202027, "step": 170020 }, { "epoch": 48.26284416690321, "grad_norm": 6.054699897766113, "learning_rate": 5.175730911155265e-05, "loss": 0.0027752116322517393, "step": 170030 }, { "epoch": 48.26568265682657, "grad_norm": 0.24358613789081573, "learning_rate": 5.175447062162929e-05, "loss": 0.00709536075592041, "step": 170040 }, { "epoch": 48.268521146749926, "grad_norm": 0.036644309759140015, "learning_rate": 5.175163213170593e-05, "loss": 0.0018010647967457772, "step": 170050 }, { "epoch": 48.27135963667329, "grad_norm": 0.6279668211936951, "learning_rate": 5.1748793641782576e-05, "loss": 0.0055326387286186215, "step": 170060 }, { "epoch": 48.27419812659665, "grad_norm": 0.18827930092811584, "learning_rate": 5.174595515185922e-05, "loss": 0.0009545262902975083, "step": 170070 }, { "epoch": 48.277036616520014, "grad_norm": 0.16071078181266785, "learning_rate": 5.174311666193585e-05, "loss": 0.003045186214148998, "step": 170080 }, { "epoch": 48.27987510644337, "grad_norm": 0.052226122468709946, "learning_rate": 5.174027817201249e-05, "loss": 0.0010632585734128952, "step": 170090 }, { "epoch": 48.28271359636673, "grad_norm": 1.552156686782837, "learning_rate": 5.173743968208913e-05, "loss": 0.001186223328113556, "step": 170100 }, { "epoch": 48.285552086290096, "grad_norm": 0.4222797155380249, "learning_rate": 5.173460119216577e-05, "loss": 0.0063815474510192875, "step": 170110 }, { "epoch": 48.28839057621345, "grad_norm": 1.296903371810913, "learning_rate": 5.173176270224242e-05, "loss": 0.0023703442886471747, "step": 170120 }, { "epoch": 48.291229066136815, "grad_norm": 1.901888132095337, "learning_rate": 5.172892421231905e-05, "loss": 0.0011137500405311584, "step": 170130 }, { "epoch": 48.29406755606018, "grad_norm": 0.20559951663017273, "learning_rate": 5.172608572239569e-05, "loss": 0.002310704067349434, "step": 170140 }, { "epoch": 48.29690604598353, "grad_norm": 0.05210656672716141, "learning_rate": 5.172324723247233e-05, "loss": 0.0009772177785634995, "step": 170150 }, { "epoch": 48.299744535906896, "grad_norm": 0.1821916550397873, "learning_rate": 5.172040874254896e-05, "loss": 0.00040080733597278593, "step": 170160 }, { "epoch": 48.30258302583026, "grad_norm": 0.7238579988479614, "learning_rate": 5.1717570252625604e-05, "loss": 0.0011339543387293815, "step": 170170 }, { "epoch": 48.30542151575362, "grad_norm": 0.14310337603092194, "learning_rate": 5.171473176270224e-05, "loss": 0.0009576469659805298, "step": 170180 }, { "epoch": 48.30826000567698, "grad_norm": 0.038881540298461914, "learning_rate": 5.1711893272778886e-05, "loss": 0.0015356259420514107, "step": 170190 }, { "epoch": 48.31109849560034, "grad_norm": 4.312447547912598, "learning_rate": 5.170905478285553e-05, "loss": 0.0018091056495904922, "step": 170200 }, { "epoch": 48.3139369855237, "grad_norm": 0.06684769690036774, "learning_rate": 5.170621629293216e-05, "loss": 0.0010862916707992554, "step": 170210 }, { "epoch": 48.31677547544706, "grad_norm": 0.41808241605758667, "learning_rate": 5.1703377803008804e-05, "loss": 0.005118612200021744, "step": 170220 }, { "epoch": 48.31961396537042, "grad_norm": 4.225318431854248, "learning_rate": 5.170053931308544e-05, "loss": 0.001921781338751316, "step": 170230 }, { "epoch": 48.322452455293785, "grad_norm": 0.36002129316329956, "learning_rate": 5.169770082316208e-05, "loss": 0.004115990549325943, "step": 170240 }, { "epoch": 48.32529094521715, "grad_norm": 0.19535623490810394, "learning_rate": 5.169486233323873e-05, "loss": 0.007371518015861511, "step": 170250 }, { "epoch": 48.328129435140504, "grad_norm": 0.6150732636451721, "learning_rate": 5.169202384331536e-05, "loss": 0.009201030433177947, "step": 170260 }, { "epoch": 48.33096792506387, "grad_norm": 0.22400610148906708, "learning_rate": 5.1689185353392e-05, "loss": 0.0021664125844836233, "step": 170270 }, { "epoch": 48.33380641498723, "grad_norm": 0.9035622477531433, "learning_rate": 5.168634686346864e-05, "loss": 0.002152431383728981, "step": 170280 }, { "epoch": 48.336644904910585, "grad_norm": 6.398060321807861, "learning_rate": 5.168350837354527e-05, "loss": 0.0017887163907289506, "step": 170290 }, { "epoch": 48.33948339483395, "grad_norm": 0.05517261102795601, "learning_rate": 5.1680669883621914e-05, "loss": 0.006261145323514938, "step": 170300 }, { "epoch": 48.34232188475731, "grad_norm": 0.041457630693912506, "learning_rate": 5.167783139369855e-05, "loss": 0.0031805258244276048, "step": 170310 }, { "epoch": 48.34516037468067, "grad_norm": 0.07920007407665253, "learning_rate": 5.16749929037752e-05, "loss": 0.011030313372612, "step": 170320 }, { "epoch": 48.34799886460403, "grad_norm": 0.29872041940689087, "learning_rate": 5.167215441385184e-05, "loss": 0.014321193099021912, "step": 170330 }, { "epoch": 48.35083735452739, "grad_norm": 0.149387925863266, "learning_rate": 5.166931592392847e-05, "loss": 0.0015392230823636055, "step": 170340 }, { "epoch": 48.353675844450756, "grad_norm": 0.03618776053190231, "learning_rate": 5.1666477434005114e-05, "loss": 0.002508210577070713, "step": 170350 }, { "epoch": 48.35651433437411, "grad_norm": 0.7413767576217651, "learning_rate": 5.166363894408175e-05, "loss": 0.0013350460678339005, "step": 170360 }, { "epoch": 48.359352824297474, "grad_norm": 3.5192720890045166, "learning_rate": 5.1660800454158384e-05, "loss": 0.00800531953573227, "step": 170370 }, { "epoch": 48.36219131422084, "grad_norm": 0.2785535454750061, "learning_rate": 5.1657961964235025e-05, "loss": 0.0011600932106375695, "step": 170380 }, { "epoch": 48.36502980414419, "grad_norm": 0.5922547578811646, "learning_rate": 5.165512347431167e-05, "loss": 0.0019769491627812386, "step": 170390 }, { "epoch": 48.367868294067556, "grad_norm": 0.2807304561138153, "learning_rate": 5.165228498438831e-05, "loss": 0.0013354629278182983, "step": 170400 }, { "epoch": 48.37070678399092, "grad_norm": 0.09057430177927017, "learning_rate": 5.164944649446495e-05, "loss": 0.0003337142989039421, "step": 170410 }, { "epoch": 48.373545273914274, "grad_norm": 0.15213540196418762, "learning_rate": 5.1646608004541584e-05, "loss": 0.01547018438577652, "step": 170420 }, { "epoch": 48.37638376383764, "grad_norm": 0.008683275431394577, "learning_rate": 5.1643769514618225e-05, "loss": 0.0011819789186120033, "step": 170430 }, { "epoch": 48.379222253761, "grad_norm": 0.0502949059009552, "learning_rate": 5.164093102469486e-05, "loss": 0.0005090538412332535, "step": 170440 }, { "epoch": 48.38206074368436, "grad_norm": 0.6799803972244263, "learning_rate": 5.163809253477151e-05, "loss": 0.004370549321174621, "step": 170450 }, { "epoch": 48.38489923360772, "grad_norm": 11.3541898727417, "learning_rate": 5.163525404484815e-05, "loss": 0.004545418918132782, "step": 170460 }, { "epoch": 48.38773772353108, "grad_norm": 0.3891279995441437, "learning_rate": 5.1632415554924784e-05, "loss": 0.0007805190980434418, "step": 170470 }, { "epoch": 48.390576213454445, "grad_norm": 0.05284084379673004, "learning_rate": 5.1629577065001425e-05, "loss": 0.0009583435952663421, "step": 170480 }, { "epoch": 48.3934147033778, "grad_norm": 0.08011160045862198, "learning_rate": 5.162673857507806e-05, "loss": 0.00040725190192461016, "step": 170490 }, { "epoch": 48.39625319330116, "grad_norm": 0.02798384614288807, "learning_rate": 5.1623900085154694e-05, "loss": 0.0005644055083394051, "step": 170500 }, { "epoch": 48.39625319330116, "eval_accuracy": 0.9790805620906721, "eval_loss": 0.07664205133914948, "eval_runtime": 32.6579, "eval_samples_per_second": 481.567, "eval_steps_per_second": 7.533, "step": 170500 }, { "epoch": 48.399091683224526, "grad_norm": 0.08876006305217743, "learning_rate": 5.1621061595231336e-05, "loss": 0.000337301567196846, "step": 170510 }, { "epoch": 48.40193017314788, "grad_norm": 0.28436708450317383, "learning_rate": 5.1618223105307984e-05, "loss": 0.000609748438000679, "step": 170520 }, { "epoch": 48.404768663071245, "grad_norm": 0.18547315895557404, "learning_rate": 5.161538461538462e-05, "loss": 0.0004974557086825371, "step": 170530 }, { "epoch": 48.40760715299461, "grad_norm": 0.22587506473064423, "learning_rate": 5.161254612546126e-05, "loss": 0.0005632251501083374, "step": 170540 }, { "epoch": 48.41044564291797, "grad_norm": 0.07921502739191055, "learning_rate": 5.1609707635537894e-05, "loss": 0.00034118220210075376, "step": 170550 }, { "epoch": 48.413284132841326, "grad_norm": 0.2299228310585022, "learning_rate": 5.1606869145614536e-05, "loss": 0.003197053074836731, "step": 170560 }, { "epoch": 48.41612262276469, "grad_norm": 8.988582611083984, "learning_rate": 5.160403065569117e-05, "loss": 0.002694563381373882, "step": 170570 }, { "epoch": 48.41896111268805, "grad_norm": 15.185585975646973, "learning_rate": 5.160119216576781e-05, "loss": 0.006045353412628174, "step": 170580 }, { "epoch": 48.42179960261141, "grad_norm": 0.7228904962539673, "learning_rate": 5.159835367584446e-05, "loss": 0.004216354340314865, "step": 170590 }, { "epoch": 48.42463809253477, "grad_norm": 0.1329009234905243, "learning_rate": 5.1595515185921095e-05, "loss": 0.0031081357970833778, "step": 170600 }, { "epoch": 48.427476582458134, "grad_norm": 1.2691131830215454, "learning_rate": 5.1592676695997736e-05, "loss": 0.0004847286269068718, "step": 170610 }, { "epoch": 48.43031507238149, "grad_norm": 0.038418155163526535, "learning_rate": 5.158983820607437e-05, "loss": 0.0006129065528512001, "step": 170620 }, { "epoch": 48.43315356230485, "grad_norm": 6.66414737701416, "learning_rate": 5.1586999716151005e-05, "loss": 0.004517550393939018, "step": 170630 }, { "epoch": 48.435992052228215, "grad_norm": 0.13070984184741974, "learning_rate": 5.1584161226227646e-05, "loss": 0.006014755368232727, "step": 170640 }, { "epoch": 48.43883054215158, "grad_norm": 0.09078683704137802, "learning_rate": 5.1581322736304295e-05, "loss": 0.003291340544819832, "step": 170650 }, { "epoch": 48.441669032074934, "grad_norm": 1.7763590812683105, "learning_rate": 5.157848424638093e-05, "loss": 0.01226412206888199, "step": 170660 }, { "epoch": 48.4445075219983, "grad_norm": 0.47938188910484314, "learning_rate": 5.157564575645757e-05, "loss": 0.005168944969773293, "step": 170670 }, { "epoch": 48.44734601192166, "grad_norm": 0.19833579659461975, "learning_rate": 5.1572807266534205e-05, "loss": 0.001846395805478096, "step": 170680 }, { "epoch": 48.450184501845015, "grad_norm": 0.040801040828228, "learning_rate": 5.1569968776610847e-05, "loss": 0.002320140041410923, "step": 170690 }, { "epoch": 48.45302299176838, "grad_norm": 1.3466113805770874, "learning_rate": 5.156713028668748e-05, "loss": 0.0012026140466332436, "step": 170700 }, { "epoch": 48.45586148169174, "grad_norm": 0.04430330917239189, "learning_rate": 5.156429179676412e-05, "loss": 0.005154763162136078, "step": 170710 }, { "epoch": 48.458699971615104, "grad_norm": 0.022386042401194572, "learning_rate": 5.156145330684077e-05, "loss": 0.00400623008608818, "step": 170720 }, { "epoch": 48.46153846153846, "grad_norm": 0.5387179851531982, "learning_rate": 5.1558614816917405e-05, "loss": 0.0012103494256734848, "step": 170730 }, { "epoch": 48.46437695146182, "grad_norm": 0.08315584063529968, "learning_rate": 5.155577632699404e-05, "loss": 0.0011243775486946106, "step": 170740 }, { "epoch": 48.467215441385186, "grad_norm": 0.042945489287376404, "learning_rate": 5.155293783707068e-05, "loss": 0.0020023128017783165, "step": 170750 }, { "epoch": 48.47005393130854, "grad_norm": 2.7136924266815186, "learning_rate": 5.1550099347147316e-05, "loss": 0.0026473045349121095, "step": 170760 }, { "epoch": 48.472892421231904, "grad_norm": 7.623650074005127, "learning_rate": 5.154726085722396e-05, "loss": 0.0025663595646619798, "step": 170770 }, { "epoch": 48.47573091115527, "grad_norm": 0.38927021622657776, "learning_rate": 5.154442236730059e-05, "loss": 0.0039700761437416075, "step": 170780 }, { "epoch": 48.47856940107862, "grad_norm": 0.18257685005664825, "learning_rate": 5.154158387737724e-05, "loss": 0.0028970541432499887, "step": 170790 }, { "epoch": 48.481407891001986, "grad_norm": 0.0339609794318676, "learning_rate": 5.153874538745388e-05, "loss": 0.000954761914908886, "step": 170800 }, { "epoch": 48.48424638092535, "grad_norm": 0.017256328836083412, "learning_rate": 5.1535906897530516e-05, "loss": 0.0014236824586987495, "step": 170810 }, { "epoch": 48.48708487084871, "grad_norm": 2.4651925563812256, "learning_rate": 5.153306840760716e-05, "loss": 0.008707504719495773, "step": 170820 }, { "epoch": 48.48992336077207, "grad_norm": 0.0893767699599266, "learning_rate": 5.153022991768379e-05, "loss": 0.01208752691745758, "step": 170830 }, { "epoch": 48.49276185069543, "grad_norm": 0.3827891945838928, "learning_rate": 5.1527391427760427e-05, "loss": 0.009104026108980178, "step": 170840 }, { "epoch": 48.49560034061879, "grad_norm": 0.08280209451913834, "learning_rate": 5.152455293783708e-05, "loss": 0.0073622152209281925, "step": 170850 }, { "epoch": 48.49843883054215, "grad_norm": 0.225166454911232, "learning_rate": 5.1521714447913716e-05, "loss": 0.0005918486043810844, "step": 170860 }, { "epoch": 48.50127732046551, "grad_norm": 9.239640235900879, "learning_rate": 5.151887595799035e-05, "loss": 0.0026381196454167365, "step": 170870 }, { "epoch": 48.504115810388875, "grad_norm": 0.2799241244792938, "learning_rate": 5.151603746806699e-05, "loss": 0.001545260287821293, "step": 170880 }, { "epoch": 48.50695430031223, "grad_norm": 0.06842215359210968, "learning_rate": 5.151319897814363e-05, "loss": 0.012658520042896271, "step": 170890 }, { "epoch": 48.50979279023559, "grad_norm": 0.37595370411872864, "learning_rate": 5.151036048822027e-05, "loss": 0.001045878604054451, "step": 170900 }, { "epoch": 48.512631280158956, "grad_norm": 2.510371446609497, "learning_rate": 5.15075219982969e-05, "loss": 0.0049222812056541445, "step": 170910 }, { "epoch": 48.51546977008232, "grad_norm": 1.7015056610107422, "learning_rate": 5.150468350837355e-05, "loss": 0.002806585468351841, "step": 170920 }, { "epoch": 48.518308260005675, "grad_norm": 0.889167845249176, "learning_rate": 5.150184501845019e-05, "loss": 0.0012668151408433913, "step": 170930 }, { "epoch": 48.52114674992904, "grad_norm": 0.3165140151977539, "learning_rate": 5.149900652852683e-05, "loss": 0.0029624992981553077, "step": 170940 }, { "epoch": 48.5239852398524, "grad_norm": 11.839585304260254, "learning_rate": 5.149616803860347e-05, "loss": 0.00961458683013916, "step": 170950 }, { "epoch": 48.52682372977576, "grad_norm": 0.9960107803344727, "learning_rate": 5.14933295486801e-05, "loss": 0.004828505590558052, "step": 170960 }, { "epoch": 48.52966221969912, "grad_norm": 0.028587639331817627, "learning_rate": 5.149049105875674e-05, "loss": 0.0015985943377017976, "step": 170970 }, { "epoch": 48.53250070962248, "grad_norm": 0.1929071694612503, "learning_rate": 5.148765256883338e-05, "loss": 0.0021568873897194864, "step": 170980 }, { "epoch": 48.535339199545845, "grad_norm": 1.6953352689743042, "learning_rate": 5.148481407891003e-05, "loss": 0.0019011260941624642, "step": 170990 }, { "epoch": 48.5381776894692, "grad_norm": 0.13031187653541565, "learning_rate": 5.148197558898666e-05, "loss": 0.0009530404582619667, "step": 171000 }, { "epoch": 48.5381776894692, "eval_accuracy": 0.9766007503020283, "eval_loss": 0.08445987850427628, "eval_runtime": 32.589, "eval_samples_per_second": 482.586, "eval_steps_per_second": 7.549, "step": 171000 }, { "epoch": 48.541016179392564, "grad_norm": 0.17168587446212769, "learning_rate": 5.14791370990633e-05, "loss": 0.0004420937970280647, "step": 171010 }, { "epoch": 48.54385466931593, "grad_norm": 0.3086674213409424, "learning_rate": 5.147629860913994e-05, "loss": 0.0076403521001338955, "step": 171020 }, { "epoch": 48.54669315923928, "grad_norm": 3.1585206985473633, "learning_rate": 5.147346011921658e-05, "loss": 0.0017544565722346305, "step": 171030 }, { "epoch": 48.549531649162645, "grad_norm": 0.8581807613372803, "learning_rate": 5.147062162929321e-05, "loss": 0.0015395818278193473, "step": 171040 }, { "epoch": 48.55237013908601, "grad_norm": 0.3838559687137604, "learning_rate": 5.146778313936986e-05, "loss": 0.001527724601328373, "step": 171050 }, { "epoch": 48.555208629009364, "grad_norm": 1.5384438037872314, "learning_rate": 5.14649446494465e-05, "loss": 0.002065899595618248, "step": 171060 }, { "epoch": 48.55804711893273, "grad_norm": 0.1205187737941742, "learning_rate": 5.146210615952314e-05, "loss": 0.005208033323287964, "step": 171070 }, { "epoch": 48.56088560885609, "grad_norm": 0.2176227867603302, "learning_rate": 5.145926766959978e-05, "loss": 0.0022127697244286536, "step": 171080 }, { "epoch": 48.56372409877945, "grad_norm": 5.478058815002441, "learning_rate": 5.1456429179676413e-05, "loss": 0.0028652939945459366, "step": 171090 }, { "epoch": 48.56656258870281, "grad_norm": 0.1728636473417282, "learning_rate": 5.145359068975305e-05, "loss": 0.0022642796859145163, "step": 171100 }, { "epoch": 48.56940107862617, "grad_norm": 0.12026462703943253, "learning_rate": 5.145075219982969e-05, "loss": 0.001722855307161808, "step": 171110 }, { "epoch": 48.572239568549534, "grad_norm": 0.0746661052107811, "learning_rate": 5.144791370990634e-05, "loss": 0.00029281917959451676, "step": 171120 }, { "epoch": 48.57507805847289, "grad_norm": 3.2571768760681152, "learning_rate": 5.144507521998297e-05, "loss": 0.007282399386167526, "step": 171130 }, { "epoch": 48.57791654839625, "grad_norm": 0.14351454377174377, "learning_rate": 5.1442236730059614e-05, "loss": 0.0070213906466960905, "step": 171140 }, { "epoch": 48.580755038319616, "grad_norm": 14.091404914855957, "learning_rate": 5.143939824013625e-05, "loss": 0.0031679809093475343, "step": 171150 }, { "epoch": 48.58359352824297, "grad_norm": 6.517899036407471, "learning_rate": 5.143655975021289e-05, "loss": 0.004081464558839798, "step": 171160 }, { "epoch": 48.586432018166335, "grad_norm": 0.9132385849952698, "learning_rate": 5.1433721260289524e-05, "loss": 0.002209450490772724, "step": 171170 }, { "epoch": 48.5892705080897, "grad_norm": 0.5172933340072632, "learning_rate": 5.1430882770366166e-05, "loss": 0.005937594175338745, "step": 171180 }, { "epoch": 48.59210899801306, "grad_norm": 1.3507235050201416, "learning_rate": 5.1428044280442814e-05, "loss": 0.00198699701577425, "step": 171190 }, { "epoch": 48.594947487936416, "grad_norm": 0.1407298445701599, "learning_rate": 5.142520579051945e-05, "loss": 0.004060435295104981, "step": 171200 }, { "epoch": 48.59778597785978, "grad_norm": 10.12601375579834, "learning_rate": 5.142236730059608e-05, "loss": 0.004953330755233765, "step": 171210 }, { "epoch": 48.60062446778314, "grad_norm": 3.8021068572998047, "learning_rate": 5.1419528810672724e-05, "loss": 0.01492089033126831, "step": 171220 }, { "epoch": 48.6034629577065, "grad_norm": 0.21038328111171722, "learning_rate": 5.141669032074936e-05, "loss": 0.0019347172230482102, "step": 171230 }, { "epoch": 48.60630144762986, "grad_norm": 5.240849018096924, "learning_rate": 5.1413851830826e-05, "loss": 0.004002631455659866, "step": 171240 }, { "epoch": 48.60913993755322, "grad_norm": 0.09050235897302628, "learning_rate": 5.141101334090265e-05, "loss": 0.001175028830766678, "step": 171250 }, { "epoch": 48.61197842747658, "grad_norm": 0.30089071393013, "learning_rate": 5.140817485097928e-05, "loss": 0.00181279294192791, "step": 171260 }, { "epoch": 48.61481691739994, "grad_norm": 0.1463218331336975, "learning_rate": 5.1405336361055924e-05, "loss": 0.0015710178762674333, "step": 171270 }, { "epoch": 48.617655407323305, "grad_norm": 0.5719322562217712, "learning_rate": 5.140249787113256e-05, "loss": 0.0012796854600310326, "step": 171280 }, { "epoch": 48.62049389724667, "grad_norm": 0.16673468053340912, "learning_rate": 5.13996593812092e-05, "loss": 0.0007609622552990913, "step": 171290 }, { "epoch": 48.623332387170024, "grad_norm": 0.029619168490171432, "learning_rate": 5.1396820891285835e-05, "loss": 0.0006316835060715675, "step": 171300 }, { "epoch": 48.62617087709339, "grad_norm": 10.497674942016602, "learning_rate": 5.139398240136247e-05, "loss": 0.008860303461551667, "step": 171310 }, { "epoch": 48.62900936701675, "grad_norm": 0.23965345323085785, "learning_rate": 5.1391143911439124e-05, "loss": 0.00040855016559362414, "step": 171320 }, { "epoch": 48.631847856940105, "grad_norm": 0.04509411379694939, "learning_rate": 5.138830542151576e-05, "loss": 0.0014358246698975563, "step": 171330 }, { "epoch": 48.63468634686347, "grad_norm": 9.060681343078613, "learning_rate": 5.1385466931592394e-05, "loss": 0.00682150274515152, "step": 171340 }, { "epoch": 48.63752483678683, "grad_norm": 1.9924805164337158, "learning_rate": 5.1382628441669035e-05, "loss": 0.003715803474187851, "step": 171350 }, { "epoch": 48.64036332671019, "grad_norm": 1.9966981410980225, "learning_rate": 5.137978995174567e-05, "loss": 0.010807289183139801, "step": 171360 }, { "epoch": 48.64320181663355, "grad_norm": 0.692420482635498, "learning_rate": 5.137695146182231e-05, "loss": 0.0014898790046572684, "step": 171370 }, { "epoch": 48.64604030655691, "grad_norm": 0.02174954302608967, "learning_rate": 5.1374112971898946e-05, "loss": 0.002732396498322487, "step": 171380 }, { "epoch": 48.648878796480275, "grad_norm": 0.21062695980072021, "learning_rate": 5.1371274481975594e-05, "loss": 0.0020420346409082413, "step": 171390 }, { "epoch": 48.65171728640363, "grad_norm": 0.006979825906455517, "learning_rate": 5.1368435992052235e-05, "loss": 0.0032182361930608748, "step": 171400 }, { "epoch": 48.654555776326994, "grad_norm": 0.16062021255493164, "learning_rate": 5.136559750212887e-05, "loss": 0.0018060909584164619, "step": 171410 }, { "epoch": 48.65739426625036, "grad_norm": 0.06651894003152847, "learning_rate": 5.136275901220551e-05, "loss": 0.0008187636733055115, "step": 171420 }, { "epoch": 48.66023275617371, "grad_norm": 0.056935835629701614, "learning_rate": 5.1359920522282146e-05, "loss": 0.001069670170545578, "step": 171430 }, { "epoch": 48.663071246097076, "grad_norm": 0.05172022804617882, "learning_rate": 5.135708203235878e-05, "loss": 0.0004696100950241089, "step": 171440 }, { "epoch": 48.66590973602044, "grad_norm": 0.1213679164648056, "learning_rate": 5.135424354243543e-05, "loss": 0.001972397416830063, "step": 171450 }, { "epoch": 48.6687482259438, "grad_norm": 0.24569158256053925, "learning_rate": 5.135140505251207e-05, "loss": 0.0004940463230013847, "step": 171460 }, { "epoch": 48.67158671586716, "grad_norm": 0.036339204758405685, "learning_rate": 5.1348566562588704e-05, "loss": 0.0004220619797706604, "step": 171470 }, { "epoch": 48.67442520579052, "grad_norm": 0.05259789526462555, "learning_rate": 5.1345728072665346e-05, "loss": 0.0006614919751882553, "step": 171480 }, { "epoch": 48.67726369571388, "grad_norm": 0.07911424338817596, "learning_rate": 5.134288958274198e-05, "loss": 0.00023583080619573593, "step": 171490 }, { "epoch": 48.68010218563724, "grad_norm": 0.10449560731649399, "learning_rate": 5.134005109281862e-05, "loss": 0.0005123855546116829, "step": 171500 }, { "epoch": 48.68010218563724, "eval_accuracy": 0.977872448655179, "eval_loss": 0.0746883824467659, "eval_runtime": 32.4992, "eval_samples_per_second": 483.92, "eval_steps_per_second": 7.569, "step": 171500 }, { "epoch": 48.6829406755606, "grad_norm": 0.056315239518880844, "learning_rate": 5.1337212602895256e-05, "loss": 0.0010042695328593254, "step": 171510 }, { "epoch": 48.685779165483964, "grad_norm": 0.060811251401901245, "learning_rate": 5.1334374112971905e-05, "loss": 0.0005493801087141037, "step": 171520 }, { "epoch": 48.68861765540732, "grad_norm": 0.049916304647922516, "learning_rate": 5.1331535623048546e-05, "loss": 0.0009134382009506226, "step": 171530 }, { "epoch": 48.69145614533068, "grad_norm": 0.020978478714823723, "learning_rate": 5.132869713312518e-05, "loss": 0.00042226947844028475, "step": 171540 }, { "epoch": 48.694294635254046, "grad_norm": 1.654530644416809, "learning_rate": 5.132585864320182e-05, "loss": 0.0026064876466989515, "step": 171550 }, { "epoch": 48.69713312517741, "grad_norm": 0.2503092885017395, "learning_rate": 5.1323020153278456e-05, "loss": 0.0009323280304670334, "step": 171560 }, { "epoch": 48.699971615100765, "grad_norm": 0.09303180873394012, "learning_rate": 5.132018166335509e-05, "loss": 0.0003151062875986099, "step": 171570 }, { "epoch": 48.70281010502413, "grad_norm": 1.4312406778335571, "learning_rate": 5.131734317343174e-05, "loss": 0.0016324160620570182, "step": 171580 }, { "epoch": 48.70564859494749, "grad_norm": 0.3516513705253601, "learning_rate": 5.131450468350838e-05, "loss": 0.00466107577085495, "step": 171590 }, { "epoch": 48.708487084870846, "grad_norm": 0.32015863060951233, "learning_rate": 5.1311666193585015e-05, "loss": 0.00390034094452858, "step": 171600 }, { "epoch": 48.71132557479421, "grad_norm": 3.952726125717163, "learning_rate": 5.1308827703661657e-05, "loss": 0.004910657927393913, "step": 171610 }, { "epoch": 48.71416406471757, "grad_norm": 0.30980437994003296, "learning_rate": 5.130598921373829e-05, "loss": 0.001830965094268322, "step": 171620 }, { "epoch": 48.71700255464093, "grad_norm": 0.47968441247940063, "learning_rate": 5.130315072381493e-05, "loss": 0.0014920564368367194, "step": 171630 }, { "epoch": 48.71984104456429, "grad_norm": 0.056100063025951385, "learning_rate": 5.130031223389157e-05, "loss": 0.0008868316188454628, "step": 171640 }, { "epoch": 48.72267953448765, "grad_norm": 0.9908788800239563, "learning_rate": 5.1297473743968215e-05, "loss": 0.0017683958634734155, "step": 171650 }, { "epoch": 48.725518024411016, "grad_norm": 6.060591220855713, "learning_rate": 5.129463525404486e-05, "loss": 0.0015589671209454536, "step": 171660 }, { "epoch": 48.72835651433437, "grad_norm": 0.11978467553853989, "learning_rate": 5.129179676412149e-05, "loss": 0.0018501009792089463, "step": 171670 }, { "epoch": 48.731195004257735, "grad_norm": 0.09514296054840088, "learning_rate": 5.1288958274198126e-05, "loss": 0.0019240105524659158, "step": 171680 }, { "epoch": 48.7340334941811, "grad_norm": 0.02538694441318512, "learning_rate": 5.128611978427477e-05, "loss": 0.0005737496539950371, "step": 171690 }, { "epoch": 48.736871984104454, "grad_norm": 0.5992187857627869, "learning_rate": 5.12832812943514e-05, "loss": 0.0018726563081145286, "step": 171700 }, { "epoch": 48.73971047402782, "grad_norm": 0.06297511607408524, "learning_rate": 5.128044280442804e-05, "loss": 0.00034196358174085615, "step": 171710 }, { "epoch": 48.74254896395118, "grad_norm": 0.04354177042841911, "learning_rate": 5.127760431450469e-05, "loss": 0.0056247469037771225, "step": 171720 }, { "epoch": 48.745387453874535, "grad_norm": 0.15472543239593506, "learning_rate": 5.1274765824581326e-05, "loss": 0.0014159411191940307, "step": 171730 }, { "epoch": 48.7482259437979, "grad_norm": 0.6958574652671814, "learning_rate": 5.127192733465797e-05, "loss": 0.003483116254210472, "step": 171740 }, { "epoch": 48.75106443372126, "grad_norm": 0.13745221495628357, "learning_rate": 5.12690888447346e-05, "loss": 0.0018445182591676711, "step": 171750 }, { "epoch": 48.753902923644624, "grad_norm": 0.5727301836013794, "learning_rate": 5.126625035481124e-05, "loss": 0.0014442389830946923, "step": 171760 }, { "epoch": 48.75674141356798, "grad_norm": 0.03319557011127472, "learning_rate": 5.126341186488788e-05, "loss": 0.00045747347176074984, "step": 171770 }, { "epoch": 48.75957990349134, "grad_norm": 0.1531682163476944, "learning_rate": 5.1260573374964526e-05, "loss": 0.0010006040334701537, "step": 171780 }, { "epoch": 48.762418393414706, "grad_norm": 4.754243850708008, "learning_rate": 5.125773488504117e-05, "loss": 0.0009909659624099732, "step": 171790 }, { "epoch": 48.76525688333806, "grad_norm": 0.38498616218566895, "learning_rate": 5.12548963951178e-05, "loss": 0.0008324358612298965, "step": 171800 }, { "epoch": 48.768095373261424, "grad_norm": 0.1062794178724289, "learning_rate": 5.125205790519444e-05, "loss": 0.0004904672503471374, "step": 171810 }, { "epoch": 48.77093386318479, "grad_norm": 0.1093926876783371, "learning_rate": 5.124921941527108e-05, "loss": 0.0005360066890716553, "step": 171820 }, { "epoch": 48.77377235310815, "grad_norm": 0.018153851851820946, "learning_rate": 5.124638092534771e-05, "loss": 0.0010227154940366744, "step": 171830 }, { "epoch": 48.776610843031506, "grad_norm": 1.4325839281082153, "learning_rate": 5.1243542435424354e-05, "loss": 0.000879974290728569, "step": 171840 }, { "epoch": 48.77944933295487, "grad_norm": 0.0067153675481677055, "learning_rate": 5.1240703945501e-05, "loss": 0.0006906948983669281, "step": 171850 }, { "epoch": 48.78228782287823, "grad_norm": 0.11794984340667725, "learning_rate": 5.123786545557764e-05, "loss": 0.0006513083353638649, "step": 171860 }, { "epoch": 48.78512631280159, "grad_norm": 0.03766554221510887, "learning_rate": 5.123502696565428e-05, "loss": 0.0017198644578456878, "step": 171870 }, { "epoch": 48.78796480272495, "grad_norm": 0.36724409461021423, "learning_rate": 5.123218847573091e-05, "loss": 0.001821814477443695, "step": 171880 }, { "epoch": 48.79080329264831, "grad_norm": 1.3753690719604492, "learning_rate": 5.1229349985807554e-05, "loss": 0.0020224038511514665, "step": 171890 }, { "epoch": 48.79364178257167, "grad_norm": 9.933741569519043, "learning_rate": 5.122651149588419e-05, "loss": 0.011322507262229919, "step": 171900 }, { "epoch": 48.79648027249503, "grad_norm": 2.224764585494995, "learning_rate": 5.122367300596082e-05, "loss": 0.006666841357946396, "step": 171910 }, { "epoch": 48.799318762418395, "grad_norm": 0.19863854348659515, "learning_rate": 5.122083451603747e-05, "loss": 0.0015585789456963538, "step": 171920 }, { "epoch": 48.80215725234176, "grad_norm": 0.2558833062648773, "learning_rate": 5.121799602611411e-05, "loss": 0.004019550979137421, "step": 171930 }, { "epoch": 48.80499574226511, "grad_norm": 0.14411549270153046, "learning_rate": 5.121515753619075e-05, "loss": 0.0002683686092495918, "step": 171940 }, { "epoch": 48.807834232188476, "grad_norm": 0.17288127541542053, "learning_rate": 5.121231904626739e-05, "loss": 0.0005569819360971451, "step": 171950 }, { "epoch": 48.81067272211184, "grad_norm": 0.01469586230814457, "learning_rate": 5.120948055634402e-05, "loss": 0.0021493881940841675, "step": 171960 }, { "epoch": 48.813511212035195, "grad_norm": 0.04532536491751671, "learning_rate": 5.1206642066420665e-05, "loss": 0.0008125390857458114, "step": 171970 }, { "epoch": 48.81634970195856, "grad_norm": 0.05526276305317879, "learning_rate": 5.120380357649731e-05, "loss": 0.00020254477858543395, "step": 171980 }, { "epoch": 48.81918819188192, "grad_norm": 0.4725193679332733, "learning_rate": 5.120096508657395e-05, "loss": 0.0010133270174264909, "step": 171990 }, { "epoch": 48.822026681805276, "grad_norm": 5.523651123046875, "learning_rate": 5.119812659665059e-05, "loss": 0.005498120188713073, "step": 172000 }, { "epoch": 48.822026681805276, "eval_accuracy": 0.9779360335728365, "eval_loss": 0.07996834814548492, "eval_runtime": 32.5093, "eval_samples_per_second": 483.769, "eval_steps_per_second": 7.567, "step": 172000 }, { "epoch": 48.82486517172864, "grad_norm": 0.18724176287651062, "learning_rate": 5.1195288106727223e-05, "loss": 0.0008900420740246773, "step": 172010 }, { "epoch": 48.827703661652, "grad_norm": 2.2936644554138184, "learning_rate": 5.119244961680386e-05, "loss": 0.005339031666517257, "step": 172020 }, { "epoch": 48.830542151575365, "grad_norm": 0.3006945848464966, "learning_rate": 5.11896111268805e-05, "loss": 0.0007886774837970734, "step": 172030 }, { "epoch": 48.83338064149872, "grad_norm": 0.05989944934844971, "learning_rate": 5.1186772636957134e-05, "loss": 0.0011230131611227989, "step": 172040 }, { "epoch": 48.836219131422084, "grad_norm": 0.022534262388944626, "learning_rate": 5.118393414703378e-05, "loss": 0.0012328688055276871, "step": 172050 }, { "epoch": 48.83905762134545, "grad_norm": 10.427884101867676, "learning_rate": 5.1181095657110424e-05, "loss": 0.010115724802017213, "step": 172060 }, { "epoch": 48.8418961112688, "grad_norm": 1.2002466917037964, "learning_rate": 5.117825716718706e-05, "loss": 0.0031574338674545286, "step": 172070 }, { "epoch": 48.844734601192165, "grad_norm": 1.10024893283844, "learning_rate": 5.11754186772637e-05, "loss": 0.002984492480754852, "step": 172080 }, { "epoch": 48.84757309111553, "grad_norm": 0.20414020121097565, "learning_rate": 5.1172580187340334e-05, "loss": 0.0024426614865660666, "step": 172090 }, { "epoch": 48.850411581038884, "grad_norm": 0.3845486044883728, "learning_rate": 5.1169741697416975e-05, "loss": 0.005111327022314071, "step": 172100 }, { "epoch": 48.85325007096225, "grad_norm": 0.08331666886806488, "learning_rate": 5.116690320749361e-05, "loss": 0.0017271287739276885, "step": 172110 }, { "epoch": 48.85608856088561, "grad_norm": 6.246562957763672, "learning_rate": 5.116406471757026e-05, "loss": 0.003239167481660843, "step": 172120 }, { "epoch": 48.85892705080897, "grad_norm": 1.20320725440979, "learning_rate": 5.11612262276469e-05, "loss": 0.0011236019432544707, "step": 172130 }, { "epoch": 48.86176554073233, "grad_norm": 2.4668877124786377, "learning_rate": 5.1158387737723534e-05, "loss": 0.0016177073121070862, "step": 172140 }, { "epoch": 48.86460403065569, "grad_norm": 0.17081069946289062, "learning_rate": 5.115554924780017e-05, "loss": 0.0019013697281479835, "step": 172150 }, { "epoch": 48.867442520579054, "grad_norm": 0.0636497288942337, "learning_rate": 5.115271075787681e-05, "loss": 0.0009447552263736724, "step": 172160 }, { "epoch": 48.87028101050241, "grad_norm": 0.7080081701278687, "learning_rate": 5.1149872267953445e-05, "loss": 0.0006501093506813049, "step": 172170 }, { "epoch": 48.87311950042577, "grad_norm": 1.296241283416748, "learning_rate": 5.114731762702243e-05, "loss": 0.00796903446316719, "step": 172180 }, { "epoch": 48.875957990349136, "grad_norm": 0.4087015986442566, "learning_rate": 5.1144479137099066e-05, "loss": 0.002808142453432083, "step": 172190 }, { "epoch": 48.8787964802725, "grad_norm": 0.10935381799936295, "learning_rate": 5.11416406471757e-05, "loss": 0.0013132559135556221, "step": 172200 }, { "epoch": 48.881634970195854, "grad_norm": 0.10228131711483002, "learning_rate": 5.113880215725234e-05, "loss": 0.0010452998802065849, "step": 172210 }, { "epoch": 48.88447346011922, "grad_norm": 0.011633284389972687, "learning_rate": 5.113596366732898e-05, "loss": 0.0005109105259180069, "step": 172220 }, { "epoch": 48.88731195004258, "grad_norm": 7.366385459899902, "learning_rate": 5.1133125177405625e-05, "loss": 0.0042848832905292514, "step": 172230 }, { "epoch": 48.890150439965936, "grad_norm": 0.05976659432053566, "learning_rate": 5.1130286687482266e-05, "loss": 0.000361105240881443, "step": 172240 }, { "epoch": 48.8929889298893, "grad_norm": 0.1869400292634964, "learning_rate": 5.11274481975589e-05, "loss": 0.005433176457881927, "step": 172250 }, { "epoch": 48.89582741981266, "grad_norm": 0.3396529257297516, "learning_rate": 5.112460970763554e-05, "loss": 0.000697348639369011, "step": 172260 }, { "epoch": 48.89866590973602, "grad_norm": 0.04140743985772133, "learning_rate": 5.112177121771218e-05, "loss": 0.0008406154811382294, "step": 172270 }, { "epoch": 48.90150439965938, "grad_norm": 0.16311980783939362, "learning_rate": 5.111893272778882e-05, "loss": 0.0002374660223722458, "step": 172280 }, { "epoch": 48.90434288958274, "grad_norm": 0.06974630802869797, "learning_rate": 5.111609423786545e-05, "loss": 0.0002673132345080376, "step": 172290 }, { "epoch": 48.907181379506106, "grad_norm": 0.00860648974776268, "learning_rate": 5.11132557479421e-05, "loss": 0.0006512820720672608, "step": 172300 }, { "epoch": 48.91001986942946, "grad_norm": 0.3189452588558197, "learning_rate": 5.111041725801874e-05, "loss": 0.0008086696267127991, "step": 172310 }, { "epoch": 48.912858359352825, "grad_norm": 0.05550479143857956, "learning_rate": 5.110757876809538e-05, "loss": 0.0004661312326788902, "step": 172320 }, { "epoch": 48.91569684927619, "grad_norm": 0.6365115642547607, "learning_rate": 5.110474027817201e-05, "loss": 0.006756998598575592, "step": 172330 }, { "epoch": 48.91853533919954, "grad_norm": 0.013937837444245815, "learning_rate": 5.110190178824865e-05, "loss": 0.005842366814613342, "step": 172340 }, { "epoch": 48.921373829122906, "grad_norm": 0.023381661623716354, "learning_rate": 5.109906329832529e-05, "loss": 0.0057539395987987515, "step": 172350 }, { "epoch": 48.92421231904627, "grad_norm": 5.716965198516846, "learning_rate": 5.1096224808401936e-05, "loss": 0.004662581533193588, "step": 172360 }, { "epoch": 48.927050808969625, "grad_norm": 4.085431098937988, "learning_rate": 5.109367016747091e-05, "loss": 0.012604504823684692, "step": 172370 }, { "epoch": 48.92988929889299, "grad_norm": 0.062256645411252975, "learning_rate": 5.109083167754754e-05, "loss": 0.001525731012225151, "step": 172380 }, { "epoch": 48.93272778881635, "grad_norm": 0.30070123076438904, "learning_rate": 5.1087993187624185e-05, "loss": 0.009692084789276124, "step": 172390 }, { "epoch": 48.935566278739714, "grad_norm": 3.0615694522857666, "learning_rate": 5.108515469770082e-05, "loss": 0.0038044527173042297, "step": 172400 }, { "epoch": 48.93840476866307, "grad_norm": 8.432422637939453, "learning_rate": 5.108231620777747e-05, "loss": 0.007222647219896317, "step": 172410 }, { "epoch": 48.94124325858643, "grad_norm": 5.4584455490112305, "learning_rate": 5.107947771785411e-05, "loss": 0.00397547036409378, "step": 172420 }, { "epoch": 48.944081748509795, "grad_norm": 12.874959945678711, "learning_rate": 5.1076639227930743e-05, "loss": 0.0063656076788902284, "step": 172430 }, { "epoch": 48.94692023843315, "grad_norm": 0.10854694247245789, "learning_rate": 5.1073800738007385e-05, "loss": 0.001983114704489708, "step": 172440 }, { "epoch": 48.949758728356514, "grad_norm": 0.17944559454917908, "learning_rate": 5.107096224808402e-05, "loss": 0.000785338506102562, "step": 172450 }, { "epoch": 48.95259721827988, "grad_norm": 2.413919448852539, "learning_rate": 5.106812375816066e-05, "loss": 0.007317013293504715, "step": 172460 }, { "epoch": 48.95543570820323, "grad_norm": 2.8163747787475586, "learning_rate": 5.1065285268237295e-05, "loss": 0.004812180250883103, "step": 172470 }, { "epoch": 48.958274198126595, "grad_norm": 0.6288138031959534, "learning_rate": 5.1062446778313944e-05, "loss": 0.004755834490060807, "step": 172480 }, { "epoch": 48.96111268804996, "grad_norm": 13.701496124267578, "learning_rate": 5.105960828839058e-05, "loss": 0.009347520023584365, "step": 172490 }, { "epoch": 48.96395117797332, "grad_norm": 0.17137959599494934, "learning_rate": 5.105676979846722e-05, "loss": 0.001433514431118965, "step": 172500 }, { "epoch": 48.96395117797332, "eval_accuracy": 0.9775545240668914, "eval_loss": 0.08699586242437363, "eval_runtime": 32.6146, "eval_samples_per_second": 482.207, "eval_steps_per_second": 7.543, "step": 172500 }, { "epoch": 48.96678966789668, "grad_norm": 0.5708220601081848, "learning_rate": 5.1053931308543854e-05, "loss": 0.004009757936000824, "step": 172510 }, { "epoch": 48.96962815782004, "grad_norm": 0.34977808594703674, "learning_rate": 5.1051092818620496e-05, "loss": 0.0009630687534809112, "step": 172520 }, { "epoch": 48.9724666477434, "grad_norm": 13.491650581359863, "learning_rate": 5.104825432869713e-05, "loss": 0.004478693753480911, "step": 172530 }, { "epoch": 48.97530513766676, "grad_norm": 15.53306770324707, "learning_rate": 5.104541583877378e-05, "loss": 0.008974905312061309, "step": 172540 }, { "epoch": 48.97814362759012, "grad_norm": 0.057033028453588486, "learning_rate": 5.104257734885042e-05, "loss": 0.005124781653285027, "step": 172550 }, { "epoch": 48.980982117513484, "grad_norm": 0.7872393131256104, "learning_rate": 5.1039738858927054e-05, "loss": 0.002710718661546707, "step": 172560 }, { "epoch": 48.98382060743684, "grad_norm": 0.550419807434082, "learning_rate": 5.1036900369003696e-05, "loss": 0.0023342955857515333, "step": 172570 }, { "epoch": 48.9866590973602, "grad_norm": 1.0960137844085693, "learning_rate": 5.103406187908033e-05, "loss": 0.0026195770129561423, "step": 172580 }, { "epoch": 48.989497587283566, "grad_norm": 0.20212888717651367, "learning_rate": 5.1031223389156965e-05, "loss": 0.0021183524280786515, "step": 172590 }, { "epoch": 48.99233607720693, "grad_norm": 1.2823057174682617, "learning_rate": 5.1028384899233606e-05, "loss": 0.0014798562973737718, "step": 172600 }, { "epoch": 48.995174567130285, "grad_norm": 1.5165798664093018, "learning_rate": 5.1025546409310254e-05, "loss": 0.007388165593147278, "step": 172610 }, { "epoch": 48.99801305705365, "grad_norm": 0.2570347189903259, "learning_rate": 5.102270791938689e-05, "loss": 0.002708026394248009, "step": 172620 }, { "epoch": 49.00085154697701, "grad_norm": 0.011321907863020897, "learning_rate": 5.101986942946353e-05, "loss": 0.004464778676629066, "step": 172630 }, { "epoch": 49.003690036900366, "grad_norm": 0.13006596267223358, "learning_rate": 5.1017030939540165e-05, "loss": 0.0012980271130800247, "step": 172640 }, { "epoch": 49.00652852682373, "grad_norm": 0.31325390934944153, "learning_rate": 5.1014192449616806e-05, "loss": 0.00035820621997117996, "step": 172650 }, { "epoch": 49.00936701674709, "grad_norm": 0.05022968351840973, "learning_rate": 5.101135395969344e-05, "loss": 0.0007009321823716163, "step": 172660 }, { "epoch": 49.012205506670455, "grad_norm": 0.5981322526931763, "learning_rate": 5.100851546977008e-05, "loss": 0.0017971133813261986, "step": 172670 }, { "epoch": 49.01504399659381, "grad_norm": 0.010666106827557087, "learning_rate": 5.100567697984673e-05, "loss": 0.00042894445359706876, "step": 172680 }, { "epoch": 49.01788248651717, "grad_norm": 0.2878568470478058, "learning_rate": 5.1002838489923365e-05, "loss": 0.012633153796195984, "step": 172690 }, { "epoch": 49.020720976440536, "grad_norm": 0.02159702032804489, "learning_rate": 5.1000000000000006e-05, "loss": 0.00570906400680542, "step": 172700 }, { "epoch": 49.02355946636389, "grad_norm": 0.2105906456708908, "learning_rate": 5.099716151007664e-05, "loss": 0.0004220360890030861, "step": 172710 }, { "epoch": 49.026397956287255, "grad_norm": 0.13678349554538727, "learning_rate": 5.0994323020153276e-05, "loss": 0.00037243887782096864, "step": 172720 }, { "epoch": 49.02923644621062, "grad_norm": 0.2747247517108917, "learning_rate": 5.099148453022992e-05, "loss": 0.004150266572833061, "step": 172730 }, { "epoch": 49.032074936133974, "grad_norm": 0.10745176672935486, "learning_rate": 5.0988646040306565e-05, "loss": 0.00033308565616607666, "step": 172740 }, { "epoch": 49.03491342605734, "grad_norm": 0.008018656633794308, "learning_rate": 5.09858075503832e-05, "loss": 0.0005752561613917351, "step": 172750 }, { "epoch": 49.0377519159807, "grad_norm": 0.013525616377592087, "learning_rate": 5.098296906045984e-05, "loss": 0.0004135463386774063, "step": 172760 }, { "epoch": 49.04059040590406, "grad_norm": 0.16271071135997772, "learning_rate": 5.0980130570536476e-05, "loss": 0.0003116823732852936, "step": 172770 }, { "epoch": 49.04342889582742, "grad_norm": 0.0540117546916008, "learning_rate": 5.097729208061312e-05, "loss": 0.0006945980712771416, "step": 172780 }, { "epoch": 49.04626738575078, "grad_norm": 0.1240612342953682, "learning_rate": 5.097445359068975e-05, "loss": 0.0005830543115735054, "step": 172790 }, { "epoch": 49.049105875674144, "grad_norm": 1.4307292699813843, "learning_rate": 5.097161510076639e-05, "loss": 0.0013180430978536606, "step": 172800 }, { "epoch": 49.0519443655975, "grad_norm": 0.1365261673927307, "learning_rate": 5.096877661084304e-05, "loss": 0.0004808057099580765, "step": 172810 }, { "epoch": 49.05478285552086, "grad_norm": 0.0333840548992157, "learning_rate": 5.0965938120919676e-05, "loss": 0.00033517032861709597, "step": 172820 }, { "epoch": 49.057621345444225, "grad_norm": 0.06113322824239731, "learning_rate": 5.096309963099632e-05, "loss": 0.00031305812299251554, "step": 172830 }, { "epoch": 49.06045983536758, "grad_norm": 0.09961499273777008, "learning_rate": 5.096026114107295e-05, "loss": 0.00017212294042110444, "step": 172840 }, { "epoch": 49.063298325290944, "grad_norm": 5.2695722579956055, "learning_rate": 5.0957422651149586e-05, "loss": 0.0011304600164294242, "step": 172850 }, { "epoch": 49.06613681521431, "grad_norm": 0.2888909578323364, "learning_rate": 5.095458416122623e-05, "loss": 0.0003456804901361465, "step": 172860 }, { "epoch": 49.06897530513767, "grad_norm": 0.05498090758919716, "learning_rate": 5.095174567130286e-05, "loss": 0.0005265653133392334, "step": 172870 }, { "epoch": 49.071813795061026, "grad_norm": 0.0338624007999897, "learning_rate": 5.094890718137951e-05, "loss": 0.0007517552003264427, "step": 172880 }, { "epoch": 49.07465228498439, "grad_norm": 1.2004364728927612, "learning_rate": 5.094606869145615e-05, "loss": 0.0006098782643675805, "step": 172890 }, { "epoch": 49.07749077490775, "grad_norm": 0.7085698843002319, "learning_rate": 5.0943230201532786e-05, "loss": 0.0010296367108821869, "step": 172900 }, { "epoch": 49.08032926483111, "grad_norm": 0.20880892872810364, "learning_rate": 5.094039171160943e-05, "loss": 0.0004785487428307533, "step": 172910 }, { "epoch": 49.08316775475447, "grad_norm": 0.6015651226043701, "learning_rate": 5.093755322168606e-05, "loss": 0.0006080139428377151, "step": 172920 }, { "epoch": 49.08600624467783, "grad_norm": 0.10781806707382202, "learning_rate": 5.0934714731762704e-05, "loss": 0.0022604286670684813, "step": 172930 }, { "epoch": 49.08884473460119, "grad_norm": 15.786813735961914, "learning_rate": 5.093187624183935e-05, "loss": 0.005497681349515915, "step": 172940 }, { "epoch": 49.09168322452455, "grad_norm": 0.014808000065386295, "learning_rate": 5.0929037751915987e-05, "loss": 0.0007314698770642281, "step": 172950 }, { "epoch": 49.094521714447914, "grad_norm": 15.514394760131836, "learning_rate": 5.092619926199262e-05, "loss": 0.013802260160446167, "step": 172960 }, { "epoch": 49.09736020437128, "grad_norm": 0.09080936014652252, "learning_rate": 5.092336077206926e-05, "loss": 0.005931773781776428, "step": 172970 }, { "epoch": 49.10019869429463, "grad_norm": 0.35394608974456787, "learning_rate": 5.09205222821459e-05, "loss": 0.0037084750831127165, "step": 172980 }, { "epoch": 49.103037184217996, "grad_norm": 0.18272143602371216, "learning_rate": 5.091768379222254e-05, "loss": 0.005183426290750503, "step": 172990 }, { "epoch": 49.10587567414136, "grad_norm": 0.030214393511414528, "learning_rate": 5.091484530229917e-05, "loss": 0.0004182647913694382, "step": 173000 }, { "epoch": 49.10587567414136, "eval_accuracy": 0.977045844725631, "eval_loss": 0.08575211465358734, "eval_runtime": 32.4098, "eval_samples_per_second": 485.255, "eval_steps_per_second": 7.59, "step": 173000 }, { "epoch": 49.108714164064715, "grad_norm": 0.12613116204738617, "learning_rate": 5.091200681237582e-05, "loss": 0.00155723188072443, "step": 173010 }, { "epoch": 49.11155265398808, "grad_norm": 0.09215208888053894, "learning_rate": 5.090916832245246e-05, "loss": 0.0025695061311125757, "step": 173020 }, { "epoch": 49.11439114391144, "grad_norm": 0.04675302281975746, "learning_rate": 5.09063298325291e-05, "loss": 0.0010211061686277389, "step": 173030 }, { "epoch": 49.1172296338348, "grad_norm": 0.15003721415996552, "learning_rate": 5.090349134260574e-05, "loss": 0.00041568893939256666, "step": 173040 }, { "epoch": 49.12006812375816, "grad_norm": 0.15591321885585785, "learning_rate": 5.090065285268237e-05, "loss": 0.001656535640358925, "step": 173050 }, { "epoch": 49.12290661368152, "grad_norm": 0.18946810066699982, "learning_rate": 5.089781436275901e-05, "loss": 0.0022673474624752997, "step": 173060 }, { "epoch": 49.125745103604885, "grad_norm": 0.43806007504463196, "learning_rate": 5.089497587283565e-05, "loss": 0.0005615003407001496, "step": 173070 }, { "epoch": 49.12858359352824, "grad_norm": 0.03690282627940178, "learning_rate": 5.08921373829123e-05, "loss": 0.0005021881312131882, "step": 173080 }, { "epoch": 49.131422083451604, "grad_norm": 0.389499306678772, "learning_rate": 5.088929889298893e-05, "loss": 0.001154518872499466, "step": 173090 }, { "epoch": 49.134260573374966, "grad_norm": 0.31621673703193665, "learning_rate": 5.088646040306557e-05, "loss": 0.0006406363099813461, "step": 173100 }, { "epoch": 49.13709906329832, "grad_norm": 0.7544806599617004, "learning_rate": 5.088362191314221e-05, "loss": 0.00033389125019311905, "step": 173110 }, { "epoch": 49.139937553221685, "grad_norm": 0.05639801174402237, "learning_rate": 5.088078342321885e-05, "loss": 0.000821429118514061, "step": 173120 }, { "epoch": 49.14277604314505, "grad_norm": 4.913681507110596, "learning_rate": 5.0877944933295484e-05, "loss": 0.0013337507843971252, "step": 173130 }, { "epoch": 49.14561453306841, "grad_norm": 0.8026243448257446, "learning_rate": 5.087510644337213e-05, "loss": 0.001129472069442272, "step": 173140 }, { "epoch": 49.14845302299177, "grad_norm": 9.207498550415039, "learning_rate": 5.087226795344877e-05, "loss": 0.004255690053105355, "step": 173150 }, { "epoch": 49.15129151291513, "grad_norm": 0.10501831769943237, "learning_rate": 5.086942946352541e-05, "loss": 0.0014639021828770638, "step": 173160 }, { "epoch": 49.15413000283849, "grad_norm": 0.1757003664970398, "learning_rate": 5.086659097360205e-05, "loss": 0.0011401575058698655, "step": 173170 }, { "epoch": 49.15696849276185, "grad_norm": 4.435089111328125, "learning_rate": 5.0863752483678684e-05, "loss": 0.002386649325489998, "step": 173180 }, { "epoch": 49.15980698268521, "grad_norm": 0.05456741899251938, "learning_rate": 5.086091399375532e-05, "loss": 0.00017779506742954254, "step": 173190 }, { "epoch": 49.162645472608574, "grad_norm": 0.2861558198928833, "learning_rate": 5.085807550383196e-05, "loss": 0.0006563998758792877, "step": 173200 }, { "epoch": 49.16548396253193, "grad_norm": 0.005178770050406456, "learning_rate": 5.085523701390861e-05, "loss": 0.00020892135798931122, "step": 173210 }, { "epoch": 49.16832245245529, "grad_norm": 1.080743432044983, "learning_rate": 5.085239852398524e-05, "loss": 0.000543312169611454, "step": 173220 }, { "epoch": 49.171160942378656, "grad_norm": 0.23394814133644104, "learning_rate": 5.0849560034061884e-05, "loss": 0.000432051345705986, "step": 173230 }, { "epoch": 49.17399943230202, "grad_norm": 0.1966971457004547, "learning_rate": 5.084672154413852e-05, "loss": 0.0004761883988976479, "step": 173240 }, { "epoch": 49.176837922225374, "grad_norm": 0.0465129129588604, "learning_rate": 5.084388305421516e-05, "loss": 0.0002175329253077507, "step": 173250 }, { "epoch": 49.17967641214874, "grad_norm": 0.0520174466073513, "learning_rate": 5.0841044564291795e-05, "loss": 0.0003683961927890778, "step": 173260 }, { "epoch": 49.1825149020721, "grad_norm": 0.02840975485742092, "learning_rate": 5.0838206074368436e-05, "loss": 0.0002465972676873207, "step": 173270 }, { "epoch": 49.185353391995456, "grad_norm": 0.1266687959432602, "learning_rate": 5.0835367584445084e-05, "loss": 0.0020509181544184686, "step": 173280 }, { "epoch": 49.18819188191882, "grad_norm": 0.015617385506629944, "learning_rate": 5.083252909452172e-05, "loss": 0.00039960816502571106, "step": 173290 }, { "epoch": 49.19103037184218, "grad_norm": 0.32313403487205505, "learning_rate": 5.082969060459836e-05, "loss": 0.0010525776073336602, "step": 173300 }, { "epoch": 49.19386886176554, "grad_norm": 0.01434552762657404, "learning_rate": 5.0826852114674995e-05, "loss": 0.0011583685874938965, "step": 173310 }, { "epoch": 49.1967073516889, "grad_norm": 0.011542484164237976, "learning_rate": 5.082401362475163e-05, "loss": 0.008152759075164795, "step": 173320 }, { "epoch": 49.19954584161226, "grad_norm": 0.02312752977013588, "learning_rate": 5.082117513482827e-05, "loss": 0.0035766873508691786, "step": 173330 }, { "epoch": 49.202384331535626, "grad_norm": 0.7306753396987915, "learning_rate": 5.081833664490492e-05, "loss": 0.0008628932759165764, "step": 173340 }, { "epoch": 49.20522282145898, "grad_norm": 0.08036074787378311, "learning_rate": 5.0815498154981553e-05, "loss": 0.001309681124985218, "step": 173350 }, { "epoch": 49.208061311382345, "grad_norm": 0.19072997570037842, "learning_rate": 5.0812659665058195e-05, "loss": 0.006792020797729492, "step": 173360 }, { "epoch": 49.21089980130571, "grad_norm": 2.7617101669311523, "learning_rate": 5.080982117513483e-05, "loss": 0.006394731998443604, "step": 173370 }, { "epoch": 49.21373829122906, "grad_norm": 2.834892511367798, "learning_rate": 5.080698268521147e-05, "loss": 0.002821844257414341, "step": 173380 }, { "epoch": 49.216576781152426, "grad_norm": 0.024129508063197136, "learning_rate": 5.0804144195288105e-05, "loss": 0.0021660128608345986, "step": 173390 }, { "epoch": 49.21941527107579, "grad_norm": 0.09458238631486893, "learning_rate": 5.080130570536475e-05, "loss": 0.0017084361985325812, "step": 173400 }, { "epoch": 49.22225376099915, "grad_norm": 0.024649200960993767, "learning_rate": 5.0798467215441395e-05, "loss": 0.005533041059970855, "step": 173410 }, { "epoch": 49.22509225092251, "grad_norm": 0.004047004040330648, "learning_rate": 5.079562872551803e-05, "loss": 0.003920510411262512, "step": 173420 }, { "epoch": 49.22793074084587, "grad_norm": 0.267958402633667, "learning_rate": 5.0792790235594664e-05, "loss": 0.01486324667930603, "step": 173430 }, { "epoch": 49.23076923076923, "grad_norm": 0.36376041173934937, "learning_rate": 5.0789951745671305e-05, "loss": 0.0010496888309717179, "step": 173440 }, { "epoch": 49.23360772069259, "grad_norm": 0.5658524632453918, "learning_rate": 5.078711325574794e-05, "loss": 0.0006689749658107758, "step": 173450 }, { "epoch": 49.23644621061595, "grad_norm": 0.1821237951517105, "learning_rate": 5.078427476582458e-05, "loss": 0.003654921054840088, "step": 173460 }, { "epoch": 49.239284700539315, "grad_norm": 0.05096187815070152, "learning_rate": 5.0781436275901216e-05, "loss": 0.0006558354943990707, "step": 173470 }, { "epoch": 49.24212319046267, "grad_norm": 5.636626720428467, "learning_rate": 5.0778597785977864e-05, "loss": 0.0021102624014019966, "step": 173480 }, { "epoch": 49.244961680386034, "grad_norm": 0.12602224946022034, "learning_rate": 5.0775759296054506e-05, "loss": 0.0022053269669413565, "step": 173490 }, { "epoch": 49.2478001703094, "grad_norm": 6.852323532104492, "learning_rate": 5.077292080613114e-05, "loss": 0.002283021621406078, "step": 173500 }, { "epoch": 49.2478001703094, "eval_accuracy": 0.9737394290074395, "eval_loss": 0.09817315638065338, "eval_runtime": 32.7124, "eval_samples_per_second": 480.765, "eval_steps_per_second": 7.52, "step": 173500 }, { "epoch": 49.25063866023276, "grad_norm": 6.805729389190674, "learning_rate": 5.077008231620778e-05, "loss": 0.0036963626742362978, "step": 173510 }, { "epoch": 49.253477150156115, "grad_norm": 2.2525441646575928, "learning_rate": 5.0767243826284416e-05, "loss": 0.0015326056629419327, "step": 173520 }, { "epoch": 49.25631564007948, "grad_norm": 0.33189085125923157, "learning_rate": 5.076440533636105e-05, "loss": 0.001049601472914219, "step": 173530 }, { "epoch": 49.25915413000284, "grad_norm": 0.1237260103225708, "learning_rate": 5.0761566846437706e-05, "loss": 0.0008194318041205406, "step": 173540 }, { "epoch": 49.2619926199262, "grad_norm": 0.08516093343496323, "learning_rate": 5.075872835651434e-05, "loss": 0.0005874171853065491, "step": 173550 }, { "epoch": 49.26483110984956, "grad_norm": 0.05770125240087509, "learning_rate": 5.0755889866590975e-05, "loss": 0.0006643613800406456, "step": 173560 }, { "epoch": 49.26766959977292, "grad_norm": 0.027042362838983536, "learning_rate": 5.0753051376667616e-05, "loss": 0.0003014327958226204, "step": 173570 }, { "epoch": 49.27050808969628, "grad_norm": 0.05333235114812851, "learning_rate": 5.075021288674425e-05, "loss": 0.000378747284412384, "step": 173580 }, { "epoch": 49.27334657961964, "grad_norm": 0.06277239322662354, "learning_rate": 5.074737439682089e-05, "loss": 0.000751454010605812, "step": 173590 }, { "epoch": 49.276185069543004, "grad_norm": 0.013695844449102879, "learning_rate": 5.074453590689753e-05, "loss": 0.0005365692079067231, "step": 173600 }, { "epoch": 49.27902355946637, "grad_norm": 0.8405146598815918, "learning_rate": 5.0741697416974175e-05, "loss": 0.00044798608869314196, "step": 173610 }, { "epoch": 49.28186204938972, "grad_norm": 0.028053848072886467, "learning_rate": 5.0738858927050816e-05, "loss": 0.0004679374396800995, "step": 173620 }, { "epoch": 49.284700539313086, "grad_norm": 0.06331754475831985, "learning_rate": 5.073602043712745e-05, "loss": 0.005630004033446312, "step": 173630 }, { "epoch": 49.28753902923645, "grad_norm": 1.0597145557403564, "learning_rate": 5.073318194720409e-05, "loss": 0.006686075031757355, "step": 173640 }, { "epoch": 49.290377519159804, "grad_norm": 0.01150739286094904, "learning_rate": 5.073034345728073e-05, "loss": 0.0003741389140486717, "step": 173650 }, { "epoch": 49.29321600908317, "grad_norm": 0.007601823192089796, "learning_rate": 5.072750496735736e-05, "loss": 0.001813308335840702, "step": 173660 }, { "epoch": 49.29605449900653, "grad_norm": 0.12178615480661392, "learning_rate": 5.072466647743401e-05, "loss": 0.0036561667919158937, "step": 173670 }, { "epoch": 49.298892988929886, "grad_norm": 0.09903335571289062, "learning_rate": 5.072182798751065e-05, "loss": 0.006555713713169098, "step": 173680 }, { "epoch": 49.30173147885325, "grad_norm": 0.07839127629995346, "learning_rate": 5.0718989497587286e-05, "loss": 0.0010848145931959153, "step": 173690 }, { "epoch": 49.30456996877661, "grad_norm": 0.07298515737056732, "learning_rate": 5.071615100766393e-05, "loss": 0.0077747412025928496, "step": 173700 }, { "epoch": 49.307408458699975, "grad_norm": 0.03916887566447258, "learning_rate": 5.071331251774056e-05, "loss": 0.0070011667907238, "step": 173710 }, { "epoch": 49.31024694862333, "grad_norm": 0.07214349508285522, "learning_rate": 5.07104740278172e-05, "loss": 0.0012357614934444428, "step": 173720 }, { "epoch": 49.31308543854669, "grad_norm": 0.7011746168136597, "learning_rate": 5.070763553789384e-05, "loss": 0.003491102159023285, "step": 173730 }, { "epoch": 49.315923928470056, "grad_norm": 0.09081356227397919, "learning_rate": 5.0704797047970486e-05, "loss": 0.007144607603549957, "step": 173740 }, { "epoch": 49.31876241839341, "grad_norm": 0.021544350311160088, "learning_rate": 5.070195855804713e-05, "loss": 0.005827154964208603, "step": 173750 }, { "epoch": 49.321600908316775, "grad_norm": 2.2530667781829834, "learning_rate": 5.069912006812376e-05, "loss": 0.007123856246471405, "step": 173760 }, { "epoch": 49.32443939824014, "grad_norm": 0.8668180704116821, "learning_rate": 5.06962815782004e-05, "loss": 0.005804193764925003, "step": 173770 }, { "epoch": 49.32727788816349, "grad_norm": 17.37893295288086, "learning_rate": 5.069344308827704e-05, "loss": 0.013988442718982697, "step": 173780 }, { "epoch": 49.330116378086856, "grad_norm": 0.2568034827709198, "learning_rate": 5.069060459835367e-05, "loss": 0.002850036509335041, "step": 173790 }, { "epoch": 49.33295486801022, "grad_norm": 0.3624034523963928, "learning_rate": 5.0687766108430314e-05, "loss": 0.0020668381825089456, "step": 173800 }, { "epoch": 49.33579335793358, "grad_norm": 0.11971236765384674, "learning_rate": 5.068492761850696e-05, "loss": 0.0018747434020042419, "step": 173810 }, { "epoch": 49.33863184785694, "grad_norm": 0.023391762748360634, "learning_rate": 5.0682089128583596e-05, "loss": 0.0005408026278018952, "step": 173820 }, { "epoch": 49.3414703377803, "grad_norm": 0.9461982250213623, "learning_rate": 5.067925063866024e-05, "loss": 0.001001550815999508, "step": 173830 }, { "epoch": 49.344308827703664, "grad_norm": 0.059548716992139816, "learning_rate": 5.067641214873687e-05, "loss": 0.004326153546571732, "step": 173840 }, { "epoch": 49.34714731762702, "grad_norm": 0.13651373982429504, "learning_rate": 5.0673573658813514e-05, "loss": 0.005768497288227081, "step": 173850 }, { "epoch": 49.34998580755038, "grad_norm": 0.08145644515752792, "learning_rate": 5.067073516889015e-05, "loss": 0.001885322853922844, "step": 173860 }, { "epoch": 49.352824297473745, "grad_norm": 0.14749662578105927, "learning_rate": 5.0667896678966796e-05, "loss": 0.0013126792386174203, "step": 173870 }, { "epoch": 49.35566278739711, "grad_norm": 0.47245869040489197, "learning_rate": 5.066505818904344e-05, "loss": 0.022169597446918488, "step": 173880 }, { "epoch": 49.358501277320464, "grad_norm": 0.13764461874961853, "learning_rate": 5.066221969912007e-05, "loss": 0.0028109893202781676, "step": 173890 }, { "epoch": 49.36133976724383, "grad_norm": 0.08403323590755463, "learning_rate": 5.065938120919671e-05, "loss": 0.007163897156715393, "step": 173900 }, { "epoch": 49.36417825716719, "grad_norm": 0.2353680282831192, "learning_rate": 5.065654271927335e-05, "loss": 0.002231322415173054, "step": 173910 }, { "epoch": 49.367016747090545, "grad_norm": 0.08180417865514755, "learning_rate": 5.065370422934998e-05, "loss": 0.008885100483894348, "step": 173920 }, { "epoch": 49.36985523701391, "grad_norm": 0.4971647560596466, "learning_rate": 5.0650865739426624e-05, "loss": 0.0025781694799661635, "step": 173930 }, { "epoch": 49.37269372693727, "grad_norm": 3.8541276454925537, "learning_rate": 5.064802724950327e-05, "loss": 0.000815461203455925, "step": 173940 }, { "epoch": 49.37553221686063, "grad_norm": 0.16474762558937073, "learning_rate": 5.064518875957991e-05, "loss": 0.0011359967291355133, "step": 173950 }, { "epoch": 49.37837070678399, "grad_norm": 0.15303142368793488, "learning_rate": 5.064235026965655e-05, "loss": 0.0018501266837120055, "step": 173960 }, { "epoch": 49.38120919670735, "grad_norm": 0.06253893673419952, "learning_rate": 5.063951177973318e-05, "loss": 0.006607683748006821, "step": 173970 }, { "epoch": 49.384047686630716, "grad_norm": 4.195091247558594, "learning_rate": 5.0636673289809825e-05, "loss": 0.004405527934432029, "step": 173980 }, { "epoch": 49.38688617655407, "grad_norm": 0.2868594527244568, "learning_rate": 5.063383479988646e-05, "loss": 0.0035739287734031676, "step": 173990 }, { "epoch": 49.389724666477434, "grad_norm": 0.09643464535474777, "learning_rate": 5.0630996309963094e-05, "loss": 0.009132219851016999, "step": 174000 }, { "epoch": 49.389724666477434, "eval_accuracy": 0.9771094296432886, "eval_loss": 0.08226440101861954, "eval_runtime": 32.2818, "eval_samples_per_second": 487.178, "eval_steps_per_second": 7.62, "step": 174000 }, { "epoch": 49.3925631564008, "grad_norm": 1.6048071384429932, "learning_rate": 5.062815782003975e-05, "loss": 0.003418990224599838, "step": 174010 }, { "epoch": 49.39540164632415, "grad_norm": 0.023365017026662827, "learning_rate": 5.062531933011638e-05, "loss": 0.00246965941041708, "step": 174020 }, { "epoch": 49.398240136247516, "grad_norm": 0.2861991226673126, "learning_rate": 5.062248084019302e-05, "loss": 0.0014650985598564149, "step": 174030 }, { "epoch": 49.40107862617088, "grad_norm": 0.48803043365478516, "learning_rate": 5.061964235026966e-05, "loss": 0.002677516266703606, "step": 174040 }, { "epoch": 49.403917116094235, "grad_norm": 0.050629086792469025, "learning_rate": 5.0616803860346294e-05, "loss": 0.0010527187958359718, "step": 174050 }, { "epoch": 49.4067556060176, "grad_norm": 0.12454574555158615, "learning_rate": 5.0613965370422935e-05, "loss": 0.011010089516639709, "step": 174060 }, { "epoch": 49.40959409594096, "grad_norm": 0.01813160814344883, "learning_rate": 5.061112688049958e-05, "loss": 0.0060628712177276615, "step": 174070 }, { "epoch": 49.41243258586432, "grad_norm": 0.13506701588630676, "learning_rate": 5.060828839057622e-05, "loss": 0.011777782440185547, "step": 174080 }, { "epoch": 49.41527107578768, "grad_norm": 0.047128163278102875, "learning_rate": 5.060544990065286e-05, "loss": 0.025590035319328307, "step": 174090 }, { "epoch": 49.41810956571104, "grad_norm": 0.43338021636009216, "learning_rate": 5.0602611410729494e-05, "loss": 0.008002544939517974, "step": 174100 }, { "epoch": 49.420948055634405, "grad_norm": 0.05247509106993675, "learning_rate": 5.0599772920806135e-05, "loss": 0.00029188450425863264, "step": 174110 }, { "epoch": 49.42378654555776, "grad_norm": 0.10351300984621048, "learning_rate": 5.059693443088277e-05, "loss": 0.0015583477914333343, "step": 174120 }, { "epoch": 49.42662503548112, "grad_norm": 0.04541488736867905, "learning_rate": 5.0594095940959404e-05, "loss": 0.0020386604592204094, "step": 174130 }, { "epoch": 49.429463525404486, "grad_norm": 0.04769646003842354, "learning_rate": 5.059125745103605e-05, "loss": 0.004193255305290222, "step": 174140 }, { "epoch": 49.43230201532784, "grad_norm": 0.0363120473921299, "learning_rate": 5.0588418961112694e-05, "loss": 0.000612378679215908, "step": 174150 }, { "epoch": 49.435140505251205, "grad_norm": 1.1993077993392944, "learning_rate": 5.058558047118933e-05, "loss": 0.0023366983979940413, "step": 174160 }, { "epoch": 49.43797899517457, "grad_norm": 0.21437665820121765, "learning_rate": 5.058274198126597e-05, "loss": 0.0014089716598391533, "step": 174170 }, { "epoch": 49.44081748509793, "grad_norm": 0.34311437606811523, "learning_rate": 5.0579903491342605e-05, "loss": 0.0007108062505722046, "step": 174180 }, { "epoch": 49.44365597502129, "grad_norm": 0.3064103424549103, "learning_rate": 5.0577065001419246e-05, "loss": 0.0007103627547621727, "step": 174190 }, { "epoch": 49.44649446494465, "grad_norm": 0.060519468039274216, "learning_rate": 5.057422651149588e-05, "loss": 0.0029462510719895363, "step": 174200 }, { "epoch": 49.44933295486801, "grad_norm": 0.08160970360040665, "learning_rate": 5.057138802157253e-05, "loss": 0.002896803803741932, "step": 174210 }, { "epoch": 49.45217144479137, "grad_norm": 0.2061879187822342, "learning_rate": 5.056854953164917e-05, "loss": 0.000956176221370697, "step": 174220 }, { "epoch": 49.45500993471473, "grad_norm": 0.027024801820516586, "learning_rate": 5.0565711041725805e-05, "loss": 0.0005043752491474152, "step": 174230 }, { "epoch": 49.457848424638094, "grad_norm": 0.19731086492538452, "learning_rate": 5.056287255180244e-05, "loss": 0.000964079424738884, "step": 174240 }, { "epoch": 49.46068691456146, "grad_norm": 0.4043397009372711, "learning_rate": 5.056003406187908e-05, "loss": 0.000355483777821064, "step": 174250 }, { "epoch": 49.46352540448481, "grad_norm": 0.02666647918522358, "learning_rate": 5.0557195571955715e-05, "loss": 0.004449402913451195, "step": 174260 }, { "epoch": 49.466363894408175, "grad_norm": 0.12688389420509338, "learning_rate": 5.0554357082032363e-05, "loss": 0.008807403594255447, "step": 174270 }, { "epoch": 49.46920238433154, "grad_norm": 0.15351034700870514, "learning_rate": 5.0551518592109005e-05, "loss": 0.00037506837397813795, "step": 174280 }, { "epoch": 49.472040874254894, "grad_norm": 0.22444675862789154, "learning_rate": 5.054868010218564e-05, "loss": 0.0022159697487950324, "step": 174290 }, { "epoch": 49.47487936417826, "grad_norm": 0.42663490772247314, "learning_rate": 5.054584161226228e-05, "loss": 0.0010086042806506158, "step": 174300 }, { "epoch": 49.47771785410162, "grad_norm": 0.05571358650922775, "learning_rate": 5.0543003122338915e-05, "loss": 0.001645335741341114, "step": 174310 }, { "epoch": 49.480556344024976, "grad_norm": 0.09672936052083969, "learning_rate": 5.054016463241556e-05, "loss": 0.0010848598554730414, "step": 174320 }, { "epoch": 49.48339483394834, "grad_norm": 0.03758428245782852, "learning_rate": 5.053732614249219e-05, "loss": 0.0004982728511095047, "step": 174330 }, { "epoch": 49.4862333238717, "grad_norm": 1.3514416217803955, "learning_rate": 5.053448765256884e-05, "loss": 0.0009937770664691925, "step": 174340 }, { "epoch": 49.489071813795064, "grad_norm": 0.10619716346263885, "learning_rate": 5.053164916264548e-05, "loss": 0.004194065183401108, "step": 174350 }, { "epoch": 49.49191030371842, "grad_norm": 1.3667157888412476, "learning_rate": 5.0528810672722115e-05, "loss": 0.00941515490412712, "step": 174360 }, { "epoch": 49.49474879364178, "grad_norm": 0.9064038395881653, "learning_rate": 5.052597218279875e-05, "loss": 0.01582586467266083, "step": 174370 }, { "epoch": 49.497587283565146, "grad_norm": 0.3536556661128998, "learning_rate": 5.052313369287539e-05, "loss": 0.0056289248168468475, "step": 174380 }, { "epoch": 49.5004257734885, "grad_norm": 0.0658712238073349, "learning_rate": 5.0520295202952026e-05, "loss": 0.0012128373607993125, "step": 174390 }, { "epoch": 49.503264263411864, "grad_norm": 0.14651751518249512, "learning_rate": 5.051745671302867e-05, "loss": 0.0003593388944864273, "step": 174400 }, { "epoch": 49.50610275333523, "grad_norm": 0.04782366380095482, "learning_rate": 5.0514618223105316e-05, "loss": 0.0006188154220581055, "step": 174410 }, { "epoch": 49.50894124325858, "grad_norm": 1.1683348417282104, "learning_rate": 5.051177973318195e-05, "loss": 0.0007347853854298592, "step": 174420 }, { "epoch": 49.511779733181946, "grad_norm": 0.8962810635566711, "learning_rate": 5.050894124325859e-05, "loss": 0.0015039181336760521, "step": 174430 }, { "epoch": 49.51461822310531, "grad_norm": 0.1680213212966919, "learning_rate": 5.0506102753335226e-05, "loss": 0.0026662373915314675, "step": 174440 }, { "epoch": 49.51745671302867, "grad_norm": 12.828865051269531, "learning_rate": 5.050326426341187e-05, "loss": 0.0023170053958892823, "step": 174450 }, { "epoch": 49.52029520295203, "grad_norm": 0.5616059899330139, "learning_rate": 5.05004257734885e-05, "loss": 0.004547327756881714, "step": 174460 }, { "epoch": 49.52313369287539, "grad_norm": 11.061531066894531, "learning_rate": 5.049758728356515e-05, "loss": 0.002197488769888878, "step": 174470 }, { "epoch": 49.52597218279875, "grad_norm": 0.262590616941452, "learning_rate": 5.049474879364179e-05, "loss": 0.0052189499139785765, "step": 174480 }, { "epoch": 49.52881067272211, "grad_norm": 0.6213337182998657, "learning_rate": 5.0491910303718426e-05, "loss": 0.00125887431204319, "step": 174490 }, { "epoch": 49.53164916264547, "grad_norm": 0.016047660261392593, "learning_rate": 5.048907181379506e-05, "loss": 0.0003376699984073639, "step": 174500 }, { "epoch": 49.53164916264547, "eval_accuracy": 0.9762828257137407, "eval_loss": 0.08225994557142258, "eval_runtime": 32.8805, "eval_samples_per_second": 478.308, "eval_steps_per_second": 7.482, "step": 174500 }, { "epoch": 49.534487652568835, "grad_norm": 0.09457311779260635, "learning_rate": 5.04862333238717e-05, "loss": 0.0007161464542150498, "step": 174510 }, { "epoch": 49.53732614249219, "grad_norm": 0.06291300058364868, "learning_rate": 5.048339483394834e-05, "loss": 0.0009557103738188743, "step": 174520 }, { "epoch": 49.540164632415554, "grad_norm": 0.04507777839899063, "learning_rate": 5.048055634402498e-05, "loss": 0.0007106008008122445, "step": 174530 }, { "epoch": 49.54300312233892, "grad_norm": 0.02703217975795269, "learning_rate": 5.0477717854101626e-05, "loss": 0.00061690304428339, "step": 174540 }, { "epoch": 49.54584161226228, "grad_norm": 0.020704973489046097, "learning_rate": 5.047487936417826e-05, "loss": 0.0011949406936764718, "step": 174550 }, { "epoch": 49.548680102185635, "grad_norm": 0.02430592104792595, "learning_rate": 5.04720408742549e-05, "loss": 0.00041502602398395537, "step": 174560 }, { "epoch": 49.551518592109, "grad_norm": 1.1303411722183228, "learning_rate": 5.046920238433154e-05, "loss": 0.0004589153453707695, "step": 174570 }, { "epoch": 49.55435708203236, "grad_norm": 0.08333845436573029, "learning_rate": 5.046636389440818e-05, "loss": 0.002490334399044514, "step": 174580 }, { "epoch": 49.55719557195572, "grad_norm": 2.0511646270751953, "learning_rate": 5.046352540448481e-05, "loss": 0.0012020649388432502, "step": 174590 }, { "epoch": 49.56003406187908, "grad_norm": 0.011292465031147003, "learning_rate": 5.046068691456145e-05, "loss": 0.0007081996649503708, "step": 174600 }, { "epoch": 49.56287255180244, "grad_norm": 0.0370759591460228, "learning_rate": 5.0457848424638096e-05, "loss": 0.0012294072657823563, "step": 174610 }, { "epoch": 49.565711041725805, "grad_norm": 0.1219487264752388, "learning_rate": 5.045500993471474e-05, "loss": 0.0030302319675683973, "step": 174620 }, { "epoch": 49.56854953164916, "grad_norm": 9.172586441040039, "learning_rate": 5.045217144479137e-05, "loss": 0.002801471762359142, "step": 174630 }, { "epoch": 49.571388021572524, "grad_norm": 0.10189906507730484, "learning_rate": 5.044933295486801e-05, "loss": 0.0002078576013445854, "step": 174640 }, { "epoch": 49.57422651149589, "grad_norm": 0.027512622997164726, "learning_rate": 5.044649446494465e-05, "loss": 0.0013089664280414581, "step": 174650 }, { "epoch": 49.57706500141924, "grad_norm": 7.966211318969727, "learning_rate": 5.044365597502129e-05, "loss": 0.0016883127391338348, "step": 174660 }, { "epoch": 49.579903491342606, "grad_norm": 2.1473007202148438, "learning_rate": 5.044081748509794e-05, "loss": 0.0038018207997083664, "step": 174670 }, { "epoch": 49.58274198126597, "grad_norm": 0.08792694658041, "learning_rate": 5.043797899517457e-05, "loss": 0.0035412661731243134, "step": 174680 }, { "epoch": 49.585580471189324, "grad_norm": 2.688619613647461, "learning_rate": 5.043514050525121e-05, "loss": 0.0013900652527809143, "step": 174690 }, { "epoch": 49.58841896111269, "grad_norm": 0.04414433240890503, "learning_rate": 5.043230201532785e-05, "loss": 0.00034538041800260546, "step": 174700 }, { "epoch": 49.59125745103605, "grad_norm": 0.5002400875091553, "learning_rate": 5.042946352540448e-05, "loss": 0.0006683496758341789, "step": 174710 }, { "epoch": 49.59409594095941, "grad_norm": 0.6112593412399292, "learning_rate": 5.0426625035481124e-05, "loss": 0.0008251957595348358, "step": 174720 }, { "epoch": 49.59693443088277, "grad_norm": 0.19376496970653534, "learning_rate": 5.042378654555776e-05, "loss": 0.00026604775339365003, "step": 174730 }, { "epoch": 49.59977292080613, "grad_norm": 0.020368777215480804, "learning_rate": 5.0420948055634406e-05, "loss": 0.0018425839021801948, "step": 174740 }, { "epoch": 49.602611410729494, "grad_norm": 0.010004108771681786, "learning_rate": 5.041810956571105e-05, "loss": 0.0015065003186464309, "step": 174750 }, { "epoch": 49.60544990065285, "grad_norm": 0.03885076567530632, "learning_rate": 5.041527107578768e-05, "loss": 0.0004271429032087326, "step": 174760 }, { "epoch": 49.60828839057621, "grad_norm": 1.101487159729004, "learning_rate": 5.0412432585864324e-05, "loss": 0.0006877217441797256, "step": 174770 }, { "epoch": 49.611126880499576, "grad_norm": 0.7713820934295654, "learning_rate": 5.040959409594096e-05, "loss": 0.0012678943574428558, "step": 174780 }, { "epoch": 49.61396537042293, "grad_norm": 0.027248458936810493, "learning_rate": 5.04067556060176e-05, "loss": 0.0003350406885147095, "step": 174790 }, { "epoch": 49.616803860346295, "grad_norm": 0.03299420326948166, "learning_rate": 5.040391711609425e-05, "loss": 0.0005190249532461167, "step": 174800 }, { "epoch": 49.61964235026966, "grad_norm": 0.22925002872943878, "learning_rate": 5.040107862617088e-05, "loss": 0.0018724218010902406, "step": 174810 }, { "epoch": 49.62248084019302, "grad_norm": 0.030862247571349144, "learning_rate": 5.0398240136247524e-05, "loss": 0.0006745636463165283, "step": 174820 }, { "epoch": 49.625319330116376, "grad_norm": 0.29360970854759216, "learning_rate": 5.039540164632416e-05, "loss": 0.0005116831511259079, "step": 174830 }, { "epoch": 49.62815782003974, "grad_norm": 6.483983993530273, "learning_rate": 5.039256315640079e-05, "loss": 0.0023964446038007734, "step": 174840 }, { "epoch": 49.6309963099631, "grad_norm": 1.4613324403762817, "learning_rate": 5.0389724666477434e-05, "loss": 0.001032392680644989, "step": 174850 }, { "epoch": 49.63383479988646, "grad_norm": 0.03379906341433525, "learning_rate": 5.038688617655407e-05, "loss": 0.0005904011428356171, "step": 174860 }, { "epoch": 49.63667328980982, "grad_norm": 0.05205250531435013, "learning_rate": 5.038404768663072e-05, "loss": 0.002123118005692959, "step": 174870 }, { "epoch": 49.63951177973318, "grad_norm": 0.017428213730454445, "learning_rate": 5.038120919670736e-05, "loss": 0.0006925828754901886, "step": 174880 }, { "epoch": 49.64235026965654, "grad_norm": 0.04248816519975662, "learning_rate": 5.037837070678399e-05, "loss": 0.004625849425792694, "step": 174890 }, { "epoch": 49.6451887595799, "grad_norm": 0.08191556483507156, "learning_rate": 5.0375532216860634e-05, "loss": 0.005106126889586449, "step": 174900 }, { "epoch": 49.648027249503265, "grad_norm": 0.04353724420070648, "learning_rate": 5.037269372693727e-05, "loss": 0.0011774029582738877, "step": 174910 }, { "epoch": 49.65086573942663, "grad_norm": 0.13645096123218536, "learning_rate": 5.036985523701391e-05, "loss": 0.0010683424770832062, "step": 174920 }, { "epoch": 49.653704229349984, "grad_norm": 0.010712743736803532, "learning_rate": 5.0367016747090545e-05, "loss": 0.0018770337104797364, "step": 174930 }, { "epoch": 49.65654271927335, "grad_norm": 2.0343637466430664, "learning_rate": 5.036417825716719e-05, "loss": 0.0024123938754200937, "step": 174940 }, { "epoch": 49.65938120919671, "grad_norm": 4.162909507751465, "learning_rate": 5.0361339767243835e-05, "loss": 0.005811486393213272, "step": 174950 }, { "epoch": 49.662219699120065, "grad_norm": 11.690189361572266, "learning_rate": 5.035850127732047e-05, "loss": 0.012012768536806107, "step": 174960 }, { "epoch": 49.66505818904343, "grad_norm": 6.432911396026611, "learning_rate": 5.0355662787397104e-05, "loss": 0.009525948762893676, "step": 174970 }, { "epoch": 49.66789667896679, "grad_norm": 0.5019909143447876, "learning_rate": 5.0352824297473745e-05, "loss": 0.00040815435349941256, "step": 174980 }, { "epoch": 49.67073516889015, "grad_norm": 0.08197805285453796, "learning_rate": 5.034998580755038e-05, "loss": 0.0010222945362329482, "step": 174990 }, { "epoch": 49.67357365881351, "grad_norm": 1.085543155670166, "learning_rate": 5.034714731762703e-05, "loss": 0.00593641847372055, "step": 175000 }, { "epoch": 49.67357365881351, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.08713710308074951, "eval_runtime": 32.8422, "eval_samples_per_second": 478.865, "eval_steps_per_second": 7.49, "step": 175000 }, { "epoch": 49.67641214873687, "grad_norm": 7.617450714111328, "learning_rate": 5.034430882770367e-05, "loss": 0.0028795527294278147, "step": 175010 }, { "epoch": 49.679250638660235, "grad_norm": 8.637292861938477, "learning_rate": 5.0341470337780304e-05, "loss": 0.005108456313610077, "step": 175020 }, { "epoch": 49.68208912858359, "grad_norm": 0.023896286264061928, "learning_rate": 5.0338631847856945e-05, "loss": 0.0012277422472834587, "step": 175030 }, { "epoch": 49.684927618506954, "grad_norm": 0.08153923600912094, "learning_rate": 5.033579335793358e-05, "loss": 0.008194569498300552, "step": 175040 }, { "epoch": 49.68776610843032, "grad_norm": 0.7620654702186584, "learning_rate": 5.033295486801022e-05, "loss": 0.002302536927163601, "step": 175050 }, { "epoch": 49.69060459835367, "grad_norm": 5.171926498413086, "learning_rate": 5.0330116378086856e-05, "loss": 0.001172569766640663, "step": 175060 }, { "epoch": 49.693443088277036, "grad_norm": 5.460896968841553, "learning_rate": 5.0327277888163504e-05, "loss": 0.0018888220191001892, "step": 175070 }, { "epoch": 49.6962815782004, "grad_norm": 1.1599875688552856, "learning_rate": 5.032443939824014e-05, "loss": 0.002580607309937477, "step": 175080 }, { "epoch": 49.69912006812376, "grad_norm": 0.4760423004627228, "learning_rate": 5.032160090831678e-05, "loss": 0.00266636461019516, "step": 175090 }, { "epoch": 49.70195855804712, "grad_norm": 0.3911263644695282, "learning_rate": 5.0318762418393415e-05, "loss": 0.0008474301546812057, "step": 175100 }, { "epoch": 49.70479704797048, "grad_norm": 1.6237149238586426, "learning_rate": 5.0315923928470056e-05, "loss": 0.0015234140679240226, "step": 175110 }, { "epoch": 49.70763553789384, "grad_norm": 0.2679058611392975, "learning_rate": 5.031308543854669e-05, "loss": 0.0026421252638101577, "step": 175120 }, { "epoch": 49.7104740278172, "grad_norm": 0.05848897248506546, "learning_rate": 5.031024694862333e-05, "loss": 0.006892288476228714, "step": 175130 }, { "epoch": 49.71331251774056, "grad_norm": 0.15998360514640808, "learning_rate": 5.030740845869998e-05, "loss": 0.0013499166816473007, "step": 175140 }, { "epoch": 49.716151007663925, "grad_norm": 0.25701963901519775, "learning_rate": 5.0304569968776615e-05, "loss": 0.004136099666357041, "step": 175150 }, { "epoch": 49.71898949758728, "grad_norm": 0.3531850278377533, "learning_rate": 5.0301731478853256e-05, "loss": 0.001028701476752758, "step": 175160 }, { "epoch": 49.72182798751064, "grad_norm": 0.177789568901062, "learning_rate": 5.029889298892989e-05, "loss": 0.0008823325857520104, "step": 175170 }, { "epoch": 49.724666477434006, "grad_norm": 0.044498004019260406, "learning_rate": 5.0296054499006525e-05, "loss": 0.0008461998775601387, "step": 175180 }, { "epoch": 49.72750496735737, "grad_norm": 0.09205108880996704, "learning_rate": 5.0293216009083167e-05, "loss": 0.0017942890524864196, "step": 175190 }, { "epoch": 49.730343457280725, "grad_norm": 1.0957984924316406, "learning_rate": 5.0290377519159815e-05, "loss": 0.0009455930441617966, "step": 175200 }, { "epoch": 49.73318194720409, "grad_norm": 0.14448638260364532, "learning_rate": 5.028753902923645e-05, "loss": 0.0026460645720362665, "step": 175210 }, { "epoch": 49.73602043712745, "grad_norm": 0.17612239718437195, "learning_rate": 5.028470053931309e-05, "loss": 0.0011737575754523278, "step": 175220 }, { "epoch": 49.738858927050806, "grad_norm": 0.5082954168319702, "learning_rate": 5.0281862049389725e-05, "loss": 0.0008183961734175682, "step": 175230 }, { "epoch": 49.74169741697417, "grad_norm": 0.27348488569259644, "learning_rate": 5.027902355946637e-05, "loss": 0.0007488925009965896, "step": 175240 }, { "epoch": 49.74453590689753, "grad_norm": 1.489981770515442, "learning_rate": 5.0276185069543e-05, "loss": 0.0008515970781445503, "step": 175250 }, { "epoch": 49.74737439682089, "grad_norm": 0.03553125634789467, "learning_rate": 5.027334657961964e-05, "loss": 0.0027698434889316557, "step": 175260 }, { "epoch": 49.75021288674425, "grad_norm": 0.5971530079841614, "learning_rate": 5.027050808969629e-05, "loss": 0.0009780852124094963, "step": 175270 }, { "epoch": 49.753051376667614, "grad_norm": 0.2780238687992096, "learning_rate": 5.0267669599772925e-05, "loss": 0.0014037508517503738, "step": 175280 }, { "epoch": 49.75588986659098, "grad_norm": 0.12754395604133606, "learning_rate": 5.026483110984957e-05, "loss": 0.000493558682501316, "step": 175290 }, { "epoch": 49.75872835651433, "grad_norm": 0.33132442831993103, "learning_rate": 5.02619926199262e-05, "loss": 0.0009608305990695953, "step": 175300 }, { "epoch": 49.761566846437695, "grad_norm": 0.03972368687391281, "learning_rate": 5.0259154130002836e-05, "loss": 0.0016151823103427887, "step": 175310 }, { "epoch": 49.76440533636106, "grad_norm": 0.7699304819107056, "learning_rate": 5.025631564007948e-05, "loss": 0.00039414633065462115, "step": 175320 }, { "epoch": 49.767243826284414, "grad_norm": 0.01738104037940502, "learning_rate": 5.025347715015611e-05, "loss": 0.0006595015525817871, "step": 175330 }, { "epoch": 49.77008231620778, "grad_norm": 2.201671838760376, "learning_rate": 5.025063866023276e-05, "loss": 0.0015249321237206458, "step": 175340 }, { "epoch": 49.77292080613114, "grad_norm": 0.012536357156932354, "learning_rate": 5.02478001703094e-05, "loss": 0.0011964490637183189, "step": 175350 }, { "epoch": 49.7757592960545, "grad_norm": 0.45947346091270447, "learning_rate": 5.0244961680386036e-05, "loss": 0.002071053348481655, "step": 175360 }, { "epoch": 49.77859778597786, "grad_norm": 1.0450935363769531, "learning_rate": 5.024212319046268e-05, "loss": 0.0012564558535814285, "step": 175370 }, { "epoch": 49.78143627590122, "grad_norm": 0.03372954949736595, "learning_rate": 5.023928470053931e-05, "loss": 0.0012946862727403641, "step": 175380 }, { "epoch": 49.784274765824584, "grad_norm": 0.15097546577453613, "learning_rate": 5.0236446210615953e-05, "loss": 0.0017166053876280785, "step": 175390 }, { "epoch": 49.78711325574794, "grad_norm": 0.3945470154285431, "learning_rate": 5.02336077206926e-05, "loss": 0.0020857272669672967, "step": 175400 }, { "epoch": 49.7899517456713, "grad_norm": 0.12289819121360779, "learning_rate": 5.0230769230769236e-05, "loss": 0.0007977455854415894, "step": 175410 }, { "epoch": 49.792790235594666, "grad_norm": 2.9969444274902344, "learning_rate": 5.022793074084588e-05, "loss": 0.003031211718916893, "step": 175420 }, { "epoch": 49.79562872551802, "grad_norm": 0.20918305218219757, "learning_rate": 5.022509225092251e-05, "loss": 0.0013728175312280656, "step": 175430 }, { "epoch": 49.798467215441384, "grad_norm": 0.5579810738563538, "learning_rate": 5.022225376099915e-05, "loss": 0.0017795367166399956, "step": 175440 }, { "epoch": 49.80130570536475, "grad_norm": 0.254611074924469, "learning_rate": 5.021941527107579e-05, "loss": 0.0014431871473789214, "step": 175450 }, { "epoch": 49.80414419528811, "grad_norm": 0.2649161219596863, "learning_rate": 5.021657678115242e-05, "loss": 0.0010729063302278518, "step": 175460 }, { "epoch": 49.806982685211466, "grad_norm": 0.037268947809934616, "learning_rate": 5.021373829122907e-05, "loss": 0.001562812365591526, "step": 175470 }, { "epoch": 49.80982117513483, "grad_norm": 0.8643369674682617, "learning_rate": 5.021089980130571e-05, "loss": 0.0012505130842328071, "step": 175480 }, { "epoch": 49.81265966505819, "grad_norm": 0.04570915177464485, "learning_rate": 5.020806131138235e-05, "loss": 0.0012321338057518006, "step": 175490 }, { "epoch": 49.81549815498155, "grad_norm": 1.856271505355835, "learning_rate": 5.020522282145899e-05, "loss": 0.0013563914224505424, "step": 175500 }, { "epoch": 49.81549815498155, "eval_accuracy": 0.972722070324919, "eval_loss": 0.10761478543281555, "eval_runtime": 32.4377, "eval_samples_per_second": 484.837, "eval_steps_per_second": 7.584, "step": 175500 }, { "epoch": 49.81833664490491, "grad_norm": 0.08523158729076385, "learning_rate": 5.020238433153562e-05, "loss": 0.003979005664587021, "step": 175510 }, { "epoch": 49.82117513482827, "grad_norm": 0.20873010158538818, "learning_rate": 5.0199545841612264e-05, "loss": 0.0012896081432700157, "step": 175520 }, { "epoch": 49.82401362475163, "grad_norm": 10.26535415649414, "learning_rate": 5.01967073516889e-05, "loss": 0.0028474293649196625, "step": 175530 }, { "epoch": 49.82685211467499, "grad_norm": 0.047118719667196274, "learning_rate": 5.019386886176555e-05, "loss": 0.000449572317302227, "step": 175540 }, { "epoch": 49.829690604598355, "grad_norm": 0.12147834151983261, "learning_rate": 5.019103037184218e-05, "loss": 0.0072993718087673186, "step": 175550 }, { "epoch": 49.83252909452172, "grad_norm": 0.22156867384910583, "learning_rate": 5.018819188191882e-05, "loss": 0.0014776095747947692, "step": 175560 }, { "epoch": 49.83536758444507, "grad_norm": 0.18199506402015686, "learning_rate": 5.018535339199546e-05, "loss": 0.0015635907649993897, "step": 175570 }, { "epoch": 49.838206074368436, "grad_norm": 0.0061673372983932495, "learning_rate": 5.01825149020721e-05, "loss": 0.004207996651530266, "step": 175580 }, { "epoch": 49.8410445642918, "grad_norm": 0.3063407242298126, "learning_rate": 5.0179676412148733e-05, "loss": 0.004273391515016556, "step": 175590 }, { "epoch": 49.843883054215155, "grad_norm": 0.18359903991222382, "learning_rate": 5.017683792222538e-05, "loss": 0.000860634632408619, "step": 175600 }, { "epoch": 49.84672154413852, "grad_norm": 0.6394650936126709, "learning_rate": 5.017399943230202e-05, "loss": 0.0022802097722887994, "step": 175610 }, { "epoch": 49.84956003406188, "grad_norm": 0.2010713666677475, "learning_rate": 5.017116094237866e-05, "loss": 0.000846301019191742, "step": 175620 }, { "epoch": 49.85239852398524, "grad_norm": 0.03978755697607994, "learning_rate": 5.01683224524553e-05, "loss": 0.0013067258521914482, "step": 175630 }, { "epoch": 49.8552370139086, "grad_norm": 0.0057344259694218636, "learning_rate": 5.0165483962531934e-05, "loss": 0.0007142610847949982, "step": 175640 }, { "epoch": 49.85807550383196, "grad_norm": 0.9011965394020081, "learning_rate": 5.016264547260857e-05, "loss": 0.0006152540445327759, "step": 175650 }, { "epoch": 49.860913993755325, "grad_norm": 0.10104791074991226, "learning_rate": 5.015980698268521e-05, "loss": 0.0005251636728644371, "step": 175660 }, { "epoch": 49.86375248367868, "grad_norm": 0.023529106751084328, "learning_rate": 5.015696849276186e-05, "loss": 0.002229335904121399, "step": 175670 }, { "epoch": 49.866590973602044, "grad_norm": 0.1872633844614029, "learning_rate": 5.015413000283849e-05, "loss": 0.004377792775630951, "step": 175680 }, { "epoch": 49.86942946352541, "grad_norm": 0.20909199118614197, "learning_rate": 5.0151291512915134e-05, "loss": 0.00166688933968544, "step": 175690 }, { "epoch": 49.87226795344876, "grad_norm": 0.10742336511611938, "learning_rate": 5.014845302299177e-05, "loss": 0.0012144222855567932, "step": 175700 }, { "epoch": 49.875106443372125, "grad_norm": 0.04432913288474083, "learning_rate": 5.014561453306841e-05, "loss": 0.0005228672176599503, "step": 175710 }, { "epoch": 49.87794493329549, "grad_norm": 0.06133781000971794, "learning_rate": 5.0142776043145044e-05, "loss": 0.0024652579799294473, "step": 175720 }, { "epoch": 49.880783423218844, "grad_norm": 0.02541930414736271, "learning_rate": 5.0139937553221686e-05, "loss": 0.0008047346025705337, "step": 175730 }, { "epoch": 49.88362191314221, "grad_norm": 0.10522088408470154, "learning_rate": 5.0137099063298334e-05, "loss": 0.013824224472045898, "step": 175740 }, { "epoch": 49.88646040306557, "grad_norm": 0.04452231526374817, "learning_rate": 5.013426057337497e-05, "loss": 0.002551809512078762, "step": 175750 }, { "epoch": 49.88929889298893, "grad_norm": 0.5581021308898926, "learning_rate": 5.013142208345161e-05, "loss": 0.002123919874429703, "step": 175760 }, { "epoch": 49.89213738291229, "grad_norm": 0.20246925950050354, "learning_rate": 5.0128583593528244e-05, "loss": 0.0008541466668248177, "step": 175770 }, { "epoch": 49.89497587283565, "grad_norm": 1.4875656366348267, "learning_rate": 5.012574510360488e-05, "loss": 0.0004628987982869148, "step": 175780 }, { "epoch": 49.897814362759014, "grad_norm": 0.18628545105457306, "learning_rate": 5.012290661368152e-05, "loss": 0.0006874006241559983, "step": 175790 }, { "epoch": 49.90065285268237, "grad_norm": 0.01880861073732376, "learning_rate": 5.012006812375817e-05, "loss": 0.000714532844722271, "step": 175800 }, { "epoch": 49.90349134260573, "grad_norm": 0.3142130374908447, "learning_rate": 5.01172296338348e-05, "loss": 0.0009714033454656601, "step": 175810 }, { "epoch": 49.906329832529096, "grad_norm": 0.03135831654071808, "learning_rate": 5.0114391143911444e-05, "loss": 0.0008255355060100555, "step": 175820 }, { "epoch": 49.90916832245246, "grad_norm": 0.11629349738359451, "learning_rate": 5.011155265398808e-05, "loss": 0.00041448790580034256, "step": 175830 }, { "epoch": 49.912006812375814, "grad_norm": 0.013747074641287327, "learning_rate": 5.010871416406472e-05, "loss": 0.0013789813965559006, "step": 175840 }, { "epoch": 49.91484530229918, "grad_norm": 0.01844453439116478, "learning_rate": 5.0105875674141355e-05, "loss": 0.0007831752300262451, "step": 175850 }, { "epoch": 49.91768379222254, "grad_norm": 0.05346502736210823, "learning_rate": 5.0103037184217996e-05, "loss": 0.0007181094959378242, "step": 175860 }, { "epoch": 49.920522282145896, "grad_norm": 7.567402362823486, "learning_rate": 5.0100198694294645e-05, "loss": 0.0020791573449969293, "step": 175870 }, { "epoch": 49.92336077206926, "grad_norm": 0.2983570396900177, "learning_rate": 5.009736020437128e-05, "loss": 0.0012483488768339156, "step": 175880 }, { "epoch": 49.92619926199262, "grad_norm": 0.021005239337682724, "learning_rate": 5.009452171444792e-05, "loss": 0.0011861098930239678, "step": 175890 }, { "epoch": 49.92903775191598, "grad_norm": 0.21002040803432465, "learning_rate": 5.0091683224524555e-05, "loss": 0.006847255676984787, "step": 175900 }, { "epoch": 49.93187624183934, "grad_norm": 0.06860771030187607, "learning_rate": 5.008884473460119e-05, "loss": 0.0009809765964746476, "step": 175910 }, { "epoch": 49.9347147317627, "grad_norm": 0.38959622383117676, "learning_rate": 5.008600624467783e-05, "loss": 0.0010251175612211228, "step": 175920 }, { "epoch": 49.937553221686066, "grad_norm": 0.2003687024116516, "learning_rate": 5.0083167754754466e-05, "loss": 0.0016873536631464958, "step": 175930 }, { "epoch": 49.94039171160942, "grad_norm": 0.7171655893325806, "learning_rate": 5.0080329264831114e-05, "loss": 0.005209941789507866, "step": 175940 }, { "epoch": 49.943230201532785, "grad_norm": 0.18133193254470825, "learning_rate": 5.0077490774907755e-05, "loss": 0.0007899625226855278, "step": 175950 }, { "epoch": 49.94606869145615, "grad_norm": 1.4987937211990356, "learning_rate": 5.007465228498439e-05, "loss": 0.0011442881077528, "step": 175960 }, { "epoch": 49.948907181379504, "grad_norm": 0.6878413558006287, "learning_rate": 5.007181379506103e-05, "loss": 0.0023087671026587485, "step": 175970 }, { "epoch": 49.95174567130287, "grad_norm": 0.13091632723808289, "learning_rate": 5.0068975305137666e-05, "loss": 0.006896325200796127, "step": 175980 }, { "epoch": 49.95458416122623, "grad_norm": 2.2272679805755615, "learning_rate": 5.006613681521431e-05, "loss": 0.012304463237524033, "step": 175990 }, { "epoch": 49.957422651149585, "grad_norm": 6.90089225769043, "learning_rate": 5.0063298325290955e-05, "loss": 0.0037517033517360686, "step": 176000 }, { "epoch": 49.957422651149585, "eval_accuracy": 0.9685890506771794, "eval_loss": 0.11283570528030396, "eval_runtime": 32.6927, "eval_samples_per_second": 481.055, "eval_steps_per_second": 7.525, "step": 176000 }, { "epoch": 49.96026114107295, "grad_norm": 0.6998717784881592, "learning_rate": 5.006045983536759e-05, "loss": 0.0036075346171855925, "step": 176010 }, { "epoch": 49.96309963099631, "grad_norm": 1.8785208463668823, "learning_rate": 5.0057621345444225e-05, "loss": 0.0015206236392259597, "step": 176020 }, { "epoch": 49.965938120919674, "grad_norm": 0.2636459767818451, "learning_rate": 5.0054782855520866e-05, "loss": 0.000851638801395893, "step": 176030 }, { "epoch": 49.96877661084303, "grad_norm": 0.7934018969535828, "learning_rate": 5.00519443655975e-05, "loss": 0.005938491225242615, "step": 176040 }, { "epoch": 49.97161510076639, "grad_norm": 0.10201861709356308, "learning_rate": 5.004910587567414e-05, "loss": 0.0025740481913089754, "step": 176050 }, { "epoch": 49.974453590689755, "grad_norm": 0.08057184517383575, "learning_rate": 5.0046267385750776e-05, "loss": 0.002357472479343414, "step": 176060 }, { "epoch": 49.97729208061311, "grad_norm": 3.975041151046753, "learning_rate": 5.0043428895827425e-05, "loss": 0.0011539261788129807, "step": 176070 }, { "epoch": 49.980130570536474, "grad_norm": 9.44887638092041, "learning_rate": 5.0040590405904066e-05, "loss": 0.002850980870425701, "step": 176080 }, { "epoch": 49.98296906045984, "grad_norm": 0.028105955570936203, "learning_rate": 5.00377519159807e-05, "loss": 0.003853920102119446, "step": 176090 }, { "epoch": 49.98580755038319, "grad_norm": 0.0209950041025877, "learning_rate": 5.003491342605734e-05, "loss": 0.0007055636495351791, "step": 176100 }, { "epoch": 49.988646040306556, "grad_norm": 2.3295576572418213, "learning_rate": 5.0032074936133977e-05, "loss": 0.002795134112238884, "step": 176110 }, { "epoch": 49.99148453022992, "grad_norm": 0.11044897139072418, "learning_rate": 5.002923644621061e-05, "loss": 0.00271771214902401, "step": 176120 }, { "epoch": 49.99432302015328, "grad_norm": 0.08135133981704712, "learning_rate": 5.0026397956287266e-05, "loss": 0.0027631301432847976, "step": 176130 }, { "epoch": 49.99716151007664, "grad_norm": 0.47417858242988586, "learning_rate": 5.00235594663639e-05, "loss": 0.0013587038964033126, "step": 176140 }, { "epoch": 50.0, "grad_norm": 0.189507856965065, "learning_rate": 5.0020720976440535e-05, "loss": 0.0007840776816010475, "step": 176150 }, { "epoch": 50.00283848992336, "grad_norm": 3.7724413871765137, "learning_rate": 5.001788248651718e-05, "loss": 0.0008784815669059754, "step": 176160 }, { "epoch": 50.00567697984672, "grad_norm": 0.2793523967266083, "learning_rate": 5.001504399659381e-05, "loss": 0.0010044803842902184, "step": 176170 }, { "epoch": 50.00851546977008, "grad_norm": 7.323808193206787, "learning_rate": 5.001220550667045e-05, "loss": 0.0017520207911729813, "step": 176180 }, { "epoch": 50.011353959693444, "grad_norm": 0.02302386984229088, "learning_rate": 5.000936701674709e-05, "loss": 0.0020531518384814262, "step": 176190 }, { "epoch": 50.01419244961681, "grad_norm": 0.2943992614746094, "learning_rate": 5.0006528526823735e-05, "loss": 0.0007233167067170143, "step": 176200 }, { "epoch": 50.01703093954016, "grad_norm": 0.007859306409955025, "learning_rate": 5.000369003690038e-05, "loss": 0.0010492430999875068, "step": 176210 }, { "epoch": 50.019869429463526, "grad_norm": 0.5912808775901794, "learning_rate": 5.000085154697701e-05, "loss": 0.0013657096773386002, "step": 176220 }, { "epoch": 50.02270791938689, "grad_norm": 1.8536204099655151, "learning_rate": 4.999801305705365e-05, "loss": 0.0008431902155280113, "step": 176230 }, { "epoch": 50.025546409310245, "grad_norm": 6.110299110412598, "learning_rate": 4.999517456713029e-05, "loss": 0.0024310681968927384, "step": 176240 }, { "epoch": 50.02838489923361, "grad_norm": 0.6818021535873413, "learning_rate": 4.999233607720693e-05, "loss": 0.005994686484336853, "step": 176250 }, { "epoch": 50.03122338915697, "grad_norm": 0.08530093729496002, "learning_rate": 4.998949758728357e-05, "loss": 0.0017384717240929604, "step": 176260 }, { "epoch": 50.034061879080326, "grad_norm": 1.48838210105896, "learning_rate": 4.9986659097360205e-05, "loss": 0.0006951028481125832, "step": 176270 }, { "epoch": 50.03690036900369, "grad_norm": 0.06297750771045685, "learning_rate": 4.9983820607436846e-05, "loss": 0.0005120839923620224, "step": 176280 }, { "epoch": 50.03973885892705, "grad_norm": 0.02089560218155384, "learning_rate": 4.998098211751349e-05, "loss": 0.0008932847529649734, "step": 176290 }, { "epoch": 50.042577348850415, "grad_norm": 0.06646229326725006, "learning_rate": 4.997814362759012e-05, "loss": 0.0009349917992949486, "step": 176300 }, { "epoch": 50.04541583877377, "grad_norm": 0.1431989222764969, "learning_rate": 4.997530513766676e-05, "loss": 0.00023363307118415833, "step": 176310 }, { "epoch": 50.04825432869713, "grad_norm": 0.020652173087000847, "learning_rate": 4.9972466647743405e-05, "loss": 0.0023580309003591537, "step": 176320 }, { "epoch": 50.051092818620496, "grad_norm": 0.05032404512166977, "learning_rate": 4.996962815782004e-05, "loss": 0.0007284469902515411, "step": 176330 }, { "epoch": 50.05393130854385, "grad_norm": 0.2373400777578354, "learning_rate": 4.996678966789668e-05, "loss": 0.0009558342397212982, "step": 176340 }, { "epoch": 50.056769798467215, "grad_norm": 5.793634414672852, "learning_rate": 4.996395117797332e-05, "loss": 0.0010962445288896562, "step": 176350 }, { "epoch": 50.05960828839058, "grad_norm": 0.13304241001605988, "learning_rate": 4.9961112688049963e-05, "loss": 0.0009592054411768913, "step": 176360 }, { "epoch": 50.062446778313934, "grad_norm": 2.5221426486968994, "learning_rate": 4.99582741981266e-05, "loss": 0.0008399656042456626, "step": 176370 }, { "epoch": 50.0652852682373, "grad_norm": 0.046720437705516815, "learning_rate": 4.995543570820324e-05, "loss": 0.0006598435342311859, "step": 176380 }, { "epoch": 50.06812375816066, "grad_norm": 0.4744413197040558, "learning_rate": 4.995259721827988e-05, "loss": 0.0022697716951370238, "step": 176390 }, { "epoch": 50.07096224808402, "grad_norm": 0.08460473269224167, "learning_rate": 4.9949758728356515e-05, "loss": 0.0026431258767843245, "step": 176400 }, { "epoch": 50.07380073800738, "grad_norm": 9.851479530334473, "learning_rate": 4.994692023843316e-05, "loss": 0.003089432045817375, "step": 176410 }, { "epoch": 50.07663922793074, "grad_norm": 0.8279853463172913, "learning_rate": 4.99440817485098e-05, "loss": 0.0012674272060394288, "step": 176420 }, { "epoch": 50.079477717854104, "grad_norm": 0.18671581149101257, "learning_rate": 4.994124325858643e-05, "loss": 0.00387532040476799, "step": 176430 }, { "epoch": 50.08231620777746, "grad_norm": 0.878939688205719, "learning_rate": 4.9938404768663074e-05, "loss": 0.0006136523559689521, "step": 176440 }, { "epoch": 50.08515469770082, "grad_norm": 0.11840343475341797, "learning_rate": 4.9935566278739716e-05, "loss": 0.009156681597232819, "step": 176450 }, { "epoch": 50.087993187624186, "grad_norm": 0.08839007467031479, "learning_rate": 4.993272778881635e-05, "loss": 0.0035822682082653047, "step": 176460 }, { "epoch": 50.09083167754754, "grad_norm": 0.08188849687576294, "learning_rate": 4.992988929889299e-05, "loss": 0.0022962478920817374, "step": 176470 }, { "epoch": 50.093670167470904, "grad_norm": 2.2806150913238525, "learning_rate": 4.992705080896963e-05, "loss": 0.0011621389538049697, "step": 176480 }, { "epoch": 50.09650865739427, "grad_norm": 0.2724352777004242, "learning_rate": 4.9924496168038606e-05, "loss": 0.0025118185207247735, "step": 176490 }, { "epoch": 50.09934714731763, "grad_norm": 1.164432406425476, "learning_rate": 4.992165767811525e-05, "loss": 0.0037373602390289307, "step": 176500 }, { "epoch": 50.09934714731763, "eval_accuracy": 0.9769822598079736, "eval_loss": 0.08625923097133636, "eval_runtime": 32.2141, "eval_samples_per_second": 488.202, "eval_steps_per_second": 7.636, "step": 176500 }, { "epoch": 50.102185637240986, "grad_norm": 1.7083172798156738, "learning_rate": 4.991910303718422e-05, "loss": 0.008183462172746658, "step": 176510 }, { "epoch": 50.10502412716435, "grad_norm": 1.135770320892334, "learning_rate": 4.991626454726086e-05, "loss": 0.0005703866481781006, "step": 176520 }, { "epoch": 50.10786261708771, "grad_norm": 0.09532252699136734, "learning_rate": 4.9913426057337497e-05, "loss": 0.0009808547794818879, "step": 176530 }, { "epoch": 50.11070110701107, "grad_norm": 0.4745347499847412, "learning_rate": 4.991058756741414e-05, "loss": 0.0028731677681207656, "step": 176540 }, { "epoch": 50.11353959693443, "grad_norm": 0.03605005145072937, "learning_rate": 4.990774907749078e-05, "loss": 0.0009749777615070343, "step": 176550 }, { "epoch": 50.11637808685779, "grad_norm": 0.07686666399240494, "learning_rate": 4.9904910587567414e-05, "loss": 0.0005639469251036644, "step": 176560 }, { "epoch": 50.119216576781156, "grad_norm": 0.006282479967921972, "learning_rate": 4.9902072097644055e-05, "loss": 0.0007820015773177147, "step": 176570 }, { "epoch": 50.12205506670451, "grad_norm": 0.02165645733475685, "learning_rate": 4.98992336077207e-05, "loss": 0.005430639907717705, "step": 176580 }, { "epoch": 50.124893556627875, "grad_norm": 15.569891929626465, "learning_rate": 4.989639511779733e-05, "loss": 0.0063955865800380705, "step": 176590 }, { "epoch": 50.12773204655124, "grad_norm": 0.2452220916748047, "learning_rate": 4.989355662787397e-05, "loss": 0.002078629843890667, "step": 176600 }, { "epoch": 50.13057053647459, "grad_norm": 0.0274581890553236, "learning_rate": 4.9890718137950614e-05, "loss": 0.0007554735988378525, "step": 176610 }, { "epoch": 50.133409026397956, "grad_norm": 1.1781631708145142, "learning_rate": 4.9887879648027255e-05, "loss": 0.000506405532360077, "step": 176620 }, { "epoch": 50.13624751632132, "grad_norm": 0.04099762812256813, "learning_rate": 4.988504115810389e-05, "loss": 0.0004459936171770096, "step": 176630 }, { "epoch": 50.139086006244675, "grad_norm": 0.028696412220597267, "learning_rate": 4.9882202668180525e-05, "loss": 0.002434944733977318, "step": 176640 }, { "epoch": 50.14192449616804, "grad_norm": 0.014150276780128479, "learning_rate": 4.987936417825717e-05, "loss": 0.0015130940824747085, "step": 176650 }, { "epoch": 50.1447629860914, "grad_norm": 0.3179625868797302, "learning_rate": 4.987652568833381e-05, "loss": 0.0006388736888766289, "step": 176660 }, { "epoch": 50.14760147601476, "grad_norm": 0.3257445991039276, "learning_rate": 4.987368719841045e-05, "loss": 0.0005356330424547195, "step": 176670 }, { "epoch": 50.15043996593812, "grad_norm": 0.028779519721865654, "learning_rate": 4.987084870848709e-05, "loss": 0.0004261115565896034, "step": 176680 }, { "epoch": 50.15327845586148, "grad_norm": 0.9095824360847473, "learning_rate": 4.9868010218563725e-05, "loss": 0.0019007945433259011, "step": 176690 }, { "epoch": 50.156116945784845, "grad_norm": 0.6246103048324585, "learning_rate": 4.9865171728640366e-05, "loss": 0.0005518237128853797, "step": 176700 }, { "epoch": 50.1589554357082, "grad_norm": 0.14597567915916443, "learning_rate": 4.986233323871701e-05, "loss": 0.0005241403356194496, "step": 176710 }, { "epoch": 50.161793925631564, "grad_norm": 0.062584787607193, "learning_rate": 4.985949474879364e-05, "loss": 0.0012103330343961715, "step": 176720 }, { "epoch": 50.16463241555493, "grad_norm": 0.15316635370254517, "learning_rate": 4.9856656258870283e-05, "loss": 0.0030560286715626715, "step": 176730 }, { "epoch": 50.16747090547828, "grad_norm": 0.02110178954899311, "learning_rate": 4.985381776894692e-05, "loss": 0.0046000611037015915, "step": 176740 }, { "epoch": 50.170309395401645, "grad_norm": 0.04300222545862198, "learning_rate": 4.9850979279023566e-05, "loss": 0.00060130525380373, "step": 176750 }, { "epoch": 50.17314788532501, "grad_norm": 0.6389912962913513, "learning_rate": 4.98481407891002e-05, "loss": 0.0014501934871077537, "step": 176760 }, { "epoch": 50.17598637524837, "grad_norm": 0.0434531532227993, "learning_rate": 4.9845302299176835e-05, "loss": 0.0004605159163475037, "step": 176770 }, { "epoch": 50.17882486517173, "grad_norm": 1.0250457525253296, "learning_rate": 4.9842463809253483e-05, "loss": 0.0008345304057002067, "step": 176780 }, { "epoch": 50.18166335509509, "grad_norm": 0.05345132201910019, "learning_rate": 4.983962531933012e-05, "loss": 0.0004011526703834534, "step": 176790 }, { "epoch": 50.18450184501845, "grad_norm": 1.5893410444259644, "learning_rate": 4.983678682940676e-05, "loss": 0.015104100108146667, "step": 176800 }, { "epoch": 50.18734033494181, "grad_norm": 2.648761510848999, "learning_rate": 4.98339483394834e-05, "loss": 0.0007993241772055626, "step": 176810 }, { "epoch": 50.19017882486517, "grad_norm": 0.4990914463996887, "learning_rate": 4.9831109849560035e-05, "loss": 0.002957923151552677, "step": 176820 }, { "epoch": 50.193017314788534, "grad_norm": 0.1258476823568344, "learning_rate": 4.982827135963668e-05, "loss": 0.0008682165294885636, "step": 176830 }, { "epoch": 50.19585580471189, "grad_norm": 0.21630902588367462, "learning_rate": 4.982543286971331e-05, "loss": 0.000866316445171833, "step": 176840 }, { "epoch": 50.19869429463525, "grad_norm": 0.09617657959461212, "learning_rate": 4.982259437978995e-05, "loss": 0.0005035484209656715, "step": 176850 }, { "epoch": 50.201532784558616, "grad_norm": 0.01744590885937214, "learning_rate": 4.9819755889866594e-05, "loss": 0.0004816923290491104, "step": 176860 }, { "epoch": 50.20437127448198, "grad_norm": 0.052794624119997025, "learning_rate": 4.981691739994323e-05, "loss": 0.0017745234072208405, "step": 176870 }, { "epoch": 50.207209764405334, "grad_norm": 0.07953483611345291, "learning_rate": 4.981407891001987e-05, "loss": 0.001958402805030346, "step": 176880 }, { "epoch": 50.2100482543287, "grad_norm": 0.11982344835996628, "learning_rate": 4.981124042009651e-05, "loss": 0.00464974045753479, "step": 176890 }, { "epoch": 50.21288674425206, "grad_norm": 0.5792988538742065, "learning_rate": 4.9808401930173146e-05, "loss": 0.001023976132273674, "step": 176900 }, { "epoch": 50.215725234175416, "grad_norm": 2.3434958457946777, "learning_rate": 4.9805563440249794e-05, "loss": 0.003122660145163536, "step": 176910 }, { "epoch": 50.21856372409878, "grad_norm": 0.6093628406524658, "learning_rate": 4.980272495032643e-05, "loss": 0.0005148198455572128, "step": 176920 }, { "epoch": 50.22140221402214, "grad_norm": 0.030891595408320427, "learning_rate": 4.9799886460403063e-05, "loss": 0.0015668433159589767, "step": 176930 }, { "epoch": 50.2242407039455, "grad_norm": 0.2774398922920227, "learning_rate": 4.979704797047971e-05, "loss": 0.0003255235031247139, "step": 176940 }, { "epoch": 50.22707919386886, "grad_norm": 8.698624610900879, "learning_rate": 4.9794209480556346e-05, "loss": 0.0018035238608717918, "step": 176950 }, { "epoch": 50.22991768379222, "grad_norm": 2.889691114425659, "learning_rate": 4.979137099063299e-05, "loss": 0.002668721787631512, "step": 176960 }, { "epoch": 50.232756173715586, "grad_norm": 7.766643524169922, "learning_rate": 4.978853250070962e-05, "loss": 0.00206392128020525, "step": 176970 }, { "epoch": 50.23559466363894, "grad_norm": 9.352635383605957, "learning_rate": 4.9785694010786264e-05, "loss": 0.0016606124117970467, "step": 176980 }, { "epoch": 50.238433153562305, "grad_norm": 1.1609859466552734, "learning_rate": 4.9782855520862905e-05, "loss": 0.0018675822764635086, "step": 176990 }, { "epoch": 50.24127164348567, "grad_norm": 0.11830850690603256, "learning_rate": 4.978001703093954e-05, "loss": 0.008323868364095688, "step": 177000 }, { "epoch": 50.24127164348567, "eval_accuracy": 0.9779360335728365, "eval_loss": 0.0856231302022934, "eval_runtime": 32.778, "eval_samples_per_second": 479.804, "eval_steps_per_second": 7.505, "step": 177000 }, { "epoch": 50.24411013340902, "grad_norm": 0.08256211876869202, "learning_rate": 4.977717854101618e-05, "loss": 0.002439967170357704, "step": 177010 }, { "epoch": 50.246948623332386, "grad_norm": 0.19302710890769958, "learning_rate": 4.977434005109282e-05, "loss": 0.0024215953424572946, "step": 177020 }, { "epoch": 50.24978711325575, "grad_norm": 0.1892315000295639, "learning_rate": 4.977150156116946e-05, "loss": 0.0012735802680253982, "step": 177030 }, { "epoch": 50.25262560317911, "grad_norm": 0.07982230186462402, "learning_rate": 4.9768663071246105e-05, "loss": 0.0009816793724894523, "step": 177040 }, { "epoch": 50.25546409310247, "grad_norm": 0.041709598153829575, "learning_rate": 4.976582458132274e-05, "loss": 0.0005294255912303925, "step": 177050 }, { "epoch": 50.25830258302583, "grad_norm": 2.273650884628296, "learning_rate": 4.9762986091399374e-05, "loss": 0.001293467730283737, "step": 177060 }, { "epoch": 50.261141072949194, "grad_norm": 0.1955438256263733, "learning_rate": 4.9760147601476016e-05, "loss": 0.0007217144593596458, "step": 177070 }, { "epoch": 50.26397956287255, "grad_norm": 0.39188092947006226, "learning_rate": 4.975730911155266e-05, "loss": 0.0003591010347008705, "step": 177080 }, { "epoch": 50.26681805279591, "grad_norm": 0.05045894905924797, "learning_rate": 4.97544706216293e-05, "loss": 0.0004187233746051788, "step": 177090 }, { "epoch": 50.269656542719275, "grad_norm": 0.04422523453831673, "learning_rate": 4.975163213170593e-05, "loss": 0.00031277742236852647, "step": 177100 }, { "epoch": 50.27249503264263, "grad_norm": 0.19990792870521545, "learning_rate": 4.9748793641782574e-05, "loss": 0.0007353005930781364, "step": 177110 }, { "epoch": 50.275333522565994, "grad_norm": 0.06600464135408401, "learning_rate": 4.9745955151859216e-05, "loss": 0.0002290608361363411, "step": 177120 }, { "epoch": 50.27817201248936, "grad_norm": 0.2030678689479828, "learning_rate": 4.974311666193585e-05, "loss": 0.0007766973227262497, "step": 177130 }, { "epoch": 50.28101050241272, "grad_norm": 0.7113814353942871, "learning_rate": 4.974027817201249e-05, "loss": 0.0005529282614588737, "step": 177140 }, { "epoch": 50.283848992336075, "grad_norm": 0.02137497067451477, "learning_rate": 4.973743968208913e-05, "loss": 0.0005377311259508133, "step": 177150 }, { "epoch": 50.28668748225944, "grad_norm": 0.2531367540359497, "learning_rate": 4.973460119216577e-05, "loss": 0.000562996044754982, "step": 177160 }, { "epoch": 50.2895259721828, "grad_norm": 0.45157748460769653, "learning_rate": 4.973176270224241e-05, "loss": 0.001690037176012993, "step": 177170 }, { "epoch": 50.29236446210616, "grad_norm": 0.05742061883211136, "learning_rate": 4.972892421231905e-05, "loss": 0.0003670386970043182, "step": 177180 }, { "epoch": 50.29520295202952, "grad_norm": 0.07757249474525452, "learning_rate": 4.9726085722395685e-05, "loss": 0.0037764310836791992, "step": 177190 }, { "epoch": 50.29804144195288, "grad_norm": 0.055256836116313934, "learning_rate": 4.9723247232472326e-05, "loss": 0.0029304096475243568, "step": 177200 }, { "epoch": 50.30087993187624, "grad_norm": 0.5304828882217407, "learning_rate": 4.972040874254897e-05, "loss": 0.002909406274557114, "step": 177210 }, { "epoch": 50.3037184217996, "grad_norm": 0.33761054277420044, "learning_rate": 4.971757025262561e-05, "loss": 0.00031282082200050353, "step": 177220 }, { "epoch": 50.306556911722964, "grad_norm": 0.15963734686374664, "learning_rate": 4.9714731762702244e-05, "loss": 0.0008289923891425133, "step": 177230 }, { "epoch": 50.30939540164633, "grad_norm": 1.5332616567611694, "learning_rate": 4.9711893272778885e-05, "loss": 0.0015551738440990448, "step": 177240 }, { "epoch": 50.31223389156968, "grad_norm": 0.3817329704761505, "learning_rate": 4.9709054782855526e-05, "loss": 0.0006673812866210937, "step": 177250 }, { "epoch": 50.315072381493046, "grad_norm": 1.096095323562622, "learning_rate": 4.970621629293216e-05, "loss": 0.003609811142086983, "step": 177260 }, { "epoch": 50.31791087141641, "grad_norm": 0.5516811013221741, "learning_rate": 4.97033778030088e-05, "loss": 0.003701005131006241, "step": 177270 }, { "epoch": 50.320749361339765, "grad_norm": 0.24736420810222626, "learning_rate": 4.9700539313085444e-05, "loss": 0.0010272614657878877, "step": 177280 }, { "epoch": 50.32358785126313, "grad_norm": 0.030993130058050156, "learning_rate": 4.969770082316208e-05, "loss": 0.00017837658524513244, "step": 177290 }, { "epoch": 50.32642634118649, "grad_norm": 0.24044036865234375, "learning_rate": 4.969486233323872e-05, "loss": 0.0009753476828336715, "step": 177300 }, { "epoch": 50.329264831109846, "grad_norm": 0.24940486252307892, "learning_rate": 4.969202384331536e-05, "loss": 0.0012340404093265534, "step": 177310 }, { "epoch": 50.33210332103321, "grad_norm": 0.1399960219860077, "learning_rate": 4.9689185353391996e-05, "loss": 0.0009991338476538659, "step": 177320 }, { "epoch": 50.33494181095657, "grad_norm": 0.1798580437898636, "learning_rate": 4.968634686346864e-05, "loss": 0.001671215333044529, "step": 177330 }, { "epoch": 50.337780300879935, "grad_norm": 1.7044034004211426, "learning_rate": 4.968350837354528e-05, "loss": 0.0015520596876740455, "step": 177340 }, { "epoch": 50.34061879080329, "grad_norm": 0.1086382195353508, "learning_rate": 4.968066988362191e-05, "loss": 0.0027397431433200834, "step": 177350 }, { "epoch": 50.34345728072665, "grad_norm": 0.1624901294708252, "learning_rate": 4.9677831393698554e-05, "loss": 0.0011369818821549416, "step": 177360 }, { "epoch": 50.346295770650016, "grad_norm": 0.6583725214004517, "learning_rate": 4.967499290377519e-05, "loss": 0.0006082981824874877, "step": 177370 }, { "epoch": 50.34913426057337, "grad_norm": 0.11071088910102844, "learning_rate": 4.967215441385184e-05, "loss": 0.0009104721248149872, "step": 177380 }, { "epoch": 50.351972750496735, "grad_norm": 0.2696910798549652, "learning_rate": 4.966931592392847e-05, "loss": 0.00036047566682100295, "step": 177390 }, { "epoch": 50.3548112404201, "grad_norm": 0.13019660115242004, "learning_rate": 4.9666477434005106e-05, "loss": 0.0005082773044705391, "step": 177400 }, { "epoch": 50.35764973034346, "grad_norm": 1.0782768726348877, "learning_rate": 4.9663638944081755e-05, "loss": 0.0008891647681593895, "step": 177410 }, { "epoch": 50.36048822026682, "grad_norm": 0.05414819344878197, "learning_rate": 4.966080045415839e-05, "loss": 0.0029627135023474692, "step": 177420 }, { "epoch": 50.36332671019018, "grad_norm": 0.4545585513114929, "learning_rate": 4.965796196423503e-05, "loss": 0.0007574697956442833, "step": 177430 }, { "epoch": 50.36616520011354, "grad_norm": 0.0877997875213623, "learning_rate": 4.965512347431167e-05, "loss": 0.006679043173789978, "step": 177440 }, { "epoch": 50.3690036900369, "grad_norm": 1.6697880029678345, "learning_rate": 4.9652284984388307e-05, "loss": 0.0015411537140607833, "step": 177450 }, { "epoch": 50.37184217996026, "grad_norm": 0.09440036863088608, "learning_rate": 4.964944649446495e-05, "loss": 0.000862899050116539, "step": 177460 }, { "epoch": 50.374680669883624, "grad_norm": 0.03411230072379112, "learning_rate": 4.964660800454158e-05, "loss": 0.00018202271312475204, "step": 177470 }, { "epoch": 50.37751915980698, "grad_norm": 0.01655156910419464, "learning_rate": 4.9643769514618224e-05, "loss": 0.004170828312635422, "step": 177480 }, { "epoch": 50.38035764973034, "grad_norm": 0.7139278650283813, "learning_rate": 4.9640931024694865e-05, "loss": 0.002236265130341053, "step": 177490 }, { "epoch": 50.383196139653705, "grad_norm": 0.014876579865813255, "learning_rate": 4.96380925347715e-05, "loss": 0.0014686668291687965, "step": 177500 }, { "epoch": 50.383196139653705, "eval_accuracy": 0.9734850893368093, "eval_loss": 0.09695994853973389, "eval_runtime": 31.8462, "eval_samples_per_second": 493.842, "eval_steps_per_second": 7.725, "step": 177500 }, { "epoch": 50.38603462957707, "grad_norm": 0.05836835876107216, "learning_rate": 4.963525404484815e-05, "loss": 0.005047713220119476, "step": 177510 }, { "epoch": 50.388873119500424, "grad_norm": 3.8501880168914795, "learning_rate": 4.963241555492478e-05, "loss": 0.001256764866411686, "step": 177520 }, { "epoch": 50.39171160942379, "grad_norm": 4.1185221672058105, "learning_rate": 4.962957706500142e-05, "loss": 0.0011665267869830132, "step": 177530 }, { "epoch": 50.39455009934715, "grad_norm": 0.35036447644233704, "learning_rate": 4.9626738575078065e-05, "loss": 0.0007903749123215675, "step": 177540 }, { "epoch": 50.397388589270506, "grad_norm": 0.024981167167425156, "learning_rate": 4.96239000851547e-05, "loss": 0.0005061108618974686, "step": 177550 }, { "epoch": 50.40022707919387, "grad_norm": 0.05890714004635811, "learning_rate": 4.962106159523134e-05, "loss": 0.0007998129352927208, "step": 177560 }, { "epoch": 50.40306556911723, "grad_norm": 21.335351943969727, "learning_rate": 4.9618223105307976e-05, "loss": 0.007528568059206009, "step": 177570 }, { "epoch": 50.40590405904059, "grad_norm": 0.0112476646900177, "learning_rate": 4.961538461538462e-05, "loss": 0.0004099428653717041, "step": 177580 }, { "epoch": 50.40874254896395, "grad_norm": 0.032216690480709076, "learning_rate": 4.961254612546126e-05, "loss": 0.0012944817543029785, "step": 177590 }, { "epoch": 50.41158103888731, "grad_norm": 0.3729014992713928, "learning_rate": 4.960970763553789e-05, "loss": 0.00034929364919662477, "step": 177600 }, { "epoch": 50.414419528810676, "grad_norm": 0.05313735455274582, "learning_rate": 4.9606869145614535e-05, "loss": 0.0041382797062397, "step": 177610 }, { "epoch": 50.41725801873403, "grad_norm": 0.07106073945760727, "learning_rate": 4.9604030655691176e-05, "loss": 0.0027822446078062057, "step": 177620 }, { "epoch": 50.420096508657394, "grad_norm": 0.6923015117645264, "learning_rate": 4.960119216576781e-05, "loss": 0.004545788839459419, "step": 177630 }, { "epoch": 50.42293499858076, "grad_norm": 0.026344675570726395, "learning_rate": 4.959835367584446e-05, "loss": 0.002169114165008068, "step": 177640 }, { "epoch": 50.42577348850411, "grad_norm": 0.8514630198478699, "learning_rate": 4.959551518592109e-05, "loss": 0.00214039608836174, "step": 177650 }, { "epoch": 50.428611978427476, "grad_norm": 0.5716798901557922, "learning_rate": 4.959267669599773e-05, "loss": 0.000827709399163723, "step": 177660 }, { "epoch": 50.43145046835084, "grad_norm": 0.10842598974704742, "learning_rate": 4.958983820607437e-05, "loss": 0.00046628396958112715, "step": 177670 }, { "epoch": 50.434288958274195, "grad_norm": 0.09555017203092575, "learning_rate": 4.958699971615101e-05, "loss": 0.003782496228814125, "step": 177680 }, { "epoch": 50.43712744819756, "grad_norm": 0.4245949983596802, "learning_rate": 4.958416122622765e-05, "loss": 0.0003491690382361412, "step": 177690 }, { "epoch": 50.43996593812092, "grad_norm": 0.32890450954437256, "learning_rate": 4.958132273630429e-05, "loss": 0.0003079770132899284, "step": 177700 }, { "epoch": 50.44280442804428, "grad_norm": 0.3969707489013672, "learning_rate": 4.957848424638093e-05, "loss": 0.0004984445869922638, "step": 177710 }, { "epoch": 50.44564291796764, "grad_norm": 0.04004652425646782, "learning_rate": 4.957564575645757e-05, "loss": 0.0008567538112401962, "step": 177720 }, { "epoch": 50.448481407891, "grad_norm": 0.058560408651828766, "learning_rate": 4.9572807266534204e-05, "loss": 0.003626786172389984, "step": 177730 }, { "epoch": 50.451319897814365, "grad_norm": 0.005180516745895147, "learning_rate": 4.9569968776610845e-05, "loss": 0.0005284024402499199, "step": 177740 }, { "epoch": 50.45415838773772, "grad_norm": 0.027004335075616837, "learning_rate": 4.956713028668749e-05, "loss": 0.004837240651249885, "step": 177750 }, { "epoch": 50.45699687766108, "grad_norm": 0.04382847994565964, "learning_rate": 4.956429179676412e-05, "loss": 0.0019002886489033698, "step": 177760 }, { "epoch": 50.459835367584446, "grad_norm": 4.791123390197754, "learning_rate": 4.956145330684076e-05, "loss": 0.0010612225160002708, "step": 177770 }, { "epoch": 50.46267385750781, "grad_norm": 0.4115423262119293, "learning_rate": 4.9558614816917404e-05, "loss": 0.001125345379114151, "step": 177780 }, { "epoch": 50.465512347431165, "grad_norm": 0.16639269888401031, "learning_rate": 4.955577632699404e-05, "loss": 0.0008646093308925628, "step": 177790 }, { "epoch": 50.46835083735453, "grad_norm": 0.32309722900390625, "learning_rate": 4.955293783707068e-05, "loss": 0.0010424047708511353, "step": 177800 }, { "epoch": 50.47118932727789, "grad_norm": 0.03370222821831703, "learning_rate": 4.955009934714732e-05, "loss": 0.001813761331140995, "step": 177810 }, { "epoch": 50.47402781720125, "grad_norm": 0.12336213886737823, "learning_rate": 4.9547260857223956e-05, "loss": 0.000892116129398346, "step": 177820 }, { "epoch": 50.47686630712461, "grad_norm": 2.7869176864624023, "learning_rate": 4.95444223673006e-05, "loss": 0.0014268592000007629, "step": 177830 }, { "epoch": 50.47970479704797, "grad_norm": 0.17018850147724152, "learning_rate": 4.954158387737724e-05, "loss": 0.0004818037152290344, "step": 177840 }, { "epoch": 50.48254328697133, "grad_norm": 0.029928898438811302, "learning_rate": 4.953874538745388e-05, "loss": 0.004192335158586502, "step": 177850 }, { "epoch": 50.48538177689469, "grad_norm": 0.12719881534576416, "learning_rate": 4.9535906897530515e-05, "loss": 0.0013388942927122117, "step": 177860 }, { "epoch": 50.488220266818054, "grad_norm": 0.2800428569316864, "learning_rate": 4.953306840760715e-05, "loss": 0.0008534926921129227, "step": 177870 }, { "epoch": 50.49105875674142, "grad_norm": 2.415271282196045, "learning_rate": 4.95302299176838e-05, "loss": 0.016553825139999388, "step": 177880 }, { "epoch": 50.49389724666477, "grad_norm": 0.05258976295590401, "learning_rate": 4.952739142776043e-05, "loss": 0.00021453816443681717, "step": 177890 }, { "epoch": 50.496735736588136, "grad_norm": 0.007284042425453663, "learning_rate": 4.9524552937837074e-05, "loss": 0.005002042278647423, "step": 177900 }, { "epoch": 50.4995742265115, "grad_norm": 0.09343832731246948, "learning_rate": 4.9521714447913715e-05, "loss": 0.0035232074558734896, "step": 177910 }, { "epoch": 50.502412716434854, "grad_norm": 0.05552128329873085, "learning_rate": 4.951887595799035e-05, "loss": 0.0007843418046832084, "step": 177920 }, { "epoch": 50.50525120635822, "grad_norm": 0.9645230770111084, "learning_rate": 4.951603746806699e-05, "loss": 0.0014153802767395973, "step": 177930 }, { "epoch": 50.50808969628158, "grad_norm": 0.0924747884273529, "learning_rate": 4.951319897814363e-05, "loss": 0.0008359776809811592, "step": 177940 }, { "epoch": 50.510928186204936, "grad_norm": 0.07320868968963623, "learning_rate": 4.951036048822027e-05, "loss": 0.0016082949936389924, "step": 177950 }, { "epoch": 50.5137666761283, "grad_norm": 1.2412312030792236, "learning_rate": 4.950752199829691e-05, "loss": 0.0036428429186344145, "step": 177960 }, { "epoch": 50.51660516605166, "grad_norm": 0.0991537794470787, "learning_rate": 4.950468350837354e-05, "loss": 0.0006276082247495651, "step": 177970 }, { "epoch": 50.519443655975024, "grad_norm": 8.58150863647461, "learning_rate": 4.950184501845019e-05, "loss": 0.00971858948469162, "step": 177980 }, { "epoch": 50.52228214589838, "grad_norm": 0.22930099070072174, "learning_rate": 4.9499006528526826e-05, "loss": 0.000516694039106369, "step": 177990 }, { "epoch": 50.52512063582174, "grad_norm": 0.22646944224834442, "learning_rate": 4.949616803860346e-05, "loss": 0.0010945620015263557, "step": 178000 }, { "epoch": 50.52512063582174, "eval_accuracy": 0.9749475424429326, "eval_loss": 0.08912072330713272, "eval_runtime": 33.1405, "eval_samples_per_second": 474.555, "eval_steps_per_second": 7.423, "step": 178000 }, { "epoch": 50.527959125745106, "grad_norm": 1.298165202140808, "learning_rate": 4.949332954868011e-05, "loss": 0.0007663747295737266, "step": 178010 }, { "epoch": 50.53079761566846, "grad_norm": 0.33097919821739197, "learning_rate": 4.949049105875674e-05, "loss": 0.0020462434738874437, "step": 178020 }, { "epoch": 50.533636105591825, "grad_norm": 0.4576626420021057, "learning_rate": 4.9487652568833384e-05, "loss": 0.003384851664304733, "step": 178030 }, { "epoch": 50.53647459551519, "grad_norm": 0.1948893964290619, "learning_rate": 4.9484814078910026e-05, "loss": 0.002604338712990284, "step": 178040 }, { "epoch": 50.53931308543854, "grad_norm": 3.9826879501342773, "learning_rate": 4.948197558898666e-05, "loss": 0.0019319184124469758, "step": 178050 }, { "epoch": 50.542151575361906, "grad_norm": 0.1248917356133461, "learning_rate": 4.94791370990633e-05, "loss": 0.007322826236486435, "step": 178060 }, { "epoch": 50.54499006528527, "grad_norm": 0.3652973771095276, "learning_rate": 4.9476298609139936e-05, "loss": 0.010866234451532364, "step": 178070 }, { "epoch": 50.54782855520863, "grad_norm": 0.18217891454696655, "learning_rate": 4.947346011921658e-05, "loss": 0.0010630324482917785, "step": 178080 }, { "epoch": 50.55066704513199, "grad_norm": 0.15132637321949005, "learning_rate": 4.947062162929322e-05, "loss": 0.001171666383743286, "step": 178090 }, { "epoch": 50.55350553505535, "grad_norm": 5.3725152015686035, "learning_rate": 4.9467783139369854e-05, "loss": 0.017546015977859496, "step": 178100 }, { "epoch": 50.55634402497871, "grad_norm": 0.033957455307245255, "learning_rate": 4.94649446494465e-05, "loss": 0.0017675373703241349, "step": 178110 }, { "epoch": 50.55918251490207, "grad_norm": 1.643140435218811, "learning_rate": 4.9462106159523136e-05, "loss": 0.0020062139257788656, "step": 178120 }, { "epoch": 50.56202100482543, "grad_norm": 0.14635750651359558, "learning_rate": 4.945926766959977e-05, "loss": 0.0037103869020938874, "step": 178130 }, { "epoch": 50.564859494748795, "grad_norm": 0.24740934371948242, "learning_rate": 4.945642917967642e-05, "loss": 0.004420775175094605, "step": 178140 }, { "epoch": 50.56769798467215, "grad_norm": 0.18603527545928955, "learning_rate": 4.9453590689753054e-05, "loss": 0.008466163277626037, "step": 178150 }, { "epoch": 50.570536474595514, "grad_norm": 14.504068374633789, "learning_rate": 4.9450752199829695e-05, "loss": 0.007627817243337632, "step": 178160 }, { "epoch": 50.57337496451888, "grad_norm": 0.22988714277744293, "learning_rate": 4.944791370990633e-05, "loss": 0.0030602091923356057, "step": 178170 }, { "epoch": 50.57621345444224, "grad_norm": 0.24587509036064148, "learning_rate": 4.944507521998297e-05, "loss": 0.0007264429703354835, "step": 178180 }, { "epoch": 50.579051944365595, "grad_norm": 0.4983689486980438, "learning_rate": 4.944223673005961e-05, "loss": 0.0042855173349380495, "step": 178190 }, { "epoch": 50.58189043428896, "grad_norm": 0.05465778708457947, "learning_rate": 4.943939824013625e-05, "loss": 0.0018220921978354455, "step": 178200 }, { "epoch": 50.58472892421232, "grad_norm": 0.20462851226329803, "learning_rate": 4.943655975021289e-05, "loss": 0.0008056052029132843, "step": 178210 }, { "epoch": 50.58756741413568, "grad_norm": 0.1778930425643921, "learning_rate": 4.943372126028953e-05, "loss": 0.0003673093393445015, "step": 178220 }, { "epoch": 50.59040590405904, "grad_norm": 0.22313767671585083, "learning_rate": 4.9430882770366164e-05, "loss": 0.0012095678597688676, "step": 178230 }, { "epoch": 50.5932443939824, "grad_norm": 2.6927945613861084, "learning_rate": 4.9428044280442806e-05, "loss": 0.006263354420661926, "step": 178240 }, { "epoch": 50.596082883905765, "grad_norm": 0.06281709671020508, "learning_rate": 4.942520579051945e-05, "loss": 0.0009450465440750122, "step": 178250 }, { "epoch": 50.59892137382912, "grad_norm": 9.54774284362793, "learning_rate": 4.942236730059608e-05, "loss": 0.0029483195394277573, "step": 178260 }, { "epoch": 50.601759863752484, "grad_norm": 17.819562911987305, "learning_rate": 4.941952881067273e-05, "loss": 0.012881028652191161, "step": 178270 }, { "epoch": 50.60459835367585, "grad_norm": 7.835615634918213, "learning_rate": 4.9416690320749364e-05, "loss": 0.016581083834171294, "step": 178280 }, { "epoch": 50.6074368435992, "grad_norm": 6.273597240447998, "learning_rate": 4.9413851830826e-05, "loss": 0.012644153833389283, "step": 178290 }, { "epoch": 50.610275333522566, "grad_norm": 4.978204250335693, "learning_rate": 4.941101334090264e-05, "loss": 0.005877928435802459, "step": 178300 }, { "epoch": 50.61311382344593, "grad_norm": 0.323255717754364, "learning_rate": 4.940817485097928e-05, "loss": 0.001150345429778099, "step": 178310 }, { "epoch": 50.615952313369284, "grad_norm": 6.733771800994873, "learning_rate": 4.940533636105592e-05, "loss": 0.0015473274514079093, "step": 178320 }, { "epoch": 50.61879080329265, "grad_norm": 0.3099766671657562, "learning_rate": 4.940249787113256e-05, "loss": 0.0008110819384455681, "step": 178330 }, { "epoch": 50.62162929321601, "grad_norm": 0.06060781702399254, "learning_rate": 4.93996593812092e-05, "loss": 0.003547689691185951, "step": 178340 }, { "epoch": 50.62446778313937, "grad_norm": 0.07607654482126236, "learning_rate": 4.939682089128584e-05, "loss": 0.0015209093689918518, "step": 178350 }, { "epoch": 50.62730627306273, "grad_norm": 0.299899160861969, "learning_rate": 4.9393982401362475e-05, "loss": 0.0015377139672636986, "step": 178360 }, { "epoch": 50.63014476298609, "grad_norm": 0.05727801471948624, "learning_rate": 4.9391143911439116e-05, "loss": 0.0020029693841934204, "step": 178370 }, { "epoch": 50.632983252909455, "grad_norm": 0.11769679188728333, "learning_rate": 4.938830542151576e-05, "loss": 0.003923571109771729, "step": 178380 }, { "epoch": 50.63582174283281, "grad_norm": 1.423798680305481, "learning_rate": 4.938546693159239e-05, "loss": 0.0036637675017118453, "step": 178390 }, { "epoch": 50.63866023275617, "grad_norm": 0.17247925698757172, "learning_rate": 4.9382628441669034e-05, "loss": 0.0014694530516862869, "step": 178400 }, { "epoch": 50.641498722679536, "grad_norm": 1.3631196022033691, "learning_rate": 4.9379789951745675e-05, "loss": 0.0015047889202833176, "step": 178410 }, { "epoch": 50.64433721260289, "grad_norm": 1.2381564378738403, "learning_rate": 4.937695146182231e-05, "loss": 0.008275463432073592, "step": 178420 }, { "epoch": 50.647175702526255, "grad_norm": 0.1554345041513443, "learning_rate": 4.937411297189895e-05, "loss": 0.011019628494977951, "step": 178430 }, { "epoch": 50.65001419244962, "grad_norm": 5.878764629364014, "learning_rate": 4.937127448197559e-05, "loss": 0.0020541947335004805, "step": 178440 }, { "epoch": 50.65285268237298, "grad_norm": 0.12758630514144897, "learning_rate": 4.9368435992052234e-05, "loss": 0.017905624210834505, "step": 178450 }, { "epoch": 50.655691172296336, "grad_norm": 0.11042506247758865, "learning_rate": 4.936559750212887e-05, "loss": 0.0018763771280646325, "step": 178460 }, { "epoch": 50.6585296622197, "grad_norm": 0.4924071133136749, "learning_rate": 4.936275901220551e-05, "loss": 0.001947961002588272, "step": 178470 }, { "epoch": 50.66136815214306, "grad_norm": 0.06848179548978806, "learning_rate": 4.935992052228215e-05, "loss": 0.0004623718559741974, "step": 178480 }, { "epoch": 50.66420664206642, "grad_norm": 0.05665064975619316, "learning_rate": 4.9357082032358786e-05, "loss": 0.000892830267548561, "step": 178490 }, { "epoch": 50.66704513198978, "grad_norm": 0.16801804304122925, "learning_rate": 4.935424354243543e-05, "loss": 0.0005209440365433693, "step": 178500 }, { "epoch": 50.66704513198978, "eval_accuracy": 0.97475678768996, "eval_loss": 0.08886351436376572, "eval_runtime": 32.6215, "eval_samples_per_second": 482.105, "eval_steps_per_second": 7.541, "step": 178500 }, { "epoch": 50.669883621913144, "grad_norm": 1.6069427728652954, "learning_rate": 4.935140505251207e-05, "loss": 0.0026915842667222024, "step": 178510 }, { "epoch": 50.67272211183651, "grad_norm": 0.15120673179626465, "learning_rate": 4.93485665625887e-05, "loss": 0.0005074212327599526, "step": 178520 }, { "epoch": 50.67556060175986, "grad_norm": 0.3359042704105377, "learning_rate": 4.9345728072665345e-05, "loss": 0.0019681137055158615, "step": 178530 }, { "epoch": 50.678399091683225, "grad_norm": 0.024479566141963005, "learning_rate": 4.9342889582741986e-05, "loss": 0.0022592568770051003, "step": 178540 }, { "epoch": 50.68123758160659, "grad_norm": 0.21077264845371246, "learning_rate": 4.934005109281862e-05, "loss": 0.002022714912891388, "step": 178550 }, { "epoch": 50.684076071529944, "grad_norm": 0.09945801645517349, "learning_rate": 4.933721260289526e-05, "loss": 0.0009286802262067795, "step": 178560 }, { "epoch": 50.68691456145331, "grad_norm": 0.8508023023605347, "learning_rate": 4.93343741129719e-05, "loss": 0.002763545513153076, "step": 178570 }, { "epoch": 50.68975305137667, "grad_norm": 0.2934623956680298, "learning_rate": 4.9331535623048545e-05, "loss": 0.00554654449224472, "step": 178580 }, { "epoch": 50.692591541300025, "grad_norm": 0.31937262415885925, "learning_rate": 4.932869713312518e-05, "loss": 0.003822198137640953, "step": 178590 }, { "epoch": 50.69543003122339, "grad_norm": 0.1112031638622284, "learning_rate": 4.9325858643201814e-05, "loss": 0.0033473484218120575, "step": 178600 }, { "epoch": 50.69826852114675, "grad_norm": 0.4033728837966919, "learning_rate": 4.932302015327846e-05, "loss": 0.000270291231572628, "step": 178610 }, { "epoch": 50.701107011070114, "grad_norm": 0.2086223065853119, "learning_rate": 4.93201816633551e-05, "loss": 0.0006224347278475761, "step": 178620 }, { "epoch": 50.70394550099347, "grad_norm": 2.71340012550354, "learning_rate": 4.931734317343174e-05, "loss": 0.001313019171357155, "step": 178630 }, { "epoch": 50.70678399091683, "grad_norm": 0.42862969636917114, "learning_rate": 4.931450468350838e-05, "loss": 0.0010548047721385956, "step": 178640 }, { "epoch": 50.709622480840196, "grad_norm": 0.14810322225093842, "learning_rate": 4.9311666193585014e-05, "loss": 0.001307407021522522, "step": 178650 }, { "epoch": 50.71246097076355, "grad_norm": 0.08268176019191742, "learning_rate": 4.9308827703661655e-05, "loss": 0.0032232481986284255, "step": 178660 }, { "epoch": 50.715299460686914, "grad_norm": 0.021411698311567307, "learning_rate": 4.93059892137383e-05, "loss": 0.0007195794954895973, "step": 178670 }, { "epoch": 50.71813795061028, "grad_norm": 6.673913955688477, "learning_rate": 4.930315072381493e-05, "loss": 0.0022778794169425963, "step": 178680 }, { "epoch": 50.72097644053363, "grad_norm": 0.48466309905052185, "learning_rate": 4.930031223389157e-05, "loss": 0.0021695947274565698, "step": 178690 }, { "epoch": 50.723814930456996, "grad_norm": 3.3368430137634277, "learning_rate": 4.929747374396821e-05, "loss": 0.0017875708639621735, "step": 178700 }, { "epoch": 50.72665342038036, "grad_norm": 0.0369260348379612, "learning_rate": 4.929463525404485e-05, "loss": 0.005566006153821945, "step": 178710 }, { "epoch": 50.72949191030372, "grad_norm": 4.205667972564697, "learning_rate": 4.929179676412149e-05, "loss": 0.0022798661142587663, "step": 178720 }, { "epoch": 50.73233040022708, "grad_norm": 0.022319070994853973, "learning_rate": 4.9288958274198125e-05, "loss": 0.0007448522374033928, "step": 178730 }, { "epoch": 50.73516889015044, "grad_norm": 0.010256805457174778, "learning_rate": 4.928611978427477e-05, "loss": 0.00022689811885356903, "step": 178740 }, { "epoch": 50.7380073800738, "grad_norm": 0.2731415927410126, "learning_rate": 4.928328129435141e-05, "loss": 0.005032230168581009, "step": 178750 }, { "epoch": 50.74084586999716, "grad_norm": 0.6842676997184753, "learning_rate": 4.928044280442804e-05, "loss": 0.004304073750972748, "step": 178760 }, { "epoch": 50.74368435992052, "grad_norm": 0.5024015307426453, "learning_rate": 4.927760431450469e-05, "loss": 0.0008814016357064248, "step": 178770 }, { "epoch": 50.746522849843885, "grad_norm": 2.7455503940582275, "learning_rate": 4.9274765824581325e-05, "loss": 0.0012464871630072593, "step": 178780 }, { "epoch": 50.74936133976724, "grad_norm": 5.226221084594727, "learning_rate": 4.9271927334657966e-05, "loss": 0.001848204992711544, "step": 178790 }, { "epoch": 50.7521998296906, "grad_norm": 0.10118193924427032, "learning_rate": 4.92690888447346e-05, "loss": 0.001522783562541008, "step": 178800 }, { "epoch": 50.755038319613966, "grad_norm": 2.0859222412109375, "learning_rate": 4.926625035481124e-05, "loss": 0.004864646494388581, "step": 178810 }, { "epoch": 50.75787680953733, "grad_norm": 0.3066481947898865, "learning_rate": 4.9263411864887883e-05, "loss": 0.0010640375316143037, "step": 178820 }, { "epoch": 50.760715299460685, "grad_norm": 0.09536521136760712, "learning_rate": 4.926057337496452e-05, "loss": 0.010853983461856842, "step": 178830 }, { "epoch": 50.76355378938405, "grad_norm": 0.32673078775405884, "learning_rate": 4.925773488504116e-05, "loss": 0.0016717568039894104, "step": 178840 }, { "epoch": 50.76639227930741, "grad_norm": 0.192287415266037, "learning_rate": 4.92548963951178e-05, "loss": 0.0014872996136546135, "step": 178850 }, { "epoch": 50.76923076923077, "grad_norm": 0.2720217704772949, "learning_rate": 4.9252057905194435e-05, "loss": 0.0011731542646884919, "step": 178860 }, { "epoch": 50.77206925915413, "grad_norm": 0.7712118625640869, "learning_rate": 4.9249219415271084e-05, "loss": 0.0014049982652068137, "step": 178870 }, { "epoch": 50.77490774907749, "grad_norm": 0.01615619845688343, "learning_rate": 4.924638092534772e-05, "loss": 0.004363653063774109, "step": 178880 }, { "epoch": 50.77774623900085, "grad_norm": 3.0605239868164062, "learning_rate": 4.924354243542435e-05, "loss": 0.003465009480714798, "step": 178890 }, { "epoch": 50.78058472892421, "grad_norm": 0.038306254893541336, "learning_rate": 4.9240703945500994e-05, "loss": 0.0007870623841881752, "step": 178900 }, { "epoch": 50.783423218847574, "grad_norm": 0.19215263426303864, "learning_rate": 4.9237865455577636e-05, "loss": 0.0005981309339404106, "step": 178910 }, { "epoch": 50.78626170877094, "grad_norm": 0.06646416336297989, "learning_rate": 4.923502696565428e-05, "loss": 0.000516049936413765, "step": 178920 }, { "epoch": 50.78910019869429, "grad_norm": 1.3104448318481445, "learning_rate": 4.923218847573091e-05, "loss": 0.0004753680899739265, "step": 178930 }, { "epoch": 50.791938688617655, "grad_norm": 1.341243028640747, "learning_rate": 4.922934998580755e-05, "loss": 0.003874048590660095, "step": 178940 }, { "epoch": 50.79477717854102, "grad_norm": 0.6288641095161438, "learning_rate": 4.9226511495884194e-05, "loss": 0.006040126830339432, "step": 178950 }, { "epoch": 50.797615668464374, "grad_norm": 2.2384088039398193, "learning_rate": 4.922367300596083e-05, "loss": 0.0010082099586725235, "step": 178960 }, { "epoch": 50.80045415838774, "grad_norm": 1.2499902248382568, "learning_rate": 4.922083451603747e-05, "loss": 0.005352294072508812, "step": 178970 }, { "epoch": 50.8032926483111, "grad_norm": 0.16821342706680298, "learning_rate": 4.921799602611411e-05, "loss": 0.0006681712344288826, "step": 178980 }, { "epoch": 50.80613113823446, "grad_norm": 0.017941802740097046, "learning_rate": 4.9215157536190746e-05, "loss": 0.003770666569471359, "step": 178990 }, { "epoch": 50.80896962815782, "grad_norm": 0.05893326550722122, "learning_rate": 4.921231904626739e-05, "loss": 0.0016591912135481834, "step": 179000 }, { "epoch": 50.80896962815782, "eval_accuracy": 0.9757741463724805, "eval_loss": 0.09130814671516418, "eval_runtime": 32.5125, "eval_samples_per_second": 483.721, "eval_steps_per_second": 7.566, "step": 179000 }, { "epoch": 50.81180811808118, "grad_norm": 0.05623128265142441, "learning_rate": 4.920948055634403e-05, "loss": 0.002649303339421749, "step": 179010 }, { "epoch": 50.814646608004544, "grad_norm": 22.51488494873047, "learning_rate": 4.9206642066420664e-05, "loss": 0.013958887755870819, "step": 179020 }, { "epoch": 50.8174850979279, "grad_norm": 0.018028760328888893, "learning_rate": 4.9203803576497305e-05, "loss": 0.0006909333169460297, "step": 179030 }, { "epoch": 50.82032358785126, "grad_norm": 0.049799077212810516, "learning_rate": 4.9200965086573946e-05, "loss": 0.0005367718636989594, "step": 179040 }, { "epoch": 50.823162077774626, "grad_norm": 0.024345967918634415, "learning_rate": 4.919812659665059e-05, "loss": 0.0006771998479962349, "step": 179050 }, { "epoch": 50.82600056769798, "grad_norm": 0.11605974286794662, "learning_rate": 4.919528810672722e-05, "loss": 0.0007713280618190765, "step": 179060 }, { "epoch": 50.828839057621344, "grad_norm": 0.29047030210494995, "learning_rate": 4.9192449616803864e-05, "loss": 0.000330309197306633, "step": 179070 }, { "epoch": 50.83167754754471, "grad_norm": 0.19378119707107544, "learning_rate": 4.9189611126880505e-05, "loss": 0.0008186059072613716, "step": 179080 }, { "epoch": 50.83451603746807, "grad_norm": 0.19748885929584503, "learning_rate": 4.918677263695714e-05, "loss": 0.0006861373782157898, "step": 179090 }, { "epoch": 50.837354527391426, "grad_norm": 0.3749293386936188, "learning_rate": 4.918393414703378e-05, "loss": 0.0015106894075870513, "step": 179100 }, { "epoch": 50.84019301731479, "grad_norm": 0.18366852402687073, "learning_rate": 4.918109565711042e-05, "loss": 0.0010784173384308815, "step": 179110 }, { "epoch": 50.84303150723815, "grad_norm": 0.18460649251937866, "learning_rate": 4.917825716718706e-05, "loss": 0.0017572684213519097, "step": 179120 }, { "epoch": 50.84586999716151, "grad_norm": 0.23588702082633972, "learning_rate": 4.91754186772637e-05, "loss": 0.0022995347157120706, "step": 179130 }, { "epoch": 50.84870848708487, "grad_norm": 0.24003693461418152, "learning_rate": 4.917258018734034e-05, "loss": 0.005397206544876099, "step": 179140 }, { "epoch": 50.85154697700823, "grad_norm": 0.7501184344291687, "learning_rate": 4.9169741697416974e-05, "loss": 0.006804966181516647, "step": 179150 }, { "epoch": 50.85438546693159, "grad_norm": 0.07630418986082077, "learning_rate": 4.9166903207493616e-05, "loss": 0.0011077145114541053, "step": 179160 }, { "epoch": 50.85722395685495, "grad_norm": 2.4324944019317627, "learning_rate": 4.916406471757026e-05, "loss": 0.003729851543903351, "step": 179170 }, { "epoch": 50.860062446778315, "grad_norm": 0.37616318464279175, "learning_rate": 4.916122622764689e-05, "loss": 0.0009873485192656516, "step": 179180 }, { "epoch": 50.86290093670168, "grad_norm": 0.08013641089200974, "learning_rate": 4.915838773772353e-05, "loss": 0.0012565094977617264, "step": 179190 }, { "epoch": 50.865739426625034, "grad_norm": 3.9856531620025635, "learning_rate": 4.915554924780017e-05, "loss": 0.0021447960287332537, "step": 179200 }, { "epoch": 50.868577916548396, "grad_norm": 0.040843307971954346, "learning_rate": 4.9152710757876816e-05, "loss": 0.0030737029388546943, "step": 179210 }, { "epoch": 50.87141640647176, "grad_norm": 0.12800690531730652, "learning_rate": 4.914987226795345e-05, "loss": 0.000683315098285675, "step": 179220 }, { "epoch": 50.874254896395115, "grad_norm": 0.21204321086406708, "learning_rate": 4.9147033778030085e-05, "loss": 0.0013605091720819474, "step": 179230 }, { "epoch": 50.87709338631848, "grad_norm": 0.02820630557835102, "learning_rate": 4.914419528810673e-05, "loss": 0.0009277824312448502, "step": 179240 }, { "epoch": 50.87993187624184, "grad_norm": 0.06900962442159653, "learning_rate": 4.914135679818337e-05, "loss": 0.002055242471396923, "step": 179250 }, { "epoch": 50.8827703661652, "grad_norm": 0.1176263764500618, "learning_rate": 4.913851830826001e-05, "loss": 0.009094487130641937, "step": 179260 }, { "epoch": 50.88560885608856, "grad_norm": 0.05066773295402527, "learning_rate": 4.913567981833665e-05, "loss": 0.0075528621673583984, "step": 179270 }, { "epoch": 50.88844734601192, "grad_norm": 0.05010965093970299, "learning_rate": 4.9132841328413285e-05, "loss": 0.0011628611013293267, "step": 179280 }, { "epoch": 50.891285835935285, "grad_norm": 0.5996926426887512, "learning_rate": 4.9130002838489926e-05, "loss": 0.004261218011379242, "step": 179290 }, { "epoch": 50.89412432585864, "grad_norm": 0.3129023015499115, "learning_rate": 4.912716434856656e-05, "loss": 0.0010878697037696838, "step": 179300 }, { "epoch": 50.896962815782004, "grad_norm": 0.015143539756536484, "learning_rate": 4.91243258586432e-05, "loss": 0.000557640939950943, "step": 179310 }, { "epoch": 50.89980130570537, "grad_norm": 0.015455203130841255, "learning_rate": 4.9121487368719844e-05, "loss": 0.0013242602348327638, "step": 179320 }, { "epoch": 50.90263979562872, "grad_norm": 0.09173936396837234, "learning_rate": 4.911864887879648e-05, "loss": 0.0007935767993330955, "step": 179330 }, { "epoch": 50.905478285552086, "grad_norm": 1.9988716840744019, "learning_rate": 4.9115810388873127e-05, "loss": 0.0011340508237481118, "step": 179340 }, { "epoch": 50.90831677547545, "grad_norm": 0.16947871446609497, "learning_rate": 4.911297189894976e-05, "loss": 0.002062997780740261, "step": 179350 }, { "epoch": 50.91115526539881, "grad_norm": 0.11406555771827698, "learning_rate": 4.9110133409026396e-05, "loss": 0.0010280873626470565, "step": 179360 }, { "epoch": 50.91399375532217, "grad_norm": 1.2644504308700562, "learning_rate": 4.9107294919103044e-05, "loss": 0.0014095757156610488, "step": 179370 }, { "epoch": 50.91683224524553, "grad_norm": 0.024860622361302376, "learning_rate": 4.910445642917968e-05, "loss": 0.0003766695037484169, "step": 179380 }, { "epoch": 50.91967073516889, "grad_norm": 8.560633659362793, "learning_rate": 4.910161793925632e-05, "loss": 0.0036016374826431273, "step": 179390 }, { "epoch": 50.92250922509225, "grad_norm": 0.04153590276837349, "learning_rate": 4.9098779449332954e-05, "loss": 0.0003776244819164276, "step": 179400 }, { "epoch": 50.92534771501561, "grad_norm": 1.5761879682540894, "learning_rate": 4.9095940959409596e-05, "loss": 0.0016282957047224044, "step": 179410 }, { "epoch": 50.928186204938974, "grad_norm": 0.5046162009239197, "learning_rate": 4.909310246948624e-05, "loss": 0.0026831995695829392, "step": 179420 }, { "epoch": 50.93102469486233, "grad_norm": 24.203166961669922, "learning_rate": 4.909026397956287e-05, "loss": 0.01174393817782402, "step": 179430 }, { "epoch": 50.93386318478569, "grad_norm": 0.7535839080810547, "learning_rate": 4.908742548963951e-05, "loss": 0.0023954806849360464, "step": 179440 }, { "epoch": 50.936701674709056, "grad_norm": 0.2901063859462738, "learning_rate": 4.9084586999716155e-05, "loss": 0.0008005311712622643, "step": 179450 }, { "epoch": 50.93954016463242, "grad_norm": 0.23094689846038818, "learning_rate": 4.908174850979279e-05, "loss": 0.001790887862443924, "step": 179460 }, { "epoch": 50.942378654555775, "grad_norm": 0.055532876402139664, "learning_rate": 4.907891001986944e-05, "loss": 0.0006162568926811219, "step": 179470 }, { "epoch": 50.94521714447914, "grad_norm": 4.2882981300354, "learning_rate": 4.907607152994607e-05, "loss": 0.0014024326577782632, "step": 179480 }, { "epoch": 50.9480556344025, "grad_norm": 0.021220499649643898, "learning_rate": 4.9073233040022707e-05, "loss": 0.004764564335346222, "step": 179490 }, { "epoch": 50.950894124325856, "grad_norm": 1.666385531425476, "learning_rate": 4.9070394550099355e-05, "loss": 0.0011418627575039864, "step": 179500 }, { "epoch": 50.950894124325856, "eval_accuracy": 0.9743752781840147, "eval_loss": 0.09286370128393173, "eval_runtime": 32.4708, "eval_samples_per_second": 484.344, "eval_steps_per_second": 7.576, "step": 179500 }, { "epoch": 50.95373261424922, "grad_norm": 0.16561318933963776, "learning_rate": 4.906755606017599e-05, "loss": 0.004696987569332123, "step": 179510 }, { "epoch": 50.95657110417258, "grad_norm": 0.07594060897827148, "learning_rate": 4.906471757025263e-05, "loss": 0.0009161263704299926, "step": 179520 }, { "epoch": 50.95940959409594, "grad_norm": 0.021128594875335693, "learning_rate": 4.9061879080329265e-05, "loss": 0.0011955585330724716, "step": 179530 }, { "epoch": 50.9622480840193, "grad_norm": 0.2908976376056671, "learning_rate": 4.905904059040591e-05, "loss": 0.0007049527019262313, "step": 179540 }, { "epoch": 50.96508657394266, "grad_norm": 9.747366905212402, "learning_rate": 4.905620210048255e-05, "loss": 0.0020374804735183716, "step": 179550 }, { "epoch": 50.967925063866026, "grad_norm": 0.05633821710944176, "learning_rate": 4.905336361055918e-05, "loss": 0.0004924533888697625, "step": 179560 }, { "epoch": 50.97076355378938, "grad_norm": 0.9094576239585876, "learning_rate": 4.9050525120635824e-05, "loss": 0.0017846930772066117, "step": 179570 }, { "epoch": 50.973602043712745, "grad_norm": 0.9898324012756348, "learning_rate": 4.9047686630712465e-05, "loss": 0.0005733927711844445, "step": 179580 }, { "epoch": 50.97644053363611, "grad_norm": 0.1377558708190918, "learning_rate": 4.90448481407891e-05, "loss": 0.0008543012663722038, "step": 179590 }, { "epoch": 50.979279023559464, "grad_norm": 0.30294978618621826, "learning_rate": 4.904200965086574e-05, "loss": 0.0017169589176774026, "step": 179600 }, { "epoch": 50.98211751348283, "grad_norm": 0.03887096047401428, "learning_rate": 4.903917116094238e-05, "loss": 0.0006159618496894836, "step": 179610 }, { "epoch": 50.98495600340619, "grad_norm": 0.07467129081487656, "learning_rate": 4.903633267101902e-05, "loss": 0.007407064735889435, "step": 179620 }, { "epoch": 50.987794493329545, "grad_norm": 0.026289066299796104, "learning_rate": 4.903349418109566e-05, "loss": 0.0006912363693118096, "step": 179630 }, { "epoch": 50.99063298325291, "grad_norm": 0.1056104227900505, "learning_rate": 4.90306556911723e-05, "loss": 0.00022765006870031356, "step": 179640 }, { "epoch": 50.99347147317627, "grad_norm": 0.04278640076518059, "learning_rate": 4.9027817201248935e-05, "loss": 0.0003650272265076637, "step": 179650 }, { "epoch": 50.996309963099634, "grad_norm": 0.4679018259048462, "learning_rate": 4.9024978711325576e-05, "loss": 0.0005566198378801346, "step": 179660 }, { "epoch": 50.99914845302299, "grad_norm": 0.020620033144950867, "learning_rate": 4.902214022140222e-05, "loss": 0.0005242584273219108, "step": 179670 }, { "epoch": 51.00198694294635, "grad_norm": 0.08303036540746689, "learning_rate": 4.901930173147886e-05, "loss": 0.0004657704383134842, "step": 179680 }, { "epoch": 51.004825432869715, "grad_norm": 0.15461541712284088, "learning_rate": 4.901646324155549e-05, "loss": 0.0004808560013771057, "step": 179690 }, { "epoch": 51.00766392279307, "grad_norm": 0.008365842513740063, "learning_rate": 4.9013624751632135e-05, "loss": 0.00023199021816253663, "step": 179700 }, { "epoch": 51.010502412716434, "grad_norm": 0.01753336377441883, "learning_rate": 4.9010786261708776e-05, "loss": 0.00024949200451374054, "step": 179710 }, { "epoch": 51.0133409026398, "grad_norm": 0.00863791722804308, "learning_rate": 4.900794777178541e-05, "loss": 0.00026589669287204745, "step": 179720 }, { "epoch": 51.01617939256316, "grad_norm": 0.2756747305393219, "learning_rate": 4.900510928186205e-05, "loss": 0.0007994692772626877, "step": 179730 }, { "epoch": 51.019017882486516, "grad_norm": 0.037193067371845245, "learning_rate": 4.9002270791938693e-05, "loss": 0.0008436944335699081, "step": 179740 }, { "epoch": 51.02185637240988, "grad_norm": 0.19737833738327026, "learning_rate": 4.899943230201533e-05, "loss": 0.0003716334700584412, "step": 179750 }, { "epoch": 51.02469486233324, "grad_norm": 2.1401031017303467, "learning_rate": 4.899659381209197e-05, "loss": 0.0008911542594432831, "step": 179760 }, { "epoch": 51.0275333522566, "grad_norm": 0.017984740436077118, "learning_rate": 4.899375532216861e-05, "loss": 0.0013545835390686989, "step": 179770 }, { "epoch": 51.03037184217996, "grad_norm": 0.011006012558937073, "learning_rate": 4.8990916832245245e-05, "loss": 0.0045109190046787265, "step": 179780 }, { "epoch": 51.03321033210332, "grad_norm": 0.1501239538192749, "learning_rate": 4.898807834232189e-05, "loss": 0.0006913190707564354, "step": 179790 }, { "epoch": 51.03604882202668, "grad_norm": 0.03708026558160782, "learning_rate": 4.898523985239853e-05, "loss": 0.0006171310320496559, "step": 179800 }, { "epoch": 51.03888731195004, "grad_norm": 0.717915415763855, "learning_rate": 4.898240136247517e-05, "loss": 0.00019870921969413757, "step": 179810 }, { "epoch": 51.041725801873405, "grad_norm": 0.0492561049759388, "learning_rate": 4.8979562872551804e-05, "loss": 0.0005968088284134865, "step": 179820 }, { "epoch": 51.04456429179677, "grad_norm": 2.8632595539093018, "learning_rate": 4.897672438262844e-05, "loss": 0.0010893121361732483, "step": 179830 }, { "epoch": 51.04740278172012, "grad_norm": 0.017252415418624878, "learning_rate": 4.897388589270509e-05, "loss": 0.00029211267828941343, "step": 179840 }, { "epoch": 51.050241271643486, "grad_norm": 0.15318366885185242, "learning_rate": 4.897104740278172e-05, "loss": 0.003840245306491852, "step": 179850 }, { "epoch": 51.05307976156685, "grad_norm": 0.00788168702274561, "learning_rate": 4.896820891285836e-05, "loss": 0.0019046250730752946, "step": 179860 }, { "epoch": 51.055918251490205, "grad_norm": 1.8242930173873901, "learning_rate": 4.8965370422935004e-05, "loss": 0.0009945519268512726, "step": 179870 }, { "epoch": 51.05875674141357, "grad_norm": 2.6354260444641113, "learning_rate": 4.896253193301164e-05, "loss": 0.0025006502866744993, "step": 179880 }, { "epoch": 51.06159523133693, "grad_norm": 0.11894006282091141, "learning_rate": 4.895969344308828e-05, "loss": 0.0011612219735980035, "step": 179890 }, { "epoch": 51.064433721260286, "grad_norm": 0.220514178276062, "learning_rate": 4.895685495316492e-05, "loss": 0.0004450822249054909, "step": 179900 }, { "epoch": 51.06727221118365, "grad_norm": 0.2767716944217682, "learning_rate": 4.8954016463241556e-05, "loss": 0.00031970683485269544, "step": 179910 }, { "epoch": 51.07011070110701, "grad_norm": 5.96195125579834, "learning_rate": 4.89511779733182e-05, "loss": 0.0029579455032944678, "step": 179920 }, { "epoch": 51.072949191030375, "grad_norm": 0.15815524756908417, "learning_rate": 4.894833948339483e-05, "loss": 0.0034724690020084383, "step": 179930 }, { "epoch": 51.07578768095373, "grad_norm": 0.5208958387374878, "learning_rate": 4.894550099347148e-05, "loss": 0.00039660818874835967, "step": 179940 }, { "epoch": 51.078626170877094, "grad_norm": 0.018688667565584183, "learning_rate": 4.8942662503548115e-05, "loss": 0.0009561549872159958, "step": 179950 }, { "epoch": 51.08146466080046, "grad_norm": 0.03799564018845558, "learning_rate": 4.893982401362475e-05, "loss": 0.0006402885541319847, "step": 179960 }, { "epoch": 51.08430315072381, "grad_norm": 6.493662357330322, "learning_rate": 4.89369855237014e-05, "loss": 0.0016361810266971588, "step": 179970 }, { "epoch": 51.087141640647175, "grad_norm": 18.471843719482422, "learning_rate": 4.893414703377803e-05, "loss": 0.0025501562282443046, "step": 179980 }, { "epoch": 51.08998013057054, "grad_norm": 0.53755784034729, "learning_rate": 4.8931308543854674e-05, "loss": 0.00204334557056427, "step": 179990 }, { "epoch": 51.092818620493894, "grad_norm": 0.2069019377231598, "learning_rate": 4.8928470053931315e-05, "loss": 0.007746119052171707, "step": 180000 }, { "epoch": 51.092818620493894, "eval_accuracy": 0.975837731290138, "eval_loss": 0.08971923589706421, "eval_runtime": 32.6144, "eval_samples_per_second": 482.211, "eval_steps_per_second": 7.543, "step": 180000 }, { "epoch": 51.09565711041726, "grad_norm": 0.38862478733062744, "learning_rate": 4.892563156400795e-05, "loss": 0.01819635331630707, "step": 180010 }, { "epoch": 51.09849560034062, "grad_norm": 0.39510875940322876, "learning_rate": 4.892279307408459e-05, "loss": 0.0018758226186037063, "step": 180020 }, { "epoch": 51.10133409026398, "grad_norm": 7.894891262054443, "learning_rate": 4.8919954584161226e-05, "loss": 0.0052969329059124, "step": 180030 }, { "epoch": 51.10417258018734, "grad_norm": 1.0198668241500854, "learning_rate": 4.891711609423787e-05, "loss": 0.012655633687973022, "step": 180040 }, { "epoch": 51.1070110701107, "grad_norm": 0.10073983669281006, "learning_rate": 4.891427760431451e-05, "loss": 0.006173787266016006, "step": 180050 }, { "epoch": 51.109849560034064, "grad_norm": 1.8462492227554321, "learning_rate": 4.891143911439114e-05, "loss": 0.0018522180616855621, "step": 180060 }, { "epoch": 51.11268804995742, "grad_norm": 0.10347336530685425, "learning_rate": 4.8908600624467784e-05, "loss": 0.0011287227272987366, "step": 180070 }, { "epoch": 51.11552653988078, "grad_norm": 0.07740754634141922, "learning_rate": 4.8905762134544426e-05, "loss": 0.00034088008105754853, "step": 180080 }, { "epoch": 51.118365029804146, "grad_norm": 0.06252571195363998, "learning_rate": 4.890292364462106e-05, "loss": 0.0022137897089123727, "step": 180090 }, { "epoch": 51.1212035197275, "grad_norm": 4.828277111053467, "learning_rate": 4.890008515469771e-05, "loss": 0.0016985338181257248, "step": 180100 }, { "epoch": 51.124042009650864, "grad_norm": 0.24035215377807617, "learning_rate": 4.889724666477434e-05, "loss": 0.0011762615293264389, "step": 180110 }, { "epoch": 51.12688049957423, "grad_norm": 0.011230031959712505, "learning_rate": 4.889440817485098e-05, "loss": 0.0009894957765936852, "step": 180120 }, { "epoch": 51.12971898949759, "grad_norm": 0.11687051504850388, "learning_rate": 4.889156968492762e-05, "loss": 0.0010034220293164253, "step": 180130 }, { "epoch": 51.132557479420946, "grad_norm": 3.432638645172119, "learning_rate": 4.888873119500426e-05, "loss": 0.005461366474628448, "step": 180140 }, { "epoch": 51.13539596934431, "grad_norm": 0.10272270441055298, "learning_rate": 4.88858927050809e-05, "loss": 0.0036858487874269485, "step": 180150 }, { "epoch": 51.13823445926767, "grad_norm": 0.6963006854057312, "learning_rate": 4.8883054215157536e-05, "loss": 0.0051358573138713835, "step": 180160 }, { "epoch": 51.14107294919103, "grad_norm": 5.506946086883545, "learning_rate": 4.888021572523418e-05, "loss": 0.008579801768064499, "step": 180170 }, { "epoch": 51.14391143911439, "grad_norm": 0.04704684391617775, "learning_rate": 4.887766108430316e-05, "loss": 0.003477158397436142, "step": 180180 }, { "epoch": 51.14674992903775, "grad_norm": 10.09178352355957, "learning_rate": 4.887482259437979e-05, "loss": 0.002485775202512741, "step": 180190 }, { "epoch": 51.149588418961116, "grad_norm": 0.4808120131492615, "learning_rate": 4.8871984104456434e-05, "loss": 0.0012505196034908294, "step": 180200 }, { "epoch": 51.15242690888447, "grad_norm": 0.07344196736812592, "learning_rate": 4.886914561453307e-05, "loss": 0.0018891185522079469, "step": 180210 }, { "epoch": 51.155265398807835, "grad_norm": 0.5004212856292725, "learning_rate": 4.886630712460971e-05, "loss": 0.0009931311011314391, "step": 180220 }, { "epoch": 51.1581038887312, "grad_norm": 2.5172946453094482, "learning_rate": 4.886346863468635e-05, "loss": 0.00524277463555336, "step": 180230 }, { "epoch": 51.16094237865455, "grad_norm": 0.08604004234075546, "learning_rate": 4.8860630144762986e-05, "loss": 0.0007053487002849579, "step": 180240 }, { "epoch": 51.163780868577916, "grad_norm": 1.566454291343689, "learning_rate": 4.885779165483963e-05, "loss": 0.00382319837808609, "step": 180250 }, { "epoch": 51.16661935850128, "grad_norm": 0.04597785323858261, "learning_rate": 4.885495316491627e-05, "loss": 0.0007988559082150459, "step": 180260 }, { "epoch": 51.169457848424635, "grad_norm": 0.06951954215765, "learning_rate": 4.88521146749929e-05, "loss": 0.001137450896203518, "step": 180270 }, { "epoch": 51.172296338348, "grad_norm": 0.1777539849281311, "learning_rate": 4.884927618506955e-05, "loss": 0.0053015690296888355, "step": 180280 }, { "epoch": 51.17513482827136, "grad_norm": 0.1762663573026657, "learning_rate": 4.8846437695146186e-05, "loss": 0.0038615263998508453, "step": 180290 }, { "epoch": 51.177973318194724, "grad_norm": 6.532079219818115, "learning_rate": 4.884359920522282e-05, "loss": 0.0020250273868441583, "step": 180300 }, { "epoch": 51.18081180811808, "grad_norm": 0.9653957486152649, "learning_rate": 4.884076071529946e-05, "loss": 0.0004380591213703156, "step": 180310 }, { "epoch": 51.18365029804144, "grad_norm": 0.050387512892484665, "learning_rate": 4.88379222253761e-05, "loss": 0.0029013574123382567, "step": 180320 }, { "epoch": 51.186488787964805, "grad_norm": 0.018023531883955002, "learning_rate": 4.8835083735452744e-05, "loss": 0.0013168906792998315, "step": 180330 }, { "epoch": 51.18932727788816, "grad_norm": 0.08583804965019226, "learning_rate": 4.883224524552938e-05, "loss": 0.0011037260293960572, "step": 180340 }, { "epoch": 51.192165767811524, "grad_norm": 0.1388053148984909, "learning_rate": 4.882940675560602e-05, "loss": 0.001609784923493862, "step": 180350 }, { "epoch": 51.19500425773489, "grad_norm": 0.024241134524345398, "learning_rate": 4.882656826568266e-05, "loss": 0.0015512334182858467, "step": 180360 }, { "epoch": 51.19784274765824, "grad_norm": 0.0329434908926487, "learning_rate": 4.8823729775759296e-05, "loss": 0.004232289642095566, "step": 180370 }, { "epoch": 51.200681237581605, "grad_norm": 0.0504172220826149, "learning_rate": 4.882089128583594e-05, "loss": 0.0009813189506530761, "step": 180380 }, { "epoch": 51.20351972750497, "grad_norm": 0.017061954364180565, "learning_rate": 4.881805279591258e-05, "loss": 0.0006106648594141007, "step": 180390 }, { "epoch": 51.20635821742833, "grad_norm": 0.3209627866744995, "learning_rate": 4.8815214305989214e-05, "loss": 0.0005499124526977539, "step": 180400 }, { "epoch": 51.20919670735169, "grad_norm": 3.182741641998291, "learning_rate": 4.8812375816065855e-05, "loss": 0.0008940545842051506, "step": 180410 }, { "epoch": 51.21203519727505, "grad_norm": 1.943410038948059, "learning_rate": 4.8809537326142496e-05, "loss": 0.0005300072953104973, "step": 180420 }, { "epoch": 51.21487368719841, "grad_norm": 0.04560404643416405, "learning_rate": 4.880669883621913e-05, "loss": 0.0009103551506996155, "step": 180430 }, { "epoch": 51.21771217712177, "grad_norm": 0.15289495885372162, "learning_rate": 4.880386034629577e-05, "loss": 0.0003016771748661995, "step": 180440 }, { "epoch": 51.22055066704513, "grad_norm": 0.1808972954750061, "learning_rate": 4.8801021856372414e-05, "loss": 0.00039109475910663604, "step": 180450 }, { "epoch": 51.223389156968494, "grad_norm": 0.2849595546722412, "learning_rate": 4.879818336644905e-05, "loss": 0.00045471377670764924, "step": 180460 }, { "epoch": 51.22622764689185, "grad_norm": 0.11283183097839355, "learning_rate": 4.879534487652569e-05, "loss": 0.002761823497712612, "step": 180470 }, { "epoch": 51.22906613681521, "grad_norm": 0.06183494254946709, "learning_rate": 4.879250638660233e-05, "loss": 0.0010355474427342416, "step": 180480 }, { "epoch": 51.231904626738576, "grad_norm": 0.07126171886920929, "learning_rate": 4.878966789667897e-05, "loss": 0.00028327684849500657, "step": 180490 }, { "epoch": 51.23474311666194, "grad_norm": 1.5550838708877563, "learning_rate": 4.878682940675561e-05, "loss": 0.0009197643026709557, "step": 180500 }, { "epoch": 51.23474311666194, "eval_accuracy": 0.977045844725631, "eval_loss": 0.08801767975091934, "eval_runtime": 32.6738, "eval_samples_per_second": 481.334, "eval_steps_per_second": 7.529, "step": 180500 }, { "epoch": 51.237581606585294, "grad_norm": 0.21365058422088623, "learning_rate": 4.878399091683225e-05, "loss": 0.000344541110098362, "step": 180510 }, { "epoch": 51.24042009650866, "grad_norm": 0.40562233328819275, "learning_rate": 4.878115242690889e-05, "loss": 0.0006029009819030762, "step": 180520 }, { "epoch": 51.24325858643202, "grad_norm": 0.04293714836239815, "learning_rate": 4.8778313936985524e-05, "loss": 0.001121486723423004, "step": 180530 }, { "epoch": 51.246097076355376, "grad_norm": 0.1427977979183197, "learning_rate": 4.8775475447062166e-05, "loss": 0.0028390195220708847, "step": 180540 }, { "epoch": 51.24893556627874, "grad_norm": 0.2921789288520813, "learning_rate": 4.877263695713881e-05, "loss": 0.003016146831214428, "step": 180550 }, { "epoch": 51.2517740562021, "grad_norm": 0.08235447853803635, "learning_rate": 4.876979846721544e-05, "loss": 0.0005880095064640045, "step": 180560 }, { "epoch": 51.254612546125465, "grad_norm": 0.0403439961373806, "learning_rate": 4.876695997729208e-05, "loss": 0.0011786442250013352, "step": 180570 }, { "epoch": 51.25745103604882, "grad_norm": 2.6774117946624756, "learning_rate": 4.8764121487368725e-05, "loss": 0.001136874221265316, "step": 180580 }, { "epoch": 51.26028952597218, "grad_norm": 0.11026768386363983, "learning_rate": 4.876128299744536e-05, "loss": 0.0003934638574719429, "step": 180590 }, { "epoch": 51.263128015895546, "grad_norm": 0.13399673998355865, "learning_rate": 4.8758444507522e-05, "loss": 0.0002336740493774414, "step": 180600 }, { "epoch": 51.2659665058189, "grad_norm": 0.11144089698791504, "learning_rate": 4.8755606017598635e-05, "loss": 0.0008956875652074813, "step": 180610 }, { "epoch": 51.268804995742265, "grad_norm": 0.28044575452804565, "learning_rate": 4.875276752767528e-05, "loss": 0.003932081162929535, "step": 180620 }, { "epoch": 51.27164348566563, "grad_norm": 0.1427331566810608, "learning_rate": 4.874992903775192e-05, "loss": 0.00586228296160698, "step": 180630 }, { "epoch": 51.274481975588984, "grad_norm": 0.3159143328666687, "learning_rate": 4.874709054782855e-05, "loss": 0.0013567646965384483, "step": 180640 }, { "epoch": 51.27732046551235, "grad_norm": 0.03928321227431297, "learning_rate": 4.87442520579052e-05, "loss": 0.000447937473654747, "step": 180650 }, { "epoch": 51.28015895543571, "grad_norm": 0.20527687668800354, "learning_rate": 4.8741413567981835e-05, "loss": 0.0024988552555441856, "step": 180660 }, { "epoch": 51.28299744535907, "grad_norm": 8.317633628845215, "learning_rate": 4.8738575078058477e-05, "loss": 0.004004713147878647, "step": 180670 }, { "epoch": 51.28583593528243, "grad_norm": 0.058568768203258514, "learning_rate": 4.873573658813512e-05, "loss": 0.00302742887288332, "step": 180680 }, { "epoch": 51.28867442520579, "grad_norm": 10.257464408874512, "learning_rate": 4.873289809821175e-05, "loss": 0.007389913499355316, "step": 180690 }, { "epoch": 51.291512915129154, "grad_norm": 0.24839149415493011, "learning_rate": 4.8730059608288394e-05, "loss": 0.00042100641876459123, "step": 180700 }, { "epoch": 51.29435140505251, "grad_norm": 16.516530990600586, "learning_rate": 4.872722111836503e-05, "loss": 0.00713203102350235, "step": 180710 }, { "epoch": 51.29718989497587, "grad_norm": 0.08153164386749268, "learning_rate": 4.872438262844167e-05, "loss": 0.0002146560698747635, "step": 180720 }, { "epoch": 51.300028384899235, "grad_norm": 0.004099681507796049, "learning_rate": 4.872154413851831e-05, "loss": 0.0004123905673623085, "step": 180730 }, { "epoch": 51.30286687482259, "grad_norm": 0.026017921045422554, "learning_rate": 4.8718705648594946e-05, "loss": 0.0005426462739706039, "step": 180740 }, { "epoch": 51.305705364745954, "grad_norm": 6.821203231811523, "learning_rate": 4.8715867158671594e-05, "loss": 0.00504787340760231, "step": 180750 }, { "epoch": 51.30854385466932, "grad_norm": 0.02022259496152401, "learning_rate": 4.871302866874823e-05, "loss": 0.0022015584632754325, "step": 180760 }, { "epoch": 51.31138234459268, "grad_norm": 3.727421283721924, "learning_rate": 4.871019017882486e-05, "loss": 0.0010901762172579766, "step": 180770 }, { "epoch": 51.314220834516036, "grad_norm": 0.06664789468050003, "learning_rate": 4.870735168890151e-05, "loss": 0.0006896872073411942, "step": 180780 }, { "epoch": 51.3170593244394, "grad_norm": 0.11181101202964783, "learning_rate": 4.8704513198978146e-05, "loss": 0.0011407820507884025, "step": 180790 }, { "epoch": 51.31989781436276, "grad_norm": 0.09372278302907944, "learning_rate": 4.870167470905479e-05, "loss": 0.0016520094126462937, "step": 180800 }, { "epoch": 51.32273630428612, "grad_norm": 0.055271636694669724, "learning_rate": 4.869883621913142e-05, "loss": 0.0005426280200481415, "step": 180810 }, { "epoch": 51.32557479420948, "grad_norm": 0.23932403326034546, "learning_rate": 4.869599772920806e-05, "loss": 0.0003681229427456856, "step": 180820 }, { "epoch": 51.32841328413284, "grad_norm": 0.03279741853475571, "learning_rate": 4.8693159239284705e-05, "loss": 0.0002476053312420845, "step": 180830 }, { "epoch": 51.3312517740562, "grad_norm": 0.3722105920314789, "learning_rate": 4.869032074936134e-05, "loss": 0.0006588449701666832, "step": 180840 }, { "epoch": 51.33409026397956, "grad_norm": 0.026470595970749855, "learning_rate": 4.868748225943798e-05, "loss": 0.0012213436886668206, "step": 180850 }, { "epoch": 51.336928753902924, "grad_norm": 0.0803748220205307, "learning_rate": 4.868464376951462e-05, "loss": 0.004335398226976395, "step": 180860 }, { "epoch": 51.33976724382629, "grad_norm": 0.0489346943795681, "learning_rate": 4.868180527959126e-05, "loss": 0.000897197239100933, "step": 180870 }, { "epoch": 51.34260573374964, "grad_norm": 0.15893396735191345, "learning_rate": 4.86789667896679e-05, "loss": 0.00025262255221605303, "step": 180880 }, { "epoch": 51.345444223673006, "grad_norm": 0.015027850866317749, "learning_rate": 4.867612829974454e-05, "loss": 0.0011533098295331001, "step": 180890 }, { "epoch": 51.34828271359637, "grad_norm": 0.6291434168815613, "learning_rate": 4.8673289809821174e-05, "loss": 0.0012927433475852014, "step": 180900 }, { "epoch": 51.351121203519725, "grad_norm": 0.8774008750915527, "learning_rate": 4.8670451319897815e-05, "loss": 0.0009439870715141297, "step": 180910 }, { "epoch": 51.35395969344309, "grad_norm": 0.04107557609677315, "learning_rate": 4.866761282997446e-05, "loss": 0.000671207718551159, "step": 180920 }, { "epoch": 51.35679818336645, "grad_norm": 0.35322102904319763, "learning_rate": 4.866477434005109e-05, "loss": 0.0003599001094698906, "step": 180930 }, { "epoch": 51.35963667328981, "grad_norm": 0.08499258011579514, "learning_rate": 4.866193585012773e-05, "loss": 0.0003437289968132973, "step": 180940 }, { "epoch": 51.36247516321317, "grad_norm": 0.07135917246341705, "learning_rate": 4.8659097360204374e-05, "loss": 0.005439245700836181, "step": 180950 }, { "epoch": 51.36531365313653, "grad_norm": 0.012154940515756607, "learning_rate": 4.8656258870281015e-05, "loss": 0.0032960034906864165, "step": 180960 }, { "epoch": 51.368152143059895, "grad_norm": 9.94111442565918, "learning_rate": 4.865342038035765e-05, "loss": 0.003684813156723976, "step": 180970 }, { "epoch": 51.37099063298325, "grad_norm": 0.032539766281843185, "learning_rate": 4.865058189043429e-05, "loss": 0.0010061433538794517, "step": 180980 }, { "epoch": 51.37382912290661, "grad_norm": 0.013985106721520424, "learning_rate": 4.864774340051093e-05, "loss": 0.00496593713760376, "step": 180990 }, { "epoch": 51.376667612829976, "grad_norm": 0.0863175019621849, "learning_rate": 4.864490491058757e-05, "loss": 0.0013937652111053467, "step": 181000 }, { "epoch": 51.376667612829976, "eval_accuracy": 0.9727856552425764, "eval_loss": 0.10613223910331726, "eval_runtime": 32.5739, "eval_samples_per_second": 482.81, "eval_steps_per_second": 7.552, "step": 181000 }, { "epoch": 51.37950610275333, "grad_norm": 1.217592716217041, "learning_rate": 4.864206642066421e-05, "loss": 0.0027171896770596506, "step": 181010 }, { "epoch": 51.382344592676695, "grad_norm": 0.02334403246641159, "learning_rate": 4.863922793074085e-05, "loss": 0.00397578626871109, "step": 181020 }, { "epoch": 51.38518308260006, "grad_norm": 0.614835798740387, "learning_rate": 4.8636389440817485e-05, "loss": 0.002307300828397274, "step": 181030 }, { "epoch": 51.38802157252342, "grad_norm": 0.11029272526502609, "learning_rate": 4.8633550950894126e-05, "loss": 0.007133351266384124, "step": 181040 }, { "epoch": 51.39086006244678, "grad_norm": 0.4057632088661194, "learning_rate": 4.863071246097077e-05, "loss": 0.003668259456753731, "step": 181050 }, { "epoch": 51.39369855237014, "grad_norm": 0.03350900113582611, "learning_rate": 4.86278739710474e-05, "loss": 0.002091832645237446, "step": 181060 }, { "epoch": 51.3965370422935, "grad_norm": 0.04151994735002518, "learning_rate": 4.8625035481124043e-05, "loss": 0.0015479713678359986, "step": 181070 }, { "epoch": 51.39937553221686, "grad_norm": 2.130741596221924, "learning_rate": 4.8622196991200685e-05, "loss": 0.0029365856200456618, "step": 181080 }, { "epoch": 51.40221402214022, "grad_norm": 0.05362674966454506, "learning_rate": 4.8619358501277326e-05, "loss": 0.0008785046637058258, "step": 181090 }, { "epoch": 51.405052512063584, "grad_norm": 14.57697868347168, "learning_rate": 4.861652001135396e-05, "loss": 0.005883552134037018, "step": 181100 }, { "epoch": 51.40789100198694, "grad_norm": 13.36205768585205, "learning_rate": 4.8613681521430595e-05, "loss": 0.0037763576954603197, "step": 181110 }, { "epoch": 51.4107294919103, "grad_norm": 0.015633828938007355, "learning_rate": 4.8610843031507244e-05, "loss": 0.0013071518391370774, "step": 181120 }, { "epoch": 51.413567981833665, "grad_norm": 0.03230821713805199, "learning_rate": 4.860800454158388e-05, "loss": 0.00046955756843090055, "step": 181130 }, { "epoch": 51.41640647175703, "grad_norm": 0.03939720615744591, "learning_rate": 4.860516605166052e-05, "loss": 0.0005629988387227059, "step": 181140 }, { "epoch": 51.419244961680384, "grad_norm": 0.03258715942502022, "learning_rate": 4.860232756173716e-05, "loss": 0.000768083706498146, "step": 181150 }, { "epoch": 51.42208345160375, "grad_norm": 0.3541862964630127, "learning_rate": 4.8599489071813796e-05, "loss": 0.001079992763698101, "step": 181160 }, { "epoch": 51.42492194152711, "grad_norm": 16.229467391967773, "learning_rate": 4.859665058189044e-05, "loss": 0.006977045536041259, "step": 181170 }, { "epoch": 51.427760431450466, "grad_norm": 1.6912175416946411, "learning_rate": 4.859381209196708e-05, "loss": 0.0006770160049200058, "step": 181180 }, { "epoch": 51.43059892137383, "grad_norm": 0.16344580054283142, "learning_rate": 4.859097360204371e-05, "loss": 0.0015537645667791367, "step": 181190 }, { "epoch": 51.43343741129719, "grad_norm": 0.021976374089717865, "learning_rate": 4.8588135112120354e-05, "loss": 0.002021470107138157, "step": 181200 }, { "epoch": 51.43627590122055, "grad_norm": 0.08889459818601608, "learning_rate": 4.8585296622196996e-05, "loss": 0.0006487442180514336, "step": 181210 }, { "epoch": 51.43911439114391, "grad_norm": 0.3332346975803375, "learning_rate": 4.858245813227364e-05, "loss": 0.0008749550208449364, "step": 181220 }, { "epoch": 51.44195288106727, "grad_norm": 0.6767814755439758, "learning_rate": 4.857961964235027e-05, "loss": 0.0006916450336575508, "step": 181230 }, { "epoch": 51.444791370990636, "grad_norm": 0.02048870176076889, "learning_rate": 4.8576781152426906e-05, "loss": 0.0010262200608849525, "step": 181240 }, { "epoch": 51.44762986091399, "grad_norm": 0.037782955914735794, "learning_rate": 4.8573942662503554e-05, "loss": 0.0003974897786974907, "step": 181250 }, { "epoch": 51.450468350837355, "grad_norm": 0.09114035964012146, "learning_rate": 4.857110417258019e-05, "loss": 0.0005215317010879517, "step": 181260 }, { "epoch": 51.45330684076072, "grad_norm": 0.054964207112789154, "learning_rate": 4.856826568265683e-05, "loss": 0.0018994625657796859, "step": 181270 }, { "epoch": 51.45614533068407, "grad_norm": 0.012142033316195011, "learning_rate": 4.856542719273347e-05, "loss": 0.0015295058488845824, "step": 181280 }, { "epoch": 51.458983820607436, "grad_norm": 0.07273957133293152, "learning_rate": 4.8562588702810106e-05, "loss": 0.0006283344700932503, "step": 181290 }, { "epoch": 51.4618223105308, "grad_norm": 0.11205682903528214, "learning_rate": 4.855975021288675e-05, "loss": 0.002934711053967476, "step": 181300 }, { "epoch": 51.464660800454155, "grad_norm": 0.45976153016090393, "learning_rate": 4.855691172296339e-05, "loss": 0.0006705144420266152, "step": 181310 }, { "epoch": 51.46749929037752, "grad_norm": 0.05666870251297951, "learning_rate": 4.8554073233040024e-05, "loss": 0.0005197223275899887, "step": 181320 }, { "epoch": 51.47033778030088, "grad_norm": 0.08819159865379333, "learning_rate": 4.8551234743116665e-05, "loss": 0.002376114763319492, "step": 181330 }, { "epoch": 51.47317627022424, "grad_norm": 0.059031471610069275, "learning_rate": 4.85483962531933e-05, "loss": 0.001571834273636341, "step": 181340 }, { "epoch": 51.4760147601476, "grad_norm": 0.03961729258298874, "learning_rate": 4.854555776326994e-05, "loss": 0.006097172200679779, "step": 181350 }, { "epoch": 51.47885325007096, "grad_norm": 0.37075722217559814, "learning_rate": 4.854271927334658e-05, "loss": 0.0006198080256581307, "step": 181360 }, { "epoch": 51.481691739994325, "grad_norm": 0.28974345326423645, "learning_rate": 4.853988078342322e-05, "loss": 0.0010362718254327773, "step": 181370 }, { "epoch": 51.48453022991768, "grad_norm": 1.238527536392212, "learning_rate": 4.8537042293499865e-05, "loss": 0.0005213011056184769, "step": 181380 }, { "epoch": 51.487368719841044, "grad_norm": 0.1479216367006302, "learning_rate": 4.85342038035765e-05, "loss": 0.0003423018380999565, "step": 181390 }, { "epoch": 51.49020720976441, "grad_norm": 0.17771846055984497, "learning_rate": 4.8531365313653134e-05, "loss": 0.001193930022418499, "step": 181400 }, { "epoch": 51.49304569968777, "grad_norm": 0.7106952667236328, "learning_rate": 4.852852682372978e-05, "loss": 0.00037361718714237215, "step": 181410 }, { "epoch": 51.495884189611125, "grad_norm": 0.02444724552333355, "learning_rate": 4.852568833380642e-05, "loss": 0.0016379190608859062, "step": 181420 }, { "epoch": 51.49872267953449, "grad_norm": 0.3195110857486725, "learning_rate": 4.852284984388306e-05, "loss": 0.004004678130149842, "step": 181430 }, { "epoch": 51.50156116945785, "grad_norm": 0.07824578136205673, "learning_rate": 4.852001135395969e-05, "loss": 0.0004601573571562767, "step": 181440 }, { "epoch": 51.50439965938121, "grad_norm": 0.06756619364023209, "learning_rate": 4.8517172864036334e-05, "loss": 0.0007849743589758873, "step": 181450 }, { "epoch": 51.50723814930457, "grad_norm": 8.194310188293457, "learning_rate": 4.8514334374112976e-05, "loss": 0.0022716274484992026, "step": 181460 }, { "epoch": 51.51007663922793, "grad_norm": 0.7217859029769897, "learning_rate": 4.851149588418961e-05, "loss": 0.0008877860382199287, "step": 181470 }, { "epoch": 51.51291512915129, "grad_norm": 0.08673321455717087, "learning_rate": 4.850865739426625e-05, "loss": 0.0003693487495183945, "step": 181480 }, { "epoch": 51.51575361907465, "grad_norm": 4.938827991485596, "learning_rate": 4.850581890434289e-05, "loss": 0.0008981414139270782, "step": 181490 }, { "epoch": 51.518592108998014, "grad_norm": 0.6222519278526306, "learning_rate": 4.850298041441953e-05, "loss": 0.0006748436018824578, "step": 181500 }, { "epoch": 51.518592108998014, "eval_accuracy": 0.9764735804667133, "eval_loss": 0.08601141721010208, "eval_runtime": 32.561, "eval_samples_per_second": 483.002, "eval_steps_per_second": 7.555, "step": 181500 }, { "epoch": 51.52143059892138, "grad_norm": 0.06601157784461975, "learning_rate": 4.8500141924496176e-05, "loss": 0.000710977241396904, "step": 181510 }, { "epoch": 51.52426908884473, "grad_norm": 0.49527034163475037, "learning_rate": 4.849730343457281e-05, "loss": 0.0006310498341917991, "step": 181520 }, { "epoch": 51.527107578768096, "grad_norm": 0.044814515858888626, "learning_rate": 4.8494464944649445e-05, "loss": 0.0023737261071801186, "step": 181530 }, { "epoch": 51.52994606869146, "grad_norm": 0.3510282635688782, "learning_rate": 4.8491626454726086e-05, "loss": 0.0007682416588068008, "step": 181540 }, { "epoch": 51.532784558614814, "grad_norm": 3.006649971008301, "learning_rate": 4.848878796480273e-05, "loss": 0.001893463358283043, "step": 181550 }, { "epoch": 51.53562304853818, "grad_norm": 1.2473188638687134, "learning_rate": 4.848594947487937e-05, "loss": 0.0004672415554523468, "step": 181560 }, { "epoch": 51.53846153846154, "grad_norm": 0.27910885214805603, "learning_rate": 4.8483110984956004e-05, "loss": 0.003571612760424614, "step": 181570 }, { "epoch": 51.541300028384896, "grad_norm": 0.20243224501609802, "learning_rate": 4.8480272495032645e-05, "loss": 0.0002509405836462975, "step": 181580 }, { "epoch": 51.54413851830826, "grad_norm": 0.04568805918097496, "learning_rate": 4.8477434005109287e-05, "loss": 0.0012143231928348542, "step": 181590 }, { "epoch": 51.54697700823162, "grad_norm": 0.23504769802093506, "learning_rate": 4.847459551518592e-05, "loss": 0.0004699230194091797, "step": 181600 }, { "epoch": 51.549815498154985, "grad_norm": 0.0416623055934906, "learning_rate": 4.847175702526256e-05, "loss": 0.0005585474893450737, "step": 181610 }, { "epoch": 51.55265398807834, "grad_norm": 0.11152494698762894, "learning_rate": 4.8468918535339204e-05, "loss": 0.0008632868528366088, "step": 181620 }, { "epoch": 51.5554924780017, "grad_norm": 13.38485336303711, "learning_rate": 4.846608004541584e-05, "loss": 0.004079372808337212, "step": 181630 }, { "epoch": 51.558330967925066, "grad_norm": 0.08424478769302368, "learning_rate": 4.846324155549248e-05, "loss": 0.0015043659135699273, "step": 181640 }, { "epoch": 51.56116945784842, "grad_norm": 3.046485662460327, "learning_rate": 4.846040306556912e-05, "loss": 0.0023901436477899553, "step": 181650 }, { "epoch": 51.564007947771785, "grad_norm": 0.2820039689540863, "learning_rate": 4.8457564575645756e-05, "loss": 0.0006506204605102539, "step": 181660 }, { "epoch": 51.56684643769515, "grad_norm": 0.11082679778337479, "learning_rate": 4.84547260857224e-05, "loss": 0.005555438995361328, "step": 181670 }, { "epoch": 51.56968492761851, "grad_norm": 0.1286991834640503, "learning_rate": 4.845188759579904e-05, "loss": 0.0005170630291104316, "step": 181680 }, { "epoch": 51.572523417541866, "grad_norm": 0.03374123573303223, "learning_rate": 4.844904910587568e-05, "loss": 0.0008635444566607476, "step": 181690 }, { "epoch": 51.57536190746523, "grad_norm": 0.13898295164108276, "learning_rate": 4.8446210615952315e-05, "loss": 0.003147616982460022, "step": 181700 }, { "epoch": 51.57820039738859, "grad_norm": 1.1623297929763794, "learning_rate": 4.8443372126028956e-05, "loss": 0.0006249109283089638, "step": 181710 }, { "epoch": 51.58103888731195, "grad_norm": 0.1495249718427658, "learning_rate": 4.84405336361056e-05, "loss": 0.0005292003974318504, "step": 181720 }, { "epoch": 51.58387737723531, "grad_norm": 0.07027989625930786, "learning_rate": 4.843769514618223e-05, "loss": 0.0012970726937055587, "step": 181730 }, { "epoch": 51.586715867158674, "grad_norm": 0.04861418157815933, "learning_rate": 4.843485665625887e-05, "loss": 0.0006477834656834602, "step": 181740 }, { "epoch": 51.58955435708203, "grad_norm": 0.3068985641002655, "learning_rate": 4.8432018166335515e-05, "loss": 0.0005508480593562126, "step": 181750 }, { "epoch": 51.59239284700539, "grad_norm": 0.29374852776527405, "learning_rate": 4.842917967641215e-05, "loss": 0.00033357590436935427, "step": 181760 }, { "epoch": 51.595231336928755, "grad_norm": 6.635913372039795, "learning_rate": 4.842634118648879e-05, "loss": 0.0017150992527604104, "step": 181770 }, { "epoch": 51.59806982685212, "grad_norm": 2.9442191123962402, "learning_rate": 4.842350269656543e-05, "loss": 0.001280095800757408, "step": 181780 }, { "epoch": 51.600908316775474, "grad_norm": 0.13151171803474426, "learning_rate": 4.8420664206642067e-05, "loss": 0.001746627502143383, "step": 181790 }, { "epoch": 51.60374680669884, "grad_norm": 0.11686323583126068, "learning_rate": 4.841782571671871e-05, "loss": 0.0040001943707466125, "step": 181800 }, { "epoch": 51.6065852966222, "grad_norm": 0.4985428750514984, "learning_rate": 4.841498722679535e-05, "loss": 0.0023714551702141763, "step": 181810 }, { "epoch": 51.609423786545555, "grad_norm": 0.020097579807043076, "learning_rate": 4.8412148736871984e-05, "loss": 0.008575651794672012, "step": 181820 }, { "epoch": 51.61226227646892, "grad_norm": 1.050213098526001, "learning_rate": 4.8409310246948625e-05, "loss": 0.0006308304145932198, "step": 181830 }, { "epoch": 51.61510076639228, "grad_norm": 0.2779833972454071, "learning_rate": 4.840647175702526e-05, "loss": 0.0024436334148049354, "step": 181840 }, { "epoch": 51.61793925631564, "grad_norm": 1.8336868286132812, "learning_rate": 4.840363326710191e-05, "loss": 0.0058536045253276825, "step": 181850 }, { "epoch": 51.620777746239, "grad_norm": 0.08608710020780563, "learning_rate": 4.840079477717854e-05, "loss": 0.01229449436068535, "step": 181860 }, { "epoch": 51.62361623616236, "grad_norm": 7.038613796234131, "learning_rate": 4.839795628725518e-05, "loss": 0.007699941098690033, "step": 181870 }, { "epoch": 51.626454726085726, "grad_norm": 15.456930160522461, "learning_rate": 4.8395117797331825e-05, "loss": 0.011249026656150818, "step": 181880 }, { "epoch": 51.62929321600908, "grad_norm": 1.8425384759902954, "learning_rate": 4.839227930740846e-05, "loss": 0.001823759078979492, "step": 181890 }, { "epoch": 51.632131705932444, "grad_norm": 0.4331265687942505, "learning_rate": 4.83894408174851e-05, "loss": 0.002013492025434971, "step": 181900 }, { "epoch": 51.63497019585581, "grad_norm": 0.8078008890151978, "learning_rate": 4.838660232756174e-05, "loss": 0.002008094824850559, "step": 181910 }, { "epoch": 51.63780868577916, "grad_norm": 0.22775200009346008, "learning_rate": 4.838376383763838e-05, "loss": 0.0004917610436677933, "step": 181920 }, { "epoch": 51.640647175702526, "grad_norm": 0.018183346837759018, "learning_rate": 4.838092534771502e-05, "loss": 0.0008903389796614646, "step": 181930 }, { "epoch": 51.64348566562589, "grad_norm": 0.11603771895170212, "learning_rate": 4.837808685779165e-05, "loss": 0.0004816558212041855, "step": 181940 }, { "epoch": 51.646324155549244, "grad_norm": 0.05986776575446129, "learning_rate": 4.8375248367868295e-05, "loss": 0.00038706324994564056, "step": 181950 }, { "epoch": 51.64916264547261, "grad_norm": 0.10174457728862762, "learning_rate": 4.8372409877944936e-05, "loss": 0.0005039626732468605, "step": 181960 }, { "epoch": 51.65200113539597, "grad_norm": 7.768913269042969, "learning_rate": 4.836957138802157e-05, "loss": 0.003003590553998947, "step": 181970 }, { "epoch": 51.65483962531933, "grad_norm": 0.0953037291765213, "learning_rate": 4.836673289809822e-05, "loss": 0.0007136184722185135, "step": 181980 }, { "epoch": 51.65767811524269, "grad_norm": 0.450955867767334, "learning_rate": 4.8363894408174853e-05, "loss": 0.002825357764959335, "step": 181990 }, { "epoch": 51.66051660516605, "grad_norm": 1.63673996925354, "learning_rate": 4.836105591825149e-05, "loss": 0.0014265939593315125, "step": 182000 }, { "epoch": 51.66051660516605, "eval_accuracy": 0.9742481083486997, "eval_loss": 0.09733858704566956, "eval_runtime": 32.4231, "eval_samples_per_second": 485.056, "eval_steps_per_second": 7.587, "step": 182000 }, { "epoch": 51.663355095089415, "grad_norm": 0.07253444939851761, "learning_rate": 4.8358217428328136e-05, "loss": 0.0018283488228917123, "step": 182010 }, { "epoch": 51.66619358501277, "grad_norm": 0.11729633808135986, "learning_rate": 4.835537893840477e-05, "loss": 0.001674944907426834, "step": 182020 }, { "epoch": 51.66903207493613, "grad_norm": 0.14158402383327484, "learning_rate": 4.835254044848141e-05, "loss": 0.0029733633622527123, "step": 182030 }, { "epoch": 51.671870564859496, "grad_norm": 2.668970823287964, "learning_rate": 4.834970195855805e-05, "loss": 0.005384045466780662, "step": 182040 }, { "epoch": 51.67470905478285, "grad_norm": 1.6362977027893066, "learning_rate": 4.834686346863469e-05, "loss": 0.0010470721870660781, "step": 182050 }, { "epoch": 51.677547544706215, "grad_norm": 0.2941102087497711, "learning_rate": 4.834402497871133e-05, "loss": 0.002261085994541645, "step": 182060 }, { "epoch": 51.68038603462958, "grad_norm": 0.8117838501930237, "learning_rate": 4.8341186488787964e-05, "loss": 0.000460306741297245, "step": 182070 }, { "epoch": 51.68322452455294, "grad_norm": 0.4067847728729248, "learning_rate": 4.8338347998864605e-05, "loss": 0.011284461617469788, "step": 182080 }, { "epoch": 51.6860630144763, "grad_norm": 10.003881454467773, "learning_rate": 4.833550950894125e-05, "loss": 0.005481652915477753, "step": 182090 }, { "epoch": 51.68890150439966, "grad_norm": 0.10897538810968399, "learning_rate": 4.833267101901788e-05, "loss": 0.0007394958287477493, "step": 182100 }, { "epoch": 51.69173999432302, "grad_norm": 6.69999361038208, "learning_rate": 4.832983252909453e-05, "loss": 0.017944443225860595, "step": 182110 }, { "epoch": 51.69457848424638, "grad_norm": 1.956730604171753, "learning_rate": 4.8326994039171164e-05, "loss": 0.0030435658991336823, "step": 182120 }, { "epoch": 51.69741697416974, "grad_norm": 0.934472382068634, "learning_rate": 4.83241555492478e-05, "loss": 0.010732844471931458, "step": 182130 }, { "epoch": 51.700255464093104, "grad_norm": 5.449108123779297, "learning_rate": 4.832131705932444e-05, "loss": 0.0023874159902334212, "step": 182140 }, { "epoch": 51.70309395401647, "grad_norm": 0.15881489217281342, "learning_rate": 4.831847856940108e-05, "loss": 0.0009800756350159645, "step": 182150 }, { "epoch": 51.70593244393982, "grad_norm": 13.191521644592285, "learning_rate": 4.831564007947772e-05, "loss": 0.0035543959587812425, "step": 182160 }, { "epoch": 51.708770933863185, "grad_norm": 4.631021022796631, "learning_rate": 4.831280158955436e-05, "loss": 0.0025419492274522783, "step": 182170 }, { "epoch": 51.71160942378655, "grad_norm": 0.06105424836277962, "learning_rate": 4.8309963099631e-05, "loss": 0.0008065775036811829, "step": 182180 }, { "epoch": 51.714447913709904, "grad_norm": 0.7231377363204956, "learning_rate": 4.830712460970764e-05, "loss": 0.0009502695873379707, "step": 182190 }, { "epoch": 51.71728640363327, "grad_norm": 0.36902305483818054, "learning_rate": 4.8304286119784275e-05, "loss": 0.0009264951571822166, "step": 182200 }, { "epoch": 51.72012489355663, "grad_norm": 0.06052348390221596, "learning_rate": 4.8301447629860916e-05, "loss": 0.0007219156250357628, "step": 182210 }, { "epoch": 51.722963383479986, "grad_norm": 0.1284666210412979, "learning_rate": 4.829860913993756e-05, "loss": 0.004949063062667847, "step": 182220 }, { "epoch": 51.72580187340335, "grad_norm": 0.509452223777771, "learning_rate": 4.829577065001419e-05, "loss": 0.00042866673320531847, "step": 182230 }, { "epoch": 51.72864036332671, "grad_norm": 0.2914709448814392, "learning_rate": 4.8292932160090834e-05, "loss": 0.00347202867269516, "step": 182240 }, { "epoch": 51.731478853250074, "grad_norm": 0.10682666301727295, "learning_rate": 4.8290093670167475e-05, "loss": 0.00019755307585000992, "step": 182250 }, { "epoch": 51.73431734317343, "grad_norm": 0.013890299014747143, "learning_rate": 4.828725518024411e-05, "loss": 0.00042697135359048843, "step": 182260 }, { "epoch": 51.73715583309679, "grad_norm": 1.1726351976394653, "learning_rate": 4.828441669032075e-05, "loss": 0.00043001510202884675, "step": 182270 }, { "epoch": 51.739994323020156, "grad_norm": 0.03203336149454117, "learning_rate": 4.828157820039739e-05, "loss": 0.00021441206336021424, "step": 182280 }, { "epoch": 51.74283281294351, "grad_norm": 0.059880051761865616, "learning_rate": 4.827873971047403e-05, "loss": 0.0006886312738060951, "step": 182290 }, { "epoch": 51.745671302866874, "grad_norm": 0.0780503898859024, "learning_rate": 4.827590122055067e-05, "loss": 0.00047068726271390913, "step": 182300 }, { "epoch": 51.74850979279024, "grad_norm": 0.06790386140346527, "learning_rate": 4.827306273062731e-05, "loss": 0.0002838464453816414, "step": 182310 }, { "epoch": 51.75134828271359, "grad_norm": 0.09590662270784378, "learning_rate": 4.827022424070395e-05, "loss": 0.0005536878481507302, "step": 182320 }, { "epoch": 51.754186772636956, "grad_norm": 0.015578238293528557, "learning_rate": 4.8267385750780586e-05, "loss": 0.0005279622972011566, "step": 182330 }, { "epoch": 51.75702526256032, "grad_norm": 0.958501398563385, "learning_rate": 4.826454726085722e-05, "loss": 0.0008314261212944984, "step": 182340 }, { "epoch": 51.75986375248368, "grad_norm": 0.14000454545021057, "learning_rate": 4.826170877093387e-05, "loss": 0.0009681940078735352, "step": 182350 }, { "epoch": 51.76270224240704, "grad_norm": 2.396366834640503, "learning_rate": 4.82588702810105e-05, "loss": 0.0008987881243228913, "step": 182360 }, { "epoch": 51.7655407323304, "grad_norm": 0.047076817601919174, "learning_rate": 4.8256031791087144e-05, "loss": 0.0007864663377404213, "step": 182370 }, { "epoch": 51.76837922225376, "grad_norm": 0.07016611844301224, "learning_rate": 4.8253193301163786e-05, "loss": 0.0008623959496617317, "step": 182380 }, { "epoch": 51.77121771217712, "grad_norm": 0.8966825604438782, "learning_rate": 4.825035481124042e-05, "loss": 0.00121581070125103, "step": 182390 }, { "epoch": 51.77405620210048, "grad_norm": 0.015400027856230736, "learning_rate": 4.824751632131706e-05, "loss": 0.0014248896390199662, "step": 182400 }, { "epoch": 51.776894692023845, "grad_norm": 0.052285660058259964, "learning_rate": 4.82446778313937e-05, "loss": 0.002049084007740021, "step": 182410 }, { "epoch": 51.7797331819472, "grad_norm": 0.015802329406142235, "learning_rate": 4.824183934147034e-05, "loss": 0.0025892384350299836, "step": 182420 }, { "epoch": 51.78257167187056, "grad_norm": 0.011272271163761616, "learning_rate": 4.823900085154698e-05, "loss": 0.0059128038585186, "step": 182430 }, { "epoch": 51.785410161793926, "grad_norm": 1.2104628086090088, "learning_rate": 4.8236162361623614e-05, "loss": 0.0006790978834033013, "step": 182440 }, { "epoch": 51.78824865171729, "grad_norm": 0.05820819362998009, "learning_rate": 4.823332387170026e-05, "loss": 0.015612594783306122, "step": 182450 }, { "epoch": 51.791087141640645, "grad_norm": 0.3581760823726654, "learning_rate": 4.8230485381776896e-05, "loss": 0.004312196373939514, "step": 182460 }, { "epoch": 51.79392563156401, "grad_norm": 0.08177873492240906, "learning_rate": 4.822764689185353e-05, "loss": 0.003569342941045761, "step": 182470 }, { "epoch": 51.79676412148737, "grad_norm": 0.05191491171717644, "learning_rate": 4.822480840193018e-05, "loss": 0.004858438298106194, "step": 182480 }, { "epoch": 51.79960261141073, "grad_norm": 0.03919440880417824, "learning_rate": 4.8221969912006814e-05, "loss": 0.0023818347603082658, "step": 182490 }, { "epoch": 51.80244110133409, "grad_norm": 0.12623044848442078, "learning_rate": 4.8219131422083455e-05, "loss": 0.009125805646181106, "step": 182500 }, { "epoch": 51.80244110133409, "eval_accuracy": 0.9734850893368093, "eval_loss": 0.09479783475399017, "eval_runtime": 32.4514, "eval_samples_per_second": 484.632, "eval_steps_per_second": 7.581, "step": 182500 }, { "epoch": 51.80527959125745, "grad_norm": 0.9531261324882507, "learning_rate": 4.8216292932160097e-05, "loss": 0.0010429421439766885, "step": 182510 }, { "epoch": 51.808118081180815, "grad_norm": 0.2409517765045166, "learning_rate": 4.821345444223673e-05, "loss": 0.0012009555473923683, "step": 182520 }, { "epoch": 51.81095657110417, "grad_norm": 0.2287381887435913, "learning_rate": 4.821061595231337e-05, "loss": 0.0013151943683624267, "step": 182530 }, { "epoch": 51.813795061027534, "grad_norm": 0.13147073984146118, "learning_rate": 4.8207777462390014e-05, "loss": 0.005388849601149559, "step": 182540 }, { "epoch": 51.8166335509509, "grad_norm": 0.07902240008115768, "learning_rate": 4.820493897246665e-05, "loss": 0.00048444122076034545, "step": 182550 }, { "epoch": 51.81947204087425, "grad_norm": 0.11406946182250977, "learning_rate": 4.820210048254329e-05, "loss": 0.0032390281558036804, "step": 182560 }, { "epoch": 51.822310530797616, "grad_norm": 0.10355222970247269, "learning_rate": 4.8199261992619924e-05, "loss": 0.0005752943456172943, "step": 182570 }, { "epoch": 51.82514902072098, "grad_norm": 0.3657318651676178, "learning_rate": 4.819642350269657e-05, "loss": 0.0014988573268055916, "step": 182580 }, { "epoch": 51.827987510644334, "grad_norm": 0.014269332401454449, "learning_rate": 4.819358501277321e-05, "loss": 0.010669919103384018, "step": 182590 }, { "epoch": 51.8308260005677, "grad_norm": 0.3653144836425781, "learning_rate": 4.819074652284984e-05, "loss": 0.0015364862978458405, "step": 182600 }, { "epoch": 51.83366449049106, "grad_norm": 0.162485733628273, "learning_rate": 4.818790803292649e-05, "loss": 0.0010139100253582002, "step": 182610 }, { "epoch": 51.83650298041442, "grad_norm": 0.16376501321792603, "learning_rate": 4.8185069543003125e-05, "loss": 0.0007864033803343772, "step": 182620 }, { "epoch": 51.83934147033778, "grad_norm": 0.10423777997493744, "learning_rate": 4.8182231053079766e-05, "loss": 0.0018844639882445336, "step": 182630 }, { "epoch": 51.84217996026114, "grad_norm": 0.41506412625312805, "learning_rate": 4.817939256315641e-05, "loss": 0.0006255175918340683, "step": 182640 }, { "epoch": 51.845018450184504, "grad_norm": 0.05386204272508621, "learning_rate": 4.817655407323304e-05, "loss": 0.00047949235886335373, "step": 182650 }, { "epoch": 51.84785694010786, "grad_norm": 0.06201737001538277, "learning_rate": 4.817371558330968e-05, "loss": 0.00077715665102005, "step": 182660 }, { "epoch": 51.85069543003122, "grad_norm": 0.22596240043640137, "learning_rate": 4.817087709338632e-05, "loss": 0.0015824969857931137, "step": 182670 }, { "epoch": 51.853533919954586, "grad_norm": 0.05849112942814827, "learning_rate": 4.816803860346296e-05, "loss": 0.0004113098606467247, "step": 182680 }, { "epoch": 51.85637240987794, "grad_norm": 0.10807622224092484, "learning_rate": 4.81652001135396e-05, "loss": 0.0003912497311830521, "step": 182690 }, { "epoch": 51.859210899801305, "grad_norm": 0.014222614467144012, "learning_rate": 4.8162361623616235e-05, "loss": 0.0005474014207720757, "step": 182700 }, { "epoch": 51.86204938972467, "grad_norm": 0.13809043169021606, "learning_rate": 4.8159523133692877e-05, "loss": 0.0005332110449671745, "step": 182710 }, { "epoch": 51.86488787964803, "grad_norm": 0.2838243246078491, "learning_rate": 4.815668464376952e-05, "loss": 0.0017236709594726563, "step": 182720 }, { "epoch": 51.867726369571386, "grad_norm": 0.1773257553577423, "learning_rate": 4.815384615384615e-05, "loss": 0.0016954099759459496, "step": 182730 }, { "epoch": 51.87056485949475, "grad_norm": 0.1669190675020218, "learning_rate": 4.81510076639228e-05, "loss": 0.0025507261976599692, "step": 182740 }, { "epoch": 51.87340334941811, "grad_norm": 0.09507136046886444, "learning_rate": 4.8148169173999435e-05, "loss": 0.0006633238866925239, "step": 182750 }, { "epoch": 51.87624183934147, "grad_norm": 0.16149522364139557, "learning_rate": 4.814533068407607e-05, "loss": 0.0041283808648586275, "step": 182760 }, { "epoch": 51.87908032926483, "grad_norm": 0.07657840847969055, "learning_rate": 4.814249219415271e-05, "loss": 0.0007986057549715042, "step": 182770 }, { "epoch": 51.88191881918819, "grad_norm": 0.21423909068107605, "learning_rate": 4.813965370422935e-05, "loss": 0.004993660002946853, "step": 182780 }, { "epoch": 51.88475730911155, "grad_norm": 0.5809764862060547, "learning_rate": 4.8136815214305994e-05, "loss": 0.0007304297760128975, "step": 182790 }, { "epoch": 51.88759579903491, "grad_norm": 1.5357874631881714, "learning_rate": 4.813397672438263e-05, "loss": 0.0007596768438816071, "step": 182800 }, { "epoch": 51.890434288958275, "grad_norm": 0.26369741559028625, "learning_rate": 4.813113823445927e-05, "loss": 0.00036773253232240676, "step": 182810 }, { "epoch": 51.89327277888164, "grad_norm": 0.2780047059059143, "learning_rate": 4.812829974453591e-05, "loss": 0.010024514794349671, "step": 182820 }, { "epoch": 51.896111268804994, "grad_norm": 0.12321890145540237, "learning_rate": 4.8125461254612546e-05, "loss": 0.0017388559877872466, "step": 182830 }, { "epoch": 51.89894975872836, "grad_norm": 1.678441047668457, "learning_rate": 4.812262276468919e-05, "loss": 0.0013358509168028831, "step": 182840 }, { "epoch": 51.90178824865172, "grad_norm": 0.35110482573509216, "learning_rate": 4.811978427476583e-05, "loss": 0.0017352698370814324, "step": 182850 }, { "epoch": 51.904626738575075, "grad_norm": 0.02774232253432274, "learning_rate": 4.811694578484246e-05, "loss": 0.0008305624127388, "step": 182860 }, { "epoch": 51.90746522849844, "grad_norm": 5.005304336547852, "learning_rate": 4.8114107294919105e-05, "loss": 0.0038928404450416566, "step": 182870 }, { "epoch": 51.9103037184218, "grad_norm": 4.239023208618164, "learning_rate": 4.8111268804995746e-05, "loss": 0.001853250525891781, "step": 182880 }, { "epoch": 51.913142208345164, "grad_norm": 14.860383033752441, "learning_rate": 4.810843031507238e-05, "loss": 0.007831629365682602, "step": 182890 }, { "epoch": 51.91598069826852, "grad_norm": 2.194275379180908, "learning_rate": 4.810559182514902e-05, "loss": 0.0012575622648000717, "step": 182900 }, { "epoch": 51.91881918819188, "grad_norm": 7.52362585067749, "learning_rate": 4.8102753335225663e-05, "loss": 0.0018234940245747567, "step": 182910 }, { "epoch": 51.921657678115245, "grad_norm": 0.20958444476127625, "learning_rate": 4.8099914845302305e-05, "loss": 0.00027543045580387117, "step": 182920 }, { "epoch": 51.9244961680386, "grad_norm": 0.03363151475787163, "learning_rate": 4.809707635537894e-05, "loss": 0.0011302854865789414, "step": 182930 }, { "epoch": 51.927334657961964, "grad_norm": 0.03240484744310379, "learning_rate": 4.809423786545558e-05, "loss": 0.0013516105711460114, "step": 182940 }, { "epoch": 51.93017314788533, "grad_norm": 0.18886597454547882, "learning_rate": 4.809139937553222e-05, "loss": 0.000336373969912529, "step": 182950 }, { "epoch": 51.93301163780868, "grad_norm": 0.1809409111738205, "learning_rate": 4.808856088560886e-05, "loss": 0.0010003114119172096, "step": 182960 }, { "epoch": 51.935850127732046, "grad_norm": 5.938627243041992, "learning_rate": 4.80857223956855e-05, "loss": 0.0027820289134979246, "step": 182970 }, { "epoch": 51.93868861765541, "grad_norm": 0.10181485861539841, "learning_rate": 4.808288390576214e-05, "loss": 0.0008155753836035729, "step": 182980 }, { "epoch": 51.94152710757877, "grad_norm": 0.31481724977493286, "learning_rate": 4.8080045415838774e-05, "loss": 0.0008360465988516807, "step": 182990 }, { "epoch": 51.94436559750213, "grad_norm": 0.14639221131801605, "learning_rate": 4.8077490774907754e-05, "loss": 0.010876993834972381, "step": 183000 }, { "epoch": 51.94436559750213, "eval_accuracy": 0.9767279201373434, "eval_loss": 0.08172839879989624, "eval_runtime": 32.4181, "eval_samples_per_second": 485.13, "eval_steps_per_second": 7.588, "step": 183000 }, { "epoch": 51.94720408742549, "grad_norm": 0.24690350890159607, "learning_rate": 4.807465228498439e-05, "loss": 0.00031044557690620425, "step": 183010 }, { "epoch": 51.95004257734885, "grad_norm": 0.048013534396886826, "learning_rate": 4.807181379506103e-05, "loss": 0.0006077568978071213, "step": 183020 }, { "epoch": 51.95288106727221, "grad_norm": 0.035825103521347046, "learning_rate": 4.806897530513767e-05, "loss": 0.0020835012197494506, "step": 183030 }, { "epoch": 51.95571955719557, "grad_norm": 0.06610327214002609, "learning_rate": 4.8066136815214306e-05, "loss": 0.0003492157906293869, "step": 183040 }, { "epoch": 51.958558047118935, "grad_norm": 0.6760591864585876, "learning_rate": 4.806329832529095e-05, "loss": 0.00025796499103307723, "step": 183050 }, { "epoch": 51.96139653704229, "grad_norm": 0.14896906912326813, "learning_rate": 4.806045983536759e-05, "loss": 0.000361521914601326, "step": 183060 }, { "epoch": 51.96423502696565, "grad_norm": 0.08462279289960861, "learning_rate": 4.805762134544422e-05, "loss": 0.0005348850041627884, "step": 183070 }, { "epoch": 51.967073516889016, "grad_norm": 0.9394897818565369, "learning_rate": 4.8054782855520865e-05, "loss": 0.0003594519570469856, "step": 183080 }, { "epoch": 51.96991200681238, "grad_norm": 3.250230550765991, "learning_rate": 4.8051944365597506e-05, "loss": 0.00048760809004306794, "step": 183090 }, { "epoch": 51.972750496735735, "grad_norm": 0.013411608524620533, "learning_rate": 4.804910587567415e-05, "loss": 0.0009473580867052078, "step": 183100 }, { "epoch": 51.9755889866591, "grad_norm": 0.07319562882184982, "learning_rate": 4.804626738575078e-05, "loss": 0.0006691792979836464, "step": 183110 }, { "epoch": 51.97842747658246, "grad_norm": 0.0706510841846466, "learning_rate": 4.8043428895827423e-05, "loss": 0.0005858691409230232, "step": 183120 }, { "epoch": 51.981265966505816, "grad_norm": 0.741838812828064, "learning_rate": 4.8040590405904065e-05, "loss": 0.0012753462418913842, "step": 183130 }, { "epoch": 51.98410445642918, "grad_norm": 0.07140675187110901, "learning_rate": 4.80377519159807e-05, "loss": 0.0004938645288348198, "step": 183140 }, { "epoch": 51.98694294635254, "grad_norm": 0.4427141845226288, "learning_rate": 4.803491342605734e-05, "loss": 0.005382928252220154, "step": 183150 }, { "epoch": 51.9897814362759, "grad_norm": 2.9594504833221436, "learning_rate": 4.803207493613398e-05, "loss": 0.0012240985408425332, "step": 183160 }, { "epoch": 51.99261992619926, "grad_norm": 0.4567752182483673, "learning_rate": 4.802923644621062e-05, "loss": 0.014815299212932587, "step": 183170 }, { "epoch": 51.995458416122624, "grad_norm": 0.09555605798959732, "learning_rate": 4.802639795628726e-05, "loss": 0.0009070837870240212, "step": 183180 }, { "epoch": 51.99829690604599, "grad_norm": 0.24449460208415985, "learning_rate": 4.80235594663639e-05, "loss": 0.0015728767961263657, "step": 183190 }, { "epoch": 52.00113539596934, "grad_norm": 0.1252463459968567, "learning_rate": 4.8020720976440534e-05, "loss": 0.0011314516887068748, "step": 183200 }, { "epoch": 52.003973885892705, "grad_norm": 0.17552568018436432, "learning_rate": 4.8017882486517175e-05, "loss": 0.0063804671168327335, "step": 183210 }, { "epoch": 52.00681237581607, "grad_norm": 0.136989563703537, "learning_rate": 4.801504399659382e-05, "loss": 0.0010395416989922523, "step": 183220 }, { "epoch": 52.009650865739424, "grad_norm": 4.250365257263184, "learning_rate": 4.801220550667045e-05, "loss": 0.001101909577846527, "step": 183230 }, { "epoch": 52.01248935566279, "grad_norm": 0.3450939953327179, "learning_rate": 4.800936701674709e-05, "loss": 0.0005226006731390953, "step": 183240 }, { "epoch": 52.01532784558615, "grad_norm": 0.9420564770698547, "learning_rate": 4.800652852682373e-05, "loss": 0.0012619102373719216, "step": 183250 }, { "epoch": 52.018166335509505, "grad_norm": 0.03450402244925499, "learning_rate": 4.8003690036900376e-05, "loss": 0.00045227594673633573, "step": 183260 }, { "epoch": 52.02100482543287, "grad_norm": 0.020162923261523247, "learning_rate": 4.800085154697701e-05, "loss": 0.000719030387699604, "step": 183270 }, { "epoch": 52.02384331535623, "grad_norm": 0.33634984493255615, "learning_rate": 4.7998013057053645e-05, "loss": 0.0009289706125855446, "step": 183280 }, { "epoch": 52.026681805279594, "grad_norm": 0.11174613982439041, "learning_rate": 4.799517456713029e-05, "loss": 0.001429719477891922, "step": 183290 }, { "epoch": 52.02952029520295, "grad_norm": 0.02389918640255928, "learning_rate": 4.799233607720693e-05, "loss": 0.0015846051275730133, "step": 183300 }, { "epoch": 52.03235878512631, "grad_norm": 0.21122485399246216, "learning_rate": 4.798949758728357e-05, "loss": 0.0014102132990956306, "step": 183310 }, { "epoch": 52.035197275049676, "grad_norm": 0.025034114718437195, "learning_rate": 4.798665909736021e-05, "loss": 0.0007824892178177834, "step": 183320 }, { "epoch": 52.03803576497303, "grad_norm": 1.564091682434082, "learning_rate": 4.7983820607436845e-05, "loss": 0.000429689884185791, "step": 183330 }, { "epoch": 52.040874254896394, "grad_norm": 0.26598823070526123, "learning_rate": 4.7980982117513486e-05, "loss": 0.0026240132749080656, "step": 183340 }, { "epoch": 52.04371274481976, "grad_norm": 4.473158359527588, "learning_rate": 4.797814362759012e-05, "loss": 0.002162446640431881, "step": 183350 }, { "epoch": 52.04655123474312, "grad_norm": 0.46561720967292786, "learning_rate": 4.797530513766676e-05, "loss": 0.005860726162791252, "step": 183360 }, { "epoch": 52.049389724666476, "grad_norm": 0.031022032722830772, "learning_rate": 4.7972466647743404e-05, "loss": 0.0005652682855725288, "step": 183370 }, { "epoch": 52.05222821458984, "grad_norm": 0.0655103474855423, "learning_rate": 4.796962815782004e-05, "loss": 0.0006406722590327263, "step": 183380 }, { "epoch": 52.0550667045132, "grad_norm": 8.915067672729492, "learning_rate": 4.7966789667896686e-05, "loss": 0.0018107224255800246, "step": 183390 }, { "epoch": 52.05790519443656, "grad_norm": 0.05562408268451691, "learning_rate": 4.796395117797332e-05, "loss": 0.004964353144168853, "step": 183400 }, { "epoch": 52.06074368435992, "grad_norm": 0.00911193247884512, "learning_rate": 4.7961112688049956e-05, "loss": 0.0023705745115876198, "step": 183410 }, { "epoch": 52.06358217428328, "grad_norm": 1.438262939453125, "learning_rate": 4.7958274198126604e-05, "loss": 0.001288844272494316, "step": 183420 }, { "epoch": 52.06642066420664, "grad_norm": 0.26436740159988403, "learning_rate": 4.795543570820324e-05, "loss": 0.00023554451763629913, "step": 183430 }, { "epoch": 52.06925915413, "grad_norm": 0.018051110208034515, "learning_rate": 4.795259721827988e-05, "loss": 0.0009310223162174225, "step": 183440 }, { "epoch": 52.072097644053365, "grad_norm": 2.080995559692383, "learning_rate": 4.7949758728356514e-05, "loss": 0.004716078191995621, "step": 183450 }, { "epoch": 52.07493613397673, "grad_norm": 0.39596524834632874, "learning_rate": 4.7946920238433156e-05, "loss": 0.003822232037782669, "step": 183460 }, { "epoch": 52.07777462390008, "grad_norm": 0.05577684938907623, "learning_rate": 4.79440817485098e-05, "loss": 0.0008683588355779647, "step": 183470 }, { "epoch": 52.080613113823446, "grad_norm": 0.06712818890810013, "learning_rate": 4.794124325858643e-05, "loss": 0.001541576161980629, "step": 183480 }, { "epoch": 52.08345160374681, "grad_norm": 0.22391265630722046, "learning_rate": 4.793840476866307e-05, "loss": 0.009365933388471604, "step": 183490 }, { "epoch": 52.086290093670165, "grad_norm": 0.1344362050294876, "learning_rate": 4.7935566278739714e-05, "loss": 0.0015834558755159378, "step": 183500 }, { "epoch": 52.086290093670165, "eval_accuracy": 0.9775545240668914, "eval_loss": 0.07857996970415115, "eval_runtime": 32.6865, "eval_samples_per_second": 481.146, "eval_steps_per_second": 7.526, "step": 183500 }, { "epoch": 52.08912858359353, "grad_norm": 0.15102817118167877, "learning_rate": 4.793272778881635e-05, "loss": 0.0009012224152684211, "step": 183510 }, { "epoch": 52.09196707351689, "grad_norm": 0.08173829317092896, "learning_rate": 4.7929889298893e-05, "loss": 0.0009478798136115074, "step": 183520 }, { "epoch": 52.09480556344025, "grad_norm": 0.024196408689022064, "learning_rate": 4.792705080896963e-05, "loss": 0.0007890662178397179, "step": 183530 }, { "epoch": 52.09764405336361, "grad_norm": 0.01097149308770895, "learning_rate": 4.7924212319046266e-05, "loss": 0.0004108775407075882, "step": 183540 }, { "epoch": 52.10048254328697, "grad_norm": 0.06965126842260361, "learning_rate": 4.792137382912291e-05, "loss": 0.0013867853209376334, "step": 183550 }, { "epoch": 52.103321033210335, "grad_norm": 0.1719314455986023, "learning_rate": 4.791853533919955e-05, "loss": 0.0038325943052768706, "step": 183560 }, { "epoch": 52.10615952313369, "grad_norm": 0.03397070989012718, "learning_rate": 4.791569684927619e-05, "loss": 0.00030316244810819627, "step": 183570 }, { "epoch": 52.108998013057054, "grad_norm": 0.0243116095662117, "learning_rate": 4.7912858359352825e-05, "loss": 0.0005455181002616882, "step": 183580 }, { "epoch": 52.11183650298042, "grad_norm": 1.5040119886398315, "learning_rate": 4.7910019869429466e-05, "loss": 0.0007761558517813683, "step": 183590 }, { "epoch": 52.11467499290377, "grad_norm": 0.30178847908973694, "learning_rate": 4.790718137950611e-05, "loss": 0.00043684784322977065, "step": 183600 }, { "epoch": 52.117513482827135, "grad_norm": 0.1653946489095688, "learning_rate": 4.790434288958274e-05, "loss": 0.005474375188350677, "step": 183610 }, { "epoch": 52.1203519727505, "grad_norm": 0.4449189305305481, "learning_rate": 4.7901504399659384e-05, "loss": 0.0003239834681153297, "step": 183620 }, { "epoch": 52.123190462673854, "grad_norm": 0.19438432157039642, "learning_rate": 4.7898665909736025e-05, "loss": 0.0009518662467598915, "step": 183630 }, { "epoch": 52.12602895259722, "grad_norm": 0.11911072582006454, "learning_rate": 4.789582741981266e-05, "loss": 0.0005753114819526672, "step": 183640 }, { "epoch": 52.12886744252058, "grad_norm": 0.030836397781968117, "learning_rate": 4.78929889298893e-05, "loss": 0.0013831334188580513, "step": 183650 }, { "epoch": 52.13170593244394, "grad_norm": 1.0039864778518677, "learning_rate": 4.789015043996594e-05, "loss": 0.0004141075536608696, "step": 183660 }, { "epoch": 52.1345444223673, "grad_norm": 0.08138184994459152, "learning_rate": 4.788731195004258e-05, "loss": 0.000584898516535759, "step": 183670 }, { "epoch": 52.13738291229066, "grad_norm": 0.04388280585408211, "learning_rate": 4.788447346011922e-05, "loss": 0.000628044456243515, "step": 183680 }, { "epoch": 52.140221402214024, "grad_norm": 21.203325271606445, "learning_rate": 4.788163497019586e-05, "loss": 0.009185659885406493, "step": 183690 }, { "epoch": 52.14305989213738, "grad_norm": 0.20027989149093628, "learning_rate": 4.7878796480272494e-05, "loss": 0.0003276161849498749, "step": 183700 }, { "epoch": 52.14589838206074, "grad_norm": 0.0775650218129158, "learning_rate": 4.7875957990349136e-05, "loss": 0.0003987180069088936, "step": 183710 }, { "epoch": 52.148736871984106, "grad_norm": 0.048422377556562424, "learning_rate": 4.787311950042578e-05, "loss": 0.0010486222803592683, "step": 183720 }, { "epoch": 52.15157536190747, "grad_norm": 5.641684055328369, "learning_rate": 4.787028101050242e-05, "loss": 0.0013720091432332993, "step": 183730 }, { "epoch": 52.154413851830824, "grad_norm": 0.057831570506095886, "learning_rate": 4.786744252057905e-05, "loss": 0.001147693768143654, "step": 183740 }, { "epoch": 52.15725234175419, "grad_norm": 0.09249673783779144, "learning_rate": 4.786460403065569e-05, "loss": 0.011926360428333282, "step": 183750 }, { "epoch": 52.16009083167755, "grad_norm": 0.012170925736427307, "learning_rate": 4.7861765540732336e-05, "loss": 0.0013270171359181405, "step": 183760 }, { "epoch": 52.162929321600906, "grad_norm": 0.08099169284105301, "learning_rate": 4.785892705080897e-05, "loss": 0.0007620403543114662, "step": 183770 }, { "epoch": 52.16576781152427, "grad_norm": 0.09675470739603043, "learning_rate": 4.785608856088561e-05, "loss": 0.004948173463344574, "step": 183780 }, { "epoch": 52.16860630144763, "grad_norm": 0.17229188978672028, "learning_rate": 4.785325007096225e-05, "loss": 0.0008942348882555962, "step": 183790 }, { "epoch": 52.17144479137099, "grad_norm": 2.768332004547119, "learning_rate": 4.785041158103889e-05, "loss": 0.0013775531202554702, "step": 183800 }, { "epoch": 52.17428328129435, "grad_norm": 0.02013053372502327, "learning_rate": 4.784757309111553e-05, "loss": 0.00039181839674711226, "step": 183810 }, { "epoch": 52.17712177121771, "grad_norm": 0.11338331550359726, "learning_rate": 4.784473460119217e-05, "loss": 0.00220294464379549, "step": 183820 }, { "epoch": 52.179960261141076, "grad_norm": 0.24792633950710297, "learning_rate": 4.7841896111268805e-05, "loss": 0.002382390759885311, "step": 183830 }, { "epoch": 52.18279875106443, "grad_norm": 0.03318275138735771, "learning_rate": 4.7839057621345447e-05, "loss": 0.0046615440398454664, "step": 183840 }, { "epoch": 52.185637240987795, "grad_norm": 0.7485451698303223, "learning_rate": 4.783621913142208e-05, "loss": 0.0020092591643333436, "step": 183850 }, { "epoch": 52.18847573091116, "grad_norm": 0.20020534098148346, "learning_rate": 4.783338064149873e-05, "loss": 0.006860274076461792, "step": 183860 }, { "epoch": 52.19131422083451, "grad_norm": 7.625820159912109, "learning_rate": 4.7830542151575364e-05, "loss": 0.008323963731527328, "step": 183870 }, { "epoch": 52.194152710757876, "grad_norm": 0.12388471513986588, "learning_rate": 4.7827703661652e-05, "loss": 0.0005649343132972717, "step": 183880 }, { "epoch": 52.19699120068124, "grad_norm": 21.527563095092773, "learning_rate": 4.782486517172865e-05, "loss": 0.005670201778411865, "step": 183890 }, { "epoch": 52.199829690604595, "grad_norm": 0.045184019953012466, "learning_rate": 4.782202668180528e-05, "loss": 0.002153523080050945, "step": 183900 }, { "epoch": 52.20266818052796, "grad_norm": 0.3825509250164032, "learning_rate": 4.781918819188192e-05, "loss": 0.0012369973585009575, "step": 183910 }, { "epoch": 52.20550667045132, "grad_norm": 0.05174838751554489, "learning_rate": 4.7816349701958564e-05, "loss": 0.002726239711046219, "step": 183920 }, { "epoch": 52.208345160374684, "grad_norm": 0.3884969651699066, "learning_rate": 4.78135112120352e-05, "loss": 0.005254042148590088, "step": 183930 }, { "epoch": 52.21118365029804, "grad_norm": 0.32423657178878784, "learning_rate": 4.781067272211184e-05, "loss": 0.004890571534633637, "step": 183940 }, { "epoch": 52.2140221402214, "grad_norm": 4.699454307556152, "learning_rate": 4.7807834232188475e-05, "loss": 0.0020103521645069124, "step": 183950 }, { "epoch": 52.216860630144765, "grad_norm": 2.506873846054077, "learning_rate": 4.7804995742265116e-05, "loss": 0.0008382083848118782, "step": 183960 }, { "epoch": 52.21969912006812, "grad_norm": 0.03574621304869652, "learning_rate": 4.780215725234176e-05, "loss": 0.0018414055928587913, "step": 183970 }, { "epoch": 52.222537609991484, "grad_norm": 0.00886073149740696, "learning_rate": 4.779931876241839e-05, "loss": 0.001334230788052082, "step": 183980 }, { "epoch": 52.22537609991485, "grad_norm": 0.3629567623138428, "learning_rate": 4.779648027249504e-05, "loss": 0.0005370743572711944, "step": 183990 }, { "epoch": 52.2282145898382, "grad_norm": 0.038913846015930176, "learning_rate": 4.7793641782571675e-05, "loss": 0.001140449196100235, "step": 184000 }, { "epoch": 52.2282145898382, "eval_accuracy": 0.9759649011254531, "eval_loss": 0.0853082463145256, "eval_runtime": 32.5995, "eval_samples_per_second": 482.431, "eval_steps_per_second": 7.546, "step": 184000 }, { "epoch": 52.231053079761566, "grad_norm": 0.12194695323705673, "learning_rate": 4.779080329264831e-05, "loss": 0.0014708971604704857, "step": 184010 }, { "epoch": 52.23389156968493, "grad_norm": 0.06730790436267853, "learning_rate": 4.778796480272496e-05, "loss": 0.00044020526111125945, "step": 184020 }, { "epoch": 52.23673005960829, "grad_norm": 21.550878524780273, "learning_rate": 4.778512631280159e-05, "loss": 0.005208000168204307, "step": 184030 }, { "epoch": 52.23956854953165, "grad_norm": 0.07917027175426483, "learning_rate": 4.778228782287823e-05, "loss": 0.00042155440896749494, "step": 184040 }, { "epoch": 52.24240703945501, "grad_norm": 0.20390169322490692, "learning_rate": 4.777944933295487e-05, "loss": 0.0012712359428405761, "step": 184050 }, { "epoch": 52.24524552937837, "grad_norm": 0.11394552141427994, "learning_rate": 4.777661084303151e-05, "loss": 0.0010580187663435936, "step": 184060 }, { "epoch": 52.24808401930173, "grad_norm": 0.07907111197710037, "learning_rate": 4.777377235310815e-05, "loss": 0.002778307907283306, "step": 184070 }, { "epoch": 52.25092250922509, "grad_norm": 0.4431132972240448, "learning_rate": 4.7770933863184785e-05, "loss": 0.002066568098962307, "step": 184080 }, { "epoch": 52.253760999148454, "grad_norm": 0.15419727563858032, "learning_rate": 4.776809537326143e-05, "loss": 0.00034906230866909026, "step": 184090 }, { "epoch": 52.25659948907182, "grad_norm": 2.434131622314453, "learning_rate": 4.776525688333807e-05, "loss": 0.0032071176916360854, "step": 184100 }, { "epoch": 52.25943797899517, "grad_norm": 0.40985676646232605, "learning_rate": 4.77624183934147e-05, "loss": 0.0009425994008779526, "step": 184110 }, { "epoch": 52.262276468918536, "grad_norm": 0.33366069197654724, "learning_rate": 4.7759579903491344e-05, "loss": 0.0006732998415827751, "step": 184120 }, { "epoch": 52.2651149588419, "grad_norm": 0.49148792028427124, "learning_rate": 4.7756741413567985e-05, "loss": 0.004441948980093003, "step": 184130 }, { "epoch": 52.267953448765255, "grad_norm": 0.07162880897521973, "learning_rate": 4.775390292364462e-05, "loss": 0.00047065373510122297, "step": 184140 }, { "epoch": 52.27079193868862, "grad_norm": 0.015660328790545464, "learning_rate": 4.775106443372126e-05, "loss": 0.0003439469262957573, "step": 184150 }, { "epoch": 52.27363042861198, "grad_norm": 0.07236897945404053, "learning_rate": 4.77482259437979e-05, "loss": 0.004213414341211319, "step": 184160 }, { "epoch": 52.276468918535336, "grad_norm": 0.04823959246277809, "learning_rate": 4.774538745387454e-05, "loss": 0.004126560688018799, "step": 184170 }, { "epoch": 52.2793074084587, "grad_norm": 2.9902758598327637, "learning_rate": 4.774254896395118e-05, "loss": 0.0023122189566493036, "step": 184180 }, { "epoch": 52.28214589838206, "grad_norm": 0.015466309152543545, "learning_rate": 4.773971047402782e-05, "loss": 0.0005363108590245247, "step": 184190 }, { "epoch": 52.284984388305425, "grad_norm": 1.1628447771072388, "learning_rate": 4.773687198410446e-05, "loss": 0.0013002697378396988, "step": 184200 }, { "epoch": 52.28782287822878, "grad_norm": 0.16024670004844666, "learning_rate": 4.7734033494181096e-05, "loss": 0.0004971085116267204, "step": 184210 }, { "epoch": 52.29066136815214, "grad_norm": 0.04302060231566429, "learning_rate": 4.773119500425774e-05, "loss": 0.0015284229069948197, "step": 184220 }, { "epoch": 52.293499858075506, "grad_norm": 0.09796073287725449, "learning_rate": 4.772835651433438e-05, "loss": 0.0007480617612600326, "step": 184230 }, { "epoch": 52.29633834799886, "grad_norm": 5.639986038208008, "learning_rate": 4.7725518024411013e-05, "loss": 0.003276053071022034, "step": 184240 }, { "epoch": 52.299176837922225, "grad_norm": 0.6984824538230896, "learning_rate": 4.7722679534487655e-05, "loss": 0.0024738194420933723, "step": 184250 }, { "epoch": 52.30201532784559, "grad_norm": 0.043838758021593094, "learning_rate": 4.7719841044564296e-05, "loss": 0.000746610201895237, "step": 184260 }, { "epoch": 52.304853817768944, "grad_norm": 0.29030969738960266, "learning_rate": 4.771700255464093e-05, "loss": 0.0034334458410739898, "step": 184270 }, { "epoch": 52.30769230769231, "grad_norm": 1.4350874423980713, "learning_rate": 4.771416406471757e-05, "loss": 0.0008537506684660912, "step": 184280 }, { "epoch": 52.31053079761567, "grad_norm": 0.02729605883359909, "learning_rate": 4.7711325574794214e-05, "loss": 0.00159029234200716, "step": 184290 }, { "epoch": 52.31336928753903, "grad_norm": 0.2532888352870941, "learning_rate": 4.770848708487085e-05, "loss": 0.0021553006023168563, "step": 184300 }, { "epoch": 52.31620777746239, "grad_norm": 0.13101926445960999, "learning_rate": 4.770564859494749e-05, "loss": 0.01797635108232498, "step": 184310 }, { "epoch": 52.31904626738575, "grad_norm": 0.5800873041152954, "learning_rate": 4.770281010502413e-05, "loss": 0.004862882941961288, "step": 184320 }, { "epoch": 52.321884757309114, "grad_norm": 0.03931891545653343, "learning_rate": 4.769997161510077e-05, "loss": 0.0023556262254714965, "step": 184330 }, { "epoch": 52.32472324723247, "grad_norm": 8.784247398376465, "learning_rate": 4.769713312517741e-05, "loss": 0.006658436357975006, "step": 184340 }, { "epoch": 52.32756173715583, "grad_norm": 3.140986919403076, "learning_rate": 4.769429463525405e-05, "loss": 0.0011419855058193208, "step": 184350 }, { "epoch": 52.330400227079195, "grad_norm": 12.199102401733398, "learning_rate": 4.769145614533069e-05, "loss": 0.0026628632098436356, "step": 184360 }, { "epoch": 52.33323871700255, "grad_norm": 0.20722025632858276, "learning_rate": 4.7688617655407324e-05, "loss": 0.002056492492556572, "step": 184370 }, { "epoch": 52.336077206925914, "grad_norm": 1.593297004699707, "learning_rate": 4.7685779165483966e-05, "loss": 0.0009723709896206856, "step": 184380 }, { "epoch": 52.33891569684928, "grad_norm": 1.6908036470413208, "learning_rate": 4.768294067556061e-05, "loss": 0.010466842353343964, "step": 184390 }, { "epoch": 52.34175418677264, "grad_norm": 10.084512710571289, "learning_rate": 4.768010218563724e-05, "loss": 0.002477515861392021, "step": 184400 }, { "epoch": 52.344592676695996, "grad_norm": 0.8554307222366333, "learning_rate": 4.767726369571388e-05, "loss": 0.0007222775369882583, "step": 184410 }, { "epoch": 52.34743116661936, "grad_norm": 0.06834488362073898, "learning_rate": 4.7674425205790524e-05, "loss": 0.0016830066218972207, "step": 184420 }, { "epoch": 52.35026965654272, "grad_norm": 0.041436124593019485, "learning_rate": 4.767158671586716e-05, "loss": 0.001099945232272148, "step": 184430 }, { "epoch": 52.35310814646608, "grad_norm": 1.5089346170425415, "learning_rate": 4.76687482259438e-05, "loss": 0.0026598405092954636, "step": 184440 }, { "epoch": 52.35594663638944, "grad_norm": 1.0984470844268799, "learning_rate": 4.766590973602044e-05, "loss": 0.0005424888804554939, "step": 184450 }, { "epoch": 52.3587851263128, "grad_norm": 0.2821011245250702, "learning_rate": 4.7663071246097076e-05, "loss": 0.00337773896753788, "step": 184460 }, { "epoch": 52.36162361623616, "grad_norm": 0.2699981927871704, "learning_rate": 4.766023275617372e-05, "loss": 0.0018408577889204025, "step": 184470 }, { "epoch": 52.36446210615952, "grad_norm": 0.05198495090007782, "learning_rate": 4.765739426625035e-05, "loss": 0.0010284805670380593, "step": 184480 }, { "epoch": 52.367300596082885, "grad_norm": 5.800675868988037, "learning_rate": 4.7654555776327e-05, "loss": 0.0014519184827804566, "step": 184490 }, { "epoch": 52.37013908600625, "grad_norm": 0.33598682284355164, "learning_rate": 4.7651717286403635e-05, "loss": 0.00037855394184589387, "step": 184500 }, { "epoch": 52.37013908600625, "eval_accuracy": 0.9760284860431105, "eval_loss": 0.09127943217754364, "eval_runtime": 32.5253, "eval_samples_per_second": 483.531, "eval_steps_per_second": 7.563, "step": 184500 }, { "epoch": 52.3729775759296, "grad_norm": 0.0159261804074049, "learning_rate": 4.7648878796480276e-05, "loss": 0.006323327124118805, "step": 184510 }, { "epoch": 52.375816065852966, "grad_norm": 0.14504724740982056, "learning_rate": 4.764604030655692e-05, "loss": 0.0005192440003156662, "step": 184520 }, { "epoch": 52.37865455577633, "grad_norm": 0.04463227465748787, "learning_rate": 4.764320181663355e-05, "loss": 0.0008708713576197624, "step": 184530 }, { "epoch": 52.381493045699685, "grad_norm": 0.010425101034343243, "learning_rate": 4.7640363326710194e-05, "loss": 0.001168590411543846, "step": 184540 }, { "epoch": 52.38433153562305, "grad_norm": 0.011947902850806713, "learning_rate": 4.7637524836786835e-05, "loss": 0.0034333862364292144, "step": 184550 }, { "epoch": 52.38717002554641, "grad_norm": 0.12935885787010193, "learning_rate": 4.763468634686347e-05, "loss": 0.0010515183210372924, "step": 184560 }, { "epoch": 52.39000851546977, "grad_norm": 4.406347274780273, "learning_rate": 4.763184785694011e-05, "loss": 0.002359734661877155, "step": 184570 }, { "epoch": 52.39284700539313, "grad_norm": 0.4258795380592346, "learning_rate": 4.7629009367016746e-05, "loss": 0.0021335961297154426, "step": 184580 }, { "epoch": 52.39568549531649, "grad_norm": 0.884838879108429, "learning_rate": 4.762617087709339e-05, "loss": 0.0044537119567394255, "step": 184590 }, { "epoch": 52.398523985239855, "grad_norm": 0.15563571453094482, "learning_rate": 4.762333238717003e-05, "loss": 0.0018550509586930275, "step": 184600 }, { "epoch": 52.40136247516321, "grad_norm": 0.036623600870370865, "learning_rate": 4.762049389724666e-05, "loss": 0.001363677904009819, "step": 184610 }, { "epoch": 52.404200965086574, "grad_norm": 0.14291466772556305, "learning_rate": 4.761765540732331e-05, "loss": 0.001742580346763134, "step": 184620 }, { "epoch": 52.40703945500994, "grad_norm": 0.01803308166563511, "learning_rate": 4.7614816917399946e-05, "loss": 0.00028180405497550963, "step": 184630 }, { "epoch": 52.40987794493329, "grad_norm": 0.07993413507938385, "learning_rate": 4.761197842747658e-05, "loss": 0.0006215602159500122, "step": 184640 }, { "epoch": 52.412716434856655, "grad_norm": 0.039400532841682434, "learning_rate": 4.760913993755323e-05, "loss": 0.000861317664384842, "step": 184650 }, { "epoch": 52.41555492478002, "grad_norm": 0.40908896923065186, "learning_rate": 4.760630144762986e-05, "loss": 0.0020953189581632615, "step": 184660 }, { "epoch": 52.41839341470338, "grad_norm": 0.007140699774026871, "learning_rate": 4.7603462957706504e-05, "loss": 0.0018901146948337554, "step": 184670 }, { "epoch": 52.42123190462674, "grad_norm": 5.458056449890137, "learning_rate": 4.760062446778314e-05, "loss": 0.004056227952241897, "step": 184680 }, { "epoch": 52.4240703945501, "grad_norm": 0.056706395000219345, "learning_rate": 4.759778597785978e-05, "loss": 0.0008314166218042374, "step": 184690 }, { "epoch": 52.42690888447346, "grad_norm": 0.055072952061891556, "learning_rate": 4.759494748793642e-05, "loss": 0.0037443917244672775, "step": 184700 }, { "epoch": 52.42974737439682, "grad_norm": 0.2812627851963043, "learning_rate": 4.7592108998013056e-05, "loss": 0.011396793276071548, "step": 184710 }, { "epoch": 52.43258586432018, "grad_norm": 0.3162594735622406, "learning_rate": 4.75892705080897e-05, "loss": 0.0013636523857712747, "step": 184720 }, { "epoch": 52.435424354243544, "grad_norm": 0.34498757123947144, "learning_rate": 4.758643201816634e-05, "loss": 0.0008828606456518173, "step": 184730 }, { "epoch": 52.4382628441669, "grad_norm": 0.11711649596691132, "learning_rate": 4.7583593528242974e-05, "loss": 0.003614000603556633, "step": 184740 }, { "epoch": 52.44110133409026, "grad_norm": 31.341411590576172, "learning_rate": 4.758075503831962e-05, "loss": 0.011059018969535827, "step": 184750 }, { "epoch": 52.443939824013626, "grad_norm": 0.10356653481721878, "learning_rate": 4.7577916548396257e-05, "loss": 0.000668397918343544, "step": 184760 }, { "epoch": 52.44677831393699, "grad_norm": 0.02240273542702198, "learning_rate": 4.757507805847289e-05, "loss": 0.0008125150576233864, "step": 184770 }, { "epoch": 52.449616803860344, "grad_norm": 0.4690690338611603, "learning_rate": 4.757223956854953e-05, "loss": 0.0026510611176490785, "step": 184780 }, { "epoch": 52.45245529378371, "grad_norm": 0.5949777364730835, "learning_rate": 4.7569401078626174e-05, "loss": 0.00033699870109558103, "step": 184790 }, { "epoch": 52.45529378370707, "grad_norm": 0.01026222389191389, "learning_rate": 4.7566562588702815e-05, "loss": 0.0003689641132950783, "step": 184800 }, { "epoch": 52.458132273630426, "grad_norm": 0.03486157953739166, "learning_rate": 4.756372409877945e-05, "loss": 0.00091804638504982, "step": 184810 }, { "epoch": 52.46097076355379, "grad_norm": 0.03475930169224739, "learning_rate": 4.756088560885609e-05, "loss": 0.0002218225970864296, "step": 184820 }, { "epoch": 52.46380925347715, "grad_norm": 0.15469048917293549, "learning_rate": 4.755804711893273e-05, "loss": 0.0004458896815776825, "step": 184830 }, { "epoch": 52.46664774340051, "grad_norm": 0.02663545124232769, "learning_rate": 4.755520862900937e-05, "loss": 0.001989557594060898, "step": 184840 }, { "epoch": 52.46948623332387, "grad_norm": 0.03500394895672798, "learning_rate": 4.755237013908601e-05, "loss": 0.0005112333223223686, "step": 184850 }, { "epoch": 52.47232472324723, "grad_norm": 0.14612731337547302, "learning_rate": 4.754953164916265e-05, "loss": 0.0006827004253864288, "step": 184860 }, { "epoch": 52.475163213170596, "grad_norm": 0.2962496876716614, "learning_rate": 4.7546693159239285e-05, "loss": 0.006271124631166458, "step": 184870 }, { "epoch": 52.47800170309395, "grad_norm": 0.05187174305319786, "learning_rate": 4.7543854669315926e-05, "loss": 0.0007941052317619324, "step": 184880 }, { "epoch": 52.480840193017315, "grad_norm": 0.15852448344230652, "learning_rate": 4.754101617939257e-05, "loss": 0.0010285772383213042, "step": 184890 }, { "epoch": 52.48367868294068, "grad_norm": 0.06756472587585449, "learning_rate": 4.75381776894692e-05, "loss": 0.0007333695888519287, "step": 184900 }, { "epoch": 52.48651717286403, "grad_norm": 0.91930091381073, "learning_rate": 4.753533919954584e-05, "loss": 0.0005421994253993034, "step": 184910 }, { "epoch": 52.489355662787396, "grad_norm": 0.026963118463754654, "learning_rate": 4.7532500709622485e-05, "loss": 0.0005008872598409652, "step": 184920 }, { "epoch": 52.49219415271076, "grad_norm": 0.02083013765513897, "learning_rate": 4.752966221969912e-05, "loss": 0.0010683590546250344, "step": 184930 }, { "epoch": 52.49503264263412, "grad_norm": 0.02074730210006237, "learning_rate": 4.752682372977576e-05, "loss": 0.001010383665561676, "step": 184940 }, { "epoch": 52.49787113255748, "grad_norm": 0.03191481530666351, "learning_rate": 4.75239852398524e-05, "loss": 0.0006425833329558372, "step": 184950 }, { "epoch": 52.50070962248084, "grad_norm": 1.4525662660598755, "learning_rate": 4.752114674992904e-05, "loss": 0.0005657251924276352, "step": 184960 }, { "epoch": 52.503548112404204, "grad_norm": 12.38812255859375, "learning_rate": 4.751830826000568e-05, "loss": 0.002616007253527641, "step": 184970 }, { "epoch": 52.50638660232756, "grad_norm": 0.9182807803153992, "learning_rate": 4.751546977008231e-05, "loss": 0.0019361067563295365, "step": 184980 }, { "epoch": 52.50922509225092, "grad_norm": 0.0336979404091835, "learning_rate": 4.751263128015896e-05, "loss": 0.0006442580372095108, "step": 184990 }, { "epoch": 52.512063582174285, "grad_norm": 0.038579318672418594, "learning_rate": 4.7509792790235595e-05, "loss": 0.0012181278318166733, "step": 185000 }, { "epoch": 52.512063582174285, "eval_accuracy": 0.9760920709607681, "eval_loss": 0.08229879289865494, "eval_runtime": 32.8146, "eval_samples_per_second": 479.269, "eval_steps_per_second": 7.497, "step": 185000 }, { "epoch": 52.51490207209764, "grad_norm": 0.2812656760215759, "learning_rate": 4.750695430031224e-05, "loss": 0.0019872616976499557, "step": 185010 }, { "epoch": 52.517740562021004, "grad_norm": 0.6278689503669739, "learning_rate": 4.750411581038888e-05, "loss": 0.0011812107637524604, "step": 185020 }, { "epoch": 52.52057905194437, "grad_norm": 0.10104627907276154, "learning_rate": 4.750127732046551e-05, "loss": 0.00023264791816473006, "step": 185030 }, { "epoch": 52.52341754186773, "grad_norm": 0.34198492765426636, "learning_rate": 4.7498438830542154e-05, "loss": 0.00035769641399383543, "step": 185040 }, { "epoch": 52.526256031791085, "grad_norm": 1.1983280181884766, "learning_rate": 4.7495600340618795e-05, "loss": 0.0004389923065900803, "step": 185050 }, { "epoch": 52.52909452171445, "grad_norm": 0.3388192057609558, "learning_rate": 4.749276185069543e-05, "loss": 0.00020617544651031495, "step": 185060 }, { "epoch": 52.53193301163781, "grad_norm": 0.02668917365372181, "learning_rate": 4.748992336077207e-05, "loss": 0.0008501425385475158, "step": 185070 }, { "epoch": 52.53477150156117, "grad_norm": 0.015657059848308563, "learning_rate": 4.7487084870848706e-05, "loss": 0.00034740213304758073, "step": 185080 }, { "epoch": 52.53760999148453, "grad_norm": 0.02161979116499424, "learning_rate": 4.7484246380925354e-05, "loss": 0.0009873220697045326, "step": 185090 }, { "epoch": 52.54044848140789, "grad_norm": 0.23935921490192413, "learning_rate": 4.748140789100199e-05, "loss": 0.0003578871488571167, "step": 185100 }, { "epoch": 52.54328697133125, "grad_norm": 0.15645882487297058, "learning_rate": 4.747856940107862e-05, "loss": 0.005178725719451905, "step": 185110 }, { "epoch": 52.54612546125461, "grad_norm": 0.07009433209896088, "learning_rate": 4.747573091115527e-05, "loss": 0.0007305731996893883, "step": 185120 }, { "epoch": 52.548963951177974, "grad_norm": 0.2810664176940918, "learning_rate": 4.7472892421231906e-05, "loss": 0.0008572075515985489, "step": 185130 }, { "epoch": 52.55180244110134, "grad_norm": 0.10936997830867767, "learning_rate": 4.747005393130855e-05, "loss": 0.0002504771575331688, "step": 185140 }, { "epoch": 52.55464093102469, "grad_norm": 0.1941082924604416, "learning_rate": 4.746721544138519e-05, "loss": 0.0029945846647024153, "step": 185150 }, { "epoch": 52.557479420948056, "grad_norm": 0.05512005090713501, "learning_rate": 4.7464376951461823e-05, "loss": 0.00023591574281454087, "step": 185160 }, { "epoch": 52.56031791087142, "grad_norm": 0.045048169791698456, "learning_rate": 4.7461538461538465e-05, "loss": 0.000295221246778965, "step": 185170 }, { "epoch": 52.563156400794774, "grad_norm": 0.03022882342338562, "learning_rate": 4.74586999716151e-05, "loss": 0.00033792518079280855, "step": 185180 }, { "epoch": 52.56599489071814, "grad_norm": 0.3429563045501709, "learning_rate": 4.745586148169174e-05, "loss": 0.000784645788371563, "step": 185190 }, { "epoch": 52.5688333806415, "grad_norm": 0.3609480559825897, "learning_rate": 4.745302299176838e-05, "loss": 0.003713177889585495, "step": 185200 }, { "epoch": 52.571671870564856, "grad_norm": 0.2989714741706848, "learning_rate": 4.745018450184502e-05, "loss": 0.0015159361064434052, "step": 185210 }, { "epoch": 52.57451036048822, "grad_norm": 0.20835088193416595, "learning_rate": 4.7447346011921665e-05, "loss": 0.0003001958131790161, "step": 185220 }, { "epoch": 52.57734885041158, "grad_norm": 0.2202584594488144, "learning_rate": 4.74445075219983e-05, "loss": 0.0003246534615755081, "step": 185230 }, { "epoch": 52.580187340334945, "grad_norm": 0.21264386177062988, "learning_rate": 4.7441669032074934e-05, "loss": 0.000213053822517395, "step": 185240 }, { "epoch": 52.5830258302583, "grad_norm": 0.27794283628463745, "learning_rate": 4.743883054215158e-05, "loss": 0.0003891423344612122, "step": 185250 }, { "epoch": 52.58586432018166, "grad_norm": 0.011930675245821476, "learning_rate": 4.743599205222822e-05, "loss": 0.0009061414748430252, "step": 185260 }, { "epoch": 52.588702810105026, "grad_norm": 0.6409693956375122, "learning_rate": 4.743315356230486e-05, "loss": 0.0006713824346661568, "step": 185270 }, { "epoch": 52.59154130002838, "grad_norm": 0.23920466005802155, "learning_rate": 4.743031507238149e-05, "loss": 0.0007942656055092811, "step": 185280 }, { "epoch": 52.594379789951745, "grad_norm": 0.13893775641918182, "learning_rate": 4.7427476582458134e-05, "loss": 0.0008586443960666657, "step": 185290 }, { "epoch": 52.59721827987511, "grad_norm": 2.483689546585083, "learning_rate": 4.7424638092534776e-05, "loss": 0.0005111707374453545, "step": 185300 }, { "epoch": 52.60005676979847, "grad_norm": 0.0080666933208704, "learning_rate": 4.742179960261141e-05, "loss": 0.00039760638028383253, "step": 185310 }, { "epoch": 52.60289525972183, "grad_norm": 0.021713584661483765, "learning_rate": 4.741896111268805e-05, "loss": 0.0005626743659377098, "step": 185320 }, { "epoch": 52.60573374964519, "grad_norm": 0.0566079318523407, "learning_rate": 4.741612262276469e-05, "loss": 0.0005209507420659065, "step": 185330 }, { "epoch": 52.60857223956855, "grad_norm": 0.019202305004000664, "learning_rate": 4.741328413284133e-05, "loss": 0.003702656924724579, "step": 185340 }, { "epoch": 52.61141072949191, "grad_norm": 1.7712969779968262, "learning_rate": 4.741044564291797e-05, "loss": 0.0023082386702299117, "step": 185350 }, { "epoch": 52.61424921941527, "grad_norm": 0.527144730091095, "learning_rate": 4.740760715299461e-05, "loss": 0.0005703317001461983, "step": 185360 }, { "epoch": 52.617087709338634, "grad_norm": 0.18114162981510162, "learning_rate": 4.7404768663071245e-05, "loss": 0.0005095869302749634, "step": 185370 }, { "epoch": 52.61992619926199, "grad_norm": 0.018949512392282486, "learning_rate": 4.7401930173147886e-05, "loss": 0.00036325007677078246, "step": 185380 }, { "epoch": 52.62276468918535, "grad_norm": 0.02048935927450657, "learning_rate": 4.739909168322453e-05, "loss": 0.0003137892112135887, "step": 185390 }, { "epoch": 52.625603179108715, "grad_norm": 0.08305579423904419, "learning_rate": 4.739625319330116e-05, "loss": 0.006444612145423889, "step": 185400 }, { "epoch": 52.62844166903208, "grad_norm": 0.28879034519195557, "learning_rate": 4.7393414703377804e-05, "loss": 0.0019057640805840493, "step": 185410 }, { "epoch": 52.631280158955434, "grad_norm": 0.027159877121448517, "learning_rate": 4.7390576213454445e-05, "loss": 0.0006318029016256332, "step": 185420 }, { "epoch": 52.6341186488788, "grad_norm": 0.11528678238391876, "learning_rate": 4.7387737723531086e-05, "loss": 0.0005651568993926048, "step": 185430 }, { "epoch": 52.63695713880216, "grad_norm": 0.20741763710975647, "learning_rate": 4.738489923360772e-05, "loss": 0.0013119548559188843, "step": 185440 }, { "epoch": 52.639795628725516, "grad_norm": 0.26731425523757935, "learning_rate": 4.738206074368436e-05, "loss": 0.0002934100106358528, "step": 185450 }, { "epoch": 52.64263411864888, "grad_norm": 0.03198122978210449, "learning_rate": 4.7379222253761004e-05, "loss": 0.001993107609450817, "step": 185460 }, { "epoch": 52.64547260857224, "grad_norm": 0.03491595387458801, "learning_rate": 4.737638376383764e-05, "loss": 0.0005677128210663795, "step": 185470 }, { "epoch": 52.6483110984956, "grad_norm": 0.09818445891141891, "learning_rate": 4.737354527391428e-05, "loss": 0.0006580352783203125, "step": 185480 }, { "epoch": 52.65114958841896, "grad_norm": 0.3002893626689911, "learning_rate": 4.737070678399092e-05, "loss": 0.007006453722715378, "step": 185490 }, { "epoch": 52.65398807834232, "grad_norm": 0.10338694602251053, "learning_rate": 4.7367868294067556e-05, "loss": 0.0080448217689991, "step": 185500 }, { "epoch": 52.65398807834232, "eval_accuracy": 0.9743752781840147, "eval_loss": 0.0890604555606842, "eval_runtime": 32.6632, "eval_samples_per_second": 481.489, "eval_steps_per_second": 7.531, "step": 185500 }, { "epoch": 52.656826568265686, "grad_norm": 0.12359783053398132, "learning_rate": 4.73650298041442e-05, "loss": 0.0018954411149024963, "step": 185510 }, { "epoch": 52.65966505818904, "grad_norm": 0.02318534255027771, "learning_rate": 4.736219131422084e-05, "loss": 0.0003147747367620468, "step": 185520 }, { "epoch": 52.662503548112404, "grad_norm": 3.2030482292175293, "learning_rate": 4.735935282429747e-05, "loss": 0.0017232675105333327, "step": 185530 }, { "epoch": 52.66534203803577, "grad_norm": 0.02708360180258751, "learning_rate": 4.7356514334374114e-05, "loss": 0.0010398253798484802, "step": 185540 }, { "epoch": 52.66818052795912, "grad_norm": 0.45479533076286316, "learning_rate": 4.7353675844450756e-05, "loss": 0.00043730009347200396, "step": 185550 }, { "epoch": 52.671019017882486, "grad_norm": 0.03316853940486908, "learning_rate": 4.73508373545274e-05, "loss": 0.0005436841398477555, "step": 185560 }, { "epoch": 52.67385750780585, "grad_norm": 0.036265190690755844, "learning_rate": 4.734799886460403e-05, "loss": 0.00048191547393798827, "step": 185570 }, { "epoch": 52.676695997729205, "grad_norm": 0.058726515620946884, "learning_rate": 4.734516037468067e-05, "loss": 0.004901925846934318, "step": 185580 }, { "epoch": 52.67953448765257, "grad_norm": 0.02433466538786888, "learning_rate": 4.7342321884757314e-05, "loss": 0.0009424816817045212, "step": 185590 }, { "epoch": 52.68237297757593, "grad_norm": 0.024017754942178726, "learning_rate": 4.733948339483395e-05, "loss": 0.00549938939511776, "step": 185600 }, { "epoch": 52.68521146749929, "grad_norm": 2.948828935623169, "learning_rate": 4.733664490491059e-05, "loss": 0.000913066603243351, "step": 185610 }, { "epoch": 52.68804995742265, "grad_norm": 0.25217127799987793, "learning_rate": 4.733380641498723e-05, "loss": 0.01155783236026764, "step": 185620 }, { "epoch": 52.69088844734601, "grad_norm": 3.9449269771575928, "learning_rate": 4.7330967925063866e-05, "loss": 0.001362660340964794, "step": 185630 }, { "epoch": 52.693726937269375, "grad_norm": 0.01123631838709116, "learning_rate": 4.732812943514051e-05, "loss": 0.0014895929023623466, "step": 185640 }, { "epoch": 52.69656542719273, "grad_norm": 0.9017177820205688, "learning_rate": 4.732529094521715e-05, "loss": 0.0006215654313564301, "step": 185650 }, { "epoch": 52.69940391711609, "grad_norm": 0.30196502804756165, "learning_rate": 4.7322452455293784e-05, "loss": 0.004408921301364899, "step": 185660 }, { "epoch": 52.702242407039456, "grad_norm": 9.108743667602539, "learning_rate": 4.7319613965370425e-05, "loss": 0.0025856684893369674, "step": 185670 }, { "epoch": 52.70508089696281, "grad_norm": 0.05668089911341667, "learning_rate": 4.7316775475447066e-05, "loss": 0.0004437718540430069, "step": 185680 }, { "epoch": 52.707919386886175, "grad_norm": 2.226569890975952, "learning_rate": 4.731393698552371e-05, "loss": 0.0009616909548640252, "step": 185690 }, { "epoch": 52.71075787680954, "grad_norm": 1.040649652481079, "learning_rate": 4.731109849560034e-05, "loss": 0.0008955013006925582, "step": 185700 }, { "epoch": 52.7135963667329, "grad_norm": 0.026919545605778694, "learning_rate": 4.730826000567698e-05, "loss": 0.0006707839667797089, "step": 185710 }, { "epoch": 52.71643485665626, "grad_norm": 0.1344549059867859, "learning_rate": 4.7305421515753625e-05, "loss": 0.0028807993978261946, "step": 185720 }, { "epoch": 52.71927334657962, "grad_norm": 0.2006743848323822, "learning_rate": 4.730258302583026e-05, "loss": 0.0011252373456954955, "step": 185730 }, { "epoch": 52.72211183650298, "grad_norm": 0.04570501297712326, "learning_rate": 4.72997445359069e-05, "loss": 0.0011382753029465676, "step": 185740 }, { "epoch": 52.72495032642634, "grad_norm": 0.018986402079463005, "learning_rate": 4.729690604598354e-05, "loss": 0.0004973847419023514, "step": 185750 }, { "epoch": 52.7277888163497, "grad_norm": 0.41940921545028687, "learning_rate": 4.729406755606018e-05, "loss": 0.0007176132872700691, "step": 185760 }, { "epoch": 52.730627306273064, "grad_norm": 0.08683253824710846, "learning_rate": 4.729122906613682e-05, "loss": 0.0006362447515130043, "step": 185770 }, { "epoch": 52.73346579619643, "grad_norm": 0.04611833766102791, "learning_rate": 4.728839057621346e-05, "loss": 0.0012508610263466835, "step": 185780 }, { "epoch": 52.73630428611978, "grad_norm": 1.2595043182373047, "learning_rate": 4.7285552086290094e-05, "loss": 0.0008790530264377594, "step": 185790 }, { "epoch": 52.739142776043145, "grad_norm": 0.06525368243455887, "learning_rate": 4.7282713596366736e-05, "loss": 0.0012218631803989411, "step": 185800 }, { "epoch": 52.74198126596651, "grad_norm": 2.1024703979492188, "learning_rate": 4.727987510644337e-05, "loss": 0.0010576851665973664, "step": 185810 }, { "epoch": 52.744819755889864, "grad_norm": 2.0177900791168213, "learning_rate": 4.727703661652001e-05, "loss": 0.006474721431732178, "step": 185820 }, { "epoch": 52.74765824581323, "grad_norm": 0.0555436797440052, "learning_rate": 4.727419812659665e-05, "loss": 0.0025393694639205934, "step": 185830 }, { "epoch": 52.75049673573659, "grad_norm": 0.5443676114082336, "learning_rate": 4.727135963667329e-05, "loss": 0.005507834255695343, "step": 185840 }, { "epoch": 52.753335225659946, "grad_norm": 0.05759714916348457, "learning_rate": 4.7268521146749936e-05, "loss": 0.0024169694632291796, "step": 185850 }, { "epoch": 52.75617371558331, "grad_norm": 0.45984742045402527, "learning_rate": 4.726568265682657e-05, "loss": 0.0012779790908098222, "step": 185860 }, { "epoch": 52.75901220550667, "grad_norm": 0.579883337020874, "learning_rate": 4.7262844166903205e-05, "loss": 0.0009206749498844146, "step": 185870 }, { "epoch": 52.761850695430034, "grad_norm": 1.9006707668304443, "learning_rate": 4.726000567697985e-05, "loss": 0.001442374475300312, "step": 185880 }, { "epoch": 52.76468918535339, "grad_norm": 0.6051242351531982, "learning_rate": 4.725716718705649e-05, "loss": 0.00028705839067697523, "step": 185890 }, { "epoch": 52.76752767527675, "grad_norm": 0.09879337251186371, "learning_rate": 4.725432869713313e-05, "loss": 0.0014525072649121285, "step": 185900 }, { "epoch": 52.770366165200116, "grad_norm": 0.2563883066177368, "learning_rate": 4.7251490207209764e-05, "loss": 0.0006884919479489326, "step": 185910 }, { "epoch": 52.77320465512347, "grad_norm": 0.02914213202893734, "learning_rate": 4.7248651717286405e-05, "loss": 0.0013490477576851845, "step": 185920 }, { "epoch": 52.776043145046835, "grad_norm": 0.0796378031373024, "learning_rate": 4.724581322736305e-05, "loss": 0.0013372285291552543, "step": 185930 }, { "epoch": 52.7788816349702, "grad_norm": 0.046775925904512405, "learning_rate": 4.724297473743968e-05, "loss": 0.0010764306411147118, "step": 185940 }, { "epoch": 52.78172012489355, "grad_norm": 1.439765214920044, "learning_rate": 4.724013624751632e-05, "loss": 0.005131819099187851, "step": 185950 }, { "epoch": 52.784558614816916, "grad_norm": 0.10606741160154343, "learning_rate": 4.7237297757592964e-05, "loss": 0.0022700073197484015, "step": 185960 }, { "epoch": 52.78739710474028, "grad_norm": 0.3596985340118408, "learning_rate": 4.72344592676696e-05, "loss": 0.0007230114191770553, "step": 185970 }, { "epoch": 52.79023559466364, "grad_norm": 0.07961712777614594, "learning_rate": 4.723162077774625e-05, "loss": 0.0010866424068808555, "step": 185980 }, { "epoch": 52.793074084587, "grad_norm": 0.04963191598653793, "learning_rate": 4.722878228782288e-05, "loss": 0.00038391612470149993, "step": 185990 }, { "epoch": 52.79591257451036, "grad_norm": 0.7350358366966248, "learning_rate": 4.7225943797899516e-05, "loss": 0.0004122616723179817, "step": 186000 }, { "epoch": 52.79591257451036, "eval_accuracy": 0.9782539581611241, "eval_loss": 0.07835876941680908, "eval_runtime": 32.8036, "eval_samples_per_second": 479.43, "eval_steps_per_second": 7.499, "step": 186000 }, { "epoch": 52.79875106443372, "grad_norm": 0.43226468563079834, "learning_rate": 4.722310530797616e-05, "loss": 0.000393989123404026, "step": 186010 }, { "epoch": 52.80158955435708, "grad_norm": 0.5629341006278992, "learning_rate": 4.72202668180528e-05, "loss": 0.006162229925394058, "step": 186020 }, { "epoch": 52.80442804428044, "grad_norm": 0.042067691683769226, "learning_rate": 4.721742832812944e-05, "loss": 0.0013523841276764869, "step": 186030 }, { "epoch": 52.807266534203805, "grad_norm": 0.07040529698133469, "learning_rate": 4.7214589838206075e-05, "loss": 0.0010930126532912253, "step": 186040 }, { "epoch": 52.81010502412717, "grad_norm": 0.39801710844039917, "learning_rate": 4.7211751348282716e-05, "loss": 0.00594882033765316, "step": 186050 }, { "epoch": 52.812943514050524, "grad_norm": 3.3025922775268555, "learning_rate": 4.720891285835936e-05, "loss": 0.0012652017176151276, "step": 186060 }, { "epoch": 52.81578200397389, "grad_norm": 5.5879011154174805, "learning_rate": 4.720607436843599e-05, "loss": 0.0012182939797639846, "step": 186070 }, { "epoch": 52.81862049389725, "grad_norm": 0.1153416708111763, "learning_rate": 4.720323587851263e-05, "loss": 0.001368202455341816, "step": 186080 }, { "epoch": 52.821458983820605, "grad_norm": 1.0654881000518799, "learning_rate": 4.7200397388589275e-05, "loss": 0.0005781034007668496, "step": 186090 }, { "epoch": 52.82429747374397, "grad_norm": 1.574164867401123, "learning_rate": 4.719755889866591e-05, "loss": 0.0009738141670823097, "step": 186100 }, { "epoch": 52.82713596366733, "grad_norm": 0.41416653990745544, "learning_rate": 4.719472040874255e-05, "loss": 0.0009036064147949219, "step": 186110 }, { "epoch": 52.82997445359069, "grad_norm": 0.10472802072763443, "learning_rate": 4.719188191881919e-05, "loss": 0.0007203890010714531, "step": 186120 }, { "epoch": 52.83281294351405, "grad_norm": 0.1843029409646988, "learning_rate": 4.718904342889583e-05, "loss": 0.0010707173496484756, "step": 186130 }, { "epoch": 52.83565143343741, "grad_norm": 0.47467759251594543, "learning_rate": 4.718620493897247e-05, "loss": 0.0006647901609539986, "step": 186140 }, { "epoch": 52.838489923360775, "grad_norm": 0.9722204208374023, "learning_rate": 4.718336644904911e-05, "loss": 0.0007989989593625068, "step": 186150 }, { "epoch": 52.84132841328413, "grad_norm": 0.036686379462480545, "learning_rate": 4.718052795912575e-05, "loss": 0.0006733773276209831, "step": 186160 }, { "epoch": 52.844166903207494, "grad_norm": 4.554087162017822, "learning_rate": 4.7177689469202385e-05, "loss": 0.0032998915761709214, "step": 186170 }, { "epoch": 52.84700539313086, "grad_norm": 0.0486115962266922, "learning_rate": 4.717485097927903e-05, "loss": 0.001992819271981716, "step": 186180 }, { "epoch": 52.84984388305421, "grad_norm": 1.5523966550827026, "learning_rate": 4.717201248935567e-05, "loss": 0.0010800985619425774, "step": 186190 }, { "epoch": 52.852682372977576, "grad_norm": 0.2830045819282532, "learning_rate": 4.71691739994323e-05, "loss": 0.0015748050063848495, "step": 186200 }, { "epoch": 52.85552086290094, "grad_norm": 0.33962830901145935, "learning_rate": 4.7166335509508944e-05, "loss": 0.0003678485751152039, "step": 186210 }, { "epoch": 52.858359352824294, "grad_norm": 0.0537160262465477, "learning_rate": 4.7163497019585586e-05, "loss": 0.0017336517572402953, "step": 186220 }, { "epoch": 52.86119784274766, "grad_norm": 0.04691077023744583, "learning_rate": 4.716065852966222e-05, "loss": 0.0009140096604824066, "step": 186230 }, { "epoch": 52.86403633267102, "grad_norm": 0.6862740516662598, "learning_rate": 4.715782003973886e-05, "loss": 0.0038388464599847794, "step": 186240 }, { "epoch": 52.86687482259438, "grad_norm": 0.05020619183778763, "learning_rate": 4.71549815498155e-05, "loss": 0.0009594792500138283, "step": 186250 }, { "epoch": 52.86971331251774, "grad_norm": 0.07432413846254349, "learning_rate": 4.715214305989214e-05, "loss": 0.0010832717642188072, "step": 186260 }, { "epoch": 52.8725518024411, "grad_norm": 0.5585078597068787, "learning_rate": 4.714930456996878e-05, "loss": 0.0011963484808802604, "step": 186270 }, { "epoch": 52.875390292364465, "grad_norm": 0.052770067006349564, "learning_rate": 4.714646608004542e-05, "loss": 0.0014254337176680565, "step": 186280 }, { "epoch": 52.87822878228782, "grad_norm": 0.11159157752990723, "learning_rate": 4.7143627590122055e-05, "loss": 0.0004890399053692817, "step": 186290 }, { "epoch": 52.88106727221118, "grad_norm": 1.0920945405960083, "learning_rate": 4.7140789100198696e-05, "loss": 0.001100941002368927, "step": 186300 }, { "epoch": 52.883905762134546, "grad_norm": 0.03142758458852768, "learning_rate": 4.713795061027533e-05, "loss": 0.0003737971186637878, "step": 186310 }, { "epoch": 52.8867442520579, "grad_norm": 0.7031912803649902, "learning_rate": 4.713511212035198e-05, "loss": 0.0006695404648780823, "step": 186320 }, { "epoch": 52.889582741981265, "grad_norm": 0.29146626591682434, "learning_rate": 4.7132273630428614e-05, "loss": 0.00019972715526819229, "step": 186330 }, { "epoch": 52.89242123190463, "grad_norm": 0.2578912079334259, "learning_rate": 4.712943514050525e-05, "loss": 0.0015983432531356812, "step": 186340 }, { "epoch": 52.89525972182799, "grad_norm": 0.26646336913108826, "learning_rate": 4.7126596650581896e-05, "loss": 0.0025143550708889963, "step": 186350 }, { "epoch": 52.898098211751346, "grad_norm": 0.03392914682626724, "learning_rate": 4.712375816065853e-05, "loss": 0.002839016355574131, "step": 186360 }, { "epoch": 52.90093670167471, "grad_norm": 0.5476954579353333, "learning_rate": 4.712091967073517e-05, "loss": 0.0005209110677242279, "step": 186370 }, { "epoch": 52.90377519159807, "grad_norm": 0.406361848115921, "learning_rate": 4.7118081180811814e-05, "loss": 0.0008580882102251052, "step": 186380 }, { "epoch": 52.90661368152143, "grad_norm": 0.06669175624847412, "learning_rate": 4.711524269088845e-05, "loss": 0.0032583050429821014, "step": 186390 }, { "epoch": 52.90945217144479, "grad_norm": 0.6824454069137573, "learning_rate": 4.711240420096509e-05, "loss": 0.0007803093641996384, "step": 186400 }, { "epoch": 52.912290661368154, "grad_norm": 0.14807383716106415, "learning_rate": 4.7109565711041724e-05, "loss": 0.00147513747215271, "step": 186410 }, { "epoch": 52.91512915129151, "grad_norm": 0.08323166519403458, "learning_rate": 4.7106727221118366e-05, "loss": 0.00987521857023239, "step": 186420 }, { "epoch": 52.91796764121487, "grad_norm": 0.03487098217010498, "learning_rate": 4.710388873119501e-05, "loss": 0.0008615966886281967, "step": 186430 }, { "epoch": 52.920806131138235, "grad_norm": 0.07758292555809021, "learning_rate": 4.710105024127164e-05, "loss": 0.0007901865988969803, "step": 186440 }, { "epoch": 52.9236446210616, "grad_norm": 0.40486881136894226, "learning_rate": 4.709821175134829e-05, "loss": 0.0014066126197576523, "step": 186450 }, { "epoch": 52.926483110984954, "grad_norm": 0.09714283794164658, "learning_rate": 4.7095373261424924e-05, "loss": 0.0012796740978956223, "step": 186460 }, { "epoch": 52.92932160090832, "grad_norm": 0.5464173555374146, "learning_rate": 4.709253477150156e-05, "loss": 0.0073540061712265015, "step": 186470 }, { "epoch": 52.93216009083168, "grad_norm": 1.4453067779541016, "learning_rate": 4.708969628157821e-05, "loss": 0.011840809881687165, "step": 186480 }, { "epoch": 52.934998580755035, "grad_norm": 0.15751346945762634, "learning_rate": 4.708685779165484e-05, "loss": 0.002746947854757309, "step": 186490 }, { "epoch": 52.9378370706784, "grad_norm": 3.499262809753418, "learning_rate": 4.708401930173148e-05, "loss": 0.002546422928571701, "step": 186500 }, { "epoch": 52.9378370706784, "eval_accuracy": 0.976918674890316, "eval_loss": 0.08579359203577042, "eval_runtime": 32.0221, "eval_samples_per_second": 491.129, "eval_steps_per_second": 7.682, "step": 186500 }, { "epoch": 52.94067556060176, "grad_norm": 0.12745369970798492, "learning_rate": 4.708118081180812e-05, "loss": 0.0011678164824843406, "step": 186510 }, { "epoch": 52.943514050525124, "grad_norm": 0.19829964637756348, "learning_rate": 4.707834232188476e-05, "loss": 0.003963455557823181, "step": 186520 }, { "epoch": 52.94635254044848, "grad_norm": 1.4347254037857056, "learning_rate": 4.70755038319614e-05, "loss": 0.001419261284172535, "step": 186530 }, { "epoch": 52.94919103037184, "grad_norm": 0.24300187826156616, "learning_rate": 4.7072665342038035e-05, "loss": 0.009653251618146896, "step": 186540 }, { "epoch": 52.952029520295206, "grad_norm": 0.28136777877807617, "learning_rate": 4.7069826852114676e-05, "loss": 0.0016790073364973067, "step": 186550 }, { "epoch": 52.95486801021856, "grad_norm": 0.1163061112165451, "learning_rate": 4.706698836219132e-05, "loss": 0.0007288211956620217, "step": 186560 }, { "epoch": 52.957706500141924, "grad_norm": 0.14876312017440796, "learning_rate": 4.706414987226795e-05, "loss": 0.002441161312162876, "step": 186570 }, { "epoch": 52.96054499006529, "grad_norm": 0.017498524859547615, "learning_rate": 4.706159523133693e-05, "loss": 0.01781250536441803, "step": 186580 }, { "epoch": 52.96338347998864, "grad_norm": 0.38986697793006897, "learning_rate": 4.705875674141357e-05, "loss": 0.0005735533311963081, "step": 186590 }, { "epoch": 52.966221969912006, "grad_norm": 0.26328518986701965, "learning_rate": 4.705591825149021e-05, "loss": 0.0020713524892926214, "step": 186600 }, { "epoch": 52.96906045983537, "grad_norm": 0.4067014455795288, "learning_rate": 4.705307976156685e-05, "loss": 0.0003866251558065414, "step": 186610 }, { "epoch": 52.97189894975873, "grad_norm": 0.03233945742249489, "learning_rate": 4.7050241271643484e-05, "loss": 0.0003457270562648773, "step": 186620 }, { "epoch": 52.97473743968209, "grad_norm": 0.48162275552749634, "learning_rate": 4.704740278172013e-05, "loss": 0.0005493048578500747, "step": 186630 }, { "epoch": 52.97757592960545, "grad_norm": 0.052287984639406204, "learning_rate": 4.704456429179677e-05, "loss": 0.0007736720144748688, "step": 186640 }, { "epoch": 52.98041441952881, "grad_norm": 0.1726934015750885, "learning_rate": 4.70417258018734e-05, "loss": 0.003924472630023957, "step": 186650 }, { "epoch": 52.98325290945217, "grad_norm": 0.05058598145842552, "learning_rate": 4.703888731195005e-05, "loss": 0.0014989161863923073, "step": 186660 }, { "epoch": 52.98609139937553, "grad_norm": 0.7435520887374878, "learning_rate": 4.7036048822026684e-05, "loss": 0.0009860554710030556, "step": 186670 }, { "epoch": 52.988929889298895, "grad_norm": 0.12682199478149414, "learning_rate": 4.7033210332103326e-05, "loss": 0.0006713606417179108, "step": 186680 }, { "epoch": 52.99176837922225, "grad_norm": 0.0371667705476284, "learning_rate": 4.703037184217996e-05, "loss": 0.007192297279834748, "step": 186690 }, { "epoch": 52.99460686914561, "grad_norm": 0.0615924634039402, "learning_rate": 4.70275333522566e-05, "loss": 0.0004767684265971184, "step": 186700 }, { "epoch": 52.997445359068976, "grad_norm": 3.853525161743164, "learning_rate": 4.702469486233324e-05, "loss": 0.0024151716381311415, "step": 186710 }, { "epoch": 53.00028384899234, "grad_norm": 0.3753420114517212, "learning_rate": 4.702185637240988e-05, "loss": 0.0005496107041835785, "step": 186720 }, { "epoch": 53.003122338915695, "grad_norm": 2.4691920280456543, "learning_rate": 4.701901788248652e-05, "loss": 0.0016416231170296668, "step": 186730 }, { "epoch": 53.00596082883906, "grad_norm": 0.4115088880062103, "learning_rate": 4.701617939256316e-05, "loss": 0.0009204635396599769, "step": 186740 }, { "epoch": 53.00879931876242, "grad_norm": 5.280255317687988, "learning_rate": 4.7013340902639795e-05, "loss": 0.0010163282975554466, "step": 186750 }, { "epoch": 53.01163780868578, "grad_norm": 0.16813445091247559, "learning_rate": 4.7010502412716436e-05, "loss": 0.0009657301008701325, "step": 186760 }, { "epoch": 53.01447629860914, "grad_norm": 0.02061653323471546, "learning_rate": 4.700766392279308e-05, "loss": 0.0038428518921136854, "step": 186770 }, { "epoch": 53.0173147885325, "grad_norm": 0.0222296342253685, "learning_rate": 4.700482543286971e-05, "loss": 0.0005093691870570183, "step": 186780 }, { "epoch": 53.02015327845586, "grad_norm": 0.031990401446819305, "learning_rate": 4.7001986942946354e-05, "loss": 0.0014039330184459687, "step": 186790 }, { "epoch": 53.02299176837922, "grad_norm": 0.11974051594734192, "learning_rate": 4.6999148453022995e-05, "loss": 0.0011685620993375778, "step": 186800 }, { "epoch": 53.025830258302584, "grad_norm": 0.02388240024447441, "learning_rate": 4.699630996309963e-05, "loss": 0.0014687377959489822, "step": 186810 }, { "epoch": 53.02866874822595, "grad_norm": 0.030611790716648102, "learning_rate": 4.699347147317627e-05, "loss": 0.00036798454821109774, "step": 186820 }, { "epoch": 53.0315072381493, "grad_norm": 0.2679491937160492, "learning_rate": 4.699063298325291e-05, "loss": 0.0007893437519669533, "step": 186830 }, { "epoch": 53.034345728072665, "grad_norm": 0.6006653308868408, "learning_rate": 4.6987794493329554e-05, "loss": 0.0006616206839680672, "step": 186840 }, { "epoch": 53.03718421799603, "grad_norm": 0.01810004934668541, "learning_rate": 4.698495600340619e-05, "loss": 0.0002793140709400177, "step": 186850 }, { "epoch": 53.040022707919384, "grad_norm": 0.007617782801389694, "learning_rate": 4.698211751348283e-05, "loss": 0.0007954850792884826, "step": 186860 }, { "epoch": 53.04286119784275, "grad_norm": 1.574866771697998, "learning_rate": 4.697927902355947e-05, "loss": 0.0006692333146929741, "step": 186870 }, { "epoch": 53.04569968776611, "grad_norm": 0.06003246083855629, "learning_rate": 4.6976440533636106e-05, "loss": 0.002177863568067551, "step": 186880 }, { "epoch": 53.04853817768947, "grad_norm": 0.48628848791122437, "learning_rate": 4.697360204371275e-05, "loss": 0.0011009020730853082, "step": 186890 }, { "epoch": 53.05137666761283, "grad_norm": 0.17109133303165436, "learning_rate": 4.697076355378939e-05, "loss": 0.0006325438618659974, "step": 186900 }, { "epoch": 53.05421515753619, "grad_norm": 0.12658186256885529, "learning_rate": 4.696792506386602e-05, "loss": 0.00023299697786569596, "step": 186910 }, { "epoch": 53.057053647459554, "grad_norm": 0.09961030632257462, "learning_rate": 4.6965086573942664e-05, "loss": 0.0007283028215169906, "step": 186920 }, { "epoch": 53.05989213738291, "grad_norm": 0.05417123809456825, "learning_rate": 4.6962248084019306e-05, "loss": 0.0003538798540830612, "step": 186930 }, { "epoch": 53.06273062730627, "grad_norm": 0.13278192281723022, "learning_rate": 4.695940959409594e-05, "loss": 0.0035180777311325075, "step": 186940 }, { "epoch": 53.065569117229636, "grad_norm": 0.12029147148132324, "learning_rate": 4.695657110417258e-05, "loss": 0.006178952753543854, "step": 186950 }, { "epoch": 53.06840760715299, "grad_norm": 0.031747233122587204, "learning_rate": 4.695373261424922e-05, "loss": 0.001958624832332134, "step": 186960 }, { "epoch": 53.071246097076354, "grad_norm": 0.08547956496477127, "learning_rate": 4.6950894124325865e-05, "loss": 0.00025147367268800734, "step": 186970 }, { "epoch": 53.07408458699972, "grad_norm": 1.0183758735656738, "learning_rate": 4.69480556344025e-05, "loss": 0.00044731739908456805, "step": 186980 }, { "epoch": 53.07692307692308, "grad_norm": 0.019348958507180214, "learning_rate": 4.6945217144479134e-05, "loss": 0.0015370257198810578, "step": 186990 }, { "epoch": 53.079761566846436, "grad_norm": 0.04783247411251068, "learning_rate": 4.694237865455578e-05, "loss": 0.0005344085395336152, "step": 187000 }, { "epoch": 53.079761566846436, "eval_accuracy": 0.9713867870541107, "eval_loss": 0.11089169234037399, "eval_runtime": 31.9512, "eval_samples_per_second": 492.22, "eval_steps_per_second": 7.699, "step": 187000 }, { "epoch": 53.0826000567698, "grad_norm": 20.596805572509766, "learning_rate": 4.6939540164632417e-05, "loss": 0.011659535020589829, "step": 187010 }, { "epoch": 53.08543854669316, "grad_norm": 2.3479671478271484, "learning_rate": 4.693670167470906e-05, "loss": 0.0012940609827637672, "step": 187020 }, { "epoch": 53.08827703661652, "grad_norm": 0.03114408068358898, "learning_rate": 4.69338631847857e-05, "loss": 0.0002849854528903961, "step": 187030 }, { "epoch": 53.09111552653988, "grad_norm": 1.9398494958877563, "learning_rate": 4.6931024694862334e-05, "loss": 0.0005921779200434684, "step": 187040 }, { "epoch": 53.09395401646324, "grad_norm": 0.709852397441864, "learning_rate": 4.6928186204938975e-05, "loss": 0.0006474701687693596, "step": 187050 }, { "epoch": 53.0967925063866, "grad_norm": 0.06263653188943863, "learning_rate": 4.6925347715015617e-05, "loss": 0.0006272278726100922, "step": 187060 }, { "epoch": 53.09963099630996, "grad_norm": 0.014547742903232574, "learning_rate": 4.692250922509225e-05, "loss": 0.0002681197598576546, "step": 187070 }, { "epoch": 53.102469486233325, "grad_norm": 0.03502983972430229, "learning_rate": 4.691967073516889e-05, "loss": 0.00023582149296998977, "step": 187080 }, { "epoch": 53.10530797615669, "grad_norm": 0.01701279543340206, "learning_rate": 4.691683224524553e-05, "loss": 0.00021918416023254396, "step": 187090 }, { "epoch": 53.10814646608004, "grad_norm": 0.4748169481754303, "learning_rate": 4.6913993755322175e-05, "loss": 0.0004496075212955475, "step": 187100 }, { "epoch": 53.110984956003406, "grad_norm": 0.06224515661597252, "learning_rate": 4.691115526539881e-05, "loss": 0.00013815425336360931, "step": 187110 }, { "epoch": 53.11382344592677, "grad_norm": 1.3196110725402832, "learning_rate": 4.6908316775475445e-05, "loss": 0.00038871299475431444, "step": 187120 }, { "epoch": 53.116661935850125, "grad_norm": 0.06916163861751556, "learning_rate": 4.690547828555209e-05, "loss": 0.0020409394055604935, "step": 187130 }, { "epoch": 53.11950042577349, "grad_norm": 0.19408981502056122, "learning_rate": 4.690263979562873e-05, "loss": 0.0043735247105360035, "step": 187140 }, { "epoch": 53.12233891569685, "grad_norm": 0.012711308896541595, "learning_rate": 4.689980130570537e-05, "loss": 0.0015541860833764077, "step": 187150 }, { "epoch": 53.12517740562021, "grad_norm": 0.0476750023663044, "learning_rate": 4.689696281578201e-05, "loss": 0.00039815939962863924, "step": 187160 }, { "epoch": 53.12801589554357, "grad_norm": 0.27740389108657837, "learning_rate": 4.6894124325858645e-05, "loss": 0.000287252850830555, "step": 187170 }, { "epoch": 53.13085438546693, "grad_norm": 0.01572190597653389, "learning_rate": 4.6891285835935286e-05, "loss": 0.00041622593998909, "step": 187180 }, { "epoch": 53.133692875390295, "grad_norm": 0.021731248125433922, "learning_rate": 4.688844734601193e-05, "loss": 0.0005726967006921768, "step": 187190 }, { "epoch": 53.13653136531365, "grad_norm": 0.8053247928619385, "learning_rate": 4.688560885608856e-05, "loss": 0.0004643416032195091, "step": 187200 }, { "epoch": 53.139369855237014, "grad_norm": 0.019197138026356697, "learning_rate": 4.68827703661652e-05, "loss": 0.000215916708111763, "step": 187210 }, { "epoch": 53.14220834516038, "grad_norm": 0.4081684350967407, "learning_rate": 4.687993187624184e-05, "loss": 0.0004559928551316261, "step": 187220 }, { "epoch": 53.14504683508373, "grad_norm": 0.04098568484187126, "learning_rate": 4.687709338631848e-05, "loss": 0.0008385729044675827, "step": 187230 }, { "epoch": 53.147885325007096, "grad_norm": 0.07545971125364304, "learning_rate": 4.687425489639512e-05, "loss": 0.00023680906742811202, "step": 187240 }, { "epoch": 53.15072381493046, "grad_norm": 0.17282487452030182, "learning_rate": 4.6871416406471755e-05, "loss": 0.0009677525609731674, "step": 187250 }, { "epoch": 53.15356230485382, "grad_norm": 0.9702873826026917, "learning_rate": 4.6868577916548403e-05, "loss": 0.0036900244653224946, "step": 187260 }, { "epoch": 53.15640079477718, "grad_norm": 0.52781742811203, "learning_rate": 4.686573942662504e-05, "loss": 0.0002894740551710129, "step": 187270 }, { "epoch": 53.15923928470054, "grad_norm": 0.09344497323036194, "learning_rate": 4.686290093670167e-05, "loss": 0.0003575272858142853, "step": 187280 }, { "epoch": 53.1620777746239, "grad_norm": 0.07455617934465408, "learning_rate": 4.686006244677832e-05, "loss": 0.00045306365936994555, "step": 187290 }, { "epoch": 53.16491626454726, "grad_norm": 10.327901840209961, "learning_rate": 4.6857223956854955e-05, "loss": 0.0033307231962680815, "step": 187300 }, { "epoch": 53.16775475447062, "grad_norm": 0.0852927714586258, "learning_rate": 4.68543854669316e-05, "loss": 0.0020900283008813856, "step": 187310 }, { "epoch": 53.170593244393984, "grad_norm": 0.331105500459671, "learning_rate": 4.685154697700823e-05, "loss": 0.000512843020260334, "step": 187320 }, { "epoch": 53.17343173431734, "grad_norm": 0.14804953336715698, "learning_rate": 4.684870848708487e-05, "loss": 0.0033414628356695174, "step": 187330 }, { "epoch": 53.1762702242407, "grad_norm": 0.027433590963482857, "learning_rate": 4.6845869997161514e-05, "loss": 0.004519091546535492, "step": 187340 }, { "epoch": 53.179108714164066, "grad_norm": 0.04188234731554985, "learning_rate": 4.684303150723815e-05, "loss": 0.00019401032477617264, "step": 187350 }, { "epoch": 53.18194720408743, "grad_norm": 0.43340104818344116, "learning_rate": 4.684019301731479e-05, "loss": 0.00018005650490522386, "step": 187360 }, { "epoch": 53.184785694010785, "grad_norm": 0.005684260278940201, "learning_rate": 4.683735452739143e-05, "loss": 0.00040181409567594526, "step": 187370 }, { "epoch": 53.18762418393415, "grad_norm": 0.043361764401197433, "learning_rate": 4.6834516037468066e-05, "loss": 0.00025251749902963636, "step": 187380 }, { "epoch": 53.19046267385751, "grad_norm": 0.007895417511463165, "learning_rate": 4.6831677547544714e-05, "loss": 0.0003590365871787071, "step": 187390 }, { "epoch": 53.193301163780866, "grad_norm": 0.03264365345239639, "learning_rate": 4.682883905762135e-05, "loss": 0.00030274651944637296, "step": 187400 }, { "epoch": 53.19613965370423, "grad_norm": 0.06636297702789307, "learning_rate": 4.6826000567697983e-05, "loss": 0.0016117408871650697, "step": 187410 }, { "epoch": 53.19897814362759, "grad_norm": 0.9597775936126709, "learning_rate": 4.6823162077774625e-05, "loss": 0.0017969051375985145, "step": 187420 }, { "epoch": 53.20181663355095, "grad_norm": 0.1412876397371292, "learning_rate": 4.6820323587851266e-05, "loss": 0.0005128502845764161, "step": 187430 }, { "epoch": 53.20465512347431, "grad_norm": 0.01800248958170414, "learning_rate": 4.681748509792791e-05, "loss": 0.005396492034196854, "step": 187440 }, { "epoch": 53.20749361339767, "grad_norm": 0.05267363414168358, "learning_rate": 4.681464660800454e-05, "loss": 0.00023108404129743577, "step": 187450 }, { "epoch": 53.210332103321036, "grad_norm": 2.6126646995544434, "learning_rate": 4.6811808118081184e-05, "loss": 0.000644209235906601, "step": 187460 }, { "epoch": 53.21317059324439, "grad_norm": 0.15921775996685028, "learning_rate": 4.6808969628157825e-05, "loss": 0.0002822166308760643, "step": 187470 }, { "epoch": 53.216009083167755, "grad_norm": 2.448462724685669, "learning_rate": 4.680613113823446e-05, "loss": 0.0008476134389638901, "step": 187480 }, { "epoch": 53.21884757309112, "grad_norm": 6.471462726593018, "learning_rate": 4.68032926483111e-05, "loss": 0.0024997176602482795, "step": 187490 }, { "epoch": 53.221686063014474, "grad_norm": 0.12524141371250153, "learning_rate": 4.680045415838774e-05, "loss": 0.0007732944563031196, "step": 187500 }, { "epoch": 53.221686063014474, "eval_accuracy": 0.9753926368665352, "eval_loss": 0.09537705034017563, "eval_runtime": 32.919, "eval_samples_per_second": 477.749, "eval_steps_per_second": 7.473, "step": 187500 }, { "epoch": 53.22452455293784, "grad_norm": 0.5902913808822632, "learning_rate": 4.679761566846438e-05, "loss": 0.0014896310865879058, "step": 187510 }, { "epoch": 53.2273630428612, "grad_norm": 0.20311912894248962, "learning_rate": 4.679477717854102e-05, "loss": 0.00042458828538656236, "step": 187520 }, { "epoch": 53.230201532784555, "grad_norm": 0.42265260219573975, "learning_rate": 4.679193868861766e-05, "loss": 0.00038704369217157364, "step": 187530 }, { "epoch": 53.23304002270792, "grad_norm": 0.029061755165457726, "learning_rate": 4.6789100198694294e-05, "loss": 0.0002748539671301842, "step": 187540 }, { "epoch": 53.23587851263128, "grad_norm": 0.0276603102684021, "learning_rate": 4.6786261708770936e-05, "loss": 0.000908457487821579, "step": 187550 }, { "epoch": 53.238717002554644, "grad_norm": 0.3567744493484497, "learning_rate": 4.678342321884758e-05, "loss": 0.0002970263361930847, "step": 187560 }, { "epoch": 53.241555492478, "grad_norm": 0.09496394544839859, "learning_rate": 4.678058472892422e-05, "loss": 0.0023291038349270822, "step": 187570 }, { "epoch": 53.24439398240136, "grad_norm": 0.22010588645935059, "learning_rate": 4.677774623900085e-05, "loss": 0.00023479852825403214, "step": 187580 }, { "epoch": 53.247232472324725, "grad_norm": 0.3240836560726166, "learning_rate": 4.6774907749077494e-05, "loss": 0.0007786054164171218, "step": 187590 }, { "epoch": 53.25007096224808, "grad_norm": 0.06833242624998093, "learning_rate": 4.6772069259154136e-05, "loss": 0.001326226256787777, "step": 187600 }, { "epoch": 53.252909452171444, "grad_norm": 0.09214320033788681, "learning_rate": 4.676923076923077e-05, "loss": 0.00024407096207141877, "step": 187610 }, { "epoch": 53.25574794209481, "grad_norm": 9.006688117980957, "learning_rate": 4.676639227930741e-05, "loss": 0.001843802258372307, "step": 187620 }, { "epoch": 53.25858643201816, "grad_norm": 0.016255563125014305, "learning_rate": 4.676355378938405e-05, "loss": 0.00045562312006950376, "step": 187630 }, { "epoch": 53.261424921941526, "grad_norm": 3.0736217498779297, "learning_rate": 4.676071529946069e-05, "loss": 0.0007492952048778534, "step": 187640 }, { "epoch": 53.26426341186489, "grad_norm": 0.0143976965919137, "learning_rate": 4.675787680953733e-05, "loss": 0.00026582367718219757, "step": 187650 }, { "epoch": 53.26710190178825, "grad_norm": 0.08633128553628922, "learning_rate": 4.675503831961397e-05, "loss": 0.0013951560482382773, "step": 187660 }, { "epoch": 53.26994039171161, "grad_norm": 0.07975874841213226, "learning_rate": 4.6752199829690605e-05, "loss": 0.0010851476341485976, "step": 187670 }, { "epoch": 53.27277888163497, "grad_norm": 3.7503960132598877, "learning_rate": 4.6749361339767246e-05, "loss": 0.0011142130941152572, "step": 187680 }, { "epoch": 53.27561737155833, "grad_norm": 0.08227263391017914, "learning_rate": 4.674652284984389e-05, "loss": 0.0005412077531218529, "step": 187690 }, { "epoch": 53.27845586148169, "grad_norm": 3.2993576526641846, "learning_rate": 4.674368435992052e-05, "loss": 0.003132442757487297, "step": 187700 }, { "epoch": 53.28129435140505, "grad_norm": 0.414966881275177, "learning_rate": 4.6740845869997164e-05, "loss": 0.0006981011480093003, "step": 187710 }, { "epoch": 53.284132841328415, "grad_norm": 0.13751912117004395, "learning_rate": 4.67380073800738e-05, "loss": 0.0011086151003837585, "step": 187720 }, { "epoch": 53.28697133125178, "grad_norm": 0.49899041652679443, "learning_rate": 4.6735168890150446e-05, "loss": 0.0039992030709981915, "step": 187730 }, { "epoch": 53.28980982117513, "grad_norm": 0.011489659547805786, "learning_rate": 4.673233040022708e-05, "loss": 0.0077802278101444244, "step": 187740 }, { "epoch": 53.292648311098496, "grad_norm": 0.052054475992918015, "learning_rate": 4.6729491910303716e-05, "loss": 0.004279228672385216, "step": 187750 }, { "epoch": 53.29548680102186, "grad_norm": 0.08907400816679001, "learning_rate": 4.6726653420380364e-05, "loss": 0.007800862193107605, "step": 187760 }, { "epoch": 53.298325290945215, "grad_norm": 4.105244159698486, "learning_rate": 4.6723814930457e-05, "loss": 0.0018029658123850823, "step": 187770 }, { "epoch": 53.30116378086858, "grad_norm": 0.9128137826919556, "learning_rate": 4.672097644053364e-05, "loss": 0.0006383150815963745, "step": 187780 }, { "epoch": 53.30400227079194, "grad_norm": 6.919089317321777, "learning_rate": 4.671813795061028e-05, "loss": 0.0031527493149042128, "step": 187790 }, { "epoch": 53.306840760715296, "grad_norm": 0.04528863728046417, "learning_rate": 4.6715299460686916e-05, "loss": 0.0020196642726659775, "step": 187800 }, { "epoch": 53.30967925063866, "grad_norm": 16.958234786987305, "learning_rate": 4.671246097076356e-05, "loss": 0.008579444885253907, "step": 187810 }, { "epoch": 53.31251774056202, "grad_norm": 0.09051081538200378, "learning_rate": 4.670962248084019e-05, "loss": 0.0010532019659876823, "step": 187820 }, { "epoch": 53.315356230485385, "grad_norm": 1.0442044734954834, "learning_rate": 4.670678399091683e-05, "loss": 0.002135928347706795, "step": 187830 }, { "epoch": 53.31819472040874, "grad_norm": 4.879495620727539, "learning_rate": 4.6703945500993474e-05, "loss": 0.0016322946175932884, "step": 187840 }, { "epoch": 53.321033210332104, "grad_norm": 0.013169843703508377, "learning_rate": 4.670110701107011e-05, "loss": 0.0007332935929298401, "step": 187850 }, { "epoch": 53.32387170025547, "grad_norm": 0.0702318474650383, "learning_rate": 4.669826852114676e-05, "loss": 0.00375099778175354, "step": 187860 }, { "epoch": 53.32671019017882, "grad_norm": 0.022802142426371574, "learning_rate": 4.669543003122339e-05, "loss": 0.0042123071849346164, "step": 187870 }, { "epoch": 53.329548680102185, "grad_norm": 1.2780405282974243, "learning_rate": 4.6692591541300026e-05, "loss": 0.008054125308990478, "step": 187880 }, { "epoch": 53.33238717002555, "grad_norm": 0.7242178916931152, "learning_rate": 4.6689753051376675e-05, "loss": 0.0015998652204871177, "step": 187890 }, { "epoch": 53.335225659948904, "grad_norm": 0.12846241891384125, "learning_rate": 4.668691456145331e-05, "loss": 0.0004772050306200981, "step": 187900 }, { "epoch": 53.33806414987227, "grad_norm": 0.22185134887695312, "learning_rate": 4.668407607152995e-05, "loss": 0.003217550367116928, "step": 187910 }, { "epoch": 53.34090263979563, "grad_norm": 0.11795251071453094, "learning_rate": 4.6681237581606585e-05, "loss": 0.000369369238615036, "step": 187920 }, { "epoch": 53.34374112971899, "grad_norm": 3.212143659591675, "learning_rate": 4.6678399091683226e-05, "loss": 0.008692929148674011, "step": 187930 }, { "epoch": 53.34657961964235, "grad_norm": 0.010836701840162277, "learning_rate": 4.667556060175987e-05, "loss": 0.00044226516038179395, "step": 187940 }, { "epoch": 53.34941810956571, "grad_norm": 0.01609104312956333, "learning_rate": 4.66727221118365e-05, "loss": 0.0005209090188145638, "step": 187950 }, { "epoch": 53.352256599489074, "grad_norm": 1.2381609678268433, "learning_rate": 4.6669883621913144e-05, "loss": 0.005836763232946396, "step": 187960 }, { "epoch": 53.35509508941243, "grad_norm": 0.10620569437742233, "learning_rate": 4.6667045131989785e-05, "loss": 0.0010946294292807578, "step": 187970 }, { "epoch": 53.35793357933579, "grad_norm": 0.07060200721025467, "learning_rate": 4.666420664206642e-05, "loss": 0.009801150113344193, "step": 187980 }, { "epoch": 53.360772069259156, "grad_norm": 0.0490715317428112, "learning_rate": 4.666136815214307e-05, "loss": 0.004727715998888016, "step": 187990 }, { "epoch": 53.36361055918251, "grad_norm": 0.05600271746516228, "learning_rate": 4.66585296622197e-05, "loss": 0.000669502466917038, "step": 188000 }, { "epoch": 53.36361055918251, "eval_accuracy": 0.9762828257137407, "eval_loss": 0.08823893964290619, "eval_runtime": 32.2328, "eval_samples_per_second": 487.92, "eval_steps_per_second": 7.632, "step": 188000 }, { "epoch": 53.366449049105874, "grad_norm": 5.185880661010742, "learning_rate": 4.665569117229634e-05, "loss": 0.004804622381925583, "step": 188010 }, { "epoch": 53.36928753902924, "grad_norm": 0.04575292766094208, "learning_rate": 4.665285268237298e-05, "loss": 0.000950300320982933, "step": 188020 }, { "epoch": 53.3721260289526, "grad_norm": 0.06251511722803116, "learning_rate": 4.665001419244962e-05, "loss": 0.0020166222006082536, "step": 188030 }, { "epoch": 53.374964518875956, "grad_norm": 0.011868019588291645, "learning_rate": 4.664717570252626e-05, "loss": 0.000693296454846859, "step": 188040 }, { "epoch": 53.37780300879932, "grad_norm": 6.547276973724365, "learning_rate": 4.6644337212602896e-05, "loss": 0.001462644524872303, "step": 188050 }, { "epoch": 53.38064149872268, "grad_norm": 1.185037612915039, "learning_rate": 4.664149872267954e-05, "loss": 0.0010535186156630516, "step": 188060 }, { "epoch": 53.38347998864604, "grad_norm": 0.838546097278595, "learning_rate": 4.663866023275618e-05, "loss": 0.0011111408472061156, "step": 188070 }, { "epoch": 53.3863184785694, "grad_norm": 0.5520305037498474, "learning_rate": 4.663582174283281e-05, "loss": 0.0008367469534277916, "step": 188080 }, { "epoch": 53.38915696849276, "grad_norm": 0.40938836336135864, "learning_rate": 4.6632983252909455e-05, "loss": 0.004319056868553162, "step": 188090 }, { "epoch": 53.391995458416126, "grad_norm": 0.08606300503015518, "learning_rate": 4.6630144762986096e-05, "loss": 0.0005734413862228394, "step": 188100 }, { "epoch": 53.39483394833948, "grad_norm": 0.7917957901954651, "learning_rate": 4.662730627306273e-05, "loss": 0.00047364681959152224, "step": 188110 }, { "epoch": 53.397672438262845, "grad_norm": 0.022059952840209007, "learning_rate": 4.662446778313937e-05, "loss": 0.0017526473850011826, "step": 188120 }, { "epoch": 53.40051092818621, "grad_norm": 2.4283320903778076, "learning_rate": 4.662162929321601e-05, "loss": 0.001446358673274517, "step": 188130 }, { "epoch": 53.40334941810956, "grad_norm": 0.05018947646021843, "learning_rate": 4.661879080329265e-05, "loss": 0.0008096132427453995, "step": 188140 }, { "epoch": 53.406187908032926, "grad_norm": 0.18982112407684326, "learning_rate": 4.661595231336929e-05, "loss": 0.0025374770164489748, "step": 188150 }, { "epoch": 53.40902639795629, "grad_norm": 0.03617572784423828, "learning_rate": 4.661311382344593e-05, "loss": 0.0006389845162630081, "step": 188160 }, { "epoch": 53.411864887879645, "grad_norm": 0.05680306628346443, "learning_rate": 4.6610275333522565e-05, "loss": 0.004915392398834229, "step": 188170 }, { "epoch": 53.41470337780301, "grad_norm": 0.35472002625465393, "learning_rate": 4.660743684359921e-05, "loss": 0.0021022846922278404, "step": 188180 }, { "epoch": 53.41754186772637, "grad_norm": 3.3389973640441895, "learning_rate": 4.660459835367585e-05, "loss": 0.004448512196540832, "step": 188190 }, { "epoch": 53.420380357649734, "grad_norm": 0.07949172705411911, "learning_rate": 4.660175986375249e-05, "loss": 0.0008588759228587151, "step": 188200 }, { "epoch": 53.42321884757309, "grad_norm": 0.0025089362170547247, "learning_rate": 4.6598921373829124e-05, "loss": 0.0019780004397034643, "step": 188210 }, { "epoch": 53.42605733749645, "grad_norm": 0.055848415940999985, "learning_rate": 4.659608288390576e-05, "loss": 0.0012480685487389564, "step": 188220 }, { "epoch": 53.428895827419815, "grad_norm": 0.644150972366333, "learning_rate": 4.659324439398241e-05, "loss": 0.0004986774176359177, "step": 188230 }, { "epoch": 53.43173431734317, "grad_norm": 0.29161128401756287, "learning_rate": 4.659040590405904e-05, "loss": 0.0006886543706059456, "step": 188240 }, { "epoch": 53.434572807266534, "grad_norm": 0.028612801805138588, "learning_rate": 4.658756741413568e-05, "loss": 0.0006515640765428543, "step": 188250 }, { "epoch": 53.4374112971899, "grad_norm": 0.031471848487854004, "learning_rate": 4.6584728924212324e-05, "loss": 0.0003195792436599731, "step": 188260 }, { "epoch": 53.44024978711325, "grad_norm": 0.062422677874565125, "learning_rate": 4.658189043428896e-05, "loss": 0.000794207863509655, "step": 188270 }, { "epoch": 53.443088277036615, "grad_norm": 0.07437340170145035, "learning_rate": 4.65790519443656e-05, "loss": 0.0002399832010269165, "step": 188280 }, { "epoch": 53.44592676695998, "grad_norm": 0.03113231062889099, "learning_rate": 4.657621345444224e-05, "loss": 0.0014466363936662674, "step": 188290 }, { "epoch": 53.44876525688334, "grad_norm": 0.062189262360334396, "learning_rate": 4.6573374964518876e-05, "loss": 0.0003046328201889992, "step": 188300 }, { "epoch": 53.4516037468067, "grad_norm": 0.24986520409584045, "learning_rate": 4.657053647459552e-05, "loss": 0.0009922612458467483, "step": 188310 }, { "epoch": 53.45444223673006, "grad_norm": 0.034784771502017975, "learning_rate": 4.656769798467215e-05, "loss": 0.0002948492765426636, "step": 188320 }, { "epoch": 53.45728072665342, "grad_norm": 2.984663724899292, "learning_rate": 4.65648594947488e-05, "loss": 0.0007460379973053932, "step": 188330 }, { "epoch": 53.46011921657678, "grad_norm": 0.015306895598769188, "learning_rate": 4.6562021004825435e-05, "loss": 0.000284622423350811, "step": 188340 }, { "epoch": 53.46295770650014, "grad_norm": 0.038686010986566544, "learning_rate": 4.655918251490207e-05, "loss": 0.0003249751403927803, "step": 188350 }, { "epoch": 53.465796196423504, "grad_norm": 4.617964267730713, "learning_rate": 4.655634402497872e-05, "loss": 0.003648388013243675, "step": 188360 }, { "epoch": 53.46863468634686, "grad_norm": 0.15376074612140656, "learning_rate": 4.655350553505535e-05, "loss": 0.00040807388722896576, "step": 188370 }, { "epoch": 53.47147317627022, "grad_norm": 0.7541678547859192, "learning_rate": 4.6550667045131993e-05, "loss": 0.0007031701505184173, "step": 188380 }, { "epoch": 53.474311666193586, "grad_norm": 0.12843947112560272, "learning_rate": 4.6547828555208635e-05, "loss": 0.004782354459166527, "step": 188390 }, { "epoch": 53.47715015611695, "grad_norm": 12.120165824890137, "learning_rate": 4.654499006528527e-05, "loss": 0.0024478064849972727, "step": 188400 }, { "epoch": 53.479988646040304, "grad_norm": 1.235603928565979, "learning_rate": 4.654215157536191e-05, "loss": 0.00048064384609460833, "step": 188410 }, { "epoch": 53.48282713596367, "grad_norm": 0.08865918219089508, "learning_rate": 4.6539313085438545e-05, "loss": 0.0010095691308379174, "step": 188420 }, { "epoch": 53.48566562588703, "grad_norm": 0.6434485912322998, "learning_rate": 4.653647459551519e-05, "loss": 0.0009179523214697838, "step": 188430 }, { "epoch": 53.488504115810386, "grad_norm": 0.04104013741016388, "learning_rate": 4.653363610559183e-05, "loss": 0.015262462198734283, "step": 188440 }, { "epoch": 53.49134260573375, "grad_norm": 0.16304485499858856, "learning_rate": 4.653079761566846e-05, "loss": 0.00320955328643322, "step": 188450 }, { "epoch": 53.49418109565711, "grad_norm": 0.041997287422418594, "learning_rate": 4.6527959125745104e-05, "loss": 0.001618834212422371, "step": 188460 }, { "epoch": 53.497019585580475, "grad_norm": 0.14904682338237762, "learning_rate": 4.6525120635821746e-05, "loss": 0.0018872521817684174, "step": 188470 }, { "epoch": 53.49985807550383, "grad_norm": 1.0034269094467163, "learning_rate": 4.652228214589838e-05, "loss": 0.0010557964444160462, "step": 188480 }, { "epoch": 53.50269656542719, "grad_norm": 0.039611585438251495, "learning_rate": 4.651944365597503e-05, "loss": 0.0005894061177968979, "step": 188490 }, { "epoch": 53.505535055350556, "grad_norm": 1.7711520195007324, "learning_rate": 4.651660516605166e-05, "loss": 0.0013704488053917884, "step": 188500 }, { "epoch": 53.505535055350556, "eval_accuracy": 0.9732943345838367, "eval_loss": 0.09527381509542465, "eval_runtime": 32.8245, "eval_samples_per_second": 479.124, "eval_steps_per_second": 7.494, "step": 188500 }, { "epoch": 53.50837354527391, "grad_norm": 1.8463540077209473, "learning_rate": 4.6513766676128304e-05, "loss": 0.005460367351770401, "step": 188510 }, { "epoch": 53.511212035197275, "grad_norm": 0.11110347509384155, "learning_rate": 4.6510928186204946e-05, "loss": 0.0009748652577400207, "step": 188520 }, { "epoch": 53.51405052512064, "grad_norm": 0.13024891912937164, "learning_rate": 4.650808969628158e-05, "loss": 0.00549941435456276, "step": 188530 }, { "epoch": 53.51688901504399, "grad_norm": 1.8181471824645996, "learning_rate": 4.650525120635822e-05, "loss": 0.005132311582565307, "step": 188540 }, { "epoch": 53.519727504967356, "grad_norm": 0.05729495361447334, "learning_rate": 4.6502412716434856e-05, "loss": 0.0017580313608050347, "step": 188550 }, { "epoch": 53.52256599489072, "grad_norm": 0.11723684519529343, "learning_rate": 4.64995742265115e-05, "loss": 0.00388554148375988, "step": 188560 }, { "epoch": 53.52540448481408, "grad_norm": 5.678378105163574, "learning_rate": 4.649673573658814e-05, "loss": 0.0029160957783460617, "step": 188570 }, { "epoch": 53.52824297473744, "grad_norm": 0.46963757276535034, "learning_rate": 4.6493897246664774e-05, "loss": 0.0022098880261182787, "step": 188580 }, { "epoch": 53.5310814646608, "grad_norm": 4.8113789558410645, "learning_rate": 4.6491058756741415e-05, "loss": 0.001940527930855751, "step": 188590 }, { "epoch": 53.533919954584164, "grad_norm": 9.30190372467041, "learning_rate": 4.6488220266818056e-05, "loss": 0.0018904400989413262, "step": 188600 }, { "epoch": 53.53675844450752, "grad_norm": 0.6030286550521851, "learning_rate": 4.648538177689469e-05, "loss": 0.002278678864240646, "step": 188610 }, { "epoch": 53.53959693443088, "grad_norm": 0.3396587371826172, "learning_rate": 4.648254328697134e-05, "loss": 0.005025509744882584, "step": 188620 }, { "epoch": 53.542435424354245, "grad_norm": 0.01260585431009531, "learning_rate": 4.6479704797047974e-05, "loss": 0.001262352056801319, "step": 188630 }, { "epoch": 53.5452739142776, "grad_norm": 0.09240014851093292, "learning_rate": 4.647686630712461e-05, "loss": 0.0048409514129161835, "step": 188640 }, { "epoch": 53.548112404200964, "grad_norm": 0.011994143016636372, "learning_rate": 4.647402781720125e-05, "loss": 0.0007047509774565696, "step": 188650 }, { "epoch": 53.55095089412433, "grad_norm": 0.06757330894470215, "learning_rate": 4.647118932727789e-05, "loss": 0.0009589048102498054, "step": 188660 }, { "epoch": 53.55378938404769, "grad_norm": 0.03461061790585518, "learning_rate": 4.646835083735453e-05, "loss": 0.0031213924288749696, "step": 188670 }, { "epoch": 53.556627873971046, "grad_norm": 0.9232422113418579, "learning_rate": 4.646551234743117e-05, "loss": 0.002472261153161526, "step": 188680 }, { "epoch": 53.55946636389441, "grad_norm": 3.757460832595825, "learning_rate": 4.646267385750781e-05, "loss": 0.0028620751574635504, "step": 188690 }, { "epoch": 53.56230485381777, "grad_norm": 0.05308603122830391, "learning_rate": 4.645983536758445e-05, "loss": 0.0006343841552734375, "step": 188700 }, { "epoch": 53.56514334374113, "grad_norm": 0.1945359855890274, "learning_rate": 4.6456996877661084e-05, "loss": 0.0047975372523069385, "step": 188710 }, { "epoch": 53.56798183366449, "grad_norm": 0.04334292188286781, "learning_rate": 4.6454158387737726e-05, "loss": 0.0011134112253785133, "step": 188720 }, { "epoch": 53.57082032358785, "grad_norm": 0.46612629294395447, "learning_rate": 4.645131989781437e-05, "loss": 0.0021153725683689117, "step": 188730 }, { "epoch": 53.57365881351121, "grad_norm": 0.017947977408766747, "learning_rate": 4.6448481407891e-05, "loss": 0.0009453095495700836, "step": 188740 }, { "epoch": 53.57649730343457, "grad_norm": 0.07249636948108673, "learning_rate": 4.644564291796764e-05, "loss": 0.0007467998191714286, "step": 188750 }, { "epoch": 53.579335793357934, "grad_norm": 0.028150703758001328, "learning_rate": 4.6442804428044284e-05, "loss": 0.002425053343176842, "step": 188760 }, { "epoch": 53.5821742832813, "grad_norm": 0.12547074258327484, "learning_rate": 4.643996593812092e-05, "loss": 0.0013765463605523109, "step": 188770 }, { "epoch": 53.58501277320465, "grad_norm": 3.709806203842163, "learning_rate": 4.643712744819756e-05, "loss": 0.0021960305050015448, "step": 188780 }, { "epoch": 53.587851263128016, "grad_norm": 0.17636407911777496, "learning_rate": 4.64342889582742e-05, "loss": 0.0013036832213401794, "step": 188790 }, { "epoch": 53.59068975305138, "grad_norm": 20.381999969482422, "learning_rate": 4.643145046835084e-05, "loss": 0.005438613146543503, "step": 188800 }, { "epoch": 53.593528242974735, "grad_norm": 0.3026883602142334, "learning_rate": 4.642861197842748e-05, "loss": 0.002001916617155075, "step": 188810 }, { "epoch": 53.5963667328981, "grad_norm": 0.1791962832212448, "learning_rate": 4.642577348850412e-05, "loss": 0.0005995603278279305, "step": 188820 }, { "epoch": 53.59920522282146, "grad_norm": 0.0670161023736, "learning_rate": 4.642293499858076e-05, "loss": 0.0018175350502133369, "step": 188830 }, { "epoch": 53.602043712744816, "grad_norm": 0.5099794268608093, "learning_rate": 4.6420096508657395e-05, "loss": 0.0035638704895973204, "step": 188840 }, { "epoch": 53.60488220266818, "grad_norm": 0.22814388573169708, "learning_rate": 4.6417258018734036e-05, "loss": 0.001583326980471611, "step": 188850 }, { "epoch": 53.60772069259154, "grad_norm": 0.037258636206388474, "learning_rate": 4.641441952881068e-05, "loss": 0.0024504045024514197, "step": 188860 }, { "epoch": 53.610559182514905, "grad_norm": 1.7237563133239746, "learning_rate": 4.641158103888731e-05, "loss": 0.002083294466137886, "step": 188870 }, { "epoch": 53.61339767243826, "grad_norm": 0.183261901140213, "learning_rate": 4.6408742548963954e-05, "loss": 0.0003583701327443123, "step": 188880 }, { "epoch": 53.61623616236162, "grad_norm": 0.011732768267393112, "learning_rate": 4.6405904059040595e-05, "loss": 0.0017732346430420875, "step": 188890 }, { "epoch": 53.619074652284986, "grad_norm": 0.028748994693160057, "learning_rate": 4.640306556911723e-05, "loss": 0.0004817206412553787, "step": 188900 }, { "epoch": 53.62191314220834, "grad_norm": 0.1272525191307068, "learning_rate": 4.640022707919387e-05, "loss": 0.0009630387648940086, "step": 188910 }, { "epoch": 53.624751632131705, "grad_norm": 4.592484951019287, "learning_rate": 4.639738858927051e-05, "loss": 0.0011784028261899947, "step": 188920 }, { "epoch": 53.62759012205507, "grad_norm": 0.0870702862739563, "learning_rate": 4.639455009934715e-05, "loss": 0.003085043281316757, "step": 188930 }, { "epoch": 53.63042861197843, "grad_norm": 0.0361563116312027, "learning_rate": 4.639171160942379e-05, "loss": 0.0009189007803797721, "step": 188940 }, { "epoch": 53.63326710190179, "grad_norm": 0.12388407438993454, "learning_rate": 4.638887311950042e-05, "loss": 0.0013304010033607482, "step": 188950 }, { "epoch": 53.63610559182515, "grad_norm": 0.6145620942115784, "learning_rate": 4.638603462957707e-05, "loss": 0.00045438967645168306, "step": 188960 }, { "epoch": 53.63894408174851, "grad_norm": 3.5897934436798096, "learning_rate": 4.6383196139653706e-05, "loss": 0.001373514160513878, "step": 188970 }, { "epoch": 53.64178257167187, "grad_norm": 1.138921856880188, "learning_rate": 4.638035764973034e-05, "loss": 0.007842230051755905, "step": 188980 }, { "epoch": 53.64462106159523, "grad_norm": 0.7738030552864075, "learning_rate": 4.637751915980699e-05, "loss": 0.0030894435942173002, "step": 188990 }, { "epoch": 53.647459551518594, "grad_norm": 0.30103740096092224, "learning_rate": 4.637468066988362e-05, "loss": 0.0022830281406641005, "step": 189000 }, { "epoch": 53.647459551518594, "eval_accuracy": 0.9762192407960831, "eval_loss": 0.08706940710544586, "eval_runtime": 32.4851, "eval_samples_per_second": 484.13, "eval_steps_per_second": 7.573, "step": 189000 }, { "epoch": 53.65029804144195, "grad_norm": 0.6918962001800537, "learning_rate": 4.6371842179960265e-05, "loss": 0.0010351579636335372, "step": 189010 }, { "epoch": 53.65313653136531, "grad_norm": 0.05111856758594513, "learning_rate": 4.6369003690036906e-05, "loss": 0.006209845095872879, "step": 189020 }, { "epoch": 53.655975021288675, "grad_norm": 0.49328985810279846, "learning_rate": 4.636616520011354e-05, "loss": 0.0013452405110001564, "step": 189030 }, { "epoch": 53.65881351121204, "grad_norm": 1.1391907930374146, "learning_rate": 4.636332671019018e-05, "loss": 0.002011183649301529, "step": 189040 }, { "epoch": 53.661652001135394, "grad_norm": 0.01998521387577057, "learning_rate": 4.6360488220266816e-05, "loss": 0.004966780170798301, "step": 189050 }, { "epoch": 53.66449049105876, "grad_norm": 0.3614199161529541, "learning_rate": 4.635764973034346e-05, "loss": 0.00042575821280479433, "step": 189060 }, { "epoch": 53.66732898098212, "grad_norm": 0.14182338118553162, "learning_rate": 4.63548112404201e-05, "loss": 0.0002698248252272606, "step": 189070 }, { "epoch": 53.670167470905476, "grad_norm": 0.15767528116703033, "learning_rate": 4.6351972750496734e-05, "loss": 0.0003722000867128372, "step": 189080 }, { "epoch": 53.67300596082884, "grad_norm": 0.02104571834206581, "learning_rate": 4.634913426057338e-05, "loss": 0.0005773192271590233, "step": 189090 }, { "epoch": 53.6758444507522, "grad_norm": 0.09053391218185425, "learning_rate": 4.6346295770650017e-05, "loss": 0.00041388403624296186, "step": 189100 }, { "epoch": 53.67868294067556, "grad_norm": 0.02403920888900757, "learning_rate": 4.634345728072665e-05, "loss": 0.00041633546352386474, "step": 189110 }, { "epoch": 53.68152143059892, "grad_norm": 0.06246112659573555, "learning_rate": 4.63406187908033e-05, "loss": 0.0012833327054977417, "step": 189120 }, { "epoch": 53.68435992052228, "grad_norm": 0.015193790197372437, "learning_rate": 4.6337780300879934e-05, "loss": 0.0009062131866812706, "step": 189130 }, { "epoch": 53.687198410445646, "grad_norm": 17.454011917114258, "learning_rate": 4.6334941810956575e-05, "loss": 0.004297524690628052, "step": 189140 }, { "epoch": 53.690036900369, "grad_norm": 0.08305272459983826, "learning_rate": 4.633210332103321e-05, "loss": 0.0017290262505412102, "step": 189150 }, { "epoch": 53.692875390292365, "grad_norm": 0.2885409891605377, "learning_rate": 4.632926483110985e-05, "loss": 0.0010030638426542283, "step": 189160 }, { "epoch": 53.69571388021573, "grad_norm": 0.014678586274385452, "learning_rate": 4.632642634118649e-05, "loss": 0.0031177390366792677, "step": 189170 }, { "epoch": 53.69855237013908, "grad_norm": 0.6939734220504761, "learning_rate": 4.632358785126313e-05, "loss": 0.0015747612342238426, "step": 189180 }, { "epoch": 53.701390860062446, "grad_norm": 8.897331237792969, "learning_rate": 4.632074936133977e-05, "loss": 0.0038195673376321794, "step": 189190 }, { "epoch": 53.70422934998581, "grad_norm": 0.08537156134843826, "learning_rate": 4.631791087141641e-05, "loss": 0.0034681320190429687, "step": 189200 }, { "epoch": 53.70706783990917, "grad_norm": 0.33852535486221313, "learning_rate": 4.6315072381493045e-05, "loss": 0.003675052523612976, "step": 189210 }, { "epoch": 53.70990632983253, "grad_norm": 0.03338174521923065, "learning_rate": 4.631223389156969e-05, "loss": 0.00034920331090688707, "step": 189220 }, { "epoch": 53.71274481975589, "grad_norm": 0.4796745777130127, "learning_rate": 4.630939540164633e-05, "loss": 0.0017383914440870286, "step": 189230 }, { "epoch": 53.71558330967925, "grad_norm": 0.23669879138469696, "learning_rate": 4.630655691172296e-05, "loss": 0.0014797637239098548, "step": 189240 }, { "epoch": 53.71842179960261, "grad_norm": 0.10383355617523193, "learning_rate": 4.63037184217996e-05, "loss": 0.0012917250394821167, "step": 189250 }, { "epoch": 53.72126028952597, "grad_norm": 0.17062482237815857, "learning_rate": 4.6300879931876245e-05, "loss": 0.006341440230607986, "step": 189260 }, { "epoch": 53.724098779449335, "grad_norm": 0.21293208003044128, "learning_rate": 4.6298041441952886e-05, "loss": 0.0025379808619618418, "step": 189270 }, { "epoch": 53.72693726937269, "grad_norm": 0.22762413322925568, "learning_rate": 4.629520295202952e-05, "loss": 0.01632435917854309, "step": 189280 }, { "epoch": 53.729775759296054, "grad_norm": 0.05195756256580353, "learning_rate": 4.629236446210616e-05, "loss": 0.00036417804658412934, "step": 189290 }, { "epoch": 53.73261424921942, "grad_norm": 0.14508557319641113, "learning_rate": 4.6289525972182803e-05, "loss": 0.0015911296010017394, "step": 189300 }, { "epoch": 53.73545273914278, "grad_norm": 0.025452902540564537, "learning_rate": 4.628668748225944e-05, "loss": 0.0008297970518469811, "step": 189310 }, { "epoch": 53.738291229066135, "grad_norm": 0.273532509803772, "learning_rate": 4.628384899233608e-05, "loss": 0.006817734241485596, "step": 189320 }, { "epoch": 53.7411297189895, "grad_norm": 0.06399763375520706, "learning_rate": 4.628101050241272e-05, "loss": 0.0016867170110344888, "step": 189330 }, { "epoch": 53.74396820891286, "grad_norm": 1.3764978647232056, "learning_rate": 4.6278172012489355e-05, "loss": 0.0013445986434817315, "step": 189340 }, { "epoch": 53.74680669883622, "grad_norm": 0.05525941401720047, "learning_rate": 4.6275333522566e-05, "loss": 0.001662706397473812, "step": 189350 }, { "epoch": 53.74964518875958, "grad_norm": 0.019307225942611694, "learning_rate": 4.627249503264264e-05, "loss": 0.004553106427192688, "step": 189360 }, { "epoch": 53.75248367868294, "grad_norm": 15.523204803466797, "learning_rate": 4.626965654271927e-05, "loss": 0.006425189226865769, "step": 189370 }, { "epoch": 53.7553221686063, "grad_norm": Infinity, "learning_rate": 4.6266818052795914e-05, "loss": 0.006315889954566956, "step": 189380 }, { "epoch": 53.75816065852966, "grad_norm": 2.023550271987915, "learning_rate": 4.626426341186489e-05, "loss": 0.010281020402908325, "step": 189390 }, { "epoch": 53.760999148453024, "grad_norm": 0.05651474371552467, "learning_rate": 4.626142492194153e-05, "loss": 0.0009592682123184204, "step": 189400 }, { "epoch": 53.76383763837639, "grad_norm": 10.953176498413086, "learning_rate": 4.625858643201817e-05, "loss": 0.012157021462917328, "step": 189410 }, { "epoch": 53.76667612829974, "grad_norm": 0.040942542254924774, "learning_rate": 4.6255747942094805e-05, "loss": 0.016399207711219787, "step": 189420 }, { "epoch": 53.769514618223106, "grad_norm": 0.15226218104362488, "learning_rate": 4.6252909452171446e-05, "loss": 0.004528618603944779, "step": 189430 }, { "epoch": 53.77235310814647, "grad_norm": 18.527626037597656, "learning_rate": 4.625007096224809e-05, "loss": 0.005434627085924149, "step": 189440 }, { "epoch": 53.775191598069824, "grad_norm": 0.02767735719680786, "learning_rate": 4.624723247232472e-05, "loss": 0.005245065316557884, "step": 189450 }, { "epoch": 53.77803008799319, "grad_norm": 0.39277541637420654, "learning_rate": 4.624439398240136e-05, "loss": 0.002950715832412243, "step": 189460 }, { "epoch": 53.78086857791655, "grad_norm": 0.06754680722951889, "learning_rate": 4.6241555492478005e-05, "loss": 0.0018896477296948433, "step": 189470 }, { "epoch": 53.783707067839906, "grad_norm": 0.024361910298466682, "learning_rate": 4.6238717002554646e-05, "loss": 0.0006793098524212837, "step": 189480 }, { "epoch": 53.78654555776327, "grad_norm": 18.31993293762207, "learning_rate": 4.623587851263128e-05, "loss": 0.005990404635667801, "step": 189490 }, { "epoch": 53.78938404768663, "grad_norm": 0.14143411815166473, "learning_rate": 4.623304002270792e-05, "loss": 0.00049457848072052, "step": 189500 }, { "epoch": 53.78938404768663, "eval_accuracy": 0.9772365994786036, "eval_loss": 0.08411398530006409, "eval_runtime": 32.9337, "eval_samples_per_second": 477.535, "eval_steps_per_second": 7.47, "step": 189500 }, { "epoch": 53.792222537609995, "grad_norm": 0.021601110696792603, "learning_rate": 4.6230201532784563e-05, "loss": 0.0021941764280200005, "step": 189510 }, { "epoch": 53.79506102753335, "grad_norm": 9.54564380645752, "learning_rate": 4.62273630428612e-05, "loss": 0.0025540245696902275, "step": 189520 }, { "epoch": 53.79789951745671, "grad_norm": 0.01287209801375866, "learning_rate": 4.622452455293784e-05, "loss": 0.0035553470253944395, "step": 189530 }, { "epoch": 53.800738007380076, "grad_norm": 0.025906408205628395, "learning_rate": 4.622168606301448e-05, "loss": 0.0006148003041744233, "step": 189540 }, { "epoch": 53.80357649730343, "grad_norm": 0.11060051620006561, "learning_rate": 4.6218847573091115e-05, "loss": 0.0012214817106723785, "step": 189550 }, { "epoch": 53.806414987226795, "grad_norm": 0.028840314596891403, "learning_rate": 4.621600908316776e-05, "loss": 0.00043843276798725126, "step": 189560 }, { "epoch": 53.80925347715016, "grad_norm": 0.052746452391147614, "learning_rate": 4.62131705932444e-05, "loss": 0.0007230622693896293, "step": 189570 }, { "epoch": 53.81209196707351, "grad_norm": 0.2922099530696869, "learning_rate": 4.621033210332103e-05, "loss": 0.001693853922188282, "step": 189580 }, { "epoch": 53.814930456996876, "grad_norm": 0.24571073055267334, "learning_rate": 4.6207493613397674e-05, "loss": 0.0003926454111933708, "step": 189590 }, { "epoch": 53.81776894692024, "grad_norm": 0.08323043584823608, "learning_rate": 4.6204655123474315e-05, "loss": 0.0006954755634069442, "step": 189600 }, { "epoch": 53.8206074368436, "grad_norm": 0.024788228794932365, "learning_rate": 4.620181663355096e-05, "loss": 0.00025607813149690627, "step": 189610 }, { "epoch": 53.82344592676696, "grad_norm": 0.2623211145401001, "learning_rate": 4.619897814362759e-05, "loss": 0.00037547256797552107, "step": 189620 }, { "epoch": 53.82628441669032, "grad_norm": 0.05043733865022659, "learning_rate": 4.6196139653704226e-05, "loss": 0.00036613382399082185, "step": 189630 }, { "epoch": 53.829122906613684, "grad_norm": 0.08899254351854324, "learning_rate": 4.6193301163780874e-05, "loss": 0.0002988923341035843, "step": 189640 }, { "epoch": 53.83196139653704, "grad_norm": 1.3906793594360352, "learning_rate": 4.619046267385751e-05, "loss": 0.0015650900080800057, "step": 189650 }, { "epoch": 53.8347998864604, "grad_norm": 0.08224312216043472, "learning_rate": 4.618762418393415e-05, "loss": 0.0003687510266900063, "step": 189660 }, { "epoch": 53.837638376383765, "grad_norm": 0.006065186113119125, "learning_rate": 4.618478569401079e-05, "loss": 0.0024584300816059114, "step": 189670 }, { "epoch": 53.84047686630713, "grad_norm": 0.01789562590420246, "learning_rate": 4.6181947204087426e-05, "loss": 0.0006448995321989059, "step": 189680 }, { "epoch": 53.843315356230484, "grad_norm": 2.570772647857666, "learning_rate": 4.617910871416407e-05, "loss": 0.0010420428588986397, "step": 189690 }, { "epoch": 53.84615384615385, "grad_norm": 0.22996391355991364, "learning_rate": 4.617627022424071e-05, "loss": 0.0009878776967525483, "step": 189700 }, { "epoch": 53.84899233607721, "grad_norm": 0.05956209450960159, "learning_rate": 4.6173431734317343e-05, "loss": 0.00040733665227890017, "step": 189710 }, { "epoch": 53.851830826000565, "grad_norm": 0.6274832487106323, "learning_rate": 4.6170593244393985e-05, "loss": 0.0010274490341544151, "step": 189720 }, { "epoch": 53.85466931592393, "grad_norm": 0.0415818989276886, "learning_rate": 4.616775475447062e-05, "loss": 0.0007280947640538216, "step": 189730 }, { "epoch": 53.85750780584729, "grad_norm": 0.0592634491622448, "learning_rate": 4.616491626454727e-05, "loss": 0.00041999686509370805, "step": 189740 }, { "epoch": 53.86034629577065, "grad_norm": 0.012188628315925598, "learning_rate": 4.61620777746239e-05, "loss": 0.0009052179753780365, "step": 189750 }, { "epoch": 53.86318478569401, "grad_norm": 0.18655340373516083, "learning_rate": 4.615923928470054e-05, "loss": 0.001091170869767666, "step": 189760 }, { "epoch": 53.86602327561737, "grad_norm": 0.08614689856767654, "learning_rate": 4.6156400794777185e-05, "loss": 0.0009487850591540336, "step": 189770 }, { "epoch": 53.868861765540736, "grad_norm": 0.04204598814249039, "learning_rate": 4.615356230485382e-05, "loss": 0.0014914494007825852, "step": 189780 }, { "epoch": 53.87170025546409, "grad_norm": 0.08111171424388885, "learning_rate": 4.615072381493046e-05, "loss": 0.002232424542307854, "step": 189790 }, { "epoch": 53.874538745387454, "grad_norm": 0.25810083746910095, "learning_rate": 4.61478853250071e-05, "loss": 0.0012791948392987251, "step": 189800 }, { "epoch": 53.87737723531082, "grad_norm": 0.19585120677947998, "learning_rate": 4.614504683508374e-05, "loss": 0.0003525331616401672, "step": 189810 }, { "epoch": 53.88021572523417, "grad_norm": 0.13614630699157715, "learning_rate": 4.614220834516038e-05, "loss": 0.0024169130250811578, "step": 189820 }, { "epoch": 53.883054215157536, "grad_norm": 0.014362146146595478, "learning_rate": 4.613936985523701e-05, "loss": 0.0010011771693825722, "step": 189830 }, { "epoch": 53.8858927050809, "grad_norm": 0.00514635443687439, "learning_rate": 4.6136531365313654e-05, "loss": 0.0032917216420173647, "step": 189840 }, { "epoch": 53.888731195004254, "grad_norm": 1.447511911392212, "learning_rate": 4.6133692875390296e-05, "loss": 0.0015402790158987045, "step": 189850 }, { "epoch": 53.89156968492762, "grad_norm": 0.5010764598846436, "learning_rate": 4.613085438546693e-05, "loss": 0.0006503740325570107, "step": 189860 }, { "epoch": 53.89440817485098, "grad_norm": 0.25249022245407104, "learning_rate": 4.612801589554357e-05, "loss": 0.0005416134372353554, "step": 189870 }, { "epoch": 53.89724666477434, "grad_norm": 0.07815248519182205, "learning_rate": 4.612517740562021e-05, "loss": 0.003799545019865036, "step": 189880 }, { "epoch": 53.9000851546977, "grad_norm": 0.05863397568464279, "learning_rate": 4.612233891569685e-05, "loss": 0.003936757147312164, "step": 189890 }, { "epoch": 53.90292364462106, "grad_norm": 0.03863385692238808, "learning_rate": 4.6119500425773496e-05, "loss": 0.0003835948184132576, "step": 189900 }, { "epoch": 53.905762134544425, "grad_norm": 4.592468738555908, "learning_rate": 4.611666193585013e-05, "loss": 0.0011073751375079155, "step": 189910 }, { "epoch": 53.90860062446778, "grad_norm": 8.734415054321289, "learning_rate": 4.6113823445926765e-05, "loss": 0.002851998247206211, "step": 189920 }, { "epoch": 53.91143911439114, "grad_norm": 0.20079463720321655, "learning_rate": 4.6110984956003406e-05, "loss": 0.0034388467669487, "step": 189930 }, { "epoch": 53.914277604314506, "grad_norm": 0.9759150743484497, "learning_rate": 4.610814646608005e-05, "loss": 0.0005862940102815628, "step": 189940 }, { "epoch": 53.91711609423786, "grad_norm": 0.07106303423643112, "learning_rate": 4.610530797615669e-05, "loss": 0.0006806841120123863, "step": 189950 }, { "epoch": 53.919954584161225, "grad_norm": 0.2194651961326599, "learning_rate": 4.6102469486233324e-05, "loss": 0.0002784525975584984, "step": 189960 }, { "epoch": 53.92279307408459, "grad_norm": 0.018796708434820175, "learning_rate": 4.6099630996309965e-05, "loss": 0.00027405768632888795, "step": 189970 }, { "epoch": 53.92563156400795, "grad_norm": 0.40771278738975525, "learning_rate": 4.6096792506386606e-05, "loss": 0.0004953578114509583, "step": 189980 }, { "epoch": 53.928470053931306, "grad_norm": 0.01927034556865692, "learning_rate": 4.609395401646324e-05, "loss": 0.0003737468272447586, "step": 189990 }, { "epoch": 53.93130854385467, "grad_norm": 0.27646583318710327, "learning_rate": 4.609111552653988e-05, "loss": 0.0006116572767496109, "step": 190000 }, { "epoch": 53.93130854385467, "eval_accuracy": 0.9788262224200419, "eval_loss": 0.07482069730758667, "eval_runtime": 32.6115, "eval_samples_per_second": 482.253, "eval_steps_per_second": 7.543, "step": 190000 }, { "epoch": 53.93414703377803, "grad_norm": 0.4058345556259155, "learning_rate": 4.6088277036616524e-05, "loss": 0.0002704864367842674, "step": 190010 }, { "epoch": 53.93698552370139, "grad_norm": 0.023325767368078232, "learning_rate": 4.608543854669316e-05, "loss": 0.0012160364538431167, "step": 190020 }, { "epoch": 53.93982401362475, "grad_norm": 0.2957730293273926, "learning_rate": 4.60826000567698e-05, "loss": 0.002289293520152569, "step": 190030 }, { "epoch": 53.942662503548114, "grad_norm": 0.20680375397205353, "learning_rate": 4.607976156684644e-05, "loss": 0.0005347423255443573, "step": 190040 }, { "epoch": 53.94550099347148, "grad_norm": 1.0706795454025269, "learning_rate": 4.6076923076923076e-05, "loss": 0.0006216930225491524, "step": 190050 }, { "epoch": 53.94833948339483, "grad_norm": 0.028042763471603394, "learning_rate": 4.607408458699972e-05, "loss": 0.0005715759471058846, "step": 190060 }, { "epoch": 53.951177973318195, "grad_norm": 0.022818604484200478, "learning_rate": 4.607124609707636e-05, "loss": 0.000824689120054245, "step": 190070 }, { "epoch": 53.95401646324156, "grad_norm": 1.9552831649780273, "learning_rate": 4.6068407607153e-05, "loss": 0.001447206549346447, "step": 190080 }, { "epoch": 53.956854953164914, "grad_norm": 0.13339178264141083, "learning_rate": 4.6065569117229634e-05, "loss": 0.0003341201692819595, "step": 190090 }, { "epoch": 53.95969344308828, "grad_norm": 0.37993094325065613, "learning_rate": 4.6062730627306276e-05, "loss": 0.0004221992567181587, "step": 190100 }, { "epoch": 53.96253193301164, "grad_norm": 0.16393068432807922, "learning_rate": 4.605989213738292e-05, "loss": 0.0015563966706395149, "step": 190110 }, { "epoch": 53.965370422934996, "grad_norm": 0.06460926681756973, "learning_rate": 4.605705364745955e-05, "loss": 0.004735343903303146, "step": 190120 }, { "epoch": 53.96820891285836, "grad_norm": 0.09176139533519745, "learning_rate": 4.605421515753619e-05, "loss": 0.0048593848943710325, "step": 190130 }, { "epoch": 53.97104740278172, "grad_norm": 3.7055065631866455, "learning_rate": 4.6051376667612835e-05, "loss": 0.0030064450576901434, "step": 190140 }, { "epoch": 53.973885892705084, "grad_norm": 0.1778849959373474, "learning_rate": 4.604853817768947e-05, "loss": 0.01610005795955658, "step": 190150 }, { "epoch": 53.97672438262844, "grad_norm": 0.14732691645622253, "learning_rate": 4.604569968776611e-05, "loss": 0.0024450957775115968, "step": 190160 }, { "epoch": 53.9795628725518, "grad_norm": 0.9671622514724731, "learning_rate": 4.604286119784275e-05, "loss": 0.0006380550563335419, "step": 190170 }, { "epoch": 53.982401362475166, "grad_norm": 0.10074272006750107, "learning_rate": 4.6040022707919386e-05, "loss": 0.0007599027827382088, "step": 190180 }, { "epoch": 53.98523985239852, "grad_norm": 0.008153036236763, "learning_rate": 4.603718421799603e-05, "loss": 0.0017909526824951172, "step": 190190 }, { "epoch": 53.988078342321884, "grad_norm": 12.13166332244873, "learning_rate": 4.603434572807267e-05, "loss": 0.0018133584409952164, "step": 190200 }, { "epoch": 53.99091683224525, "grad_norm": 0.18008406460285187, "learning_rate": 4.603150723814931e-05, "loss": 0.0021529370918869973, "step": 190210 }, { "epoch": 53.9937553221686, "grad_norm": 0.1943129152059555, "learning_rate": 4.6028668748225945e-05, "loss": 0.0006942916661500931, "step": 190220 }, { "epoch": 53.996593812091966, "grad_norm": 0.07712505757808685, "learning_rate": 4.6025830258302587e-05, "loss": 0.0045285351574420925, "step": 190230 }, { "epoch": 53.99943230201533, "grad_norm": 0.01945650205016136, "learning_rate": 4.602299176837923e-05, "loss": 0.0012741034850478172, "step": 190240 }, { "epoch": 54.00227079193869, "grad_norm": 0.04336756467819214, "learning_rate": 4.602015327845586e-05, "loss": 0.0004376681987196207, "step": 190250 }, { "epoch": 54.00510928186205, "grad_norm": 0.058626607060432434, "learning_rate": 4.6017314788532504e-05, "loss": 0.002380247227847576, "step": 190260 }, { "epoch": 54.00794777178541, "grad_norm": 0.025904756039381027, "learning_rate": 4.6014476298609145e-05, "loss": 0.00265309102833271, "step": 190270 }, { "epoch": 54.01078626170877, "grad_norm": 0.10968022048473358, "learning_rate": 4.601163780868578e-05, "loss": 0.0013353103771805764, "step": 190280 }, { "epoch": 54.01362475163213, "grad_norm": 0.08156608790159225, "learning_rate": 4.600879931876242e-05, "loss": 0.004735436290502548, "step": 190290 }, { "epoch": 54.01646324155549, "grad_norm": 0.02988324500620365, "learning_rate": 4.600596082883906e-05, "loss": 0.0008209524676203728, "step": 190300 }, { "epoch": 54.019301731478855, "grad_norm": 0.03307041525840759, "learning_rate": 4.60031223389157e-05, "loss": 0.0004901405423879624, "step": 190310 }, { "epoch": 54.02214022140221, "grad_norm": 0.2678154706954956, "learning_rate": 4.600028384899234e-05, "loss": 0.00025896038860082624, "step": 190320 }, { "epoch": 54.02497871132557, "grad_norm": 0.23837795853614807, "learning_rate": 4.599744535906898e-05, "loss": 0.0006772223860025406, "step": 190330 }, { "epoch": 54.027817201248936, "grad_norm": 0.1294577270746231, "learning_rate": 4.5994606869145615e-05, "loss": 0.0017785361036658288, "step": 190340 }, { "epoch": 54.0306556911723, "grad_norm": 0.33269912004470825, "learning_rate": 4.5991768379222256e-05, "loss": 0.00022858977317810058, "step": 190350 }, { "epoch": 54.033494181095655, "grad_norm": 0.008971957489848137, "learning_rate": 4.598892988929889e-05, "loss": 0.0008467480540275574, "step": 190360 }, { "epoch": 54.03633267101902, "grad_norm": 1.3784480094909668, "learning_rate": 4.598609139937554e-05, "loss": 0.0035009533166885376, "step": 190370 }, { "epoch": 54.03917116094238, "grad_norm": 0.03688269108533859, "learning_rate": 4.598325290945217e-05, "loss": 0.00036978591233491895, "step": 190380 }, { "epoch": 54.04200965086574, "grad_norm": 2.610508918762207, "learning_rate": 4.598041441952881e-05, "loss": 0.0009543802589178085, "step": 190390 }, { "epoch": 54.0448481407891, "grad_norm": 0.015336356125772, "learning_rate": 4.5977575929605456e-05, "loss": 0.0007450997829437256, "step": 190400 }, { "epoch": 54.04768663071246, "grad_norm": 0.07187741994857788, "learning_rate": 4.597473743968209e-05, "loss": 0.0008507147431373596, "step": 190410 }, { "epoch": 54.050525120635825, "grad_norm": 0.11248627305030823, "learning_rate": 4.597189894975873e-05, "loss": 0.00016391444951295852, "step": 190420 }, { "epoch": 54.05336361055918, "grad_norm": 0.18101564049720764, "learning_rate": 4.5969060459835373e-05, "loss": 0.0005840940400958061, "step": 190430 }, { "epoch": 54.056202100482544, "grad_norm": 0.06716714799404144, "learning_rate": 4.596622196991201e-05, "loss": 0.0004270797595381737, "step": 190440 }, { "epoch": 54.05904059040591, "grad_norm": 0.039141811430454254, "learning_rate": 4.596338347998865e-05, "loss": 0.000286259688436985, "step": 190450 }, { "epoch": 54.06187908032926, "grad_norm": 0.027115758508443832, "learning_rate": 4.5960544990065284e-05, "loss": 0.00038269311189651487, "step": 190460 }, { "epoch": 54.064717570252625, "grad_norm": 0.036761052906513214, "learning_rate": 4.5957706500141925e-05, "loss": 0.00014310982078313828, "step": 190470 }, { "epoch": 54.06755606017599, "grad_norm": 0.011145690456032753, "learning_rate": 4.595486801021857e-05, "loss": 0.0002455353736877441, "step": 190480 }, { "epoch": 54.070394550099344, "grad_norm": 0.007584948092699051, "learning_rate": 4.59520295202952e-05, "loss": 0.0004253394901752472, "step": 190490 }, { "epoch": 54.07323304002271, "grad_norm": 0.03196874260902405, "learning_rate": 4.594919103037185e-05, "loss": 0.0029203345999121665, "step": 190500 }, { "epoch": 54.07323304002271, "eval_accuracy": 0.9783811279964393, "eval_loss": 0.0743328109383583, "eval_runtime": 32.6456, "eval_samples_per_second": 481.749, "eval_steps_per_second": 7.535, "step": 190500 }, { "epoch": 54.07607152994607, "grad_norm": 7.898156642913818, "learning_rate": 4.5946352540448484e-05, "loss": 0.001129193603992462, "step": 190510 }, { "epoch": 54.07891001986943, "grad_norm": 0.10997507721185684, "learning_rate": 4.594351405052512e-05, "loss": 0.0012220559641718865, "step": 190520 }, { "epoch": 54.08174850979279, "grad_norm": 0.03728827089071274, "learning_rate": 4.594067556060177e-05, "loss": 0.0008155627176165581, "step": 190530 }, { "epoch": 54.08458699971615, "grad_norm": 1.9674068689346313, "learning_rate": 4.59378370706784e-05, "loss": 0.008432667702436447, "step": 190540 }, { "epoch": 54.087425489639514, "grad_norm": 0.020622720941901207, "learning_rate": 4.593499858075504e-05, "loss": 0.0005267409607768059, "step": 190550 }, { "epoch": 54.09026397956287, "grad_norm": 0.04700889810919762, "learning_rate": 4.593216009083168e-05, "loss": 0.0004438776522874832, "step": 190560 }, { "epoch": 54.09310246948623, "grad_norm": 0.052473366260528564, "learning_rate": 4.592932160090832e-05, "loss": 0.0005018325522542, "step": 190570 }, { "epoch": 54.095940959409596, "grad_norm": 0.1601518839597702, "learning_rate": 4.592648311098496e-05, "loss": 0.004880205541849136, "step": 190580 }, { "epoch": 54.09877944933295, "grad_norm": 0.056338779628276825, "learning_rate": 4.5923644621061595e-05, "loss": 0.004223726689815521, "step": 190590 }, { "epoch": 54.101617939256315, "grad_norm": 1.8295094966888428, "learning_rate": 4.5920806131138236e-05, "loss": 0.0006453650072216988, "step": 190600 }, { "epoch": 54.10445642917968, "grad_norm": 0.04320772737264633, "learning_rate": 4.591796764121488e-05, "loss": 0.000980471633374691, "step": 190610 }, { "epoch": 54.10729491910304, "grad_norm": 0.32836875319480896, "learning_rate": 4.591512915129151e-05, "loss": 0.0011575426906347275, "step": 190620 }, { "epoch": 54.110133409026396, "grad_norm": 0.10729886591434479, "learning_rate": 4.591229066136816e-05, "loss": 0.00022611748427152633, "step": 190630 }, { "epoch": 54.11297189894976, "grad_norm": 0.04957544431090355, "learning_rate": 4.5909452171444795e-05, "loss": 0.0008996525779366493, "step": 190640 }, { "epoch": 54.11581038887312, "grad_norm": 0.022691527381539345, "learning_rate": 4.590661368152143e-05, "loss": 0.0007292501628398895, "step": 190650 }, { "epoch": 54.11864887879648, "grad_norm": 0.4851768910884857, "learning_rate": 4.590377519159807e-05, "loss": 0.002762913890182972, "step": 190660 }, { "epoch": 54.12148736871984, "grad_norm": 0.043385524302721024, "learning_rate": 4.590093670167471e-05, "loss": 0.005570866167545319, "step": 190670 }, { "epoch": 54.1243258586432, "grad_norm": 0.5570418834686279, "learning_rate": 4.5898098211751354e-05, "loss": 0.0009375890716910362, "step": 190680 }, { "epoch": 54.12716434856656, "grad_norm": 0.050397127866744995, "learning_rate": 4.589525972182799e-05, "loss": 0.00035753827542066573, "step": 190690 }, { "epoch": 54.13000283848992, "grad_norm": 0.0205315463244915, "learning_rate": 4.589242123190463e-05, "loss": 0.0009131802245974541, "step": 190700 }, { "epoch": 54.132841328413285, "grad_norm": 0.05958734452724457, "learning_rate": 4.588958274198127e-05, "loss": 0.001697041280567646, "step": 190710 }, { "epoch": 54.13567981833665, "grad_norm": 6.207794189453125, "learning_rate": 4.5886744252057906e-05, "loss": 0.004091175645589829, "step": 190720 }, { "epoch": 54.138518308260004, "grad_norm": 0.05499005317687988, "learning_rate": 4.588390576213455e-05, "loss": 0.0006162609905004501, "step": 190730 }, { "epoch": 54.14135679818337, "grad_norm": 0.2337942272424698, "learning_rate": 4.588106727221119e-05, "loss": 0.0003521820530295372, "step": 190740 }, { "epoch": 54.14419528810673, "grad_norm": 0.11948271095752716, "learning_rate": 4.587822878228782e-05, "loss": 0.0003924405202269554, "step": 190750 }, { "epoch": 54.147033778030085, "grad_norm": 0.05974902585148811, "learning_rate": 4.5875390292364464e-05, "loss": 0.0004996908828616142, "step": 190760 }, { "epoch": 54.14987226795345, "grad_norm": 0.08094682544469833, "learning_rate": 4.5872551802441106e-05, "loss": 0.00017572548240423201, "step": 190770 }, { "epoch": 54.15271075787681, "grad_norm": 0.005863814614713192, "learning_rate": 4.586971331251774e-05, "loss": 0.0002920014783740044, "step": 190780 }, { "epoch": 54.15554924780017, "grad_norm": 0.15569227933883667, "learning_rate": 4.586687482259438e-05, "loss": 0.000244101881980896, "step": 190790 }, { "epoch": 54.15838773772353, "grad_norm": 0.014053049497306347, "learning_rate": 4.586403633267102e-05, "loss": 0.00016052387654781343, "step": 190800 }, { "epoch": 54.16122622764689, "grad_norm": 0.023193303495645523, "learning_rate": 4.586119784274766e-05, "loss": 0.0002686984837055206, "step": 190810 }, { "epoch": 54.164064717570255, "grad_norm": 6.790426254272461, "learning_rate": 4.58583593528243e-05, "loss": 0.0017693564295768738, "step": 190820 }, { "epoch": 54.16690320749361, "grad_norm": 0.17603667080402374, "learning_rate": 4.585552086290094e-05, "loss": 0.0005218934267759324, "step": 190830 }, { "epoch": 54.169741697416974, "grad_norm": 0.021246036514639854, "learning_rate": 4.585268237297758e-05, "loss": 0.0008829269558191299, "step": 190840 }, { "epoch": 54.17258018734034, "grad_norm": 0.054241664707660675, "learning_rate": 4.5849843883054216e-05, "loss": 0.0005923217162489891, "step": 190850 }, { "epoch": 54.17541867726369, "grad_norm": 0.010939565487205982, "learning_rate": 4.584700539313085e-05, "loss": 0.00018359683454036713, "step": 190860 }, { "epoch": 54.178257167187056, "grad_norm": 0.12357290089130402, "learning_rate": 4.58441669032075e-05, "loss": 0.00020604655146598815, "step": 190870 }, { "epoch": 54.18109565711042, "grad_norm": 0.0812787115573883, "learning_rate": 4.5841328413284134e-05, "loss": 0.00022991299629211427, "step": 190880 }, { "epoch": 54.18393414703378, "grad_norm": 0.02822262980043888, "learning_rate": 4.5838489923360775e-05, "loss": 0.00026959292590618135, "step": 190890 }, { "epoch": 54.18677263695714, "grad_norm": 0.05768641084432602, "learning_rate": 4.5835651433437416e-05, "loss": 0.00035768505185842516, "step": 190900 }, { "epoch": 54.1896111268805, "grad_norm": 0.30397072434425354, "learning_rate": 4.583281294351405e-05, "loss": 0.00040937867015600204, "step": 190910 }, { "epoch": 54.19244961680386, "grad_norm": 0.023846931755542755, "learning_rate": 4.582997445359069e-05, "loss": 0.0002432873472571373, "step": 190920 }, { "epoch": 54.19528810672722, "grad_norm": 2.2404913902282715, "learning_rate": 4.5827135963667334e-05, "loss": 0.0005140436813235282, "step": 190930 }, { "epoch": 54.19812659665058, "grad_norm": 0.12774401903152466, "learning_rate": 4.582429747374397e-05, "loss": 0.0016571661457419396, "step": 190940 }, { "epoch": 54.200965086573945, "grad_norm": 0.23690098524093628, "learning_rate": 4.582145898382061e-05, "loss": 0.0003392687067389488, "step": 190950 }, { "epoch": 54.2038035764973, "grad_norm": 0.03348930552601814, "learning_rate": 4.5818620493897244e-05, "loss": 0.0003225687891244888, "step": 190960 }, { "epoch": 54.20664206642066, "grad_norm": 0.04760325327515602, "learning_rate": 4.581578200397389e-05, "loss": 0.0013542346656322478, "step": 190970 }, { "epoch": 54.209480556344026, "grad_norm": 0.43742257356643677, "learning_rate": 4.581294351405053e-05, "loss": 0.0010713983327150344, "step": 190980 }, { "epoch": 54.21231904626739, "grad_norm": 0.006574250757694244, "learning_rate": 4.581010502412716e-05, "loss": 0.0007963255047798157, "step": 190990 }, { "epoch": 54.215157536190745, "grad_norm": 0.02654959261417389, "learning_rate": 4.580726653420381e-05, "loss": 0.0007808137685060501, "step": 191000 }, { "epoch": 54.215157536190745, "eval_accuracy": 0.9786354676670693, "eval_loss": 0.07990726828575134, "eval_runtime": 32.521, "eval_samples_per_second": 483.595, "eval_steps_per_second": 7.564, "step": 191000 }, { "epoch": 54.21799602611411, "grad_norm": 0.027145924046635628, "learning_rate": 4.5804428044280444e-05, "loss": 0.00040116701275110244, "step": 191010 }, { "epoch": 54.22083451603747, "grad_norm": 2.6159069538116455, "learning_rate": 4.5801589554357086e-05, "loss": 0.0005386099219322205, "step": 191020 }, { "epoch": 54.223673005960826, "grad_norm": 0.7345849871635437, "learning_rate": 4.579875106443373e-05, "loss": 0.0004670847207307816, "step": 191030 }, { "epoch": 54.22651149588419, "grad_norm": 0.3353499472141266, "learning_rate": 4.579591257451036e-05, "loss": 0.0005030401051044464, "step": 191040 }, { "epoch": 54.22934998580755, "grad_norm": 0.7025876641273499, "learning_rate": 4.5793074084587e-05, "loss": 0.005337405204772949, "step": 191050 }, { "epoch": 54.23218847573091, "grad_norm": 0.5406635403633118, "learning_rate": 4.579023559466364e-05, "loss": 0.003859071433544159, "step": 191060 }, { "epoch": 54.23502696565427, "grad_norm": 0.032845284789800644, "learning_rate": 4.578739710474028e-05, "loss": 0.00019309762865304947, "step": 191070 }, { "epoch": 54.237865455577634, "grad_norm": 0.07537852972745895, "learning_rate": 4.578455861481692e-05, "loss": 0.013812538981437684, "step": 191080 }, { "epoch": 54.240703945501, "grad_norm": 0.052238769829273224, "learning_rate": 4.5781720124893555e-05, "loss": 0.00025481339544057845, "step": 191090 }, { "epoch": 54.24354243542435, "grad_norm": 0.03162602335214615, "learning_rate": 4.57788816349702e-05, "loss": 0.0003665471449494362, "step": 191100 }, { "epoch": 54.246380925347715, "grad_norm": 0.03463372960686684, "learning_rate": 4.577604314504684e-05, "loss": 0.002721059322357178, "step": 191110 }, { "epoch": 54.24921941527108, "grad_norm": 0.00753134535625577, "learning_rate": 4.577320465512347e-05, "loss": 0.0022253615781664847, "step": 191120 }, { "epoch": 54.252057905194434, "grad_norm": 0.012002123519778252, "learning_rate": 4.577036616520012e-05, "loss": 0.0007774662226438522, "step": 191130 }, { "epoch": 54.2548963951178, "grad_norm": 0.09558827430009842, "learning_rate": 4.5767527675276755e-05, "loss": 0.0033770311623811723, "step": 191140 }, { "epoch": 54.25773488504116, "grad_norm": 0.05416702851653099, "learning_rate": 4.5764689185353397e-05, "loss": 0.0009279221296310425, "step": 191150 }, { "epoch": 54.260573374964515, "grad_norm": 0.13274389505386353, "learning_rate": 4.576185069543003e-05, "loss": 0.0014615826308727264, "step": 191160 }, { "epoch": 54.26341186488788, "grad_norm": 0.06981343030929565, "learning_rate": 4.575901220550667e-05, "loss": 0.004239465296268463, "step": 191170 }, { "epoch": 54.26625035481124, "grad_norm": 0.011452543549239635, "learning_rate": 4.5756173715583314e-05, "loss": 0.001320442743599415, "step": 191180 }, { "epoch": 54.269088844734604, "grad_norm": 0.6174089908599854, "learning_rate": 4.575333522565995e-05, "loss": 0.0014120515435934067, "step": 191190 }, { "epoch": 54.27192733465796, "grad_norm": 1.648452877998352, "learning_rate": 4.575049673573659e-05, "loss": 0.004841911047697068, "step": 191200 }, { "epoch": 54.27476582458132, "grad_norm": 4.7957844734191895, "learning_rate": 4.574765824581323e-05, "loss": 0.002529970556497574, "step": 191210 }, { "epoch": 54.277604314504686, "grad_norm": 0.18139295279979706, "learning_rate": 4.5744819755889866e-05, "loss": 0.000805499404668808, "step": 191220 }, { "epoch": 54.28044280442804, "grad_norm": 0.7347800135612488, "learning_rate": 4.574198126596651e-05, "loss": 0.0025661103427410126, "step": 191230 }, { "epoch": 54.283281294351404, "grad_norm": 0.7614655494689941, "learning_rate": 4.573914277604315e-05, "loss": 0.0005474206060171128, "step": 191240 }, { "epoch": 54.28611978427477, "grad_norm": 0.7470982670783997, "learning_rate": 4.573630428611978e-05, "loss": 0.00046431403607130053, "step": 191250 }, { "epoch": 54.28895827419813, "grad_norm": 0.1361372172832489, "learning_rate": 4.5733465796196425e-05, "loss": 0.000633927620947361, "step": 191260 }, { "epoch": 54.291796764121486, "grad_norm": 14.023747444152832, "learning_rate": 4.5730627306273066e-05, "loss": 0.004489714652299881, "step": 191270 }, { "epoch": 54.29463525404485, "grad_norm": 0.1560022234916687, "learning_rate": 4.57277888163497e-05, "loss": 0.00188782699406147, "step": 191280 }, { "epoch": 54.29747374396821, "grad_norm": 0.041179411113262177, "learning_rate": 4.572495032642634e-05, "loss": 0.0007719064131379127, "step": 191290 }, { "epoch": 54.30031223389157, "grad_norm": 0.20201550424098969, "learning_rate": 4.572211183650298e-05, "loss": 0.0011780181899666785, "step": 191300 }, { "epoch": 54.30315072381493, "grad_norm": 0.24065889418125153, "learning_rate": 4.5719273346579625e-05, "loss": 0.01725877970457077, "step": 191310 }, { "epoch": 54.30598921373829, "grad_norm": 0.06295903772115707, "learning_rate": 4.571643485665626e-05, "loss": 0.00045367050915956497, "step": 191320 }, { "epoch": 54.30882770366165, "grad_norm": 0.2680228352546692, "learning_rate": 4.57135963667329e-05, "loss": 0.0006093390285968781, "step": 191330 }, { "epoch": 54.31166619358501, "grad_norm": 0.08256310224533081, "learning_rate": 4.571075787680954e-05, "loss": 0.0002137446776032448, "step": 191340 }, { "epoch": 54.314504683508375, "grad_norm": 0.0482572503387928, "learning_rate": 4.5707919386886177e-05, "loss": 0.007931867986917496, "step": 191350 }, { "epoch": 54.31734317343174, "grad_norm": 0.019631007686257362, "learning_rate": 4.570508089696282e-05, "loss": 0.0003249365836381912, "step": 191360 }, { "epoch": 54.32018166335509, "grad_norm": 0.007392289116978645, "learning_rate": 4.570224240703946e-05, "loss": 0.000434829480946064, "step": 191370 }, { "epoch": 54.323020153278456, "grad_norm": 0.025458846241235733, "learning_rate": 4.5699403917116094e-05, "loss": 0.0002754811197519302, "step": 191380 }, { "epoch": 54.32585864320182, "grad_norm": 0.9495893716812134, "learning_rate": 4.5696565427192735e-05, "loss": 0.0008845260366797447, "step": 191390 }, { "epoch": 54.328697133125175, "grad_norm": 0.07244095206260681, "learning_rate": 4.569372693726938e-05, "loss": 0.0005084015429019928, "step": 191400 }, { "epoch": 54.33153562304854, "grad_norm": 0.04121003299951553, "learning_rate": 4.569088844734601e-05, "loss": 0.0019967040047049523, "step": 191410 }, { "epoch": 54.3343741129719, "grad_norm": 0.04737447202205658, "learning_rate": 4.568804995742265e-05, "loss": 0.0006064316257834435, "step": 191420 }, { "epoch": 54.33721260289526, "grad_norm": 0.11682509630918503, "learning_rate": 4.5685211467499294e-05, "loss": 0.003230658173561096, "step": 191430 }, { "epoch": 54.34005109281862, "grad_norm": 0.11286338418722153, "learning_rate": 4.5682372977575935e-05, "loss": 0.00032396893948316574, "step": 191440 }, { "epoch": 54.34288958274198, "grad_norm": 0.02628742903470993, "learning_rate": 4.567953448765257e-05, "loss": 0.0008755277842283249, "step": 191450 }, { "epoch": 54.345728072665345, "grad_norm": 0.10322128236293793, "learning_rate": 4.5676695997729205e-05, "loss": 0.0001767423003911972, "step": 191460 }, { "epoch": 54.3485665625887, "grad_norm": 0.429061621427536, "learning_rate": 4.567385750780585e-05, "loss": 0.00015796199440956115, "step": 191470 }, { "epoch": 54.351405052512064, "grad_norm": 0.03771941736340523, "learning_rate": 4.567101901788249e-05, "loss": 9.199436753988266e-05, "step": 191480 }, { "epoch": 54.35424354243543, "grad_norm": 0.03777378425002098, "learning_rate": 4.566818052795913e-05, "loss": 0.0006996814161539077, "step": 191490 }, { "epoch": 54.35708203235878, "grad_norm": 1.254267692565918, "learning_rate": 4.566534203803577e-05, "loss": 0.005466943979263306, "step": 191500 }, { "epoch": 54.35708203235878, "eval_accuracy": 0.9782539581611241, "eval_loss": 0.08096545934677124, "eval_runtime": 32.7311, "eval_samples_per_second": 480.49, "eval_steps_per_second": 7.516, "step": 191500 }, { "epoch": 54.359920522282145, "grad_norm": 0.8003575205802917, "learning_rate": 4.5662503548112405e-05, "loss": 0.0006389133632183075, "step": 191510 }, { "epoch": 54.36275901220551, "grad_norm": 0.2379906177520752, "learning_rate": 4.5659665058189046e-05, "loss": 0.0004051385447382927, "step": 191520 }, { "epoch": 54.365597502128864, "grad_norm": 0.20272108912467957, "learning_rate": 4.565682656826569e-05, "loss": 0.0007701417431235314, "step": 191530 }, { "epoch": 54.36843599205223, "grad_norm": 0.13115806877613068, "learning_rate": 4.565398807834232e-05, "loss": 0.000862884521484375, "step": 191540 }, { "epoch": 54.37127448197559, "grad_norm": 0.6040709614753723, "learning_rate": 4.5651149588418963e-05, "loss": 0.0005554372444748878, "step": 191550 }, { "epoch": 54.37411297189895, "grad_norm": 0.09156836569309235, "learning_rate": 4.5648311098495605e-05, "loss": 0.004020366817712784, "step": 191560 }, { "epoch": 54.37695146182231, "grad_norm": 0.43785208463668823, "learning_rate": 4.5645472608572246e-05, "loss": 0.011412200331687928, "step": 191570 }, { "epoch": 54.37978995174567, "grad_norm": 0.21632596850395203, "learning_rate": 4.564263411864888e-05, "loss": 0.009718668460845948, "step": 191580 }, { "epoch": 54.382628441669034, "grad_norm": 1.2963097095489502, "learning_rate": 4.5639795628725515e-05, "loss": 0.01190505251288414, "step": 191590 }, { "epoch": 54.38546693159239, "grad_norm": 0.03205480799078941, "learning_rate": 4.5636957138802164e-05, "loss": 0.009194850176572799, "step": 191600 }, { "epoch": 54.38830542151575, "grad_norm": 5.241995811462402, "learning_rate": 4.56341186488788e-05, "loss": 0.007095793634653092, "step": 191610 }, { "epoch": 54.391143911439116, "grad_norm": 4.867777347564697, "learning_rate": 4.563128015895544e-05, "loss": 0.001996377855539322, "step": 191620 }, { "epoch": 54.39398240136248, "grad_norm": 0.162331223487854, "learning_rate": 4.562844166903208e-05, "loss": 0.0006814096122980118, "step": 191630 }, { "epoch": 54.396820891285834, "grad_norm": 0.01757028140127659, "learning_rate": 4.5625603179108715e-05, "loss": 0.0007355289533734321, "step": 191640 }, { "epoch": 54.3996593812092, "grad_norm": 0.07736075669527054, "learning_rate": 4.562276468918536e-05, "loss": 0.0003492781892418861, "step": 191650 }, { "epoch": 54.40249787113256, "grad_norm": 0.40973594784736633, "learning_rate": 4.5619926199262e-05, "loss": 0.0018367763608694077, "step": 191660 }, { "epoch": 54.405336361055916, "grad_norm": 1.6062086820602417, "learning_rate": 4.561708770933863e-05, "loss": 0.0016108294948935509, "step": 191670 }, { "epoch": 54.40817485097928, "grad_norm": 0.07022950798273087, "learning_rate": 4.5614249219415274e-05, "loss": 0.005348193645477295, "step": 191680 }, { "epoch": 54.41101334090264, "grad_norm": 0.13323922455310822, "learning_rate": 4.561141072949191e-05, "loss": 0.00034225676208734514, "step": 191690 }, { "epoch": 54.413851830826, "grad_norm": 0.4637349545955658, "learning_rate": 4.560857223956855e-05, "loss": 0.0010190952569246292, "step": 191700 }, { "epoch": 54.41669032074936, "grad_norm": 5.711320400238037, "learning_rate": 4.560573374964519e-05, "loss": 0.008157269656658172, "step": 191710 }, { "epoch": 54.41952881067272, "grad_norm": 2.7402095794677734, "learning_rate": 4.5602895259721826e-05, "loss": 0.0021525029093027114, "step": 191720 }, { "epoch": 54.422367300596086, "grad_norm": 0.2618250846862793, "learning_rate": 4.5600056769798474e-05, "loss": 0.010515743494033813, "step": 191730 }, { "epoch": 54.42520579051944, "grad_norm": 0.0976276844739914, "learning_rate": 4.559721827987511e-05, "loss": 0.0018458623439073563, "step": 191740 }, { "epoch": 54.428044280442805, "grad_norm": 0.01917622983455658, "learning_rate": 4.5594379789951743e-05, "loss": 0.004306823760271072, "step": 191750 }, { "epoch": 54.43088277036617, "grad_norm": 0.1566542237997055, "learning_rate": 4.559154130002839e-05, "loss": 0.00032712016254663465, "step": 191760 }, { "epoch": 54.43372126028952, "grad_norm": 0.034231625497341156, "learning_rate": 4.5588702810105026e-05, "loss": 0.0007139647379517556, "step": 191770 }, { "epoch": 54.436559750212886, "grad_norm": 0.08058899641036987, "learning_rate": 4.558586432018167e-05, "loss": 0.0005681518465280533, "step": 191780 }, { "epoch": 54.43939824013625, "grad_norm": 0.23930566012859344, "learning_rate": 4.55830258302583e-05, "loss": 0.012514838576316833, "step": 191790 }, { "epoch": 54.442236730059605, "grad_norm": 0.02976282499730587, "learning_rate": 4.5580187340334944e-05, "loss": 0.001925123855471611, "step": 191800 }, { "epoch": 54.44507521998297, "grad_norm": 0.026894567534327507, "learning_rate": 4.5577348850411585e-05, "loss": 0.0004089120775461197, "step": 191810 }, { "epoch": 54.44791370990633, "grad_norm": 0.9611183404922485, "learning_rate": 4.557451036048822e-05, "loss": 0.001247238926589489, "step": 191820 }, { "epoch": 54.450752199829694, "grad_norm": 0.09460238367319107, "learning_rate": 4.557167187056486e-05, "loss": 0.0009379588067531585, "step": 191830 }, { "epoch": 54.45359068975305, "grad_norm": 0.175171360373497, "learning_rate": 4.55688333806415e-05, "loss": 0.0015018945559859276, "step": 191840 }, { "epoch": 54.45642917967641, "grad_norm": Infinity, "learning_rate": 4.556599489071814e-05, "loss": 0.020250675082206727, "step": 191850 }, { "epoch": 54.459267669599775, "grad_norm": 1.2165013551712036, "learning_rate": 4.556344024978712e-05, "loss": 0.00045145489275455475, "step": 191860 }, { "epoch": 54.46210615952313, "grad_norm": 3.818896770477295, "learning_rate": 4.556060175986375e-05, "loss": 0.0018362969160079956, "step": 191870 }, { "epoch": 54.464944649446494, "grad_norm": 0.08423987776041031, "learning_rate": 4.555776326994039e-05, "loss": 0.0026127370074391365, "step": 191880 }, { "epoch": 54.46778313936986, "grad_norm": 1.9855378866195679, "learning_rate": 4.5554924780017034e-05, "loss": 0.0009028496220707894, "step": 191890 }, { "epoch": 54.47062162929321, "grad_norm": 0.06720281392335892, "learning_rate": 4.555208629009367e-05, "loss": 0.0006246268749237061, "step": 191900 }, { "epoch": 54.473460119216575, "grad_norm": 0.00843675434589386, "learning_rate": 4.554924780017032e-05, "loss": 0.0008774057030677795, "step": 191910 }, { "epoch": 54.47629860913994, "grad_norm": 0.053035154938697815, "learning_rate": 4.554640931024695e-05, "loss": 0.0019910309463739397, "step": 191920 }, { "epoch": 54.4791370990633, "grad_norm": 0.06042381748557091, "learning_rate": 4.5543570820323586e-05, "loss": 0.00042624231427907945, "step": 191930 }, { "epoch": 54.48197558898666, "grad_norm": 0.05435601994395256, "learning_rate": 4.5540732330400234e-05, "loss": 0.0016485599800944329, "step": 191940 }, { "epoch": 54.48481407891002, "grad_norm": 16.76787757873535, "learning_rate": 4.553789384047687e-05, "loss": 0.006288465112447739, "step": 191950 }, { "epoch": 54.48765256883338, "grad_norm": 0.1326131671667099, "learning_rate": 4.553505535055351e-05, "loss": 0.0003082128241658211, "step": 191960 }, { "epoch": 54.49049105875674, "grad_norm": 1.0724918842315674, "learning_rate": 4.5532216860630145e-05, "loss": 0.0008005261421203613, "step": 191970 }, { "epoch": 54.4933295486801, "grad_norm": 0.06680697947740555, "learning_rate": 4.5529378370706786e-05, "loss": 0.0012105032801628113, "step": 191980 }, { "epoch": 54.496168038603464, "grad_norm": 0.06247660145163536, "learning_rate": 4.552653988078343e-05, "loss": 0.0002882188186049461, "step": 191990 }, { "epoch": 54.49900652852683, "grad_norm": 0.06340223550796509, "learning_rate": 4.552370139086006e-05, "loss": 0.0006563013419508934, "step": 192000 }, { "epoch": 54.49900652852683, "eval_accuracy": 0.9772365994786036, "eval_loss": 0.08066019415855408, "eval_runtime": 32.2524, "eval_samples_per_second": 487.622, "eval_steps_per_second": 7.627, "step": 192000 }, { "epoch": 54.50184501845018, "grad_norm": 0.07750023156404495, "learning_rate": 4.5520862900936704e-05, "loss": 0.00027098413556814194, "step": 192010 }, { "epoch": 54.504683508373546, "grad_norm": 0.03031003102660179, "learning_rate": 4.5518024411013345e-05, "loss": 0.00022284649312496186, "step": 192020 }, { "epoch": 54.50752199829691, "grad_norm": 0.09574250876903534, "learning_rate": 4.551518592108998e-05, "loss": 0.0011160939931869507, "step": 192030 }, { "epoch": 54.510360488220265, "grad_norm": 0.046857986599206924, "learning_rate": 4.551234743116662e-05, "loss": 0.0003237653523683548, "step": 192040 }, { "epoch": 54.51319897814363, "grad_norm": 0.3462381660938263, "learning_rate": 4.550950894124326e-05, "loss": 0.0025655729696154594, "step": 192050 }, { "epoch": 54.51603746806699, "grad_norm": 0.10085567831993103, "learning_rate": 4.55066704513199e-05, "loss": 0.0005363073199987411, "step": 192060 }, { "epoch": 54.518875957990346, "grad_norm": 0.07148906588554382, "learning_rate": 4.550383196139654e-05, "loss": 0.0006646843627095223, "step": 192070 }, { "epoch": 54.52171444791371, "grad_norm": 1.7412410974502563, "learning_rate": 4.550099347147318e-05, "loss": 0.0007324749603867531, "step": 192080 }, { "epoch": 54.52455293783707, "grad_norm": 0.09977587312459946, "learning_rate": 4.5498154981549814e-05, "loss": 0.00046803466975688936, "step": 192090 }, { "epoch": 54.527391427760435, "grad_norm": 0.042045172303915024, "learning_rate": 4.5495316491626456e-05, "loss": 0.0005581462755799294, "step": 192100 }, { "epoch": 54.53022991768379, "grad_norm": 0.407324880361557, "learning_rate": 4.54924780017031e-05, "loss": 0.002821612358093262, "step": 192110 }, { "epoch": 54.53306840760715, "grad_norm": 2.7339792251586914, "learning_rate": 4.548963951177974e-05, "loss": 0.0010560296475887298, "step": 192120 }, { "epoch": 54.535906897530516, "grad_norm": 0.02168787270784378, "learning_rate": 4.548680102185637e-05, "loss": 0.00045020710676908494, "step": 192130 }, { "epoch": 54.53874538745387, "grad_norm": 0.12661434710025787, "learning_rate": 4.5483962531933014e-05, "loss": 0.0032897204160690307, "step": 192140 }, { "epoch": 54.541583877377235, "grad_norm": 0.5909566879272461, "learning_rate": 4.5481124042009656e-05, "loss": 0.0002688605338335037, "step": 192150 }, { "epoch": 54.5444223673006, "grad_norm": 17.908164978027344, "learning_rate": 4.547828555208629e-05, "loss": 0.006518945097923279, "step": 192160 }, { "epoch": 54.547260857223954, "grad_norm": 0.06297691911458969, "learning_rate": 4.547544706216293e-05, "loss": 0.0009724490344524383, "step": 192170 }, { "epoch": 54.55009934714732, "grad_norm": 0.029532356187701225, "learning_rate": 4.547260857223957e-05, "loss": 0.00041862446814775465, "step": 192180 }, { "epoch": 54.55293783707068, "grad_norm": 0.19218584895133972, "learning_rate": 4.546977008231621e-05, "loss": 0.000959034264087677, "step": 192190 }, { "epoch": 54.55577632699404, "grad_norm": 0.24306516349315643, "learning_rate": 4.546693159239285e-05, "loss": 0.0015700815245509147, "step": 192200 }, { "epoch": 54.5586148169174, "grad_norm": 0.06988168507814407, "learning_rate": 4.546409310246949e-05, "loss": 0.00020565036684274673, "step": 192210 }, { "epoch": 54.56145330684076, "grad_norm": 0.060943227261304855, "learning_rate": 4.5461254612546125e-05, "loss": 0.0007553787901997567, "step": 192220 }, { "epoch": 54.564291796764124, "grad_norm": 0.02212204411625862, "learning_rate": 4.5458416122622766e-05, "loss": 0.0007668403908610344, "step": 192230 }, { "epoch": 54.56713028668748, "grad_norm": 0.2982671856880188, "learning_rate": 4.545557763269941e-05, "loss": 0.0010435983538627624, "step": 192240 }, { "epoch": 54.56996877661084, "grad_norm": 0.040205419063568115, "learning_rate": 4.545273914277605e-05, "loss": 0.00040983259677886965, "step": 192250 }, { "epoch": 54.572807266534205, "grad_norm": 0.011486396193504333, "learning_rate": 4.5449900652852684e-05, "loss": 0.001012999564409256, "step": 192260 }, { "epoch": 54.57564575645756, "grad_norm": 0.01004727091640234, "learning_rate": 4.544706216292932e-05, "loss": 0.0005482640117406845, "step": 192270 }, { "epoch": 54.578484246380924, "grad_norm": 0.02840992622077465, "learning_rate": 4.5444223673005967e-05, "loss": 0.00026895590126514436, "step": 192280 }, { "epoch": 54.58132273630429, "grad_norm": 0.05997065082192421, "learning_rate": 4.54413851830826e-05, "loss": 0.00432254858314991, "step": 192290 }, { "epoch": 54.58416122622765, "grad_norm": 0.012514862231910229, "learning_rate": 4.543854669315924e-05, "loss": 0.00021007414907217025, "step": 192300 }, { "epoch": 54.586999716151006, "grad_norm": 0.16145235300064087, "learning_rate": 4.5435708203235884e-05, "loss": 0.0004065090790390968, "step": 192310 }, { "epoch": 54.58983820607437, "grad_norm": 0.09747538715600967, "learning_rate": 4.543286971331252e-05, "loss": 0.0020884845405817034, "step": 192320 }, { "epoch": 54.59267669599773, "grad_norm": 0.03529169410467148, "learning_rate": 4.543003122338916e-05, "loss": 0.0011177945882081986, "step": 192330 }, { "epoch": 54.59551518592109, "grad_norm": 1.405574917793274, "learning_rate": 4.54271927334658e-05, "loss": 0.005006754398345947, "step": 192340 }, { "epoch": 54.59835367584445, "grad_norm": 0.3183739185333252, "learning_rate": 4.5424354243542436e-05, "loss": 0.0007190024480223656, "step": 192350 }, { "epoch": 54.60119216576781, "grad_norm": 0.09176784008741379, "learning_rate": 4.542151575361908e-05, "loss": 0.0009590830653905868, "step": 192360 }, { "epoch": 54.604030655691176, "grad_norm": 0.5007919669151306, "learning_rate": 4.541867726369571e-05, "loss": 0.006760498136281967, "step": 192370 }, { "epoch": 54.60686914561453, "grad_norm": 2.3587849140167236, "learning_rate": 4.541583877377236e-05, "loss": 0.0063875623047351835, "step": 192380 }, { "epoch": 54.609707635537895, "grad_norm": 5.596359729766846, "learning_rate": 4.5413000283848995e-05, "loss": 0.0062640272080898285, "step": 192390 }, { "epoch": 54.61254612546126, "grad_norm": 0.5585590600967407, "learning_rate": 4.541016179392563e-05, "loss": 0.004681891202926636, "step": 192400 }, { "epoch": 54.61538461538461, "grad_norm": 0.1267448216676712, "learning_rate": 4.540732330400228e-05, "loss": 0.005992340296506882, "step": 192410 }, { "epoch": 54.618223105307976, "grad_norm": 0.028188111260533333, "learning_rate": 4.540448481407891e-05, "loss": 0.0015598913654685021, "step": 192420 }, { "epoch": 54.62106159523134, "grad_norm": 0.17758294939994812, "learning_rate": 4.540164632415555e-05, "loss": 0.00221580695360899, "step": 192430 }, { "epoch": 54.623900085154695, "grad_norm": 0.03579934686422348, "learning_rate": 4.5398807834232195e-05, "loss": 0.0005748918280005455, "step": 192440 }, { "epoch": 54.62673857507806, "grad_norm": 1.8916223049163818, "learning_rate": 4.539596934430883e-05, "loss": 0.0012983901426196099, "step": 192450 }, { "epoch": 54.62957706500142, "grad_norm": 0.11436627060174942, "learning_rate": 4.539313085438547e-05, "loss": 0.0026648728176951408, "step": 192460 }, { "epoch": 54.63241555492478, "grad_norm": 0.04040074348449707, "learning_rate": 4.5390292364462105e-05, "loss": 0.0006347080692648887, "step": 192470 }, { "epoch": 54.63525404484814, "grad_norm": 0.13617153465747833, "learning_rate": 4.5387453874538747e-05, "loss": 0.0013882244005799294, "step": 192480 }, { "epoch": 54.6380925347715, "grad_norm": 0.27459990978240967, "learning_rate": 4.538461538461539e-05, "loss": 0.0011465493589639664, "step": 192490 }, { "epoch": 54.640931024694865, "grad_norm": 0.037793755531311035, "learning_rate": 4.538177689469202e-05, "loss": 0.0018102325499057769, "step": 192500 }, { "epoch": 54.640931024694865, "eval_accuracy": 0.9724677306542888, "eval_loss": 0.09608685970306396, "eval_runtime": 32.5171, "eval_samples_per_second": 483.653, "eval_steps_per_second": 7.565, "step": 192500 }, { "epoch": 54.64376951461822, "grad_norm": 0.35484978556632996, "learning_rate": 4.5378938404768664e-05, "loss": 0.0008193304762244225, "step": 192510 }, { "epoch": 54.646608004541584, "grad_norm": 0.3018094003200531, "learning_rate": 4.5376099914845305e-05, "loss": 0.0027299633249640463, "step": 192520 }, { "epoch": 54.64944649446495, "grad_norm": 0.4788801074028015, "learning_rate": 4.537326142492194e-05, "loss": 0.001204531267285347, "step": 192530 }, { "epoch": 54.6522849843883, "grad_norm": 0.03987901657819748, "learning_rate": 4.537042293499859e-05, "loss": 0.0049213066697120665, "step": 192540 }, { "epoch": 54.655123474311665, "grad_norm": 0.854216992855072, "learning_rate": 4.536758444507522e-05, "loss": 0.0009148487821221352, "step": 192550 }, { "epoch": 54.65796196423503, "grad_norm": 0.4054808020591736, "learning_rate": 4.536474595515186e-05, "loss": 0.0007602997124195099, "step": 192560 }, { "epoch": 54.66080045415839, "grad_norm": 0.3868764638900757, "learning_rate": 4.53619074652285e-05, "loss": 0.0044997379183769224, "step": 192570 }, { "epoch": 54.66363894408175, "grad_norm": 10.473978042602539, "learning_rate": 4.535906897530514e-05, "loss": 0.0036855705082416536, "step": 192580 }, { "epoch": 54.66647743400511, "grad_norm": 0.29570889472961426, "learning_rate": 4.535623048538178e-05, "loss": 0.000539948046207428, "step": 192590 }, { "epoch": 54.66931592392847, "grad_norm": 1.1597963571548462, "learning_rate": 4.5353391995458416e-05, "loss": 0.0014783261343836785, "step": 192600 }, { "epoch": 54.67215441385183, "grad_norm": 5.333010673522949, "learning_rate": 4.535055350553506e-05, "loss": 0.0015417825430631637, "step": 192610 }, { "epoch": 54.67499290377519, "grad_norm": 0.2151205837726593, "learning_rate": 4.53477150156117e-05, "loss": 0.0007778137922286987, "step": 192620 }, { "epoch": 54.677831393698554, "grad_norm": 0.05176310986280441, "learning_rate": 4.534487652568833e-05, "loss": 0.0006184954196214676, "step": 192630 }, { "epoch": 54.68066988362191, "grad_norm": 0.5517154932022095, "learning_rate": 4.5342038035764975e-05, "loss": 0.0014133095741271974, "step": 192640 }, { "epoch": 54.68350837354527, "grad_norm": 0.08922632038593292, "learning_rate": 4.5339199545841616e-05, "loss": 0.005697987973690033, "step": 192650 }, { "epoch": 54.686346863468636, "grad_norm": 0.24071922898292542, "learning_rate": 4.533636105591825e-05, "loss": 0.0003132052719593048, "step": 192660 }, { "epoch": 54.689185353392, "grad_norm": 0.07995827496051788, "learning_rate": 4.533352256599489e-05, "loss": 0.0011485984548926353, "step": 192670 }, { "epoch": 54.692023843315354, "grad_norm": 0.2503539025783539, "learning_rate": 4.5330684076071533e-05, "loss": 0.0024651190266013145, "step": 192680 }, { "epoch": 54.69486233323872, "grad_norm": 0.03767113387584686, "learning_rate": 4.532784558614817e-05, "loss": 0.00045919399708509444, "step": 192690 }, { "epoch": 54.69770082316208, "grad_norm": 9.668213844299316, "learning_rate": 4.532500709622481e-05, "loss": 0.004905151575803757, "step": 192700 }, { "epoch": 54.700539313085436, "grad_norm": 10.275238990783691, "learning_rate": 4.532216860630145e-05, "loss": 0.003293055295944214, "step": 192710 }, { "epoch": 54.7033778030088, "grad_norm": 5.07054328918457, "learning_rate": 4.531933011637809e-05, "loss": 0.002890988439321518, "step": 192720 }, { "epoch": 54.70621629293216, "grad_norm": 0.4986066520214081, "learning_rate": 4.531649162645473e-05, "loss": 0.0004779331386089325, "step": 192730 }, { "epoch": 54.70905478285552, "grad_norm": 0.6505165100097656, "learning_rate": 4.531365313653137e-05, "loss": 0.0008390314877033233, "step": 192740 }, { "epoch": 54.71189327277888, "grad_norm": 1.590277075767517, "learning_rate": 4.531081464660801e-05, "loss": 0.0017888875678181647, "step": 192750 }, { "epoch": 54.71473176270224, "grad_norm": 8.684060096740723, "learning_rate": 4.5307976156684644e-05, "loss": 0.0011608406901359558, "step": 192760 }, { "epoch": 54.717570252625606, "grad_norm": 4.239928722381592, "learning_rate": 4.5305137666761285e-05, "loss": 0.0036556873470544814, "step": 192770 }, { "epoch": 54.72040874254896, "grad_norm": 0.10527706146240234, "learning_rate": 4.530229917683793e-05, "loss": 0.0003862097859382629, "step": 192780 }, { "epoch": 54.723247232472325, "grad_norm": 9.572259902954102, "learning_rate": 4.529946068691456e-05, "loss": 0.0059012148529291155, "step": 192790 }, { "epoch": 54.72608572239569, "grad_norm": 0.07620464265346527, "learning_rate": 4.52966221969912e-05, "loss": 0.00047458857297897337, "step": 192800 }, { "epoch": 54.72892421231904, "grad_norm": 0.18030978739261627, "learning_rate": 4.5293783707067844e-05, "loss": 0.0010402575135231018, "step": 192810 }, { "epoch": 54.731762702242406, "grad_norm": 0.04602370783686638, "learning_rate": 4.529094521714448e-05, "loss": 0.00092079509049654, "step": 192820 }, { "epoch": 54.73460119216577, "grad_norm": 0.19807742536067963, "learning_rate": 4.528810672722112e-05, "loss": 0.0005450891330838203, "step": 192830 }, { "epoch": 54.73743968208913, "grad_norm": 0.06712987273931503, "learning_rate": 4.528526823729776e-05, "loss": 0.0008508488535881042, "step": 192840 }, { "epoch": 54.74027817201249, "grad_norm": 0.07866635918617249, "learning_rate": 4.52824297473744e-05, "loss": 0.0011670637875795364, "step": 192850 }, { "epoch": 54.74311666193585, "grad_norm": 13.692626953125, "learning_rate": 4.527959125745104e-05, "loss": 0.004864080995321274, "step": 192860 }, { "epoch": 54.745955151859214, "grad_norm": 0.2779467701911926, "learning_rate": 4.527675276752767e-05, "loss": 0.0006695106625556946, "step": 192870 }, { "epoch": 54.74879364178257, "grad_norm": 16.66049575805664, "learning_rate": 4.527391427760432e-05, "loss": 0.004803113266825676, "step": 192880 }, { "epoch": 54.75163213170593, "grad_norm": 0.13514883816242218, "learning_rate": 4.5271075787680955e-05, "loss": 0.0004820266738533974, "step": 192890 }, { "epoch": 54.754470621629295, "grad_norm": 0.05200890451669693, "learning_rate": 4.5268237297757596e-05, "loss": 0.001515892520546913, "step": 192900 }, { "epoch": 54.75730911155265, "grad_norm": 0.08665496110916138, "learning_rate": 4.526539880783424e-05, "loss": 0.004966672882437706, "step": 192910 }, { "epoch": 54.760147601476014, "grad_norm": 1.787520408630371, "learning_rate": 4.526256031791087e-05, "loss": 0.0015240205451846124, "step": 192920 }, { "epoch": 54.76298609139938, "grad_norm": 0.05158241093158722, "learning_rate": 4.5259721827987514e-05, "loss": 0.0010311653837561606, "step": 192930 }, { "epoch": 54.76582458132274, "grad_norm": 0.009568643756210804, "learning_rate": 4.5256883338064155e-05, "loss": 0.0012820202857255936, "step": 192940 }, { "epoch": 54.768663071246095, "grad_norm": 4.9963884353637695, "learning_rate": 4.525404484814079e-05, "loss": 0.002583321928977966, "step": 192950 }, { "epoch": 54.77150156116946, "grad_norm": 2.6551594734191895, "learning_rate": 4.525120635821743e-05, "loss": 0.002155821584165096, "step": 192960 }, { "epoch": 54.77434005109282, "grad_norm": 0.12301301956176758, "learning_rate": 4.5248367868294066e-05, "loss": 0.0009524689987301827, "step": 192970 }, { "epoch": 54.77717854101618, "grad_norm": 0.026012232527136803, "learning_rate": 4.524552937837071e-05, "loss": 0.0019410010427236557, "step": 192980 }, { "epoch": 54.78001703093954, "grad_norm": 0.44197386503219604, "learning_rate": 4.524269088844735e-05, "loss": 0.0011832265183329583, "step": 192990 }, { "epoch": 54.7828555208629, "grad_norm": 0.2571312189102173, "learning_rate": 4.523985239852398e-05, "loss": 0.004839348420500755, "step": 193000 }, { "epoch": 54.7828555208629, "eval_accuracy": 0.9767279201373434, "eval_loss": 0.08199021965265274, "eval_runtime": 33.2696, "eval_samples_per_second": 472.714, "eval_steps_per_second": 7.394, "step": 193000 }, { "epoch": 54.78569401078626, "grad_norm": 0.08331780135631561, "learning_rate": 4.523701390860063e-05, "loss": 0.0020491404458880423, "step": 193010 }, { "epoch": 54.78853250070962, "grad_norm": 0.19433677196502686, "learning_rate": 4.5234175418677266e-05, "loss": 0.0013044068589806556, "step": 193020 }, { "epoch": 54.791370990632984, "grad_norm": 0.21276892721652985, "learning_rate": 4.52313369287539e-05, "loss": 0.003513157367706299, "step": 193030 }, { "epoch": 54.79420948055635, "grad_norm": 0.3858931362628937, "learning_rate": 4.522849843883055e-05, "loss": 0.0008163414895534515, "step": 193040 }, { "epoch": 54.7970479704797, "grad_norm": 0.18437929451465607, "learning_rate": 4.522565994890718e-05, "loss": 0.0025547361001372337, "step": 193050 }, { "epoch": 54.799886460403066, "grad_norm": 4.0028605461120605, "learning_rate": 4.5222821458983824e-05, "loss": 0.001390901580452919, "step": 193060 }, { "epoch": 54.80272495032643, "grad_norm": 0.01603393256664276, "learning_rate": 4.521998296906046e-05, "loss": 0.0005649816244840622, "step": 193070 }, { "epoch": 54.805563440249784, "grad_norm": 0.08990688621997833, "learning_rate": 4.52171444791371e-05, "loss": 0.0020457480102777483, "step": 193080 }, { "epoch": 54.80840193017315, "grad_norm": 0.24544215202331543, "learning_rate": 4.521430598921374e-05, "loss": 0.007690902054309845, "step": 193090 }, { "epoch": 54.81124042009651, "grad_norm": 0.20418989658355713, "learning_rate": 4.5211467499290376e-05, "loss": 0.004023807495832444, "step": 193100 }, { "epoch": 54.814078910019866, "grad_norm": 7.985023498535156, "learning_rate": 4.520862900936702e-05, "loss": 0.001903153397142887, "step": 193110 }, { "epoch": 54.81691739994323, "grad_norm": 0.24844545125961304, "learning_rate": 4.520579051944366e-05, "loss": 0.006008155643939972, "step": 193120 }, { "epoch": 54.81975588986659, "grad_norm": 1.743600606918335, "learning_rate": 4.5202952029520294e-05, "loss": 0.0020038703456521033, "step": 193130 }, { "epoch": 54.822594379789955, "grad_norm": 0.07064063847064972, "learning_rate": 4.520011353959694e-05, "loss": 0.0005411414429545402, "step": 193140 }, { "epoch": 54.82543286971331, "grad_norm": 0.26500457525253296, "learning_rate": 4.5197275049673576e-05, "loss": 0.0008420482277870178, "step": 193150 }, { "epoch": 54.82827135963667, "grad_norm": 0.776692807674408, "learning_rate": 4.519443655975021e-05, "loss": 0.000926315225660801, "step": 193160 }, { "epoch": 54.831109849560036, "grad_norm": 0.21970584988594055, "learning_rate": 4.519159806982685e-05, "loss": 0.0006067922338843345, "step": 193170 }, { "epoch": 54.83394833948339, "grad_norm": 4.706866264343262, "learning_rate": 4.5188759579903494e-05, "loss": 0.004598168656229973, "step": 193180 }, { "epoch": 54.836786829406755, "grad_norm": 0.07605768740177155, "learning_rate": 4.5185921089980135e-05, "loss": 0.000839291326701641, "step": 193190 }, { "epoch": 54.83962531933012, "grad_norm": 0.1045413538813591, "learning_rate": 4.518308260005677e-05, "loss": 0.0004973091185092926, "step": 193200 }, { "epoch": 54.84246380925348, "grad_norm": 0.18285517394542694, "learning_rate": 4.518024411013341e-05, "loss": 0.00015832800418138503, "step": 193210 }, { "epoch": 54.845302299176836, "grad_norm": 1.668381929397583, "learning_rate": 4.517740562021005e-05, "loss": 0.0008169155567884445, "step": 193220 }, { "epoch": 54.8481407891002, "grad_norm": 0.08601323515176773, "learning_rate": 4.517456713028669e-05, "loss": 0.0003814948722720146, "step": 193230 }, { "epoch": 54.85097927902356, "grad_norm": 0.08280467242002487, "learning_rate": 4.517172864036333e-05, "loss": 0.0011677952483296395, "step": 193240 }, { "epoch": 54.85381776894692, "grad_norm": 0.02753811702132225, "learning_rate": 4.516889015043997e-05, "loss": 0.00021603237837553024, "step": 193250 }, { "epoch": 54.85665625887028, "grad_norm": 2.4973671436309814, "learning_rate": 4.5166051660516604e-05, "loss": 0.0008990088477730751, "step": 193260 }, { "epoch": 54.859494748793644, "grad_norm": 0.6331532001495361, "learning_rate": 4.516321317059325e-05, "loss": 0.0007856853306293487, "step": 193270 }, { "epoch": 54.862333238717, "grad_norm": 0.06639384478330612, "learning_rate": 4.516037468066989e-05, "loss": 0.0006695657968521119, "step": 193280 }, { "epoch": 54.86517172864036, "grad_norm": 0.01670117676258087, "learning_rate": 4.515753619074652e-05, "loss": 0.0005840536206960679, "step": 193290 }, { "epoch": 54.868010218563725, "grad_norm": 1.174407720565796, "learning_rate": 4.515469770082316e-05, "loss": 0.0006215361878275871, "step": 193300 }, { "epoch": 54.87084870848709, "grad_norm": 0.09297282248735428, "learning_rate": 4.5151859210899804e-05, "loss": 0.0003429649397730827, "step": 193310 }, { "epoch": 54.873687198410444, "grad_norm": 0.49915459752082825, "learning_rate": 4.5149020720976446e-05, "loss": 0.006991142034530639, "step": 193320 }, { "epoch": 54.87652568833381, "grad_norm": 1.1704343557357788, "learning_rate": 4.514618223105308e-05, "loss": 0.0017220772802829742, "step": 193330 }, { "epoch": 54.87936417825717, "grad_norm": 0.034017205238342285, "learning_rate": 4.514334374112972e-05, "loss": 0.0009599035605788231, "step": 193340 }, { "epoch": 54.882202668180526, "grad_norm": 0.03104356676340103, "learning_rate": 4.514050525120636e-05, "loss": 0.001101686805486679, "step": 193350 }, { "epoch": 54.88504115810389, "grad_norm": 0.10257542133331299, "learning_rate": 4.5137666761283e-05, "loss": 0.0011461110785603522, "step": 193360 }, { "epoch": 54.88787964802725, "grad_norm": 0.2692425847053528, "learning_rate": 4.513482827135964e-05, "loss": 0.000727594830095768, "step": 193370 }, { "epoch": 54.89071813795061, "grad_norm": 0.14733290672302246, "learning_rate": 4.513198978143628e-05, "loss": 0.0003593308851122856, "step": 193380 }, { "epoch": 54.89355662787397, "grad_norm": 0.078899085521698, "learning_rate": 4.5129151291512915e-05, "loss": 0.0006048738956451416, "step": 193390 }, { "epoch": 54.89639511779733, "grad_norm": 0.6950100064277649, "learning_rate": 4.5126312801589557e-05, "loss": 0.0010959558188915254, "step": 193400 }, { "epoch": 54.899233607720696, "grad_norm": 6.848596096038818, "learning_rate": 4.51234743116662e-05, "loss": 0.0018643075600266457, "step": 193410 }, { "epoch": 54.90207209764405, "grad_norm": 4.271066188812256, "learning_rate": 4.512063582174283e-05, "loss": 0.0010663041844964027, "step": 193420 }, { "epoch": 54.904910587567414, "grad_norm": 0.3417695462703705, "learning_rate": 4.5117797331819474e-05, "loss": 0.01135827973484993, "step": 193430 }, { "epoch": 54.90774907749078, "grad_norm": 0.35392799973487854, "learning_rate": 4.5114958841896115e-05, "loss": 0.006626708060503006, "step": 193440 }, { "epoch": 54.91058756741413, "grad_norm": 0.010730382986366749, "learning_rate": 4.511212035197275e-05, "loss": 0.0005681196227669716, "step": 193450 }, { "epoch": 54.913426057337496, "grad_norm": 0.28312432765960693, "learning_rate": 4.510928186204939e-05, "loss": 0.0034432146698236466, "step": 193460 }, { "epoch": 54.91626454726086, "grad_norm": 0.3397130072116852, "learning_rate": 4.510644337212603e-05, "loss": 0.0006520602852106095, "step": 193470 }, { "epoch": 54.919103037184215, "grad_norm": 0.05224641039967537, "learning_rate": 4.5103604882202674e-05, "loss": 0.0007474120706319809, "step": 193480 }, { "epoch": 54.92194152710758, "grad_norm": 0.0948987752199173, "learning_rate": 4.510076639227931e-05, "loss": 0.00033192243427038193, "step": 193490 }, { "epoch": 54.92478001703094, "grad_norm": 0.008073879405856133, "learning_rate": 4.509792790235594e-05, "loss": 0.00133442971855402, "step": 193500 }, { "epoch": 54.92478001703094, "eval_accuracy": 0.9786354676670693, "eval_loss": 0.07588300108909607, "eval_runtime": 32.7319, "eval_samples_per_second": 480.48, "eval_steps_per_second": 7.516, "step": 193500 }, { "epoch": 54.9276185069543, "grad_norm": 0.1921972930431366, "learning_rate": 4.509508941243259e-05, "loss": 0.0004267564043402672, "step": 193510 }, { "epoch": 54.93045699687766, "grad_norm": 0.6815023422241211, "learning_rate": 4.5092250922509226e-05, "loss": 0.0007739594206213951, "step": 193520 }, { "epoch": 54.93329548680102, "grad_norm": 0.09581562876701355, "learning_rate": 4.508941243258587e-05, "loss": 0.0017895305529236794, "step": 193530 }, { "epoch": 54.936133976724385, "grad_norm": 0.014870029874145985, "learning_rate": 4.508657394266251e-05, "loss": 0.0023726608604192735, "step": 193540 }, { "epoch": 54.93897246664774, "grad_norm": 0.04893457517027855, "learning_rate": 4.508373545273914e-05, "loss": 0.0003431588411331177, "step": 193550 }, { "epoch": 54.9418109565711, "grad_norm": 1.321195363998413, "learning_rate": 4.5080896962815785e-05, "loss": 0.001604944095015526, "step": 193560 }, { "epoch": 54.944649446494466, "grad_norm": 11.417872428894043, "learning_rate": 4.5078058472892426e-05, "loss": 0.0034023217856884004, "step": 193570 }, { "epoch": 54.94748793641783, "grad_norm": 0.7584550380706787, "learning_rate": 4.507521998296906e-05, "loss": 0.0006143055856227875, "step": 193580 }, { "epoch": 54.950326426341185, "grad_norm": 0.42939451336860657, "learning_rate": 4.50723814930457e-05, "loss": 0.0034742169082164764, "step": 193590 }, { "epoch": 54.95316491626455, "grad_norm": 0.020512780174613, "learning_rate": 4.5069543003122337e-05, "loss": 0.0006411071866750717, "step": 193600 }, { "epoch": 54.95600340618791, "grad_norm": 0.04974746331572533, "learning_rate": 4.5066704513198985e-05, "loss": 0.000949045829474926, "step": 193610 }, { "epoch": 54.95884189611127, "grad_norm": 0.8657918572425842, "learning_rate": 4.506386602327562e-05, "loss": 0.009335582703351974, "step": 193620 }, { "epoch": 54.96168038603463, "grad_norm": 0.019998984411358833, "learning_rate": 4.5061027533352254e-05, "loss": 0.0009070407599210739, "step": 193630 }, { "epoch": 54.96451887595799, "grad_norm": 0.10608475655317307, "learning_rate": 4.50581890434289e-05, "loss": 0.00045571401715278627, "step": 193640 }, { "epoch": 54.96735736588135, "grad_norm": 0.02978459931910038, "learning_rate": 4.505535055350554e-05, "loss": 0.001737365499138832, "step": 193650 }, { "epoch": 54.97019585580471, "grad_norm": 0.13439415395259857, "learning_rate": 4.505251206358218e-05, "loss": 0.0014687672257423401, "step": 193660 }, { "epoch": 54.973034345728074, "grad_norm": 0.202483668923378, "learning_rate": 4.504967357365882e-05, "loss": 0.005338087305426597, "step": 193670 }, { "epoch": 54.97587283565144, "grad_norm": 4.919798374176025, "learning_rate": 4.5046835083735454e-05, "loss": 0.0010338090360164642, "step": 193680 }, { "epoch": 54.97871132557479, "grad_norm": 6.3843183517456055, "learning_rate": 4.5043996593812095e-05, "loss": 0.0020647086203098297, "step": 193690 }, { "epoch": 54.981549815498155, "grad_norm": 0.18966571986675262, "learning_rate": 4.504115810388873e-05, "loss": 0.002423659712076187, "step": 193700 }, { "epoch": 54.98438830542152, "grad_norm": 0.2408057004213333, "learning_rate": 4.503831961396537e-05, "loss": 0.0005019035190343857, "step": 193710 }, { "epoch": 54.987226795344874, "grad_norm": 0.4083288609981537, "learning_rate": 4.503548112404201e-05, "loss": 0.0006309045478701591, "step": 193720 }, { "epoch": 54.99006528526824, "grad_norm": 0.09742613136768341, "learning_rate": 4.503264263411865e-05, "loss": 0.0005408357828855515, "step": 193730 }, { "epoch": 54.9929037751916, "grad_norm": 0.137253075838089, "learning_rate": 4.5029804144195296e-05, "loss": 0.000510379858314991, "step": 193740 }, { "epoch": 54.995742265114956, "grad_norm": 0.03280330449342728, "learning_rate": 4.502696565427193e-05, "loss": 0.0007651934400200843, "step": 193750 }, { "epoch": 54.99858075503832, "grad_norm": 0.17405132949352264, "learning_rate": 4.5024127164348565e-05, "loss": 0.00031260661780834197, "step": 193760 }, { "epoch": 55.00141924496168, "grad_norm": 0.02616366744041443, "learning_rate": 4.502128867442521e-05, "loss": 0.0008971180766820907, "step": 193770 }, { "epoch": 55.004257734885044, "grad_norm": 0.6239033937454224, "learning_rate": 4.501845018450185e-05, "loss": 0.00015637893229722977, "step": 193780 }, { "epoch": 55.0070962248084, "grad_norm": 0.12782365083694458, "learning_rate": 4.501561169457849e-05, "loss": 0.00028281304985284806, "step": 193790 }, { "epoch": 55.00993471473176, "grad_norm": 0.07772985845804214, "learning_rate": 4.5012773204655123e-05, "loss": 0.00031991302967071533, "step": 193800 }, { "epoch": 55.012773204655126, "grad_norm": 0.14819084107875824, "learning_rate": 4.5009934714731765e-05, "loss": 0.002174023352563381, "step": 193810 }, { "epoch": 55.01561169457848, "grad_norm": 0.10162334889173508, "learning_rate": 4.5007096224808406e-05, "loss": 0.0019311131909489632, "step": 193820 }, { "epoch": 55.018450184501845, "grad_norm": 0.009083440527319908, "learning_rate": 4.500425773488504e-05, "loss": 0.0012231888249516487, "step": 193830 }, { "epoch": 55.02128867442521, "grad_norm": 19.13519859313965, "learning_rate": 4.500141924496168e-05, "loss": 0.004359965398907662, "step": 193840 }, { "epoch": 55.02412716434856, "grad_norm": 20.1438045501709, "learning_rate": 4.4998580755038324e-05, "loss": 0.021589916944503785, "step": 193850 }, { "epoch": 55.026965654271926, "grad_norm": 0.11773016303777695, "learning_rate": 4.499574226511496e-05, "loss": 0.0017927112057805061, "step": 193860 }, { "epoch": 55.02980414419529, "grad_norm": 0.005287750158458948, "learning_rate": 4.49929037751916e-05, "loss": 0.0027223965153098106, "step": 193870 }, { "epoch": 55.03264263411865, "grad_norm": 24.35765838623047, "learning_rate": 4.499006528526824e-05, "loss": 0.006979382783174515, "step": 193880 }, { "epoch": 55.03548112404201, "grad_norm": 0.07231361418962479, "learning_rate": 4.4987226795344875e-05, "loss": 0.00035792943090200423, "step": 193890 }, { "epoch": 55.03831961396537, "grad_norm": 0.05828948691487312, "learning_rate": 4.498438830542152e-05, "loss": 0.0013010494410991668, "step": 193900 }, { "epoch": 55.04115810388873, "grad_norm": 0.03026546537876129, "learning_rate": 4.498154981549816e-05, "loss": 0.0007915075868368149, "step": 193910 }, { "epoch": 55.04399659381209, "grad_norm": 0.022937564179301262, "learning_rate": 4.497871132557479e-05, "loss": 0.0001715485006570816, "step": 193920 }, { "epoch": 55.04683508373545, "grad_norm": 0.013667608611285686, "learning_rate": 4.4975872835651434e-05, "loss": 0.00044150408357381823, "step": 193930 }, { "epoch": 55.049673573658815, "grad_norm": 0.15419965982437134, "learning_rate": 4.4973034345728076e-05, "loss": 0.003681786358356476, "step": 193940 }, { "epoch": 55.05251206358217, "grad_norm": 0.12160047143697739, "learning_rate": 4.497019585580472e-05, "loss": 0.0008872700855135918, "step": 193950 }, { "epoch": 55.055350553505534, "grad_norm": 9.364887237548828, "learning_rate": 4.496735736588135e-05, "loss": 0.006453837454319, "step": 193960 }, { "epoch": 55.0581890434289, "grad_norm": 2.1096715927124023, "learning_rate": 4.496451887595799e-05, "loss": 0.003289201110601425, "step": 193970 }, { "epoch": 55.06102753335226, "grad_norm": 0.014158018864691257, "learning_rate": 4.4961680386034634e-05, "loss": 0.003348197788000107, "step": 193980 }, { "epoch": 55.063866023275615, "grad_norm": 0.2770717442035675, "learning_rate": 4.495884189611127e-05, "loss": 0.0012485940009355546, "step": 193990 }, { "epoch": 55.06670451319898, "grad_norm": 0.22681371867656708, "learning_rate": 4.495600340618791e-05, "loss": 0.0007808644324541092, "step": 194000 }, { "epoch": 55.06670451319898, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.0930028110742569, "eval_runtime": 32.8128, "eval_samples_per_second": 479.294, "eval_steps_per_second": 7.497, "step": 194000 }, { "epoch": 55.06954300312234, "grad_norm": 0.029804272577166557, "learning_rate": 4.495316491626455e-05, "loss": 0.0011145146563649178, "step": 194010 }, { "epoch": 55.0723814930457, "grad_norm": 0.1192685067653656, "learning_rate": 4.4950610275333525e-05, "loss": 0.003115515597164631, "step": 194020 }, { "epoch": 55.07521998296906, "grad_norm": 0.020017346367239952, "learning_rate": 4.4947771785410166e-05, "loss": 0.0010362906381487847, "step": 194030 }, { "epoch": 55.07805847289242, "grad_norm": 0.4125311076641083, "learning_rate": 4.49449332954868e-05, "loss": 0.0002985116094350815, "step": 194040 }, { "epoch": 55.080896962815785, "grad_norm": 0.043651483952999115, "learning_rate": 4.494209480556344e-05, "loss": 0.012575843930244445, "step": 194050 }, { "epoch": 55.08373545273914, "grad_norm": 3.485382080078125, "learning_rate": 4.4939256315640084e-05, "loss": 0.000773235410451889, "step": 194060 }, { "epoch": 55.086573942662504, "grad_norm": 0.29404959082603455, "learning_rate": 4.493641782571672e-05, "loss": 0.00020912550389766692, "step": 194070 }, { "epoch": 55.08941243258587, "grad_norm": 0.11801493167877197, "learning_rate": 4.493357933579336e-05, "loss": 0.0007062975317239761, "step": 194080 }, { "epoch": 55.09225092250922, "grad_norm": 0.039400991052389145, "learning_rate": 4.493074084587e-05, "loss": 0.0002997206524014473, "step": 194090 }, { "epoch": 55.095089412432586, "grad_norm": 0.3497559428215027, "learning_rate": 4.4927902355946635e-05, "loss": 0.00040965750813484194, "step": 194100 }, { "epoch": 55.09792790235595, "grad_norm": 9.693792343139648, "learning_rate": 4.492506386602328e-05, "loss": 0.001997986435890198, "step": 194110 }, { "epoch": 55.100766392279304, "grad_norm": 0.0255607720464468, "learning_rate": 4.492222537609992e-05, "loss": 0.0002866353839635849, "step": 194120 }, { "epoch": 55.10360488220267, "grad_norm": 0.06255605816841125, "learning_rate": 4.491938688617656e-05, "loss": 0.0029277056455612183, "step": 194130 }, { "epoch": 55.10644337212603, "grad_norm": 0.013188205659389496, "learning_rate": 4.4916548396253194e-05, "loss": 0.00012948643416166306, "step": 194140 }, { "epoch": 55.10928186204939, "grad_norm": 0.06642240285873413, "learning_rate": 4.4913709906329836e-05, "loss": 0.0012414507567882539, "step": 194150 }, { "epoch": 55.11212035197275, "grad_norm": 0.6423050165176392, "learning_rate": 4.491087141640648e-05, "loss": 0.0002448853105306625, "step": 194160 }, { "epoch": 55.11495884189611, "grad_norm": 0.04526769742369652, "learning_rate": 4.490803292648311e-05, "loss": 0.0002661734819412231, "step": 194170 }, { "epoch": 55.117797331819474, "grad_norm": 0.6339390873908997, "learning_rate": 4.490519443655975e-05, "loss": 0.001001104898750782, "step": 194180 }, { "epoch": 55.12063582174283, "grad_norm": 0.02418268658220768, "learning_rate": 4.4902355946636394e-05, "loss": 0.00039238333702087405, "step": 194190 }, { "epoch": 55.12347431166619, "grad_norm": 0.7863162755966187, "learning_rate": 4.489951745671303e-05, "loss": 0.0004016367718577385, "step": 194200 }, { "epoch": 55.126312801589556, "grad_norm": 0.0613560825586319, "learning_rate": 4.489667896678967e-05, "loss": 0.0010295888409018517, "step": 194210 }, { "epoch": 55.12915129151291, "grad_norm": 0.010939335450530052, "learning_rate": 4.489384047686631e-05, "loss": 0.0012508980929851533, "step": 194220 }, { "epoch": 55.131989781436275, "grad_norm": 0.10237230360507965, "learning_rate": 4.4891001986942946e-05, "loss": 0.0010218488052487374, "step": 194230 }, { "epoch": 55.13482827135964, "grad_norm": 0.02024519257247448, "learning_rate": 4.488816349701959e-05, "loss": 0.0027677293866872786, "step": 194240 }, { "epoch": 55.137666761283, "grad_norm": 5.052537441253662, "learning_rate": 4.488532500709623e-05, "loss": 0.002382362261414528, "step": 194250 }, { "epoch": 55.140505251206356, "grad_norm": 0.8396875262260437, "learning_rate": 4.488248651717287e-05, "loss": 0.0004217498004436493, "step": 194260 }, { "epoch": 55.14334374112972, "grad_norm": 1.4175028800964355, "learning_rate": 4.4879648027249505e-05, "loss": 0.0008722422644495964, "step": 194270 }, { "epoch": 55.14618223105308, "grad_norm": 0.09628958255052567, "learning_rate": 4.487680953732614e-05, "loss": 0.0005133228376507759, "step": 194280 }, { "epoch": 55.14902072097644, "grad_norm": 0.11109407991170883, "learning_rate": 4.487397104740279e-05, "loss": 0.0007476769387722015, "step": 194290 }, { "epoch": 55.1518592108998, "grad_norm": 0.23278552293777466, "learning_rate": 4.487113255747942e-05, "loss": 0.0007284300401806832, "step": 194300 }, { "epoch": 55.154697700823164, "grad_norm": 0.22267752885818481, "learning_rate": 4.4868294067556064e-05, "loss": 0.0003184758126735687, "step": 194310 }, { "epoch": 55.15753619074652, "grad_norm": 0.17797483503818512, "learning_rate": 4.4865455577632705e-05, "loss": 0.0001699177548289299, "step": 194320 }, { "epoch": 55.16037468066988, "grad_norm": 0.06253586709499359, "learning_rate": 4.486261708770934e-05, "loss": 0.0012514950707554818, "step": 194330 }, { "epoch": 55.163213170593245, "grad_norm": 16.923171997070312, "learning_rate": 4.485977859778598e-05, "loss": 0.0036897551268339156, "step": 194340 }, { "epoch": 55.16605166051661, "grad_norm": 2.552764415740967, "learning_rate": 4.485694010786262e-05, "loss": 0.0005691768601536751, "step": 194350 }, { "epoch": 55.168890150439964, "grad_norm": 0.4761795699596405, "learning_rate": 4.485410161793926e-05, "loss": 0.0009078178554773331, "step": 194360 }, { "epoch": 55.17172864036333, "grad_norm": 0.0016080249333754182, "learning_rate": 4.48512631280159e-05, "loss": 0.000337614119052887, "step": 194370 }, { "epoch": 55.17456713028669, "grad_norm": 0.14274680614471436, "learning_rate": 4.484842463809253e-05, "loss": 0.0003740767017006874, "step": 194380 }, { "epoch": 55.177405620210045, "grad_norm": 0.044082410633563995, "learning_rate": 4.4845586148169174e-05, "loss": 0.0003754129633307457, "step": 194390 }, { "epoch": 55.18024411013341, "grad_norm": 0.00775542389601469, "learning_rate": 4.4842747658245816e-05, "loss": 0.0017359802499413491, "step": 194400 }, { "epoch": 55.18308260005677, "grad_norm": 0.038673534989356995, "learning_rate": 4.483990916832245e-05, "loss": 0.00019173212349414824, "step": 194410 }, { "epoch": 55.185921089980134, "grad_norm": 0.5505730509757996, "learning_rate": 4.48370706783991e-05, "loss": 0.00036329366266727445, "step": 194420 }, { "epoch": 55.18875957990349, "grad_norm": 0.2965264916419983, "learning_rate": 4.483423218847573e-05, "loss": 0.00038316380232572556, "step": 194430 }, { "epoch": 55.19159806982685, "grad_norm": 0.03618954122066498, "learning_rate": 4.483139369855237e-05, "loss": 0.0009590055793523789, "step": 194440 }, { "epoch": 55.194436559750216, "grad_norm": 0.06821884214878082, "learning_rate": 4.4828555208629016e-05, "loss": 0.000608360767364502, "step": 194450 }, { "epoch": 55.19727504967357, "grad_norm": 0.06027701869606972, "learning_rate": 4.482571671870565e-05, "loss": 0.0002906423062086105, "step": 194460 }, { "epoch": 55.200113539596934, "grad_norm": 0.0756693184375763, "learning_rate": 4.482287822878229e-05, "loss": 0.0008252138271927833, "step": 194470 }, { "epoch": 55.2029520295203, "grad_norm": 0.12636911869049072, "learning_rate": 4.4820039738858926e-05, "loss": 0.001247130148112774, "step": 194480 }, { "epoch": 55.20579051944365, "grad_norm": 0.1515313684940338, "learning_rate": 4.481720124893557e-05, "loss": 0.0009822078049182893, "step": 194490 }, { "epoch": 55.208629009367016, "grad_norm": 0.07619692385196686, "learning_rate": 4.481436275901221e-05, "loss": 0.00039006806910037997, "step": 194500 }, { "epoch": 55.208629009367016, "eval_accuracy": 0.9786990525847269, "eval_loss": 0.08498641848564148, "eval_runtime": 32.911, "eval_samples_per_second": 477.864, "eval_steps_per_second": 7.475, "step": 194500 }, { "epoch": 55.21146749929038, "grad_norm": 0.22936800122261047, "learning_rate": 4.4811524269088844e-05, "loss": 0.000566130317747593, "step": 194510 }, { "epoch": 55.21430598921374, "grad_norm": 14.665034294128418, "learning_rate": 4.4808685779165485e-05, "loss": 0.014037653803825378, "step": 194520 }, { "epoch": 55.2171444791371, "grad_norm": 0.038242414593696594, "learning_rate": 4.4805847289242126e-05, "loss": 0.0020670820027589797, "step": 194530 }, { "epoch": 55.21998296906046, "grad_norm": 0.07455743104219437, "learning_rate": 4.480300879931876e-05, "loss": 0.002356105111539364, "step": 194540 }, { "epoch": 55.22282145898382, "grad_norm": 0.24001234769821167, "learning_rate": 4.480017030939541e-05, "loss": 0.0019136454910039901, "step": 194550 }, { "epoch": 55.22565994890718, "grad_norm": 0.0326654314994812, "learning_rate": 4.4797331819472044e-05, "loss": 0.00044527091085910797, "step": 194560 }, { "epoch": 55.22849843883054, "grad_norm": 18.23886489868164, "learning_rate": 4.479449332954868e-05, "loss": 0.016130602359771727, "step": 194570 }, { "epoch": 55.231336928753905, "grad_norm": 0.0644712895154953, "learning_rate": 4.479165483962532e-05, "loss": 0.0022852616384625435, "step": 194580 }, { "epoch": 55.23417541867726, "grad_norm": 0.23194275796413422, "learning_rate": 4.478881634970196e-05, "loss": 0.00044996365904808043, "step": 194590 }, { "epoch": 55.23701390860062, "grad_norm": 0.044111207127571106, "learning_rate": 4.47859778597786e-05, "loss": 0.00026917587965726855, "step": 194600 }, { "epoch": 55.239852398523986, "grad_norm": 0.7198662757873535, "learning_rate": 4.478313936985524e-05, "loss": 0.0003333861008286476, "step": 194610 }, { "epoch": 55.24269088844735, "grad_norm": 0.3087533414363861, "learning_rate": 4.478030087993188e-05, "loss": 0.00114208422601223, "step": 194620 }, { "epoch": 55.245529378370705, "grad_norm": 3.340681791305542, "learning_rate": 4.477746239000852e-05, "loss": 0.001962008699774742, "step": 194630 }, { "epoch": 55.24836786829407, "grad_norm": 0.03065573237836361, "learning_rate": 4.4774623900085155e-05, "loss": 0.00047474950551986697, "step": 194640 }, { "epoch": 55.25120635821743, "grad_norm": 0.14722421765327454, "learning_rate": 4.4771785410161796e-05, "loss": 0.0006685396656394005, "step": 194650 }, { "epoch": 55.254044848140786, "grad_norm": 0.027249690145254135, "learning_rate": 4.476894692023844e-05, "loss": 0.00047531258314847946, "step": 194660 }, { "epoch": 55.25688333806415, "grad_norm": 0.05916985869407654, "learning_rate": 4.476610843031507e-05, "loss": 0.0006445864215493202, "step": 194670 }, { "epoch": 55.25972182798751, "grad_norm": 0.8173816800117493, "learning_rate": 4.476326994039171e-05, "loss": 0.0004665181040763855, "step": 194680 }, { "epoch": 55.26256031791087, "grad_norm": 0.2880288362503052, "learning_rate": 4.4760431450468355e-05, "loss": 0.00025918837636709215, "step": 194690 }, { "epoch": 55.26539880783423, "grad_norm": 3.3000266551971436, "learning_rate": 4.475759296054499e-05, "loss": 0.0010233314707875252, "step": 194700 }, { "epoch": 55.268237297757594, "grad_norm": 0.050387244671583176, "learning_rate": 4.475475447062163e-05, "loss": 0.00035357773303985597, "step": 194710 }, { "epoch": 55.27107578768096, "grad_norm": 0.07578729838132858, "learning_rate": 4.475191598069827e-05, "loss": 0.0015855209901928901, "step": 194720 }, { "epoch": 55.27391427760431, "grad_norm": 1.8770846128463745, "learning_rate": 4.474907749077491e-05, "loss": 0.0010290032252669335, "step": 194730 }, { "epoch": 55.276752767527675, "grad_norm": 0.023023571819067, "learning_rate": 4.474623900085155e-05, "loss": 0.00593048632144928, "step": 194740 }, { "epoch": 55.27959125745104, "grad_norm": 0.14154699444770813, "learning_rate": 4.474340051092819e-05, "loss": 0.0002583129331469536, "step": 194750 }, { "epoch": 55.282429747374394, "grad_norm": 0.06609191745519638, "learning_rate": 4.474056202100483e-05, "loss": 0.0024312878027558326, "step": 194760 }, { "epoch": 55.28526823729776, "grad_norm": 0.0890822559595108, "learning_rate": 4.4737723531081465e-05, "loss": 0.00032823048532009124, "step": 194770 }, { "epoch": 55.28810672722112, "grad_norm": 0.03643076494336128, "learning_rate": 4.473488504115811e-05, "loss": 0.0003332842141389847, "step": 194780 }, { "epoch": 55.29094521714448, "grad_norm": 0.05892584100365639, "learning_rate": 4.473204655123475e-05, "loss": 0.00022799372673034667, "step": 194790 }, { "epoch": 55.29378370706784, "grad_norm": 0.06062020733952522, "learning_rate": 4.472920806131138e-05, "loss": 0.00018019899725914003, "step": 194800 }, { "epoch": 55.2966221969912, "grad_norm": 0.15940389037132263, "learning_rate": 4.4726369571388024e-05, "loss": 0.0029679324477910997, "step": 194810 }, { "epoch": 55.299460686914564, "grad_norm": 0.009689678438007832, "learning_rate": 4.4723531081464665e-05, "loss": 0.0033068276941776277, "step": 194820 }, { "epoch": 55.30229917683792, "grad_norm": 0.24689918756484985, "learning_rate": 4.47206925915413e-05, "loss": 0.0003689061850309372, "step": 194830 }, { "epoch": 55.30513766676128, "grad_norm": 1.4904077053070068, "learning_rate": 4.471785410161794e-05, "loss": 0.0010180400684475898, "step": 194840 }, { "epoch": 55.307976156684646, "grad_norm": 0.35899242758750916, "learning_rate": 4.471501561169458e-05, "loss": 0.0007432864978909493, "step": 194850 }, { "epoch": 55.310814646608, "grad_norm": 0.003990854136645794, "learning_rate": 4.471217712177122e-05, "loss": 0.0012456996366381646, "step": 194860 }, { "epoch": 55.313653136531364, "grad_norm": 0.02826351672410965, "learning_rate": 4.470933863184786e-05, "loss": 0.0005749000236392021, "step": 194870 }, { "epoch": 55.31649162645473, "grad_norm": 0.9362640380859375, "learning_rate": 4.470650014192449e-05, "loss": 0.007070305198431015, "step": 194880 }, { "epoch": 55.31933011637809, "grad_norm": 7.644920349121094, "learning_rate": 4.470366165200114e-05, "loss": 0.004437017440795899, "step": 194890 }, { "epoch": 55.322168606301446, "grad_norm": 0.018548686057329178, "learning_rate": 4.4700823162077776e-05, "loss": 0.001053914614021778, "step": 194900 }, { "epoch": 55.32500709622481, "grad_norm": 7.442314147949219, "learning_rate": 4.469798467215441e-05, "loss": 0.003334449976682663, "step": 194910 }, { "epoch": 55.32784558614817, "grad_norm": 0.04250696673989296, "learning_rate": 4.469514618223106e-05, "loss": 0.001729109138250351, "step": 194920 }, { "epoch": 55.33068407607153, "grad_norm": 0.5079478621482849, "learning_rate": 4.4692307692307693e-05, "loss": 0.0005363667383790016, "step": 194930 }, { "epoch": 55.33352256599489, "grad_norm": 1.8001205921173096, "learning_rate": 4.4689469202384335e-05, "loss": 0.009835890680551528, "step": 194940 }, { "epoch": 55.33636105591825, "grad_norm": 0.09766422212123871, "learning_rate": 4.4686630712460976e-05, "loss": 0.00041944757103919985, "step": 194950 }, { "epoch": 55.33919954584161, "grad_norm": 0.15057848393917084, "learning_rate": 4.468379222253761e-05, "loss": 0.006303573399782181, "step": 194960 }, { "epoch": 55.34203803576497, "grad_norm": 0.7030815482139587, "learning_rate": 4.468095373261425e-05, "loss": 0.006318077445030212, "step": 194970 }, { "epoch": 55.344876525688335, "grad_norm": 4.12020206451416, "learning_rate": 4.4678115242690893e-05, "loss": 0.0008585747331380844, "step": 194980 }, { "epoch": 55.3477150156117, "grad_norm": 1.4110859632492065, "learning_rate": 4.467527675276753e-05, "loss": 0.0010818565264344216, "step": 194990 }, { "epoch": 55.35055350553505, "grad_norm": 0.005748797673732042, "learning_rate": 4.467243826284417e-05, "loss": 0.0004417736083269119, "step": 195000 }, { "epoch": 55.35055350553505, "eval_accuracy": 0.9761556558784257, "eval_loss": 0.0904388576745987, "eval_runtime": 32.0171, "eval_samples_per_second": 491.207, "eval_steps_per_second": 7.683, "step": 195000 }, { "epoch": 55.353391995458416, "grad_norm": 0.058975785970687866, "learning_rate": 4.4669599772920804e-05, "loss": 0.00155792236328125, "step": 195010 }, { "epoch": 55.35623048538178, "grad_norm": 0.7122920751571655, "learning_rate": 4.466676128299745e-05, "loss": 0.008237532526254653, "step": 195020 }, { "epoch": 55.359068975305135, "grad_norm": 0.18734413385391235, "learning_rate": 4.466392279307409e-05, "loss": 0.0009437600150704384, "step": 195030 }, { "epoch": 55.3619074652285, "grad_norm": 0.44869399070739746, "learning_rate": 4.466108430315072e-05, "loss": 0.005068421736359597, "step": 195040 }, { "epoch": 55.36474595515186, "grad_norm": 1.7386337518692017, "learning_rate": 4.465824581322737e-05, "loss": 0.004939820617437363, "step": 195050 }, { "epoch": 55.36758444507522, "grad_norm": 0.0189081821590662, "learning_rate": 4.4655407323304004e-05, "loss": 0.001970767229795456, "step": 195060 }, { "epoch": 55.37042293499858, "grad_norm": 0.992856502532959, "learning_rate": 4.4652568833380646e-05, "loss": 0.0006323590874671936, "step": 195070 }, { "epoch": 55.37326142492194, "grad_norm": 0.005068126134574413, "learning_rate": 4.464973034345729e-05, "loss": 0.001990712247788906, "step": 195080 }, { "epoch": 55.376099914845305, "grad_norm": 0.04581119492650032, "learning_rate": 4.464689185353392e-05, "loss": 0.002633177861571312, "step": 195090 }, { "epoch": 55.37893840476866, "grad_norm": 0.00856122374534607, "learning_rate": 4.464405336361056e-05, "loss": 0.00046063978224992753, "step": 195100 }, { "epoch": 55.381776894692024, "grad_norm": 0.3049808144569397, "learning_rate": 4.46412148736872e-05, "loss": 0.0004952913150191307, "step": 195110 }, { "epoch": 55.38461538461539, "grad_norm": 0.052736274898052216, "learning_rate": 4.463837638376384e-05, "loss": 0.0006919488310813904, "step": 195120 }, { "epoch": 55.38745387453874, "grad_norm": 0.2375338226556778, "learning_rate": 4.463553789384048e-05, "loss": 0.0026282791048288347, "step": 195130 }, { "epoch": 55.390292364462105, "grad_norm": 0.22425179183483124, "learning_rate": 4.4632699403917115e-05, "loss": 0.0011659577488899232, "step": 195140 }, { "epoch": 55.39313085438547, "grad_norm": 0.01393402274698019, "learning_rate": 4.4629860913993756e-05, "loss": 0.0003966502845287323, "step": 195150 }, { "epoch": 55.395969344308824, "grad_norm": 0.13200625777244568, "learning_rate": 4.46270224240704e-05, "loss": 0.0005212925374507904, "step": 195160 }, { "epoch": 55.39880783423219, "grad_norm": 0.018576575443148613, "learning_rate": 4.462418393414703e-05, "loss": 0.002523546852171421, "step": 195170 }, { "epoch": 55.40164632415555, "grad_norm": 9.146350860595703, "learning_rate": 4.462134544422368e-05, "loss": 0.0023242756724357605, "step": 195180 }, { "epoch": 55.40448481407891, "grad_norm": 0.1979316622018814, "learning_rate": 4.4618506954300315e-05, "loss": 0.0006057566031813621, "step": 195190 }, { "epoch": 55.40732330400227, "grad_norm": 1.0347381830215454, "learning_rate": 4.461566846437695e-05, "loss": 0.00044256746768951417, "step": 195200 }, { "epoch": 55.41016179392563, "grad_norm": 0.09901831299066544, "learning_rate": 4.461282997445359e-05, "loss": 0.0003788786008954048, "step": 195210 }, { "epoch": 55.413000283848994, "grad_norm": 0.03397703915834427, "learning_rate": 4.460999148453023e-05, "loss": 0.0013886306434869767, "step": 195220 }, { "epoch": 55.41583877377235, "grad_norm": 0.08982089906930923, "learning_rate": 4.4607152994606874e-05, "loss": 0.002927272766828537, "step": 195230 }, { "epoch": 55.41867726369571, "grad_norm": 0.1762295663356781, "learning_rate": 4.460431450468351e-05, "loss": 0.001263965666294098, "step": 195240 }, { "epoch": 55.421515753619076, "grad_norm": 0.04802100732922554, "learning_rate": 4.460147601476015e-05, "loss": 0.0007280142977833748, "step": 195250 }, { "epoch": 55.42435424354244, "grad_norm": 0.1878032684326172, "learning_rate": 4.459863752483679e-05, "loss": 0.0025816164910793305, "step": 195260 }, { "epoch": 55.427192733465795, "grad_norm": 0.13638068735599518, "learning_rate": 4.4595799034913426e-05, "loss": 0.0004608534276485443, "step": 195270 }, { "epoch": 55.43003122338916, "grad_norm": 0.1042189747095108, "learning_rate": 4.459296054499007e-05, "loss": 0.0004773736000061035, "step": 195280 }, { "epoch": 55.43286971331252, "grad_norm": 0.12997114658355713, "learning_rate": 4.459012205506671e-05, "loss": 0.0004634244367480278, "step": 195290 }, { "epoch": 55.435708203235876, "grad_norm": 0.06837407499551773, "learning_rate": 4.458728356514334e-05, "loss": 0.0003552177920937538, "step": 195300 }, { "epoch": 55.43854669315924, "grad_norm": 0.04790256917476654, "learning_rate": 4.4584445075219984e-05, "loss": 0.0008546538650989532, "step": 195310 }, { "epoch": 55.4413851830826, "grad_norm": 0.016681192442774773, "learning_rate": 4.4581606585296626e-05, "loss": 0.00011992193758487701, "step": 195320 }, { "epoch": 55.44422367300596, "grad_norm": 0.2294277399778366, "learning_rate": 4.457876809537326e-05, "loss": 0.0011796841397881507, "step": 195330 }, { "epoch": 55.44706216292932, "grad_norm": 0.286893755197525, "learning_rate": 4.45759296054499e-05, "loss": 0.0004792273044586182, "step": 195340 }, { "epoch": 55.44990065285268, "grad_norm": 0.0750778466463089, "learning_rate": 4.457309111552654e-05, "loss": 0.0007407324388623238, "step": 195350 }, { "epoch": 55.452739142776046, "grad_norm": 0.06433068215847015, "learning_rate": 4.4570252625603184e-05, "loss": 0.002376294508576393, "step": 195360 }, { "epoch": 55.4555776326994, "grad_norm": 0.3422260880470276, "learning_rate": 4.456741413567982e-05, "loss": 0.001585122011601925, "step": 195370 }, { "epoch": 55.458416122622765, "grad_norm": 0.06209409236907959, "learning_rate": 4.456457564575646e-05, "loss": 0.0007416512817144394, "step": 195380 }, { "epoch": 55.46125461254613, "grad_norm": 0.16264092922210693, "learning_rate": 4.45617371558331e-05, "loss": 0.0009405048564076424, "step": 195390 }, { "epoch": 55.464093102469484, "grad_norm": 0.36454451084136963, "learning_rate": 4.4558898665909736e-05, "loss": 0.0013256259262561798, "step": 195400 }, { "epoch": 55.46693159239285, "grad_norm": 0.42974647879600525, "learning_rate": 4.455606017598638e-05, "loss": 0.001060732640326023, "step": 195410 }, { "epoch": 55.46977008231621, "grad_norm": 0.0345328226685524, "learning_rate": 4.455322168606302e-05, "loss": 0.0026641532778739927, "step": 195420 }, { "epoch": 55.472608572239565, "grad_norm": 0.053902532905340195, "learning_rate": 4.4550383196139654e-05, "loss": 0.0015611572191119194, "step": 195430 }, { "epoch": 55.47544706216293, "grad_norm": 0.01404232531785965, "learning_rate": 4.4547544706216295e-05, "loss": 0.012929515540599823, "step": 195440 }, { "epoch": 55.47828555208629, "grad_norm": 0.021717660129070282, "learning_rate": 4.4544706216292936e-05, "loss": 0.0013281747698783875, "step": 195450 }, { "epoch": 55.481124042009654, "grad_norm": 0.16588552296161652, "learning_rate": 4.454186772636957e-05, "loss": 0.0003172192722558975, "step": 195460 }, { "epoch": 55.48396253193301, "grad_norm": 0.036945052444934845, "learning_rate": 4.453902923644621e-05, "loss": 0.0009128941223025322, "step": 195470 }, { "epoch": 55.48680102185637, "grad_norm": 0.11791518330574036, "learning_rate": 4.4536190746522854e-05, "loss": 0.005113641545176506, "step": 195480 }, { "epoch": 55.489639511779735, "grad_norm": 0.03821057826280594, "learning_rate": 4.4533352256599495e-05, "loss": 0.0004328705370426178, "step": 195490 }, { "epoch": 55.49247800170309, "grad_norm": 0.05291134864091873, "learning_rate": 4.453051376667613e-05, "loss": 0.0021774593740701677, "step": 195500 }, { "epoch": 55.49247800170309, "eval_accuracy": 0.9793984866789598, "eval_loss": 0.0788162350654602, "eval_runtime": 33.1417, "eval_samples_per_second": 474.538, "eval_steps_per_second": 7.423, "step": 195500 }, { "epoch": 55.495316491626454, "grad_norm": 0.140462264418602, "learning_rate": 4.4527675276752764e-05, "loss": 0.0003875315189361572, "step": 195510 }, { "epoch": 55.49815498154982, "grad_norm": 0.19697502255439758, "learning_rate": 4.452483678682941e-05, "loss": 0.0008436206728219986, "step": 195520 }, { "epoch": 55.50099347147318, "grad_norm": 0.8182134032249451, "learning_rate": 4.452199829690605e-05, "loss": 0.0004978477954864502, "step": 195530 }, { "epoch": 55.503831961396536, "grad_norm": 0.10129676759243011, "learning_rate": 4.451915980698269e-05, "loss": 0.0003723142668604851, "step": 195540 }, { "epoch": 55.5066704513199, "grad_norm": 0.13134445250034332, "learning_rate": 4.451632131705933e-05, "loss": 0.0011487668380141258, "step": 195550 }, { "epoch": 55.50950894124326, "grad_norm": 0.24510473012924194, "learning_rate": 4.4513482827135964e-05, "loss": 0.00030811242759227754, "step": 195560 }, { "epoch": 55.51234743116662, "grad_norm": 0.010434734635055065, "learning_rate": 4.4510644337212606e-05, "loss": 0.00041885655373334884, "step": 195570 }, { "epoch": 55.51518592108998, "grad_norm": 0.1905076801776886, "learning_rate": 4.450780584728925e-05, "loss": 0.0018813129514455794, "step": 195580 }, { "epoch": 55.51802441101334, "grad_norm": 0.07967260479927063, "learning_rate": 4.450496735736588e-05, "loss": 0.0002949539572000504, "step": 195590 }, { "epoch": 55.5208629009367, "grad_norm": 0.45000386238098145, "learning_rate": 4.450212886744252e-05, "loss": 0.004608586803078652, "step": 195600 }, { "epoch": 55.52370139086006, "grad_norm": 0.02075824700295925, "learning_rate": 4.449929037751916e-05, "loss": 0.0011772202327847482, "step": 195610 }, { "epoch": 55.526539880783425, "grad_norm": 0.02408762089908123, "learning_rate": 4.44964518875958e-05, "loss": 0.00045991577208042144, "step": 195620 }, { "epoch": 55.52937837070679, "grad_norm": 2.340108871459961, "learning_rate": 4.449361339767244e-05, "loss": 0.0025185955688357352, "step": 195630 }, { "epoch": 55.53221686063014, "grad_norm": 1.4925473928451538, "learning_rate": 4.4490774907749075e-05, "loss": 0.0008068062365055084, "step": 195640 }, { "epoch": 55.535055350553506, "grad_norm": 0.8110709190368652, "learning_rate": 4.448793641782572e-05, "loss": 0.0019405078142881393, "step": 195650 }, { "epoch": 55.53789384047687, "grad_norm": 0.12303122133016586, "learning_rate": 4.448509792790236e-05, "loss": 0.004039919748902321, "step": 195660 }, { "epoch": 55.540732330400225, "grad_norm": 0.08504562079906464, "learning_rate": 4.448225943797899e-05, "loss": 0.0009208496659994126, "step": 195670 }, { "epoch": 55.54357082032359, "grad_norm": 0.13055868446826935, "learning_rate": 4.447942094805564e-05, "loss": 0.00845632255077362, "step": 195680 }, { "epoch": 55.54640931024695, "grad_norm": 0.4772072732448578, "learning_rate": 4.4476582458132275e-05, "loss": 0.0010243121534585953, "step": 195690 }, { "epoch": 55.549247800170306, "grad_norm": 0.03264050930738449, "learning_rate": 4.447374396820892e-05, "loss": 0.0011041749268770218, "step": 195700 }, { "epoch": 55.55208629009367, "grad_norm": 0.07846280932426453, "learning_rate": 4.447090547828555e-05, "loss": 0.003601345047354698, "step": 195710 }, { "epoch": 55.55492478001703, "grad_norm": 0.06509603559970856, "learning_rate": 4.446806698836219e-05, "loss": 0.0007311167195439338, "step": 195720 }, { "epoch": 55.557763269940395, "grad_norm": 0.31171146035194397, "learning_rate": 4.4465228498438834e-05, "loss": 0.0019745094701647758, "step": 195730 }, { "epoch": 55.56060175986375, "grad_norm": 1.0746601819992065, "learning_rate": 4.446239000851547e-05, "loss": 0.0036881938576698303, "step": 195740 }, { "epoch": 55.563440249787114, "grad_norm": 0.11764934659004211, "learning_rate": 4.445955151859211e-05, "loss": 0.0004766654223203659, "step": 195750 }, { "epoch": 55.56627873971048, "grad_norm": 0.14857521653175354, "learning_rate": 4.445671302866875e-05, "loss": 0.0025995437055826185, "step": 195760 }, { "epoch": 55.56911722963383, "grad_norm": 0.06457062065601349, "learning_rate": 4.4453874538745386e-05, "loss": 0.00040244702249765397, "step": 195770 }, { "epoch": 55.571955719557195, "grad_norm": 0.024196013808250427, "learning_rate": 4.4451036048822034e-05, "loss": 0.0012528656050562858, "step": 195780 }, { "epoch": 55.57479420948056, "grad_norm": 0.01978454180061817, "learning_rate": 4.444819755889867e-05, "loss": 0.0004911383613944054, "step": 195790 }, { "epoch": 55.577632699403914, "grad_norm": 0.086908720433712, "learning_rate": 4.44453590689753e-05, "loss": 0.0003069400787353516, "step": 195800 }, { "epoch": 55.58047118932728, "grad_norm": 0.281536340713501, "learning_rate": 4.4442520579051945e-05, "loss": 0.0014177361503243447, "step": 195810 }, { "epoch": 55.58330967925064, "grad_norm": 0.33803147077560425, "learning_rate": 4.4439682089128586e-05, "loss": 0.0014886047691106796, "step": 195820 }, { "epoch": 55.586148169174, "grad_norm": 1.5873678922653198, "learning_rate": 4.443684359920523e-05, "loss": 0.0013248478993773461, "step": 195830 }, { "epoch": 55.58898665909736, "grad_norm": 0.1931331306695938, "learning_rate": 4.443400510928186e-05, "loss": 0.0005571950227022171, "step": 195840 }, { "epoch": 55.59182514902072, "grad_norm": 0.04527309164404869, "learning_rate": 4.44311666193585e-05, "loss": 0.0014125628396868706, "step": 195850 }, { "epoch": 55.594663638944084, "grad_norm": 0.29715847969055176, "learning_rate": 4.4428328129435145e-05, "loss": 0.0007061680778861045, "step": 195860 }, { "epoch": 55.59750212886744, "grad_norm": 0.032203949987888336, "learning_rate": 4.442548963951178e-05, "loss": 0.006834743916988373, "step": 195870 }, { "epoch": 55.6003406187908, "grad_norm": 0.03830215334892273, "learning_rate": 4.442265114958842e-05, "loss": 0.0006267936900258064, "step": 195880 }, { "epoch": 55.603179108714166, "grad_norm": 0.04029150679707527, "learning_rate": 4.441981265966506e-05, "loss": 0.0007859796285629272, "step": 195890 }, { "epoch": 55.60601759863752, "grad_norm": 0.5850456357002258, "learning_rate": 4.44169741697417e-05, "loss": 0.001617724820971489, "step": 195900 }, { "epoch": 55.608856088560884, "grad_norm": 0.040380753576755524, "learning_rate": 4.441413567981834e-05, "loss": 0.0007476085796952247, "step": 195910 }, { "epoch": 55.61169457848425, "grad_norm": 0.06166137754917145, "learning_rate": 4.441129718989498e-05, "loss": 0.0021175406873226166, "step": 195920 }, { "epoch": 55.61453306840761, "grad_norm": 2.293567419052124, "learning_rate": 4.4408458699971614e-05, "loss": 0.0009004078805446625, "step": 195930 }, { "epoch": 55.617371558330966, "grad_norm": 0.12644106149673462, "learning_rate": 4.4405620210048255e-05, "loss": 0.0004668345674872398, "step": 195940 }, { "epoch": 55.62021004825433, "grad_norm": 0.05381820723414421, "learning_rate": 4.44027817201249e-05, "loss": 0.00041090063750743867, "step": 195950 }, { "epoch": 55.62304853817769, "grad_norm": 0.6979149580001831, "learning_rate": 4.439994323020154e-05, "loss": 0.0011342067271471023, "step": 195960 }, { "epoch": 55.62588702810105, "grad_norm": 0.27880677580833435, "learning_rate": 4.439710474027817e-05, "loss": 0.0010289128869771958, "step": 195970 }, { "epoch": 55.62872551802441, "grad_norm": 0.13811397552490234, "learning_rate": 4.4394266250354814e-05, "loss": 0.0014708327129483224, "step": 195980 }, { "epoch": 55.63156400794777, "grad_norm": 0.1676754653453827, "learning_rate": 4.4391427760431456e-05, "loss": 0.000462646409869194, "step": 195990 }, { "epoch": 55.634402497871136, "grad_norm": 4.822806358337402, "learning_rate": 4.438858927050809e-05, "loss": 0.0012267749756574632, "step": 196000 }, { "epoch": 55.634402497871136, "eval_accuracy": 0.9785718827494119, "eval_loss": 0.07978913933038712, "eval_runtime": 32.4524, "eval_samples_per_second": 484.617, "eval_steps_per_second": 7.58, "step": 196000 }, { "epoch": 55.63724098779449, "grad_norm": 2.371077060699463, "learning_rate": 4.438575078058473e-05, "loss": 0.0020863503217697144, "step": 196010 }, { "epoch": 55.640079477717855, "grad_norm": 1.2471134662628174, "learning_rate": 4.438291229066137e-05, "loss": 0.0003457244485616684, "step": 196020 }, { "epoch": 55.64291796764122, "grad_norm": 0.007532291579991579, "learning_rate": 4.438007380073801e-05, "loss": 0.0007181014865636825, "step": 196030 }, { "epoch": 55.64575645756457, "grad_norm": 0.29052087664604187, "learning_rate": 4.437723531081465e-05, "loss": 0.01364491879940033, "step": 196040 }, { "epoch": 55.648594947487936, "grad_norm": 0.010884094052016735, "learning_rate": 4.437439682089129e-05, "loss": 0.00047542229294776915, "step": 196050 }, { "epoch": 55.6514334374113, "grad_norm": 0.3816167116165161, "learning_rate": 4.4371558330967925e-05, "loss": 0.0005589909851551056, "step": 196060 }, { "epoch": 55.654271927334655, "grad_norm": 0.14847809076309204, "learning_rate": 4.4368719841044566e-05, "loss": 0.0003341909497976303, "step": 196070 }, { "epoch": 55.65711041725802, "grad_norm": 3.6622815132141113, "learning_rate": 4.436588135112121e-05, "loss": 0.0018750831484794616, "step": 196080 }, { "epoch": 55.65994890718138, "grad_norm": 0.8110861778259277, "learning_rate": 4.436304286119784e-05, "loss": 0.00030072927474975584, "step": 196090 }, { "epoch": 55.66278739710474, "grad_norm": 0.14410029351711273, "learning_rate": 4.4360204371274484e-05, "loss": 0.0006238007918000221, "step": 196100 }, { "epoch": 55.6656258870281, "grad_norm": 11.876084327697754, "learning_rate": 4.435736588135112e-05, "loss": 0.003476811945438385, "step": 196110 }, { "epoch": 55.66846437695146, "grad_norm": 0.4048847258090973, "learning_rate": 4.4354527391427766e-05, "loss": 0.001298365369439125, "step": 196120 }, { "epoch": 55.671302866874825, "grad_norm": 0.8168792724609375, "learning_rate": 4.43516889015044e-05, "loss": 0.0018630454316735268, "step": 196130 }, { "epoch": 55.67414135679818, "grad_norm": 0.10523020476102829, "learning_rate": 4.4348850411581035e-05, "loss": 0.000249406136572361, "step": 196140 }, { "epoch": 55.676979846721544, "grad_norm": 0.2033424824476242, "learning_rate": 4.4346011921657684e-05, "loss": 0.0004924353212118149, "step": 196150 }, { "epoch": 55.67981833664491, "grad_norm": 0.08681827038526535, "learning_rate": 4.434317343173432e-05, "loss": 0.0001948842778801918, "step": 196160 }, { "epoch": 55.68265682656826, "grad_norm": 0.06687960773706436, "learning_rate": 4.434033494181096e-05, "loss": 0.0017615793272852897, "step": 196170 }, { "epoch": 55.685495316491625, "grad_norm": 0.032955352216959, "learning_rate": 4.43374964518876e-05, "loss": 0.0008970245718955994, "step": 196180 }, { "epoch": 55.68833380641499, "grad_norm": 0.0464039146900177, "learning_rate": 4.4334657961964236e-05, "loss": 0.0003467181697487831, "step": 196190 }, { "epoch": 55.69117229633835, "grad_norm": 0.05231625959277153, "learning_rate": 4.433181947204088e-05, "loss": 0.0006382651627063751, "step": 196200 }, { "epoch": 55.69401078626171, "grad_norm": 1.9588552713394165, "learning_rate": 4.432898098211752e-05, "loss": 0.00047942418605089187, "step": 196210 }, { "epoch": 55.69684927618507, "grad_norm": 4.796819686889648, "learning_rate": 4.432614249219415e-05, "loss": 0.0009731125086545944, "step": 196220 }, { "epoch": 55.69968776610843, "grad_norm": 0.08818718045949936, "learning_rate": 4.4323304002270794e-05, "loss": 0.00042299237102270124, "step": 196230 }, { "epoch": 55.70252625603179, "grad_norm": 0.016414295881986618, "learning_rate": 4.432046551234743e-05, "loss": 0.0005264440551400185, "step": 196240 }, { "epoch": 55.70536474595515, "grad_norm": 0.019471850246191025, "learning_rate": 4.431762702242408e-05, "loss": 0.00061210747808218, "step": 196250 }, { "epoch": 55.708203235878514, "grad_norm": 0.0701521560549736, "learning_rate": 4.431478853250071e-05, "loss": 0.00022021960467100143, "step": 196260 }, { "epoch": 55.71104172580187, "grad_norm": 0.20904599130153656, "learning_rate": 4.4311950042577346e-05, "loss": 0.00030242409557104113, "step": 196270 }, { "epoch": 55.71388021572523, "grad_norm": 0.01986934058368206, "learning_rate": 4.4309111552653994e-05, "loss": 0.00042535196989774705, "step": 196280 }, { "epoch": 55.716718705648596, "grad_norm": 0.0640273466706276, "learning_rate": 4.430627306273063e-05, "loss": 0.00015320442616939545, "step": 196290 }, { "epoch": 55.71955719557196, "grad_norm": 0.022599680349230766, "learning_rate": 4.430343457280727e-05, "loss": 0.00022713989019393921, "step": 196300 }, { "epoch": 55.722395685495314, "grad_norm": 1.0642094612121582, "learning_rate": 4.430059608288391e-05, "loss": 0.00043539702892303467, "step": 196310 }, { "epoch": 55.72523417541868, "grad_norm": 0.016858529299497604, "learning_rate": 4.4297757592960546e-05, "loss": 0.003183409571647644, "step": 196320 }, { "epoch": 55.72807266534204, "grad_norm": 0.11536920070648193, "learning_rate": 4.429491910303719e-05, "loss": 0.0002772163599729538, "step": 196330 }, { "epoch": 55.730911155265396, "grad_norm": 0.1606159508228302, "learning_rate": 4.429208061311382e-05, "loss": 0.00040016863495111464, "step": 196340 }, { "epoch": 55.73374964518876, "grad_norm": 0.022690167650580406, "learning_rate": 4.4289242123190464e-05, "loss": 0.0018143236637115478, "step": 196350 }, { "epoch": 55.73658813511212, "grad_norm": 0.055691879242658615, "learning_rate": 4.4286403633267105e-05, "loss": 0.00038042869418859483, "step": 196360 }, { "epoch": 55.739426625035485, "grad_norm": 0.442757248878479, "learning_rate": 4.428356514334374e-05, "loss": 0.00044394098222255707, "step": 196370 }, { "epoch": 55.74226511495884, "grad_norm": 0.02711881510913372, "learning_rate": 4.428072665342039e-05, "loss": 0.00020918566733598709, "step": 196380 }, { "epoch": 55.7451036048822, "grad_norm": 0.06608392298221588, "learning_rate": 4.427788816349702e-05, "loss": 0.0002472247928380966, "step": 196390 }, { "epoch": 55.747942094805566, "grad_norm": 0.13745005428791046, "learning_rate": 4.427504967357366e-05, "loss": 0.00017805453389883042, "step": 196400 }, { "epoch": 55.75078058472892, "grad_norm": 0.20907828211784363, "learning_rate": 4.4272211183650305e-05, "loss": 0.0005539601668715477, "step": 196410 }, { "epoch": 55.753619074652285, "grad_norm": 0.2176695317029953, "learning_rate": 4.426937269372694e-05, "loss": 0.0008491484448313713, "step": 196420 }, { "epoch": 55.75645756457565, "grad_norm": 0.23949824273586273, "learning_rate": 4.426653420380358e-05, "loss": 0.0002777740359306335, "step": 196430 }, { "epoch": 55.759296054499, "grad_norm": 0.015139023773372173, "learning_rate": 4.4263695713880216e-05, "loss": 0.0005654899403452873, "step": 196440 }, { "epoch": 55.762134544422366, "grad_norm": 0.24645769596099854, "learning_rate": 4.426085722395686e-05, "loss": 0.00032842159271240234, "step": 196450 }, { "epoch": 55.76497303434573, "grad_norm": 0.09163224697113037, "learning_rate": 4.42580187340335e-05, "loss": 0.002380048669874668, "step": 196460 }, { "epoch": 55.76781152426909, "grad_norm": 10.132923126220703, "learning_rate": 4.425518024411013e-05, "loss": 0.0020694958046078684, "step": 196470 }, { "epoch": 55.77065001419245, "grad_norm": 0.006881595589220524, "learning_rate": 4.4252341754186774e-05, "loss": 0.008043007552623748, "step": 196480 }, { "epoch": 55.77348850411581, "grad_norm": 0.03419148549437523, "learning_rate": 4.4249503264263416e-05, "loss": 0.001072332076728344, "step": 196490 }, { "epoch": 55.776326994039174, "grad_norm": 4.8492302894592285, "learning_rate": 4.424666477434005e-05, "loss": 0.001530216820538044, "step": 196500 }, { "epoch": 55.776326994039174, "eval_accuracy": 0.9759013162077955, "eval_loss": 0.08591056615114212, "eval_runtime": 32.8386, "eval_samples_per_second": 478.918, "eval_steps_per_second": 7.491, "step": 196500 }, { "epoch": 55.77916548396253, "grad_norm": 0.02104274183511734, "learning_rate": 4.424382628441669e-05, "loss": 0.00043503046035766604, "step": 196510 }, { "epoch": 55.78200397388589, "grad_norm": 0.1139107421040535, "learning_rate": 4.424098779449333e-05, "loss": 0.0008360268548130989, "step": 196520 }, { "epoch": 55.784842463809255, "grad_norm": 0.03754192590713501, "learning_rate": 4.423814930456997e-05, "loss": 0.0023066187277436256, "step": 196530 }, { "epoch": 55.78768095373261, "grad_norm": 0.013354557566344738, "learning_rate": 4.423531081464661e-05, "loss": 0.0019367378205060958, "step": 196540 }, { "epoch": 55.790519443655974, "grad_norm": 0.06813370436429977, "learning_rate": 4.423247232472325e-05, "loss": 0.0004153234884142876, "step": 196550 }, { "epoch": 55.79335793357934, "grad_norm": 0.11764467507600784, "learning_rate": 4.4229633834799885e-05, "loss": 0.0009926460683345796, "step": 196560 }, { "epoch": 55.7961964235027, "grad_norm": 0.02853783220052719, "learning_rate": 4.4226795344876526e-05, "loss": 0.0013534115627408029, "step": 196570 }, { "epoch": 55.799034913426055, "grad_norm": 0.03399305418133736, "learning_rate": 4.422395685495317e-05, "loss": 0.0002983277663588524, "step": 196580 }, { "epoch": 55.80187340334942, "grad_norm": 0.12297548353672028, "learning_rate": 4.422111836502981e-05, "loss": 0.0005769422277808189, "step": 196590 }, { "epoch": 55.80471189327278, "grad_norm": 0.01821609027683735, "learning_rate": 4.4218279875106444e-05, "loss": 0.00021558105945587158, "step": 196600 }, { "epoch": 55.80755038319614, "grad_norm": 0.030883856117725372, "learning_rate": 4.4215441385183085e-05, "loss": 0.00024609584361314776, "step": 196610 }, { "epoch": 55.8103888731195, "grad_norm": 0.024584682658314705, "learning_rate": 4.4212602895259727e-05, "loss": 0.0008576789870858193, "step": 196620 }, { "epoch": 55.81322736304286, "grad_norm": 0.09661012887954712, "learning_rate": 4.420976440533636e-05, "loss": 0.0009856108576059342, "step": 196630 }, { "epoch": 55.81606585296622, "grad_norm": 0.4234319031238556, "learning_rate": 4.4206925915413e-05, "loss": 0.00028709862381219863, "step": 196640 }, { "epoch": 55.81890434288958, "grad_norm": 0.14795376360416412, "learning_rate": 4.4204087425489644e-05, "loss": 0.0003234105184674263, "step": 196650 }, { "epoch": 55.821742832812944, "grad_norm": 0.2211301028728485, "learning_rate": 4.420124893556628e-05, "loss": 0.00029030777513980863, "step": 196660 }, { "epoch": 55.82458132273631, "grad_norm": 0.013068323023617268, "learning_rate": 4.419841044564292e-05, "loss": 0.00011935140937566758, "step": 196670 }, { "epoch": 55.82741981265966, "grad_norm": 0.15816088020801544, "learning_rate": 4.419557195571956e-05, "loss": 0.00017476733773946762, "step": 196680 }, { "epoch": 55.830258302583026, "grad_norm": 0.025062914937734604, "learning_rate": 4.4192733465796196e-05, "loss": 0.0009407781064510346, "step": 196690 }, { "epoch": 55.83309679250639, "grad_norm": 0.0777682363986969, "learning_rate": 4.418989497587284e-05, "loss": 0.0006402276456356048, "step": 196700 }, { "epoch": 55.835935282429745, "grad_norm": 0.04425423592329025, "learning_rate": 4.418705648594948e-05, "loss": 0.0003429219126701355, "step": 196710 }, { "epoch": 55.83877377235311, "grad_norm": 0.12589485943317413, "learning_rate": 4.418421799602612e-05, "loss": 0.000830291211605072, "step": 196720 }, { "epoch": 55.84161226227647, "grad_norm": 0.8923065066337585, "learning_rate": 4.4181379506102755e-05, "loss": 0.010044597089290619, "step": 196730 }, { "epoch": 55.84445075219983, "grad_norm": 0.05249821022152901, "learning_rate": 4.417854101617939e-05, "loss": 0.0013560689985752106, "step": 196740 }, { "epoch": 55.84728924212319, "grad_norm": 0.2111930549144745, "learning_rate": 4.417570252625604e-05, "loss": 0.0028815098106861115, "step": 196750 }, { "epoch": 55.85012773204655, "grad_norm": 0.699023962020874, "learning_rate": 4.417286403633267e-05, "loss": 0.0004907786846160888, "step": 196760 }, { "epoch": 55.852966221969915, "grad_norm": 0.30411437153816223, "learning_rate": 4.417002554640931e-05, "loss": 0.0019480813294649123, "step": 196770 }, { "epoch": 55.85580471189327, "grad_norm": 0.07168100029230118, "learning_rate": 4.4167187056485955e-05, "loss": 0.00045349579304456713, "step": 196780 }, { "epoch": 55.85864320181663, "grad_norm": 1.0561796426773071, "learning_rate": 4.416434856656259e-05, "loss": 0.0006963502615690232, "step": 196790 }, { "epoch": 55.861481691739996, "grad_norm": 0.014141416177153587, "learning_rate": 4.416151007663923e-05, "loss": 0.00199742317199707, "step": 196800 }, { "epoch": 55.86432018166335, "grad_norm": 0.04862400144338608, "learning_rate": 4.415867158671587e-05, "loss": 0.008266513794660568, "step": 196810 }, { "epoch": 55.867158671586715, "grad_norm": 0.6778897643089294, "learning_rate": 4.415583309679251e-05, "loss": 0.0005776094272732735, "step": 196820 }, { "epoch": 55.86999716151008, "grad_norm": 0.06471648812294006, "learning_rate": 4.415299460686915e-05, "loss": 0.0004810810089111328, "step": 196830 }, { "epoch": 55.87283565143344, "grad_norm": 0.0660003051161766, "learning_rate": 4.415015611694578e-05, "loss": 0.0006744973361492157, "step": 196840 }, { "epoch": 55.8756741413568, "grad_norm": 0.007100454065948725, "learning_rate": 4.414731762702243e-05, "loss": 0.00044870078563690184, "step": 196850 }, { "epoch": 55.87851263128016, "grad_norm": 8.078669548034668, "learning_rate": 4.4144479137099065e-05, "loss": 0.003866002708673477, "step": 196860 }, { "epoch": 55.88135112120352, "grad_norm": 0.08319126069545746, "learning_rate": 4.41416406471757e-05, "loss": 0.0008420303463935852, "step": 196870 }, { "epoch": 55.88418961112688, "grad_norm": 0.6044951677322388, "learning_rate": 4.413880215725235e-05, "loss": 0.0011138007044792174, "step": 196880 }, { "epoch": 55.88702810105024, "grad_norm": 0.2160164713859558, "learning_rate": 4.413596366732898e-05, "loss": 0.0006604541093111038, "step": 196890 }, { "epoch": 55.889866590973604, "grad_norm": 0.16837632656097412, "learning_rate": 4.4133125177405624e-05, "loss": 0.00035883933305740355, "step": 196900 }, { "epoch": 55.89270508089696, "grad_norm": 0.05623887851834297, "learning_rate": 4.4130286687482265e-05, "loss": 0.00036455150693655015, "step": 196910 }, { "epoch": 55.89554357082032, "grad_norm": 0.03406241536140442, "learning_rate": 4.41274481975589e-05, "loss": 0.0021186094731092454, "step": 196920 }, { "epoch": 55.898382060743685, "grad_norm": 0.0315786749124527, "learning_rate": 4.412460970763554e-05, "loss": 0.0005835970863699913, "step": 196930 }, { "epoch": 55.90122055066705, "grad_norm": 0.01840806193649769, "learning_rate": 4.4121771217712176e-05, "loss": 0.00031728055328130724, "step": 196940 }, { "epoch": 55.904059040590404, "grad_norm": 0.030644332990050316, "learning_rate": 4.411893272778882e-05, "loss": 0.0006682487204670906, "step": 196950 }, { "epoch": 55.90689753051377, "grad_norm": 0.053842999041080475, "learning_rate": 4.411609423786546e-05, "loss": 0.0009479392319917679, "step": 196960 }, { "epoch": 55.90973602043713, "grad_norm": 0.37311267852783203, "learning_rate": 4.411325574794209e-05, "loss": 0.0005013581365346908, "step": 196970 }, { "epoch": 55.912574510360486, "grad_norm": 0.2571651339530945, "learning_rate": 4.4110417258018735e-05, "loss": 0.0016391843557357789, "step": 196980 }, { "epoch": 55.91541300028385, "grad_norm": 3.8961353302001953, "learning_rate": 4.4107578768095376e-05, "loss": 0.003627689927816391, "step": 196990 }, { "epoch": 55.91825149020721, "grad_norm": 1.1127028465270996, "learning_rate": 4.410474027817201e-05, "loss": 0.0006626248359680176, "step": 197000 }, { "epoch": 55.91825149020721, "eval_accuracy": 0.9755198067018503, "eval_loss": 0.09028699994087219, "eval_runtime": 32.8866, "eval_samples_per_second": 478.22, "eval_steps_per_second": 7.48, "step": 197000 }, { "epoch": 55.92108998013057, "grad_norm": 0.07078777253627777, "learning_rate": 4.410190178824866e-05, "loss": 0.002096804790198803, "step": 197010 }, { "epoch": 55.92392847005393, "grad_norm": 0.029537471011281013, "learning_rate": 4.4099063298325293e-05, "loss": 0.000508381798863411, "step": 197020 }, { "epoch": 55.92676695997729, "grad_norm": 1.0426149368286133, "learning_rate": 4.409622480840193e-05, "loss": 0.001748926192522049, "step": 197030 }, { "epoch": 55.929605449900656, "grad_norm": 3.2299537658691406, "learning_rate": 4.409338631847857e-05, "loss": 0.004475123807787895, "step": 197040 }, { "epoch": 55.93244393982401, "grad_norm": 0.4356624186038971, "learning_rate": 4.409054782855521e-05, "loss": 0.002441550977528095, "step": 197050 }, { "epoch": 55.935282429747375, "grad_norm": 1.0316020250320435, "learning_rate": 4.408770933863185e-05, "loss": 0.002776379510760307, "step": 197060 }, { "epoch": 55.93812091967074, "grad_norm": 0.03970561549067497, "learning_rate": 4.408487084870849e-05, "loss": 0.0020995175465941427, "step": 197070 }, { "epoch": 55.94095940959409, "grad_norm": 5.274291515350342, "learning_rate": 4.408203235878513e-05, "loss": 0.0029045039787888526, "step": 197080 }, { "epoch": 55.943797899517456, "grad_norm": 1.0549286603927612, "learning_rate": 4.407919386886177e-05, "loss": 0.0016000311821699142, "step": 197090 }, { "epoch": 55.94663638944082, "grad_norm": 1.7141069173812866, "learning_rate": 4.4076355378938404e-05, "loss": 0.001901523768901825, "step": 197100 }, { "epoch": 55.949474879364175, "grad_norm": 0.2701321542263031, "learning_rate": 4.4073516889015046e-05, "loss": 0.0027799053117632867, "step": 197110 }, { "epoch": 55.95231336928754, "grad_norm": 0.32178449630737305, "learning_rate": 4.407067839909169e-05, "loss": 0.009899512678384782, "step": 197120 }, { "epoch": 55.9551518592109, "grad_norm": 0.05778244882822037, "learning_rate": 4.406783990916832e-05, "loss": 0.001419794373214245, "step": 197130 }, { "epoch": 55.95799034913426, "grad_norm": 0.06016255170106888, "learning_rate": 4.406500141924496e-05, "loss": 0.0010527152568101882, "step": 197140 }, { "epoch": 55.96082883905762, "grad_norm": 0.681505560874939, "learning_rate": 4.4062162929321604e-05, "loss": 0.0016550857573747635, "step": 197150 }, { "epoch": 55.96366732898098, "grad_norm": 0.17727383971214294, "learning_rate": 4.405932443939824e-05, "loss": 0.008316299319267273, "step": 197160 }, { "epoch": 55.966505818904345, "grad_norm": 0.15485012531280518, "learning_rate": 4.405648594947488e-05, "loss": 0.0011253084987401963, "step": 197170 }, { "epoch": 55.9693443088277, "grad_norm": 0.037993598729372025, "learning_rate": 4.405364745955152e-05, "loss": 0.0003286227583885193, "step": 197180 }, { "epoch": 55.972182798751064, "grad_norm": 0.0142594575881958, "learning_rate": 4.405080896962816e-05, "loss": 0.0006614217534661293, "step": 197190 }, { "epoch": 55.97502128867443, "grad_norm": 0.18776586651802063, "learning_rate": 4.40479704797048e-05, "loss": 0.00041862428188323977, "step": 197200 }, { "epoch": 55.97785977859779, "grad_norm": 0.11794696003198624, "learning_rate": 4.404513198978144e-05, "loss": 0.0006279835477471351, "step": 197210 }, { "epoch": 55.980698268521145, "grad_norm": 0.014981154352426529, "learning_rate": 4.404229349985808e-05, "loss": 0.0003104716539382935, "step": 197220 }, { "epoch": 55.98353675844451, "grad_norm": 0.04261564463376999, "learning_rate": 4.4039455009934715e-05, "loss": 0.00048018284142017367, "step": 197230 }, { "epoch": 55.98637524836787, "grad_norm": 0.13542304933071136, "learning_rate": 4.4036616520011356e-05, "loss": 0.00023575909435749053, "step": 197240 }, { "epoch": 55.98921373829123, "grad_norm": 0.017865128815174103, "learning_rate": 4.4033778030088e-05, "loss": 0.00027904212474823, "step": 197250 }, { "epoch": 55.99205222821459, "grad_norm": 0.04700678586959839, "learning_rate": 4.403093954016463e-05, "loss": 0.0004254782572388649, "step": 197260 }, { "epoch": 55.99489071813795, "grad_norm": 0.3910329043865204, "learning_rate": 4.4028101050241274e-05, "loss": 0.0003100203350186348, "step": 197270 }, { "epoch": 55.99772920806131, "grad_norm": 0.23227888345718384, "learning_rate": 4.4025262560317915e-05, "loss": 0.00044828802347183225, "step": 197280 }, { "epoch": 56.00056769798467, "grad_norm": 0.037090808153152466, "learning_rate": 4.402242407039455e-05, "loss": 0.00034112222492694856, "step": 197290 }, { "epoch": 56.003406187908034, "grad_norm": 0.14840741455554962, "learning_rate": 4.401958558047119e-05, "loss": 0.00019460096955299379, "step": 197300 }, { "epoch": 56.0062446778314, "grad_norm": 0.0092390775680542, "learning_rate": 4.401674709054783e-05, "loss": 0.0005055587738752365, "step": 197310 }, { "epoch": 56.00908316775475, "grad_norm": 0.012530405074357986, "learning_rate": 4.4013908600624474e-05, "loss": 0.00013594739139080047, "step": 197320 }, { "epoch": 56.011921657678116, "grad_norm": 0.047808460891246796, "learning_rate": 4.401107011070111e-05, "loss": 0.000666525773704052, "step": 197330 }, { "epoch": 56.01476014760148, "grad_norm": 0.041439563035964966, "learning_rate": 4.400823162077774e-05, "loss": 0.0018152600154280663, "step": 197340 }, { "epoch": 56.017598637524834, "grad_norm": 0.159916952252388, "learning_rate": 4.400539313085439e-05, "loss": 0.0005783768370747566, "step": 197350 }, { "epoch": 56.0204371274482, "grad_norm": 0.17146021127700806, "learning_rate": 4.4002554640931026e-05, "loss": 0.0037273682653903963, "step": 197360 }, { "epoch": 56.02327561737156, "grad_norm": 0.2154400646686554, "learning_rate": 4.399971615100767e-05, "loss": 0.008866132795810699, "step": 197370 }, { "epoch": 56.026114107294916, "grad_norm": 0.06461193412542343, "learning_rate": 4.399687766108431e-05, "loss": 0.0009569931775331497, "step": 197380 }, { "epoch": 56.02895259721828, "grad_norm": 0.09547378122806549, "learning_rate": 4.399403917116094e-05, "loss": 0.011964388936758042, "step": 197390 }, { "epoch": 56.03179108714164, "grad_norm": 0.0626763328909874, "learning_rate": 4.3991200681237584e-05, "loss": 0.0013785533607006072, "step": 197400 }, { "epoch": 56.034629577065004, "grad_norm": 0.07333990186452866, "learning_rate": 4.3988362191314226e-05, "loss": 0.00428132638335228, "step": 197410 }, { "epoch": 56.03746806698836, "grad_norm": 0.014438900165259838, "learning_rate": 4.398552370139086e-05, "loss": 0.0009747935459017753, "step": 197420 }, { "epoch": 56.04030655691172, "grad_norm": 0.04271167516708374, "learning_rate": 4.39826852114675e-05, "loss": 0.00028040036559104917, "step": 197430 }, { "epoch": 56.043145046835086, "grad_norm": 0.06776389479637146, "learning_rate": 4.3979846721544136e-05, "loss": 0.000681416317820549, "step": 197440 }, { "epoch": 56.04598353675844, "grad_norm": 0.45363032817840576, "learning_rate": 4.397700823162078e-05, "loss": 0.0002904154360294342, "step": 197450 }, { "epoch": 56.048822026681805, "grad_norm": 0.1014968752861023, "learning_rate": 4.397416974169742e-05, "loss": 0.00024654120206832884, "step": 197460 }, { "epoch": 56.05166051660517, "grad_norm": 0.08723069727420807, "learning_rate": 4.3971331251774054e-05, "loss": 0.0007890606299042702, "step": 197470 }, { "epoch": 56.05449900652852, "grad_norm": 0.01609191484749317, "learning_rate": 4.39684927618507e-05, "loss": 0.0005990006029605865, "step": 197480 }, { "epoch": 56.057337496451886, "grad_norm": 0.2650815546512604, "learning_rate": 4.3965654271927336e-05, "loss": 0.00029516369104385374, "step": 197490 }, { "epoch": 56.06017598637525, "grad_norm": 0.007899067364633083, "learning_rate": 4.396281578200397e-05, "loss": 0.0005149470642209053, "step": 197500 }, { "epoch": 56.06017598637525, "eval_accuracy": 0.9790805620906721, "eval_loss": 0.07877887040376663, "eval_runtime": 33.0609, "eval_samples_per_second": 475.698, "eval_steps_per_second": 7.441, "step": 197500 }, { "epoch": 56.06301447629861, "grad_norm": 0.07078234851360321, "learning_rate": 4.395997729208062e-05, "loss": 0.00019044410437345504, "step": 197510 }, { "epoch": 56.06585296622197, "grad_norm": 0.7675188183784485, "learning_rate": 4.3957138802157254e-05, "loss": 0.0002797111868858337, "step": 197520 }, { "epoch": 56.06869145614533, "grad_norm": 0.04986502602696419, "learning_rate": 4.3954300312233895e-05, "loss": 0.00024943500757217406, "step": 197530 }, { "epoch": 56.071529946068694, "grad_norm": 0.07227253913879395, "learning_rate": 4.3951461822310537e-05, "loss": 0.0005524942651391029, "step": 197540 }, { "epoch": 56.07436843599205, "grad_norm": 0.0565667487680912, "learning_rate": 4.394862333238717e-05, "loss": 0.0002444203943014145, "step": 197550 }, { "epoch": 56.07720692591541, "grad_norm": 2.75298810005188, "learning_rate": 4.394578484246381e-05, "loss": 0.0004672294482588768, "step": 197560 }, { "epoch": 56.080045415838775, "grad_norm": 0.009504813700914383, "learning_rate": 4.394294635254045e-05, "loss": 0.00012306235730648042, "step": 197570 }, { "epoch": 56.08288390576214, "grad_norm": 0.05454844981431961, "learning_rate": 4.394010786261709e-05, "loss": 0.0011475438252091408, "step": 197580 }, { "epoch": 56.085722395685494, "grad_norm": 0.00970033835619688, "learning_rate": 4.393726937269373e-05, "loss": 0.0003916572779417038, "step": 197590 }, { "epoch": 56.08856088560886, "grad_norm": 0.06024151295423508, "learning_rate": 4.3934430882770364e-05, "loss": 0.000473402813076973, "step": 197600 }, { "epoch": 56.09139937553222, "grad_norm": 0.09335778653621674, "learning_rate": 4.393159239284701e-05, "loss": 0.0005736978724598885, "step": 197610 }, { "epoch": 56.094237865455575, "grad_norm": 0.512845516204834, "learning_rate": 4.392875390292365e-05, "loss": 0.0012324657291173935, "step": 197620 }, { "epoch": 56.09707635537894, "grad_norm": 0.6239861249923706, "learning_rate": 4.392591541300028e-05, "loss": 0.009652135521173477, "step": 197630 }, { "epoch": 56.0999148453023, "grad_norm": 0.030514217913150787, "learning_rate": 4.392307692307693e-05, "loss": 0.0006255961954593659, "step": 197640 }, { "epoch": 56.10275333522566, "grad_norm": 3.043515682220459, "learning_rate": 4.3920238433153565e-05, "loss": 0.000696626678109169, "step": 197650 }, { "epoch": 56.10559182514902, "grad_norm": 0.016939260065555573, "learning_rate": 4.3917399943230206e-05, "loss": 0.0012533944100141525, "step": 197660 }, { "epoch": 56.10843031507238, "grad_norm": 0.34762042760849, "learning_rate": 4.391456145330684e-05, "loss": 0.0004999421536922455, "step": 197670 }, { "epoch": 56.111268804995746, "grad_norm": 0.03120344877243042, "learning_rate": 4.391172296338348e-05, "loss": 0.0007635621353983879, "step": 197680 }, { "epoch": 56.1141072949191, "grad_norm": 0.06392840296030045, "learning_rate": 4.390888447346012e-05, "loss": 0.005988557636737823, "step": 197690 }, { "epoch": 56.116945784842464, "grad_norm": 0.03505518659949303, "learning_rate": 4.390604598353676e-05, "loss": 0.0008239742368459701, "step": 197700 }, { "epoch": 56.11978427476583, "grad_norm": 0.035669561475515366, "learning_rate": 4.39032074936134e-05, "loss": 0.0005952177569270134, "step": 197710 }, { "epoch": 56.12262276468918, "grad_norm": 0.47885778546333313, "learning_rate": 4.390036900369004e-05, "loss": 0.0005559653043746948, "step": 197720 }, { "epoch": 56.125461254612546, "grad_norm": 0.1748822182416916, "learning_rate": 4.3897530513766675e-05, "loss": 0.0010783966630697251, "step": 197730 }, { "epoch": 56.12829974453591, "grad_norm": 0.20833677053451538, "learning_rate": 4.389469202384332e-05, "loss": 0.001283513940870762, "step": 197740 }, { "epoch": 56.131138234459264, "grad_norm": 4.170480251312256, "learning_rate": 4.389185353391996e-05, "loss": 0.0017899546772241592, "step": 197750 }, { "epoch": 56.13397672438263, "grad_norm": 4.9609832763671875, "learning_rate": 4.388901504399659e-05, "loss": 0.008066914975643158, "step": 197760 }, { "epoch": 56.13681521430599, "grad_norm": 0.1354832798242569, "learning_rate": 4.3886176554073234e-05, "loss": 0.0026313146576285363, "step": 197770 }, { "epoch": 56.13965370422935, "grad_norm": 2.690324306488037, "learning_rate": 4.3883338064149875e-05, "loss": 0.0008557170629501342, "step": 197780 }, { "epoch": 56.14249219415271, "grad_norm": 0.33090153336524963, "learning_rate": 4.388049957422652e-05, "loss": 0.0046662587672472, "step": 197790 }, { "epoch": 56.14533068407607, "grad_norm": 1.0361905097961426, "learning_rate": 4.387766108430315e-05, "loss": 0.000522649846971035, "step": 197800 }, { "epoch": 56.148169173999435, "grad_norm": 0.07533556967973709, "learning_rate": 4.387482259437979e-05, "loss": 0.004646455124020577, "step": 197810 }, { "epoch": 56.15100766392279, "grad_norm": 1.1133232116699219, "learning_rate": 4.3871984104456434e-05, "loss": 0.00034068115055561064, "step": 197820 }, { "epoch": 56.15384615384615, "grad_norm": 0.051488231867551804, "learning_rate": 4.386914561453307e-05, "loss": 0.00017730072140693666, "step": 197830 }, { "epoch": 56.156684643769516, "grad_norm": 0.01392693817615509, "learning_rate": 4.386630712460971e-05, "loss": 0.0013605743646621705, "step": 197840 }, { "epoch": 56.15952313369287, "grad_norm": 0.18626351654529572, "learning_rate": 4.386346863468635e-05, "loss": 0.00041651967912912367, "step": 197850 }, { "epoch": 56.162361623616235, "grad_norm": 0.3668014705181122, "learning_rate": 4.3860630144762986e-05, "loss": 0.004006946459412575, "step": 197860 }, { "epoch": 56.1652001135396, "grad_norm": 4.950279235839844, "learning_rate": 4.385779165483963e-05, "loss": 0.0015882330015301704, "step": 197870 }, { "epoch": 56.16803860346296, "grad_norm": 1.7250604629516602, "learning_rate": 4.385495316491627e-05, "loss": 0.005562891811132431, "step": 197880 }, { "epoch": 56.170877093386316, "grad_norm": 0.20169413089752197, "learning_rate": 4.38521146749929e-05, "loss": 0.0005310537293553352, "step": 197890 }, { "epoch": 56.17371558330968, "grad_norm": 2.4441425800323486, "learning_rate": 4.3849276185069545e-05, "loss": 0.008076748251914978, "step": 197900 }, { "epoch": 56.17655407323304, "grad_norm": 1.2797824144363403, "learning_rate": 4.3846437695146186e-05, "loss": 0.009489230811595917, "step": 197910 }, { "epoch": 56.1793925631564, "grad_norm": 1.0756442546844482, "learning_rate": 4.384359920522282e-05, "loss": 0.019038066267967224, "step": 197920 }, { "epoch": 56.18223105307976, "grad_norm": 2.0704245567321777, "learning_rate": 4.384076071529946e-05, "loss": 0.0006803514435887337, "step": 197930 }, { "epoch": 56.185069543003124, "grad_norm": 1.1507623195648193, "learning_rate": 4.3837922225376103e-05, "loss": 0.005391133576631546, "step": 197940 }, { "epoch": 56.18790803292649, "grad_norm": 0.4097041189670563, "learning_rate": 4.3835083735452745e-05, "loss": 0.011469721794128418, "step": 197950 }, { "epoch": 56.19074652284984, "grad_norm": 2.59089994430542, "learning_rate": 4.383224524552938e-05, "loss": 0.015905682742595673, "step": 197960 }, { "epoch": 56.193585012773205, "grad_norm": 1.6597341299057007, "learning_rate": 4.3829406755606014e-05, "loss": 0.00253769401460886, "step": 197970 }, { "epoch": 56.19642350269657, "grad_norm": 0.030487125739455223, "learning_rate": 4.382656826568266e-05, "loss": 0.0017853746190667152, "step": 197980 }, { "epoch": 56.199261992619924, "grad_norm": 0.08982520550489426, "learning_rate": 4.38237297757593e-05, "loss": 0.000512753427028656, "step": 197990 }, { "epoch": 56.20210048254329, "grad_norm": 0.14178965985774994, "learning_rate": 4.382089128583594e-05, "loss": 0.006114903837442398, "step": 198000 }, { "epoch": 56.20210048254329, "eval_accuracy": 0.9775545240668914, "eval_loss": 0.077818863093853, "eval_runtime": 32.4909, "eval_samples_per_second": 484.043, "eval_steps_per_second": 7.571, "step": 198000 }, { "epoch": 56.20493897246665, "grad_norm": 0.0845697894692421, "learning_rate": 4.381805279591258e-05, "loss": 0.0002902230247855186, "step": 198010 }, { "epoch": 56.207777462390005, "grad_norm": 0.22844667732715607, "learning_rate": 4.3815214305989214e-05, "loss": 0.00033297352492809293, "step": 198020 }, { "epoch": 56.21061595231337, "grad_norm": 0.012572943232953548, "learning_rate": 4.3812375816065855e-05, "loss": 0.0010914696380496026, "step": 198030 }, { "epoch": 56.21345444223673, "grad_norm": 0.5784156322479248, "learning_rate": 4.38095373261425e-05, "loss": 0.0011450149118900298, "step": 198040 }, { "epoch": 56.216292932160094, "grad_norm": 0.5785497426986694, "learning_rate": 4.380669883621913e-05, "loss": 0.000695529393851757, "step": 198050 }, { "epoch": 56.21913142208345, "grad_norm": 0.109353207051754, "learning_rate": 4.380386034629577e-05, "loss": 0.0005987774580717087, "step": 198060 }, { "epoch": 56.22196991200681, "grad_norm": 0.16899922490119934, "learning_rate": 4.380102185637241e-05, "loss": 0.0011895840987563133, "step": 198070 }, { "epoch": 56.224808401930176, "grad_norm": 0.644934356212616, "learning_rate": 4.3798183366449056e-05, "loss": 0.0005254877731204032, "step": 198080 }, { "epoch": 56.22764689185353, "grad_norm": 0.15639841556549072, "learning_rate": 4.379534487652569e-05, "loss": 0.0020797932520508766, "step": 198090 }, { "epoch": 56.230485381776894, "grad_norm": 0.015220246277749538, "learning_rate": 4.3792506386602325e-05, "loss": 0.0020483529195189478, "step": 198100 }, { "epoch": 56.23332387170026, "grad_norm": 0.031398188322782516, "learning_rate": 4.378966789667897e-05, "loss": 0.0006619745865464211, "step": 198110 }, { "epoch": 56.23616236162361, "grad_norm": 0.12131484597921371, "learning_rate": 4.378682940675561e-05, "loss": 0.00025120191276073456, "step": 198120 }, { "epoch": 56.239000851546976, "grad_norm": 0.4681147038936615, "learning_rate": 4.378399091683225e-05, "loss": 0.0012213705107569695, "step": 198130 }, { "epoch": 56.24183934147034, "grad_norm": 0.4163227379322052, "learning_rate": 4.378115242690889e-05, "loss": 0.0019706455990672112, "step": 198140 }, { "epoch": 56.2446778313937, "grad_norm": 0.037470411509275436, "learning_rate": 4.3778313936985525e-05, "loss": 0.000494806095957756, "step": 198150 }, { "epoch": 56.24751632131706, "grad_norm": 0.2693958878517151, "learning_rate": 4.3775475447062166e-05, "loss": 0.0010548725724220276, "step": 198160 }, { "epoch": 56.25035481124042, "grad_norm": 0.11869361251592636, "learning_rate": 4.37726369571388e-05, "loss": 0.0007648164406418801, "step": 198170 }, { "epoch": 56.25319330116378, "grad_norm": 0.21156053245067596, "learning_rate": 4.376979846721544e-05, "loss": 0.001613084226846695, "step": 198180 }, { "epoch": 56.25603179108714, "grad_norm": 12.185402870178223, "learning_rate": 4.3766959977292084e-05, "loss": 0.009113604575395584, "step": 198190 }, { "epoch": 56.2588702810105, "grad_norm": 0.05877240002155304, "learning_rate": 4.376412148736872e-05, "loss": 0.0006514696404337883, "step": 198200 }, { "epoch": 56.261708770933865, "grad_norm": 0.06642419099807739, "learning_rate": 4.3761282997445366e-05, "loss": 0.0006647750735282898, "step": 198210 }, { "epoch": 56.26454726085722, "grad_norm": 0.1845230609178543, "learning_rate": 4.3758444507522e-05, "loss": 0.0009336881339550018, "step": 198220 }, { "epoch": 56.26738575078058, "grad_norm": 1.1860766410827637, "learning_rate": 4.3755606017598636e-05, "loss": 0.0005762943997979164, "step": 198230 }, { "epoch": 56.270224240703946, "grad_norm": 0.033912964165210724, "learning_rate": 4.3752767527675284e-05, "loss": 0.0007745284587144851, "step": 198240 }, { "epoch": 56.27306273062731, "grad_norm": 0.18883812427520752, "learning_rate": 4.374992903775192e-05, "loss": 0.0006298128515481949, "step": 198250 }, { "epoch": 56.275901220550665, "grad_norm": 1.4490346908569336, "learning_rate": 4.374709054782856e-05, "loss": 0.004904014617204666, "step": 198260 }, { "epoch": 56.27873971047403, "grad_norm": 0.2459600269794464, "learning_rate": 4.3744252057905194e-05, "loss": 0.0005243731662631035, "step": 198270 }, { "epoch": 56.28157820039739, "grad_norm": 0.563421368598938, "learning_rate": 4.3741413567981836e-05, "loss": 0.00029081404209136964, "step": 198280 }, { "epoch": 56.28441669032075, "grad_norm": 0.3818355202674866, "learning_rate": 4.373857507805848e-05, "loss": 0.0006073202937841415, "step": 198290 }, { "epoch": 56.28725518024411, "grad_norm": 0.028666572645306587, "learning_rate": 4.373573658813511e-05, "loss": 0.0002596151083707809, "step": 198300 }, { "epoch": 56.29009367016747, "grad_norm": 0.1813509315252304, "learning_rate": 4.373289809821175e-05, "loss": 0.00020192544907331466, "step": 198310 }, { "epoch": 56.29293216009083, "grad_norm": 0.04363854601979256, "learning_rate": 4.373034345728073e-05, "loss": 0.00372762531042099, "step": 198320 }, { "epoch": 56.29577065001419, "grad_norm": 0.013831849209964275, "learning_rate": 4.372750496735737e-05, "loss": 0.0031982794404029845, "step": 198330 }, { "epoch": 56.298609139937554, "grad_norm": 0.015881000086665154, "learning_rate": 4.372466647743401e-05, "loss": 0.00013125278055667878, "step": 198340 }, { "epoch": 56.30144762986092, "grad_norm": 0.07827562093734741, "learning_rate": 4.3721827987510644e-05, "loss": 0.0012825321406126021, "step": 198350 }, { "epoch": 56.30428611978427, "grad_norm": 0.0406169667840004, "learning_rate": 4.3718989497587285e-05, "loss": 0.0017355794087052345, "step": 198360 }, { "epoch": 56.307124609707635, "grad_norm": 0.030913464725017548, "learning_rate": 4.3716151007663926e-05, "loss": 0.00036715492606163023, "step": 198370 }, { "epoch": 56.309963099631, "grad_norm": 0.007096961606293917, "learning_rate": 4.371331251774056e-05, "loss": 0.0002389693632721901, "step": 198380 }, { "epoch": 56.312801589554354, "grad_norm": 0.021570827811956406, "learning_rate": 4.37104740278172e-05, "loss": 0.00025997795164585115, "step": 198390 }, { "epoch": 56.31564007947772, "grad_norm": 0.01946287788450718, "learning_rate": 4.3707635537893844e-05, "loss": 0.000519716925919056, "step": 198400 }, { "epoch": 56.31847856940108, "grad_norm": 0.08321772515773773, "learning_rate": 4.370479704797048e-05, "loss": 0.0026102373376488686, "step": 198410 }, { "epoch": 56.32131705932444, "grad_norm": 0.0069947256706655025, "learning_rate": 4.3701958558047126e-05, "loss": 0.000380752794444561, "step": 198420 }, { "epoch": 56.3241555492478, "grad_norm": 14.97995376586914, "learning_rate": 4.369912006812376e-05, "loss": 0.007968942821025848, "step": 198430 }, { "epoch": 56.32699403917116, "grad_norm": 5.838681697845459, "learning_rate": 4.3696281578200396e-05, "loss": 0.001706031896173954, "step": 198440 }, { "epoch": 56.329832529094524, "grad_norm": 1.4455469846725464, "learning_rate": 4.369344308827704e-05, "loss": 0.006653010100126267, "step": 198450 }, { "epoch": 56.33267101901788, "grad_norm": 0.3108550012111664, "learning_rate": 4.369060459835368e-05, "loss": 0.00085301473736763, "step": 198460 }, { "epoch": 56.33550950894124, "grad_norm": 0.05176917836070061, "learning_rate": 4.368776610843032e-05, "loss": 0.0037717752158641813, "step": 198470 }, { "epoch": 56.338347998864606, "grad_norm": 0.10225865989923477, "learning_rate": 4.3684927618506954e-05, "loss": 0.0005568321794271469, "step": 198480 }, { "epoch": 56.34118648878796, "grad_norm": 0.0787891075015068, "learning_rate": 4.3682089128583596e-05, "loss": 0.001842343993484974, "step": 198490 }, { "epoch": 56.344024978711325, "grad_norm": 0.19564880430698395, "learning_rate": 4.367925063866024e-05, "loss": 0.003161030262708664, "step": 198500 }, { "epoch": 56.344024978711325, "eval_accuracy": 0.9762828257137407, "eval_loss": 0.08471360057592392, "eval_runtime": 32.197, "eval_samples_per_second": 488.462, "eval_steps_per_second": 7.64, "step": 198500 }, { "epoch": 56.34686346863469, "grad_norm": 0.07728838920593262, "learning_rate": 4.367641214873687e-05, "loss": 0.0004705877974629402, "step": 198510 }, { "epoch": 56.34970195855805, "grad_norm": 0.06463629752397537, "learning_rate": 4.367357365881351e-05, "loss": 0.0003051610663533211, "step": 198520 }, { "epoch": 56.352540448481406, "grad_norm": 0.017057890072464943, "learning_rate": 4.3670735168890154e-05, "loss": 0.0003836583346128464, "step": 198530 }, { "epoch": 56.35537893840477, "grad_norm": 0.0918227881193161, "learning_rate": 4.366789667896679e-05, "loss": 0.000265682116150856, "step": 198540 }, { "epoch": 56.35821742832813, "grad_norm": 0.028482750058174133, "learning_rate": 4.366505818904343e-05, "loss": 0.000638226605951786, "step": 198550 }, { "epoch": 56.36105591825149, "grad_norm": 0.06451809406280518, "learning_rate": 4.366221969912007e-05, "loss": 0.0007457753643393516, "step": 198560 }, { "epoch": 56.36389440817485, "grad_norm": 1.1632802486419678, "learning_rate": 4.3659381209196706e-05, "loss": 0.0024611489847302438, "step": 198570 }, { "epoch": 56.36673289809821, "grad_norm": 0.0964711382985115, "learning_rate": 4.365654271927335e-05, "loss": 0.0007909022271633149, "step": 198580 }, { "epoch": 56.36957138802157, "grad_norm": 0.019427409395575523, "learning_rate": 4.365370422934999e-05, "loss": 0.002803533896803856, "step": 198590 }, { "epoch": 56.37240987794493, "grad_norm": 0.14352138340473175, "learning_rate": 4.365086573942663e-05, "loss": 0.00416020043194294, "step": 198600 }, { "epoch": 56.375248367868295, "grad_norm": 0.67116379737854, "learning_rate": 4.3648027249503265e-05, "loss": 0.0006471769884228706, "step": 198610 }, { "epoch": 56.37808685779166, "grad_norm": 1.7457098960876465, "learning_rate": 4.3645188759579906e-05, "loss": 0.0010544246062636376, "step": 198620 }, { "epoch": 56.380925347715014, "grad_norm": 0.05199697986245155, "learning_rate": 4.364235026965655e-05, "loss": 0.0003349499776959419, "step": 198630 }, { "epoch": 56.38376383763838, "grad_norm": 3.4960880279541016, "learning_rate": 4.363951177973318e-05, "loss": 0.0023002691566944124, "step": 198640 }, { "epoch": 56.38660232756174, "grad_norm": 0.13748858869075775, "learning_rate": 4.3636673289809824e-05, "loss": 0.0025214117020368578, "step": 198650 }, { "epoch": 56.389440817485095, "grad_norm": 0.39394262433052063, "learning_rate": 4.3633834799886465e-05, "loss": 0.0012639863416552544, "step": 198660 }, { "epoch": 56.39227930740846, "grad_norm": 0.25414419174194336, "learning_rate": 4.36309963099631e-05, "loss": 0.00042229443788528443, "step": 198670 }, { "epoch": 56.39511779733182, "grad_norm": 0.7672843337059021, "learning_rate": 4.362815782003974e-05, "loss": 0.0007474036887288093, "step": 198680 }, { "epoch": 56.39795628725518, "grad_norm": 0.12842409312725067, "learning_rate": 4.362531933011638e-05, "loss": 0.004470654204487801, "step": 198690 }, { "epoch": 56.40079477717854, "grad_norm": 0.03979675471782684, "learning_rate": 4.362248084019302e-05, "loss": 0.0007620995864272117, "step": 198700 }, { "epoch": 56.4036332671019, "grad_norm": 0.0909903272986412, "learning_rate": 4.361964235026966e-05, "loss": 0.0024743080139160156, "step": 198710 }, { "epoch": 56.406471757025265, "grad_norm": 0.7290894985198975, "learning_rate": 4.36168038603463e-05, "loss": 0.0004583677276968956, "step": 198720 }, { "epoch": 56.40931024694862, "grad_norm": 0.4180063009262085, "learning_rate": 4.361396537042294e-05, "loss": 0.0011193234473466872, "step": 198730 }, { "epoch": 56.412148736871984, "grad_norm": 0.038974978029727936, "learning_rate": 4.3611126880499576e-05, "loss": 0.00048462022095918655, "step": 198740 }, { "epoch": 56.41498722679535, "grad_norm": 0.015640880912542343, "learning_rate": 4.360828839057621e-05, "loss": 0.0013642387464642524, "step": 198750 }, { "epoch": 56.4178257167187, "grad_norm": 0.033592142164707184, "learning_rate": 4.360544990065286e-05, "loss": 0.0003052284941077232, "step": 198760 }, { "epoch": 56.420664206642066, "grad_norm": 0.02572907879948616, "learning_rate": 4.360261141072949e-05, "loss": 0.0004195353016257286, "step": 198770 }, { "epoch": 56.42350269656543, "grad_norm": 2.2455687522888184, "learning_rate": 4.3599772920806135e-05, "loss": 0.005290890857577324, "step": 198780 }, { "epoch": 56.42634118648879, "grad_norm": 0.17791533470153809, "learning_rate": 4.3596934430882776e-05, "loss": 0.000258168950676918, "step": 198790 }, { "epoch": 56.42917967641215, "grad_norm": 0.022287752479314804, "learning_rate": 4.359409594095941e-05, "loss": 0.004058560356497765, "step": 198800 }, { "epoch": 56.43201816633551, "grad_norm": 0.16986031830310822, "learning_rate": 4.359125745103605e-05, "loss": 0.0016647623851895332, "step": 198810 }, { "epoch": 56.43485665625887, "grad_norm": 0.186801478266716, "learning_rate": 4.358841896111269e-05, "loss": 0.0004996253177523613, "step": 198820 }, { "epoch": 56.43769514618223, "grad_norm": 0.953207790851593, "learning_rate": 4.358558047118933e-05, "loss": 0.0003852887079119682, "step": 198830 }, { "epoch": 56.44053363610559, "grad_norm": 0.25107598304748535, "learning_rate": 4.358274198126597e-05, "loss": 0.0014645962044596672, "step": 198840 }, { "epoch": 56.443372126028954, "grad_norm": 0.06009405106306076, "learning_rate": 4.3579903491342604e-05, "loss": 0.0005159026011824607, "step": 198850 }, { "epoch": 56.44621061595231, "grad_norm": 0.42033523321151733, "learning_rate": 4.3577065001419245e-05, "loss": 0.00023478940129280091, "step": 198860 }, { "epoch": 56.44904910587567, "grad_norm": 0.03515841066837311, "learning_rate": 4.3574226511495887e-05, "loss": 0.00023357272148132323, "step": 198870 }, { "epoch": 56.451887595799036, "grad_norm": 0.1380787193775177, "learning_rate": 4.357138802157252e-05, "loss": 0.002481667883694172, "step": 198880 }, { "epoch": 56.4547260857224, "grad_norm": 0.06295204907655716, "learning_rate": 4.356854953164917e-05, "loss": 0.00037338715046644213, "step": 198890 }, { "epoch": 56.457564575645755, "grad_norm": 0.6336563229560852, "learning_rate": 4.3565711041725804e-05, "loss": 0.0007837843149900436, "step": 198900 }, { "epoch": 56.46040306556912, "grad_norm": 0.04138362780213356, "learning_rate": 4.356287255180244e-05, "loss": 0.0031766965985298157, "step": 198910 }, { "epoch": 56.46324155549248, "grad_norm": 0.1436455249786377, "learning_rate": 4.356003406187909e-05, "loss": 0.0003342380747199059, "step": 198920 }, { "epoch": 56.466080045415836, "grad_norm": 0.5750896334648132, "learning_rate": 4.355719557195572e-05, "loss": 0.0007151016965508461, "step": 198930 }, { "epoch": 56.4689185353392, "grad_norm": 0.11192185431718826, "learning_rate": 4.355435708203236e-05, "loss": 0.0023063011467456818, "step": 198940 }, { "epoch": 56.47175702526256, "grad_norm": 0.08794848620891571, "learning_rate": 4.3551518592109e-05, "loss": 0.0008930442854762077, "step": 198950 }, { "epoch": 56.47459551518592, "grad_norm": 0.32268255949020386, "learning_rate": 4.354868010218564e-05, "loss": 0.0010062240064144135, "step": 198960 }, { "epoch": 56.47743400510928, "grad_norm": 0.07041940838098526, "learning_rate": 4.354584161226228e-05, "loss": 0.00022476539015769958, "step": 198970 }, { "epoch": 56.480272495032644, "grad_norm": 0.402192085981369, "learning_rate": 4.3543003122338915e-05, "loss": 0.0008492724969983101, "step": 198980 }, { "epoch": 56.48311098495601, "grad_norm": 0.12153196334838867, "learning_rate": 4.3540164632415556e-05, "loss": 0.001067104935646057, "step": 198990 }, { "epoch": 56.48594947487936, "grad_norm": 0.7671191096305847, "learning_rate": 4.35373261424922e-05, "loss": 0.0003671528771519661, "step": 199000 }, { "epoch": 56.48594947487936, "eval_accuracy": 0.9766643352196859, "eval_loss": 0.07846841216087341, "eval_runtime": 33.4429, "eval_samples_per_second": 470.264, "eval_steps_per_second": 7.356, "step": 199000 }, { "epoch": 56.488787964802725, "grad_norm": 0.029213303700089455, "learning_rate": 4.353448765256883e-05, "loss": 0.0007071943953633308, "step": 199010 }, { "epoch": 56.49162645472609, "grad_norm": 0.057255640625953674, "learning_rate": 4.353164916264548e-05, "loss": 0.0015198281034827232, "step": 199020 }, { "epoch": 56.494464944649444, "grad_norm": 0.2156606912612915, "learning_rate": 4.3528810672722115e-05, "loss": 0.0020401576533913613, "step": 199030 }, { "epoch": 56.49730343457281, "grad_norm": 0.0633658692240715, "learning_rate": 4.352597218279875e-05, "loss": 0.0009381350129842759, "step": 199040 }, { "epoch": 56.50014192449617, "grad_norm": 0.12863902747631073, "learning_rate": 4.352313369287539e-05, "loss": 0.0039426635950803755, "step": 199050 }, { "epoch": 56.502980414419525, "grad_norm": 0.03906010463833809, "learning_rate": 4.352029520295203e-05, "loss": 0.0007244830951094628, "step": 199060 }, { "epoch": 56.50581890434289, "grad_norm": 0.08160391449928284, "learning_rate": 4.3517456713028673e-05, "loss": 0.0007255004718899726, "step": 199070 }, { "epoch": 56.50865739426625, "grad_norm": 0.0738561674952507, "learning_rate": 4.351461822310531e-05, "loss": 0.0009003078565001487, "step": 199080 }, { "epoch": 56.511495884189614, "grad_norm": 0.03525705635547638, "learning_rate": 4.351177973318195e-05, "loss": 0.0002653932198882103, "step": 199090 }, { "epoch": 56.51433437411297, "grad_norm": 0.35569605231285095, "learning_rate": 4.350894124325859e-05, "loss": 0.0006025351583957672, "step": 199100 }, { "epoch": 56.51717286403633, "grad_norm": 0.06256183236837387, "learning_rate": 4.3506102753335225e-05, "loss": 0.0005217483267188072, "step": 199110 }, { "epoch": 56.520011353959696, "grad_norm": 0.4175226092338562, "learning_rate": 4.350326426341187e-05, "loss": 0.00043813586235046386, "step": 199120 }, { "epoch": 56.52284984388305, "grad_norm": 1.0195633172988892, "learning_rate": 4.350042577348851e-05, "loss": 0.0005567774176597595, "step": 199130 }, { "epoch": 56.525688333806414, "grad_norm": 0.01752062700688839, "learning_rate": 4.349758728356514e-05, "loss": 0.0005378371104598045, "step": 199140 }, { "epoch": 56.52852682372978, "grad_norm": 9.5982027053833, "learning_rate": 4.3494748793641784e-05, "loss": 0.0048179224133491514, "step": 199150 }, { "epoch": 56.53136531365314, "grad_norm": 0.3466532826423645, "learning_rate": 4.3491910303718425e-05, "loss": 0.004235567152500152, "step": 199160 }, { "epoch": 56.534203803576496, "grad_norm": 0.027063734829425812, "learning_rate": 4.348907181379506e-05, "loss": 0.0003608273342251778, "step": 199170 }, { "epoch": 56.53704229349986, "grad_norm": 0.034100525081157684, "learning_rate": 4.34862333238717e-05, "loss": 0.0005609307438135148, "step": 199180 }, { "epoch": 56.53988078342322, "grad_norm": 0.17738543450832367, "learning_rate": 4.348339483394834e-05, "loss": 0.00014857780188322068, "step": 199190 }, { "epoch": 56.54271927334658, "grad_norm": 0.0913369208574295, "learning_rate": 4.348055634402498e-05, "loss": 0.00027710162103176115, "step": 199200 }, { "epoch": 56.54555776326994, "grad_norm": 0.05409266799688339, "learning_rate": 4.347771785410162e-05, "loss": 0.000291801430284977, "step": 199210 }, { "epoch": 56.5483962531933, "grad_norm": 0.47202062606811523, "learning_rate": 4.347487936417826e-05, "loss": 0.0002573749050498009, "step": 199220 }, { "epoch": 56.55123474311666, "grad_norm": 0.11567140370607376, "learning_rate": 4.34720408742549e-05, "loss": 0.000920984148979187, "step": 199230 }, { "epoch": 56.55407323304002, "grad_norm": 0.024555141106247902, "learning_rate": 4.3469202384331536e-05, "loss": 0.0001989012584090233, "step": 199240 }, { "epoch": 56.556911722963385, "grad_norm": 0.015964647755026817, "learning_rate": 4.346636389440818e-05, "loss": 0.00015730056911706926, "step": 199250 }, { "epoch": 56.55975021288675, "grad_norm": 0.0557699017226696, "learning_rate": 4.346352540448482e-05, "loss": 0.0006092982366681099, "step": 199260 }, { "epoch": 56.5625887028101, "grad_norm": 0.005719055887311697, "learning_rate": 4.3460686914561453e-05, "loss": 0.00035339519381523133, "step": 199270 }, { "epoch": 56.565427192733466, "grad_norm": 0.19153249263763428, "learning_rate": 4.3457848424638095e-05, "loss": 0.00018258262425661088, "step": 199280 }, { "epoch": 56.56826568265683, "grad_norm": 0.027012955397367477, "learning_rate": 4.3455009934714736e-05, "loss": 0.00018149204552173615, "step": 199290 }, { "epoch": 56.571104172580185, "grad_norm": 0.1535273641347885, "learning_rate": 4.345217144479137e-05, "loss": 0.00018101446330547332, "step": 199300 }, { "epoch": 56.57394266250355, "grad_norm": 0.07806742191314697, "learning_rate": 4.344933295486801e-05, "loss": 0.0002159474417567253, "step": 199310 }, { "epoch": 56.57678115242691, "grad_norm": 0.01677834242582321, "learning_rate": 4.3446494464944654e-05, "loss": 0.00019921697676181794, "step": 199320 }, { "epoch": 56.579619642350266, "grad_norm": 0.04106127843260765, "learning_rate": 4.344365597502129e-05, "loss": 0.0007447617128491402, "step": 199330 }, { "epoch": 56.58245813227363, "grad_norm": 0.0706578865647316, "learning_rate": 4.344081748509793e-05, "loss": 0.0010051688179373742, "step": 199340 }, { "epoch": 56.58529662219699, "grad_norm": 0.01734188012778759, "learning_rate": 4.343797899517457e-05, "loss": 0.00011336542665958404, "step": 199350 }, { "epoch": 56.588135112120355, "grad_norm": 0.015083505772054195, "learning_rate": 4.343514050525121e-05, "loss": 0.0003255097195506096, "step": 199360 }, { "epoch": 56.59097360204371, "grad_norm": 2.137889862060547, "learning_rate": 4.343230201532785e-05, "loss": 0.0004946982488036155, "step": 199370 }, { "epoch": 56.593812091967074, "grad_norm": 0.016169564798474312, "learning_rate": 4.342946352540448e-05, "loss": 0.000572783686220646, "step": 199380 }, { "epoch": 56.59665058189044, "grad_norm": 0.5501602292060852, "learning_rate": 4.342662503548113e-05, "loss": 0.00048679988831281664, "step": 199390 }, { "epoch": 56.59948907181379, "grad_norm": 0.14745880663394928, "learning_rate": 4.3423786545557764e-05, "loss": 0.0005479119718074799, "step": 199400 }, { "epoch": 56.602327561737155, "grad_norm": 0.7646116614341736, "learning_rate": 4.3420948055634406e-05, "loss": 0.0012604082003235817, "step": 199410 }, { "epoch": 56.60516605166052, "grad_norm": 0.004354759585112333, "learning_rate": 4.341810956571105e-05, "loss": 0.0003527868539094925, "step": 199420 }, { "epoch": 56.608004541583874, "grad_norm": 0.20170891284942627, "learning_rate": 4.341527107578768e-05, "loss": 0.0021626021713018416, "step": 199430 }, { "epoch": 56.61084303150724, "grad_norm": 0.026509013026952744, "learning_rate": 4.341243258586432e-05, "loss": 0.002615862712264061, "step": 199440 }, { "epoch": 56.6136815214306, "grad_norm": 0.02309305965900421, "learning_rate": 4.3409594095940964e-05, "loss": 0.0012685371562838554, "step": 199450 }, { "epoch": 56.61652001135396, "grad_norm": 0.023291172459721565, "learning_rate": 4.34067556060176e-05, "loss": 0.00025542806833982467, "step": 199460 }, { "epoch": 56.61935850127732, "grad_norm": 0.13455131649971008, "learning_rate": 4.340391711609424e-05, "loss": 0.0027252862229943274, "step": 199470 }, { "epoch": 56.62219699120068, "grad_norm": 0.09099557995796204, "learning_rate": 4.3401078626170875e-05, "loss": 0.0004426902160048485, "step": 199480 }, { "epoch": 56.625035481124044, "grad_norm": 0.04469424486160278, "learning_rate": 4.339824013624752e-05, "loss": 0.0003223340958356857, "step": 199490 }, { "epoch": 56.6278739710474, "grad_norm": 0.12165403366088867, "learning_rate": 4.339540164632416e-05, "loss": 0.0011931302025914193, "step": 199500 }, { "epoch": 56.6278739710474, "eval_accuracy": 0.9774909391492338, "eval_loss": 0.07720094174146652, "eval_runtime": 32.4595, "eval_samples_per_second": 484.512, "eval_steps_per_second": 7.579, "step": 199500 }, { "epoch": 56.63071246097076, "grad_norm": 0.16904130578041077, "learning_rate": 4.339256315640079e-05, "loss": 0.0004886258393526077, "step": 199510 }, { "epoch": 56.633550950894126, "grad_norm": 0.231682687997818, "learning_rate": 4.338972466647744e-05, "loss": 0.0004755785688757896, "step": 199520 }, { "epoch": 56.63638944081748, "grad_norm": 0.034110113978385925, "learning_rate": 4.3386886176554075e-05, "loss": 0.0012620704248547554, "step": 199530 }, { "epoch": 56.639227930740844, "grad_norm": 0.01764194667339325, "learning_rate": 4.3384047686630716e-05, "loss": 0.0003511270508170128, "step": 199540 }, { "epoch": 56.64206642066421, "grad_norm": 2.2723305225372314, "learning_rate": 4.338120919670736e-05, "loss": 0.0005339279770851136, "step": 199550 }, { "epoch": 56.64490491058757, "grad_norm": 0.06426084041595459, "learning_rate": 4.337837070678399e-05, "loss": 0.0001961858943104744, "step": 199560 }, { "epoch": 56.647743400510926, "grad_norm": 1.8229585886001587, "learning_rate": 4.3375532216860634e-05, "loss": 0.000505877286195755, "step": 199570 }, { "epoch": 56.65058189043429, "grad_norm": 0.05769103020429611, "learning_rate": 4.337269372693727e-05, "loss": 0.00017693303525447845, "step": 199580 }, { "epoch": 56.65342038035765, "grad_norm": 0.046078965067863464, "learning_rate": 4.336985523701391e-05, "loss": 0.00042354799807071687, "step": 199590 }, { "epoch": 56.65625887028101, "grad_norm": 0.11833233386278152, "learning_rate": 4.336701674709055e-05, "loss": 0.0015283428132534028, "step": 199600 }, { "epoch": 56.65909736020437, "grad_norm": 0.11072896420955658, "learning_rate": 4.3364178257167186e-05, "loss": 0.02097795307636261, "step": 199610 }, { "epoch": 56.66193585012773, "grad_norm": 0.570492684841156, "learning_rate": 4.336133976724383e-05, "loss": 0.00035146847367286684, "step": 199620 }, { "epoch": 56.664774340051096, "grad_norm": 0.5610129237174988, "learning_rate": 4.335850127732047e-05, "loss": 0.001219264790415764, "step": 199630 }, { "epoch": 56.66761282997445, "grad_norm": 0.1917383074760437, "learning_rate": 4.33556627873971e-05, "loss": 0.00019885897636413575, "step": 199640 }, { "epoch": 56.670451319897815, "grad_norm": 1.015963077545166, "learning_rate": 4.335282429747375e-05, "loss": 0.0007009373977780343, "step": 199650 }, { "epoch": 56.67328980982118, "grad_norm": 0.11489671468734741, "learning_rate": 4.3349985807550386e-05, "loss": 0.006462021172046662, "step": 199660 }, { "epoch": 56.67612829974453, "grad_norm": 0.9260806441307068, "learning_rate": 4.334714731762702e-05, "loss": 0.0013763448223471642, "step": 199670 }, { "epoch": 56.678966789667896, "grad_norm": 0.09579630941152573, "learning_rate": 4.334430882770366e-05, "loss": 0.000954856164753437, "step": 199680 }, { "epoch": 56.68180527959126, "grad_norm": 1.9144452810287476, "learning_rate": 4.33414703377803e-05, "loss": 0.0013401705771684646, "step": 199690 }, { "epoch": 56.684643769514615, "grad_norm": 0.011504733003675938, "learning_rate": 4.3338631847856945e-05, "loss": 0.000724194198846817, "step": 199700 }, { "epoch": 56.68748225943798, "grad_norm": 0.017461175099015236, "learning_rate": 4.333579335793358e-05, "loss": 0.0017755566164851188, "step": 199710 }, { "epoch": 56.69032074936134, "grad_norm": 0.056614961475133896, "learning_rate": 4.333295486801022e-05, "loss": 0.0012316390872001647, "step": 199720 }, { "epoch": 56.693159239284704, "grad_norm": 0.33073684573173523, "learning_rate": 4.333011637808686e-05, "loss": 0.001095731183886528, "step": 199730 }, { "epoch": 56.69599772920806, "grad_norm": 0.9772460460662842, "learning_rate": 4.3327277888163496e-05, "loss": 0.00040018819272518157, "step": 199740 }, { "epoch": 56.69883621913142, "grad_norm": 0.013589735142886639, "learning_rate": 4.332443939824014e-05, "loss": 0.0023385629057884215, "step": 199750 }, { "epoch": 56.701674709054785, "grad_norm": 0.10904175788164139, "learning_rate": 4.332160090831678e-05, "loss": 0.0005422079935669899, "step": 199760 }, { "epoch": 56.70451319897814, "grad_norm": 0.739193856716156, "learning_rate": 4.3318762418393414e-05, "loss": 0.0004178471863269806, "step": 199770 }, { "epoch": 56.707351688901504, "grad_norm": 0.10070403665304184, "learning_rate": 4.3315923928470055e-05, "loss": 0.007056562602519989, "step": 199780 }, { "epoch": 56.71019017882487, "grad_norm": 0.07318787276744843, "learning_rate": 4.3313085438546697e-05, "loss": 0.0062316469848155975, "step": 199790 }, { "epoch": 56.71302866874822, "grad_norm": 2.2195827960968018, "learning_rate": 4.331024694862333e-05, "loss": 0.00043890681117773054, "step": 199800 }, { "epoch": 56.715867158671585, "grad_norm": 0.6832600235939026, "learning_rate": 4.330740845869997e-05, "loss": 0.0004683392122387886, "step": 199810 }, { "epoch": 56.71870564859495, "grad_norm": 0.07580214738845825, "learning_rate": 4.3304569968776614e-05, "loss": 0.0014487143605947494, "step": 199820 }, { "epoch": 56.72154413851831, "grad_norm": 7.889248371124268, "learning_rate": 4.3301731478853255e-05, "loss": 0.0019018324092030525, "step": 199830 }, { "epoch": 56.72438262844167, "grad_norm": 0.2305252104997635, "learning_rate": 4.329889298892989e-05, "loss": 0.0014726314693689346, "step": 199840 }, { "epoch": 56.72722111836503, "grad_norm": 9.918656349182129, "learning_rate": 4.329605449900653e-05, "loss": 0.0029454467818140983, "step": 199850 }, { "epoch": 56.73005960828839, "grad_norm": 0.42369577288627625, "learning_rate": 4.329321600908317e-05, "loss": 0.005766977369785309, "step": 199860 }, { "epoch": 56.73289809821175, "grad_norm": 15.997648239135742, "learning_rate": 4.329037751915981e-05, "loss": 0.016415253281593323, "step": 199870 }, { "epoch": 56.73573658813511, "grad_norm": 0.0924142524600029, "learning_rate": 4.328753902923645e-05, "loss": 0.004418928176164627, "step": 199880 }, { "epoch": 56.738575078058474, "grad_norm": 5.531737327575684, "learning_rate": 4.328470053931309e-05, "loss": 0.0034918226301670074, "step": 199890 }, { "epoch": 56.74141356798184, "grad_norm": 0.2198348045349121, "learning_rate": 4.3281862049389725e-05, "loss": 0.014966781437397002, "step": 199900 }, { "epoch": 56.74425205790519, "grad_norm": 0.9527679085731506, "learning_rate": 4.3279023559466366e-05, "loss": 0.0010680094361305236, "step": 199910 }, { "epoch": 56.747090547828556, "grad_norm": 0.026231449097394943, "learning_rate": 4.327618506954301e-05, "loss": 0.005332134664058685, "step": 199920 }, { "epoch": 56.74992903775192, "grad_norm": 0.017704149708151817, "learning_rate": 4.327334657961964e-05, "loss": 0.004073721542954445, "step": 199930 }, { "epoch": 56.752767527675275, "grad_norm": 0.2289051115512848, "learning_rate": 4.327050808969628e-05, "loss": 0.0037243306636810304, "step": 199940 }, { "epoch": 56.75560601759864, "grad_norm": 0.07778113335371017, "learning_rate": 4.3267669599772925e-05, "loss": 0.007437411695718765, "step": 199950 }, { "epoch": 56.758444507522, "grad_norm": 0.018980856984853745, "learning_rate": 4.3264831109849566e-05, "loss": 0.00034308005124330523, "step": 199960 }, { "epoch": 56.761282997445356, "grad_norm": 3.053487777709961, "learning_rate": 4.32619926199262e-05, "loss": 0.0012752771377563476, "step": 199970 }, { "epoch": 56.76412148736872, "grad_norm": 0.05718189477920532, "learning_rate": 4.3259154130002835e-05, "loss": 0.001780446618795395, "step": 199980 }, { "epoch": 56.76695997729208, "grad_norm": 1.373928427696228, "learning_rate": 4.325631564007948e-05, "loss": 0.0015583224594593048, "step": 199990 }, { "epoch": 56.769798467215445, "grad_norm": 0.31937354803085327, "learning_rate": 4.325347715015612e-05, "loss": 0.00027578268200159075, "step": 200000 }, { "epoch": 56.769798467215445, "eval_accuracy": 0.9774273542315762, "eval_loss": 0.08635704219341278, "eval_runtime": 32.9425, "eval_samples_per_second": 477.407, "eval_steps_per_second": 7.468, "step": 200000 }, { "epoch": 56.7726369571388, "grad_norm": 0.052973486483097076, "learning_rate": 4.325063866023276e-05, "loss": 0.00818576142191887, "step": 200010 }, { "epoch": 56.77547544706216, "grad_norm": 0.23988540470600128, "learning_rate": 4.32478001703094e-05, "loss": 0.0007290612906217575, "step": 200020 }, { "epoch": 56.778313936985526, "grad_norm": 2.4585306644439697, "learning_rate": 4.3244961680386035e-05, "loss": 0.0039919808506965635, "step": 200030 }, { "epoch": 56.78115242690888, "grad_norm": 0.6307926774024963, "learning_rate": 4.324212319046268e-05, "loss": 0.0008143294602632522, "step": 200040 }, { "epoch": 56.783990916832245, "grad_norm": 0.22681139409542084, "learning_rate": 4.323928470053932e-05, "loss": 0.0012881381437182427, "step": 200050 }, { "epoch": 56.78682940675561, "grad_norm": 0.08073797076940536, "learning_rate": 4.323644621061595e-05, "loss": 0.004340262711048126, "step": 200060 }, { "epoch": 56.789667896678964, "grad_norm": 0.1734338104724884, "learning_rate": 4.3233607720692594e-05, "loss": 0.004114207997918129, "step": 200070 }, { "epoch": 56.79250638660233, "grad_norm": 0.016746170818805695, "learning_rate": 4.323076923076923e-05, "loss": 0.006683717668056488, "step": 200080 }, { "epoch": 56.79534487652569, "grad_norm": 0.603496789932251, "learning_rate": 4.322793074084587e-05, "loss": 0.003788972645998001, "step": 200090 }, { "epoch": 56.79818336644905, "grad_norm": 0.023494048044085503, "learning_rate": 4.322509225092251e-05, "loss": 0.0005455933511257172, "step": 200100 }, { "epoch": 56.80102185637241, "grad_norm": 0.38556423783302307, "learning_rate": 4.3222253760999146e-05, "loss": 0.000386604480445385, "step": 200110 }, { "epoch": 56.80386034629577, "grad_norm": 0.0393877774477005, "learning_rate": 4.3219415271075794e-05, "loss": 0.00021758172661066056, "step": 200120 }, { "epoch": 56.806698836219134, "grad_norm": 0.789720892906189, "learning_rate": 4.321657678115243e-05, "loss": 0.000470663420855999, "step": 200130 }, { "epoch": 56.80953732614249, "grad_norm": 0.389004647731781, "learning_rate": 4.321373829122906e-05, "loss": 0.005303002148866654, "step": 200140 }, { "epoch": 56.81237581606585, "grad_norm": 13.555556297302246, "learning_rate": 4.321089980130571e-05, "loss": 0.002979358844459057, "step": 200150 }, { "epoch": 56.815214305989215, "grad_norm": 0.19053852558135986, "learning_rate": 4.3208061311382346e-05, "loss": 0.0008209770545363426, "step": 200160 }, { "epoch": 56.81805279591257, "grad_norm": 0.27862349152565, "learning_rate": 4.320522282145899e-05, "loss": 0.0013585850596427918, "step": 200170 }, { "epoch": 56.820891285835934, "grad_norm": 0.11936115473508835, "learning_rate": 4.320238433153562e-05, "loss": 0.00029610954225063325, "step": 200180 }, { "epoch": 56.8237297757593, "grad_norm": 0.040944699198007584, "learning_rate": 4.3199545841612263e-05, "loss": 0.0006252303719520569, "step": 200190 }, { "epoch": 56.82656826568266, "grad_norm": 0.04554248973727226, "learning_rate": 4.3196707351688905e-05, "loss": 0.0005840733647346497, "step": 200200 }, { "epoch": 56.829406755606016, "grad_norm": 0.006429123226553202, "learning_rate": 4.319386886176554e-05, "loss": 0.0003351595252752304, "step": 200210 }, { "epoch": 56.83224524552938, "grad_norm": 0.3482067584991455, "learning_rate": 4.319103037184218e-05, "loss": 0.0005381667986512184, "step": 200220 }, { "epoch": 56.83508373545274, "grad_norm": 0.1282515525817871, "learning_rate": 4.318819188191882e-05, "loss": 0.0007099047303199768, "step": 200230 }, { "epoch": 56.8379222253761, "grad_norm": 0.06444639712572098, "learning_rate": 4.318535339199546e-05, "loss": 0.0006049908697605133, "step": 200240 }, { "epoch": 56.84076071529946, "grad_norm": 0.05016118288040161, "learning_rate": 4.3182514902072105e-05, "loss": 0.0002855706959962845, "step": 200250 }, { "epoch": 56.84359920522282, "grad_norm": 0.02094232477247715, "learning_rate": 4.317967641214874e-05, "loss": 0.0004952836781740189, "step": 200260 }, { "epoch": 56.84643769514618, "grad_norm": 0.14093153178691864, "learning_rate": 4.3176837922225374e-05, "loss": 0.0003348285332322121, "step": 200270 }, { "epoch": 56.84927618506954, "grad_norm": 0.05609801784157753, "learning_rate": 4.3173999432302015e-05, "loss": 0.0003192594274878502, "step": 200280 }, { "epoch": 56.852114674992904, "grad_norm": 0.18074922263622284, "learning_rate": 4.317116094237866e-05, "loss": 0.0002816483378410339, "step": 200290 }, { "epoch": 56.85495316491627, "grad_norm": 0.2897770404815674, "learning_rate": 4.31683224524553e-05, "loss": 0.00039952434599399567, "step": 200300 }, { "epoch": 56.85779165483962, "grad_norm": 0.7893989682197571, "learning_rate": 4.316548396253193e-05, "loss": 0.000849335826933384, "step": 200310 }, { "epoch": 56.860630144762986, "grad_norm": 0.15595951676368713, "learning_rate": 4.3162645472608574e-05, "loss": 0.001363135688006878, "step": 200320 }, { "epoch": 56.86346863468635, "grad_norm": 0.05356869474053383, "learning_rate": 4.3159806982685216e-05, "loss": 0.00030534714460372925, "step": 200330 }, { "epoch": 56.866307124609705, "grad_norm": 0.03867045044898987, "learning_rate": 4.315696849276185e-05, "loss": 0.00021122097969055175, "step": 200340 }, { "epoch": 56.86914561453307, "grad_norm": 1.7470285892486572, "learning_rate": 4.315413000283849e-05, "loss": 0.0006336702033877372, "step": 200350 }, { "epoch": 56.87198410445643, "grad_norm": 0.09334969520568848, "learning_rate": 4.315129151291513e-05, "loss": 0.002667706459760666, "step": 200360 }, { "epoch": 56.87482259437979, "grad_norm": 0.4083535373210907, "learning_rate": 4.314845302299177e-05, "loss": 0.0003425450995564461, "step": 200370 }, { "epoch": 56.87766108430315, "grad_norm": 0.01971421390771866, "learning_rate": 4.314561453306841e-05, "loss": 0.00040315445512533187, "step": 200380 }, { "epoch": 56.88049957422651, "grad_norm": 0.03132257238030434, "learning_rate": 4.314305989213738e-05, "loss": 0.002739547565579414, "step": 200390 }, { "epoch": 56.883338064149875, "grad_norm": 0.059551358222961426, "learning_rate": 4.3140221402214023e-05, "loss": 0.0033661499619483948, "step": 200400 }, { "epoch": 56.88617655407323, "grad_norm": 0.03028663620352745, "learning_rate": 4.3137382912290665e-05, "loss": 0.0003389192745089531, "step": 200410 }, { "epoch": 56.889015043996594, "grad_norm": 0.07753496617078781, "learning_rate": 4.31345444223673e-05, "loss": 0.004858072847127914, "step": 200420 }, { "epoch": 56.89185353391996, "grad_norm": 0.16182976961135864, "learning_rate": 4.313170593244395e-05, "loss": 0.0037418119609355927, "step": 200430 }, { "epoch": 56.89469202384331, "grad_norm": 3.6472718715667725, "learning_rate": 4.312886744252058e-05, "loss": 0.0014647191390395164, "step": 200440 }, { "epoch": 56.897530513766675, "grad_norm": 0.02206544578075409, "learning_rate": 4.312602895259722e-05, "loss": 0.0009286265820264816, "step": 200450 }, { "epoch": 56.90036900369004, "grad_norm": 0.11763380467891693, "learning_rate": 4.312319046267386e-05, "loss": 0.0002564597874879837, "step": 200460 }, { "epoch": 56.9032074936134, "grad_norm": 0.013742645271122456, "learning_rate": 4.31203519727505e-05, "loss": 0.0012595893815159798, "step": 200470 }, { "epoch": 56.90604598353676, "grad_norm": 0.06837963312864304, "learning_rate": 4.311751348282714e-05, "loss": 0.0005041493102908134, "step": 200480 }, { "epoch": 56.90888447346012, "grad_norm": 0.008963258937001228, "learning_rate": 4.3114674992903775e-05, "loss": 0.0005401270464062691, "step": 200490 }, { "epoch": 56.91172296338348, "grad_norm": 0.28664571046829224, "learning_rate": 4.311183650298042e-05, "loss": 0.000853918120265007, "step": 200500 }, { "epoch": 56.91172296338348, "eval_accuracy": 0.9788898073376995, "eval_loss": 0.07930737733840942, "eval_runtime": 32.9932, "eval_samples_per_second": 476.674, "eval_steps_per_second": 7.456, "step": 200500 }, { "epoch": 56.91456145330684, "grad_norm": 0.13170455396175385, "learning_rate": 4.310899801305706e-05, "loss": 0.001414143294095993, "step": 200510 }, { "epoch": 56.9173999432302, "grad_norm": 0.01512229349464178, "learning_rate": 4.310615952313369e-05, "loss": 0.00031428057700395585, "step": 200520 }, { "epoch": 56.920238433153564, "grad_norm": 0.028068216517567635, "learning_rate": 4.3103321033210334e-05, "loss": 0.0004225457087159157, "step": 200530 }, { "epoch": 56.92307692307692, "grad_norm": 0.2232406735420227, "learning_rate": 4.3100482543286976e-05, "loss": 0.004478540644049645, "step": 200540 }, { "epoch": 56.92591541300028, "grad_norm": 0.03122646175324917, "learning_rate": 4.309764405336361e-05, "loss": 0.005822714045643806, "step": 200550 }, { "epoch": 56.928753902923646, "grad_norm": 0.016302771866321564, "learning_rate": 4.309480556344025e-05, "loss": 0.002021363191306591, "step": 200560 }, { "epoch": 56.93159239284701, "grad_norm": 1.2137556076049805, "learning_rate": 4.309196707351689e-05, "loss": 0.0020448042079806326, "step": 200570 }, { "epoch": 56.934430882770364, "grad_norm": 1.5950778722763062, "learning_rate": 4.308912858359353e-05, "loss": 0.0023507248610258103, "step": 200580 }, { "epoch": 56.93726937269373, "grad_norm": 0.34476718306541443, "learning_rate": 4.308629009367017e-05, "loss": 0.001139584556221962, "step": 200590 }, { "epoch": 56.94010786261709, "grad_norm": 0.21791072189807892, "learning_rate": 4.308345160374681e-05, "loss": 0.003195559233427048, "step": 200600 }, { "epoch": 56.942946352540446, "grad_norm": 0.05086489021778107, "learning_rate": 4.3080613113823445e-05, "loss": 0.0014512546360492707, "step": 200610 }, { "epoch": 56.94578484246381, "grad_norm": 0.4523414373397827, "learning_rate": 4.3077774623900086e-05, "loss": 0.0006383338943123818, "step": 200620 }, { "epoch": 56.94862333238717, "grad_norm": 0.4168773293495178, "learning_rate": 4.307493613397673e-05, "loss": 0.0019120490178465843, "step": 200630 }, { "epoch": 56.95146182231053, "grad_norm": 0.05980618670582771, "learning_rate": 4.307209764405337e-05, "loss": 0.000257771834731102, "step": 200640 }, { "epoch": 56.95430031223389, "grad_norm": 0.038323335349559784, "learning_rate": 4.3069259154130004e-05, "loss": 0.0017015635967254639, "step": 200650 }, { "epoch": 56.95713880215725, "grad_norm": 4.837734699249268, "learning_rate": 4.306642066420664e-05, "loss": 0.001287921704351902, "step": 200660 }, { "epoch": 56.959977292080616, "grad_norm": 4.7121262550354, "learning_rate": 4.3063582174283286e-05, "loss": 0.0025889866054058077, "step": 200670 }, { "epoch": 56.96281578200397, "grad_norm": 0.3785896897315979, "learning_rate": 4.306074368435992e-05, "loss": 0.0009185686707496643, "step": 200680 }, { "epoch": 56.965654271927335, "grad_norm": 0.25577312707901, "learning_rate": 4.305790519443656e-05, "loss": 0.000325273722410202, "step": 200690 }, { "epoch": 56.9684927618507, "grad_norm": 0.7346470952033997, "learning_rate": 4.3055066704513204e-05, "loss": 0.0011619523167610169, "step": 200700 }, { "epoch": 56.97133125177405, "grad_norm": 0.16928669810295105, "learning_rate": 4.305222821458984e-05, "loss": 0.004123948514461517, "step": 200710 }, { "epoch": 56.974169741697416, "grad_norm": 0.2655104994773865, "learning_rate": 4.304938972466648e-05, "loss": 0.0011900918558239936, "step": 200720 }, { "epoch": 56.97700823162078, "grad_norm": 0.04529326409101486, "learning_rate": 4.304655123474312e-05, "loss": 0.00018385909497737886, "step": 200730 }, { "epoch": 56.97984672154414, "grad_norm": 8.621896743774414, "learning_rate": 4.3043712744819756e-05, "loss": 0.007953740656375885, "step": 200740 }, { "epoch": 56.9826852114675, "grad_norm": 0.07254883646965027, "learning_rate": 4.30408742548964e-05, "loss": 0.005233626812696457, "step": 200750 }, { "epoch": 56.98552370139086, "grad_norm": 0.03222733736038208, "learning_rate": 4.303803576497303e-05, "loss": 0.0007836554199457168, "step": 200760 }, { "epoch": 56.98836219131422, "grad_norm": 0.03387417644262314, "learning_rate": 4.303519727504968e-05, "loss": 0.003844743221998215, "step": 200770 }, { "epoch": 56.99120068123758, "grad_norm": 0.08525165170431137, "learning_rate": 4.3032358785126314e-05, "loss": 0.0006162222474813461, "step": 200780 }, { "epoch": 56.99403917116094, "grad_norm": 1.7434321641921997, "learning_rate": 4.302952029520295e-05, "loss": 0.00262595359236002, "step": 200790 }, { "epoch": 56.996877661084305, "grad_norm": 0.011591685935854912, "learning_rate": 4.30266818052796e-05, "loss": 0.007482858002185821, "step": 200800 }, { "epoch": 56.99971615100766, "grad_norm": 3.297243118286133, "learning_rate": 4.302384331535623e-05, "loss": 0.007903494685888291, "step": 200810 }, { "epoch": 57.002554640931024, "grad_norm": 0.40686121582984924, "learning_rate": 4.302100482543287e-05, "loss": 0.0005969660356640816, "step": 200820 }, { "epoch": 57.00539313085439, "grad_norm": 0.05055893957614899, "learning_rate": 4.3018166335509514e-05, "loss": 0.0006559722125530243, "step": 200830 }, { "epoch": 57.00823162077775, "grad_norm": 0.0697258785367012, "learning_rate": 4.301532784558615e-05, "loss": 0.0008485350757837295, "step": 200840 }, { "epoch": 57.011070110701105, "grad_norm": 0.021078651770949364, "learning_rate": 4.301248935566279e-05, "loss": 0.0029240841045975684, "step": 200850 }, { "epoch": 57.01390860062447, "grad_norm": 0.13679464161396027, "learning_rate": 4.3009650865739425e-05, "loss": 0.00034329015761613846, "step": 200860 }, { "epoch": 57.01674709054783, "grad_norm": 0.03644420579075813, "learning_rate": 4.3006812375816066e-05, "loss": 0.00019503328949213027, "step": 200870 }, { "epoch": 57.01958558047119, "grad_norm": 0.5464492440223694, "learning_rate": 4.300397388589271e-05, "loss": 0.0007966605946421624, "step": 200880 }, { "epoch": 57.02242407039455, "grad_norm": 0.11623863130807877, "learning_rate": 4.300113539596934e-05, "loss": 0.0004833795130252838, "step": 200890 }, { "epoch": 57.02526256031791, "grad_norm": 0.15781408548355103, "learning_rate": 4.299829690604599e-05, "loss": 0.00019017811864614488, "step": 200900 }, { "epoch": 57.02810105024127, "grad_norm": 0.028663834556937218, "learning_rate": 4.2995458416122625e-05, "loss": 0.0007589267566800118, "step": 200910 }, { "epoch": 57.03093954016463, "grad_norm": 0.04607165977358818, "learning_rate": 4.299261992619926e-05, "loss": 0.0003463162109255791, "step": 200920 }, { "epoch": 57.033778030087994, "grad_norm": 0.010459305718541145, "learning_rate": 4.298978143627591e-05, "loss": 0.00038949735462665556, "step": 200930 }, { "epoch": 57.03661652001136, "grad_norm": 0.018429765477776527, "learning_rate": 4.298694294635254e-05, "loss": 0.002263453043997288, "step": 200940 }, { "epoch": 57.03945500993471, "grad_norm": 0.02073507197201252, "learning_rate": 4.2984104456429184e-05, "loss": 0.0035685524344444275, "step": 200950 }, { "epoch": 57.042293499858076, "grad_norm": 0.11708080023527145, "learning_rate": 4.2981265966505825e-05, "loss": 0.00032347720116376877, "step": 200960 }, { "epoch": 57.04513198978144, "grad_norm": 0.4139687418937683, "learning_rate": 4.297842747658246e-05, "loss": 0.0005678731948137283, "step": 200970 }, { "epoch": 57.047970479704794, "grad_norm": 0.2454933226108551, "learning_rate": 4.29755889866591e-05, "loss": 0.00016815606504678727, "step": 200980 }, { "epoch": 57.05080896962816, "grad_norm": 0.03844744712114334, "learning_rate": 4.2972750496735736e-05, "loss": 0.0005626164376735688, "step": 200990 }, { "epoch": 57.05364745955152, "grad_norm": 0.05151917785406113, "learning_rate": 4.296991200681238e-05, "loss": 0.00024006012827157975, "step": 201000 }, { "epoch": 57.05364745955152, "eval_accuracy": 0.9773001843962612, "eval_loss": 0.08056609332561493, "eval_runtime": 32.6569, "eval_samples_per_second": 481.583, "eval_steps_per_second": 7.533, "step": 201000 }, { "epoch": 57.056485949474876, "grad_norm": 0.035329461097717285, "learning_rate": 4.296707351688902e-05, "loss": 0.0002843949943780899, "step": 201010 }, { "epoch": 57.05932443939824, "grad_norm": 0.07585558295249939, "learning_rate": 4.296423502696565e-05, "loss": 0.0009139340370893478, "step": 201020 }, { "epoch": 57.0621629293216, "grad_norm": 0.02679772675037384, "learning_rate": 4.2961396537042295e-05, "loss": 0.0007927954196929931, "step": 201030 }, { "epoch": 57.065001419244965, "grad_norm": 0.5146026015281677, "learning_rate": 4.2958558047118936e-05, "loss": 0.0006412042304873467, "step": 201040 }, { "epoch": 57.06783990916832, "grad_norm": 0.010220753960311413, "learning_rate": 4.295571955719557e-05, "loss": 0.00019828248769044877, "step": 201050 }, { "epoch": 57.07067839909168, "grad_norm": 0.10936890542507172, "learning_rate": 4.295288106727222e-05, "loss": 0.001105501502752304, "step": 201060 }, { "epoch": 57.073516889015046, "grad_norm": 0.4911111891269684, "learning_rate": 4.295004257734885e-05, "loss": 0.0003148248419165611, "step": 201070 }, { "epoch": 57.0763553789384, "grad_norm": 0.07574953138828278, "learning_rate": 4.294720408742549e-05, "loss": 0.0006164440885186196, "step": 201080 }, { "epoch": 57.079193868861765, "grad_norm": 0.03353596851229668, "learning_rate": 4.294436559750213e-05, "loss": 0.0006126653403043747, "step": 201090 }, { "epoch": 57.08203235878513, "grad_norm": 0.1112908199429512, "learning_rate": 4.294152710757877e-05, "loss": 0.0003598678857088089, "step": 201100 }, { "epoch": 57.08487084870849, "grad_norm": 0.2912595868110657, "learning_rate": 4.293868861765541e-05, "loss": 0.0004725232720375061, "step": 201110 }, { "epoch": 57.087709338631846, "grad_norm": 0.04724462702870369, "learning_rate": 4.2935850127732047e-05, "loss": 0.0014183890074491502, "step": 201120 }, { "epoch": 57.09054782855521, "grad_norm": 0.4213326573371887, "learning_rate": 4.293301163780869e-05, "loss": 0.000235789455473423, "step": 201130 }, { "epoch": 57.09338631847857, "grad_norm": 0.12265964597463608, "learning_rate": 4.293017314788533e-05, "loss": 0.00023546479642391204, "step": 201140 }, { "epoch": 57.09622480840193, "grad_norm": 0.23238897323608398, "learning_rate": 4.2927334657961964e-05, "loss": 0.00024346671998500823, "step": 201150 }, { "epoch": 57.09906329832529, "grad_norm": 0.0033123402390629053, "learning_rate": 4.2924496168038605e-05, "loss": 0.00014940686523914337, "step": 201160 }, { "epoch": 57.101901788248654, "grad_norm": 0.024003108963370323, "learning_rate": 4.292165767811525e-05, "loss": 0.0008556239306926727, "step": 201170 }, { "epoch": 57.10474027817201, "grad_norm": 0.08149160444736481, "learning_rate": 4.291881918819188e-05, "loss": 0.0004599355161190033, "step": 201180 }, { "epoch": 57.10757876809537, "grad_norm": 0.28362658619880676, "learning_rate": 4.291598069826852e-05, "loss": 0.0013384733349084855, "step": 201190 }, { "epoch": 57.110417258018735, "grad_norm": 0.754981517791748, "learning_rate": 4.2913142208345164e-05, "loss": 0.005869158729910851, "step": 201200 }, { "epoch": 57.1132557479421, "grad_norm": 0.16299238801002502, "learning_rate": 4.29103037184218e-05, "loss": 0.0004938431084156036, "step": 201210 }, { "epoch": 57.116094237865454, "grad_norm": 0.14035063982009888, "learning_rate": 4.290746522849844e-05, "loss": 0.000969882495701313, "step": 201220 }, { "epoch": 57.11893272778882, "grad_norm": 0.031112737953662872, "learning_rate": 4.290462673857508e-05, "loss": 0.0004536926746368408, "step": 201230 }, { "epoch": 57.12177121771218, "grad_norm": 0.19260765612125397, "learning_rate": 4.290178824865172e-05, "loss": 0.0005556778982281685, "step": 201240 }, { "epoch": 57.124609707635535, "grad_norm": 0.040867771953344345, "learning_rate": 4.289894975872836e-05, "loss": 0.00042935218662023546, "step": 201250 }, { "epoch": 57.1274481975589, "grad_norm": 0.0331200510263443, "learning_rate": 4.2896111268805e-05, "loss": 0.0009248403832316399, "step": 201260 }, { "epoch": 57.13028668748226, "grad_norm": 0.014002885669469833, "learning_rate": 4.289327277888164e-05, "loss": 0.0006891258060932159, "step": 201270 }, { "epoch": 57.13312517740562, "grad_norm": 0.021625259891152382, "learning_rate": 4.2890434288958275e-05, "loss": 0.0003234881907701492, "step": 201280 }, { "epoch": 57.13596366732898, "grad_norm": 0.19820933043956757, "learning_rate": 4.2887595799034916e-05, "loss": 0.0005907393991947174, "step": 201290 }, { "epoch": 57.13880215725234, "grad_norm": 0.3163262605667114, "learning_rate": 4.288475730911156e-05, "loss": 0.02052401453256607, "step": 201300 }, { "epoch": 57.141640647175706, "grad_norm": 0.9325040578842163, "learning_rate": 4.288191881918819e-05, "loss": 0.0026011385023593904, "step": 201310 }, { "epoch": 57.14447913709906, "grad_norm": 0.01992279477417469, "learning_rate": 4.2879080329264833e-05, "loss": 0.00027755871415138247, "step": 201320 }, { "epoch": 57.147317627022424, "grad_norm": 0.010596634820103645, "learning_rate": 4.2876241839341475e-05, "loss": 0.0016797471791505814, "step": 201330 }, { "epoch": 57.15015611694579, "grad_norm": 0.06688288599252701, "learning_rate": 4.287340334941811e-05, "loss": 0.0006846962496638298, "step": 201340 }, { "epoch": 57.15299460686914, "grad_norm": 0.7527656555175781, "learning_rate": 4.287056485949475e-05, "loss": 0.0031741395592689513, "step": 201350 }, { "epoch": 57.155833096792506, "grad_norm": 0.0603465735912323, "learning_rate": 4.286772636957139e-05, "loss": 0.00030042510479688644, "step": 201360 }, { "epoch": 57.15867158671587, "grad_norm": 0.019975975155830383, "learning_rate": 4.2864887879648034e-05, "loss": 0.005129379779100418, "step": 201370 }, { "epoch": 57.161510076639225, "grad_norm": 0.10040336847305298, "learning_rate": 4.286204938972467e-05, "loss": 0.0016369117423892022, "step": 201380 }, { "epoch": 57.16434856656259, "grad_norm": 7.092808246612549, "learning_rate": 4.28592108998013e-05, "loss": 0.0024403642863035204, "step": 201390 }, { "epoch": 57.16718705648595, "grad_norm": 0.021059805527329445, "learning_rate": 4.285637240987795e-05, "loss": 0.0003399364650249481, "step": 201400 }, { "epoch": 57.17002554640931, "grad_norm": 0.27285119891166687, "learning_rate": 4.2853533919954585e-05, "loss": 0.001135636866092682, "step": 201410 }, { "epoch": 57.17286403633267, "grad_norm": 0.21088802814483643, "learning_rate": 4.285069543003123e-05, "loss": 0.0054770026355981825, "step": 201420 }, { "epoch": 57.17570252625603, "grad_norm": 1.2385860681533813, "learning_rate": 4.284785694010787e-05, "loss": 0.005344578251242638, "step": 201430 }, { "epoch": 57.178541016179395, "grad_norm": 0.08587493002414703, "learning_rate": 4.28450184501845e-05, "loss": 0.002549692802131176, "step": 201440 }, { "epoch": 57.18137950610275, "grad_norm": 0.008264529518783092, "learning_rate": 4.2842179960261144e-05, "loss": 0.0005351455882191658, "step": 201450 }, { "epoch": 57.18421799602611, "grad_norm": 1.494195580482483, "learning_rate": 4.2839341470337786e-05, "loss": 0.0011081697419285774, "step": 201460 }, { "epoch": 57.187056485949476, "grad_norm": 0.017095206305384636, "learning_rate": 4.283650298041442e-05, "loss": 0.00044147055596113207, "step": 201470 }, { "epoch": 57.18989497587283, "grad_norm": 0.09560857713222504, "learning_rate": 4.283366449049106e-05, "loss": 0.003907162696123123, "step": 201480 }, { "epoch": 57.192733465796195, "grad_norm": 1.5653682947158813, "learning_rate": 4.2830826000567696e-05, "loss": 0.0010096924379467964, "step": 201490 }, { "epoch": 57.19557195571956, "grad_norm": 0.8284834027290344, "learning_rate": 4.282798751064434e-05, "loss": 0.00022936519235372543, "step": 201500 }, { "epoch": 57.19557195571956, "eval_accuracy": 0.9786990525847269, "eval_loss": 0.07789654284715652, "eval_runtime": 32.7108, "eval_samples_per_second": 480.789, "eval_steps_per_second": 7.52, "step": 201500 }, { "epoch": 57.19841044564292, "grad_norm": 1.8554421663284302, "learning_rate": 4.282514902072098e-05, "loss": 0.0018603634089231491, "step": 201510 }, { "epoch": 57.20124893556628, "grad_norm": 0.04776168614625931, "learning_rate": 4.2822310530797613e-05, "loss": 0.00028951317071914675, "step": 201520 }, { "epoch": 57.20408742548964, "grad_norm": 1.7256097793579102, "learning_rate": 4.281947204087426e-05, "loss": 0.001122703403234482, "step": 201530 }, { "epoch": 57.206925915413, "grad_norm": 0.0360376238822937, "learning_rate": 4.2816633550950896e-05, "loss": 0.00030745286494493484, "step": 201540 }, { "epoch": 57.20976440533636, "grad_norm": 0.0067825173027813435, "learning_rate": 4.281379506102753e-05, "loss": 0.0004044413566589355, "step": 201550 }, { "epoch": 57.21260289525972, "grad_norm": 0.20355620980262756, "learning_rate": 4.281095657110418e-05, "loss": 0.004194860160350799, "step": 201560 }, { "epoch": 57.215441385183084, "grad_norm": 0.48657962679862976, "learning_rate": 4.2808118081180814e-05, "loss": 0.0028144998475909235, "step": 201570 }, { "epoch": 57.21827987510645, "grad_norm": 2.9200165271759033, "learning_rate": 4.2805279591257455e-05, "loss": 0.004096401855349541, "step": 201580 }, { "epoch": 57.2211183650298, "grad_norm": 0.883172333240509, "learning_rate": 4.280244110133409e-05, "loss": 0.0019150709733366967, "step": 201590 }, { "epoch": 57.223956854953165, "grad_norm": 0.06598073989152908, "learning_rate": 4.279960261141073e-05, "loss": 0.0006409918889403344, "step": 201600 }, { "epoch": 57.22679534487653, "grad_norm": 0.11473830789327621, "learning_rate": 4.279676412148737e-05, "loss": 0.0017858995124697685, "step": 201610 }, { "epoch": 57.229633834799884, "grad_norm": 0.18643957376480103, "learning_rate": 4.279392563156401e-05, "loss": 0.00033036954700946806, "step": 201620 }, { "epoch": 57.23247232472325, "grad_norm": 0.09944051504135132, "learning_rate": 4.279108714164065e-05, "loss": 0.002861417457461357, "step": 201630 }, { "epoch": 57.23531081464661, "grad_norm": 0.5344258546829224, "learning_rate": 4.278824865171729e-05, "loss": 0.004497134312987328, "step": 201640 }, { "epoch": 57.238149304569966, "grad_norm": 0.03843424841761589, "learning_rate": 4.2785410161793924e-05, "loss": 0.002250286191701889, "step": 201650 }, { "epoch": 57.24098779449333, "grad_norm": 0.04939184710383415, "learning_rate": 4.278257167187057e-05, "loss": 0.0022885726764798166, "step": 201660 }, { "epoch": 57.24382628441669, "grad_norm": 1.6738266944885254, "learning_rate": 4.277973318194721e-05, "loss": 0.0015710202977061271, "step": 201670 }, { "epoch": 57.246664774340054, "grad_norm": 0.3871611952781677, "learning_rate": 4.277689469202384e-05, "loss": 0.005441091954708099, "step": 201680 }, { "epoch": 57.24950326426341, "grad_norm": 0.45166873931884766, "learning_rate": 4.277405620210048e-05, "loss": 0.0013358691707253457, "step": 201690 }, { "epoch": 57.25234175418677, "grad_norm": 0.36165645718574524, "learning_rate": 4.2771217712177124e-05, "loss": 0.0012647081166505814, "step": 201700 }, { "epoch": 57.255180244110136, "grad_norm": 12.646797180175781, "learning_rate": 4.2768379222253766e-05, "loss": 0.004315158724784851, "step": 201710 }, { "epoch": 57.25801873403349, "grad_norm": 0.5485801100730896, "learning_rate": 4.27655407323304e-05, "loss": 0.0031424075365066527, "step": 201720 }, { "epoch": 57.260857223956855, "grad_norm": 0.07665881514549255, "learning_rate": 4.276270224240704e-05, "loss": 0.0008447878062725067, "step": 201730 }, { "epoch": 57.26369571388022, "grad_norm": 0.026540782302618027, "learning_rate": 4.275986375248368e-05, "loss": 0.004280924052000046, "step": 201740 }, { "epoch": 57.26653420380357, "grad_norm": 0.06902874261140823, "learning_rate": 4.275702526256032e-05, "loss": 0.0004971720278263092, "step": 201750 }, { "epoch": 57.269372693726936, "grad_norm": 0.19614951312541962, "learning_rate": 4.275418677263696e-05, "loss": 0.0007437849417328835, "step": 201760 }, { "epoch": 57.2722111836503, "grad_norm": 0.017647884786128998, "learning_rate": 4.27513482827136e-05, "loss": 0.00023087374866008757, "step": 201770 }, { "epoch": 57.27504967357366, "grad_norm": 0.1867164522409439, "learning_rate": 4.2748509792790235e-05, "loss": 0.0006678322330117226, "step": 201780 }, { "epoch": 57.27788816349702, "grad_norm": 0.9830272793769836, "learning_rate": 4.2745671302866876e-05, "loss": 0.0005666259676218033, "step": 201790 }, { "epoch": 57.28072665342038, "grad_norm": 0.6811372637748718, "learning_rate": 4.274283281294352e-05, "loss": 0.0006288418546319008, "step": 201800 }, { "epoch": 57.28356514334374, "grad_norm": 0.03200864419341087, "learning_rate": 4.273999432302015e-05, "loss": 0.00039253849536180496, "step": 201810 }, { "epoch": 57.2864036332671, "grad_norm": 0.16454167664051056, "learning_rate": 4.2737155833096794e-05, "loss": 0.00019156821072101592, "step": 201820 }, { "epoch": 57.28924212319046, "grad_norm": 2.3114898204803467, "learning_rate": 4.2734317343173435e-05, "loss": 0.002325908653438091, "step": 201830 }, { "epoch": 57.292080613113825, "grad_norm": 3.676750659942627, "learning_rate": 4.2731478853250076e-05, "loss": 0.0007697217166423798, "step": 201840 }, { "epoch": 57.29491910303718, "grad_norm": 0.062299538403749466, "learning_rate": 4.272864036332671e-05, "loss": 0.0003022855147719383, "step": 201850 }, { "epoch": 57.297757592960544, "grad_norm": 5.26702356338501, "learning_rate": 4.272580187340335e-05, "loss": 0.0010319046676158905, "step": 201860 }, { "epoch": 57.30059608288391, "grad_norm": 0.010554893873631954, "learning_rate": 4.2722963383479994e-05, "loss": 0.0012429360300302505, "step": 201870 }, { "epoch": 57.30343457280727, "grad_norm": 0.15709945559501648, "learning_rate": 4.272012489355663e-05, "loss": 0.0036340445280075074, "step": 201880 }, { "epoch": 57.306273062730625, "grad_norm": 0.09297211468219757, "learning_rate": 4.271728640363327e-05, "loss": 0.0012716976925730705, "step": 201890 }, { "epoch": 57.30911155265399, "grad_norm": 0.2547995150089264, "learning_rate": 4.271444791370991e-05, "loss": 0.0037591468542814256, "step": 201900 }, { "epoch": 57.31195004257735, "grad_norm": 0.02291746996343136, "learning_rate": 4.2711609423786546e-05, "loss": 0.001446639932692051, "step": 201910 }, { "epoch": 57.31478853250071, "grad_norm": 0.1732814759016037, "learning_rate": 4.270877093386319e-05, "loss": 0.00046481769531965257, "step": 201920 }, { "epoch": 57.31762702242407, "grad_norm": 11.757360458374023, "learning_rate": 4.270593244393983e-05, "loss": 0.0018617138266563416, "step": 201930 }, { "epoch": 57.32046551234743, "grad_norm": 0.05320831760764122, "learning_rate": 4.270309395401646e-05, "loss": 0.00013310648500919342, "step": 201940 }, { "epoch": 57.323304002270795, "grad_norm": 5.967705726623535, "learning_rate": 4.2700255464093104e-05, "loss": 0.0008825855329632759, "step": 201950 }, { "epoch": 57.32614249219415, "grad_norm": 0.16669918596744537, "learning_rate": 4.2697416974169746e-05, "loss": 0.00038497764617204664, "step": 201960 }, { "epoch": 57.328980982117514, "grad_norm": 0.13413560390472412, "learning_rate": 4.269457848424638e-05, "loss": 0.0013362009078264237, "step": 201970 }, { "epoch": 57.33181947204088, "grad_norm": 0.010523278266191483, "learning_rate": 4.269173999432302e-05, "loss": 0.00022161733359098433, "step": 201980 }, { "epoch": 57.33465796196423, "grad_norm": 0.22267121076583862, "learning_rate": 4.2688901504399656e-05, "loss": 0.0005072617903351783, "step": 201990 }, { "epoch": 57.337496451887596, "grad_norm": 0.030805744230747223, "learning_rate": 4.2686063014476305e-05, "loss": 0.0009692342951893807, "step": 202000 }, { "epoch": 57.337496451887596, "eval_accuracy": 0.9809245247027405, "eval_loss": 0.07391611486673355, "eval_runtime": 32.1412, "eval_samples_per_second": 489.31, "eval_steps_per_second": 7.654, "step": 202000 }, { "epoch": 57.34033494181096, "grad_norm": 0.07943688333034515, "learning_rate": 4.268322452455294e-05, "loss": 0.00026950743049383166, "step": 202010 }, { "epoch": 57.343173431734314, "grad_norm": 0.048843540251255035, "learning_rate": 4.2680386034629574e-05, "loss": 0.0017082693055272102, "step": 202020 }, { "epoch": 57.34601192165768, "grad_norm": 0.03189486265182495, "learning_rate": 4.267754754470622e-05, "loss": 0.0003001859411597252, "step": 202030 }, { "epoch": 57.34885041158104, "grad_norm": 0.15821634232997894, "learning_rate": 4.2674709054782857e-05, "loss": 0.0006166325882077217, "step": 202040 }, { "epoch": 57.3516889015044, "grad_norm": 0.8173258900642395, "learning_rate": 4.26718705648595e-05, "loss": 0.002323352359235287, "step": 202050 }, { "epoch": 57.35452739142776, "grad_norm": 0.026219304651021957, "learning_rate": 4.266903207493614e-05, "loss": 0.0017041206359863282, "step": 202060 }, { "epoch": 57.35736588135112, "grad_norm": 0.04855605587363243, "learning_rate": 4.2666193585012774e-05, "loss": 0.0004995286464691163, "step": 202070 }, { "epoch": 57.360204371274484, "grad_norm": 0.7108160853385925, "learning_rate": 4.2663355095089415e-05, "loss": 0.00028795972466468813, "step": 202080 }, { "epoch": 57.36304286119784, "grad_norm": 0.3911873400211334, "learning_rate": 4.266051660516605e-05, "loss": 0.0010342035442590714, "step": 202090 }, { "epoch": 57.3658813511212, "grad_norm": 0.22070525586605072, "learning_rate": 4.265767811524269e-05, "loss": 0.001557663269340992, "step": 202100 }, { "epoch": 57.368719841044566, "grad_norm": 0.1147550493478775, "learning_rate": 4.265483962531933e-05, "loss": 0.0018103081732988358, "step": 202110 }, { "epoch": 57.37155833096792, "grad_norm": 0.029643462970852852, "learning_rate": 4.265200113539597e-05, "loss": 0.00048581007868051527, "step": 202120 }, { "epoch": 57.374396820891285, "grad_norm": 0.014745309948921204, "learning_rate": 4.2649162645472615e-05, "loss": 0.001324942521750927, "step": 202130 }, { "epoch": 57.37723531081465, "grad_norm": 0.23990780115127563, "learning_rate": 4.264632415554925e-05, "loss": 0.00026526432484388354, "step": 202140 }, { "epoch": 57.38007380073801, "grad_norm": 0.02432987093925476, "learning_rate": 4.2643485665625885e-05, "loss": 0.0018613256514072418, "step": 202150 }, { "epoch": 57.382912290661366, "grad_norm": 0.15802666544914246, "learning_rate": 4.264064717570253e-05, "loss": 0.0003154255449771881, "step": 202160 }, { "epoch": 57.38575078058473, "grad_norm": 0.1496344655752182, "learning_rate": 4.263780868577917e-05, "loss": 0.000443795882165432, "step": 202170 }, { "epoch": 57.38858927050809, "grad_norm": 0.21130171418190002, "learning_rate": 4.263497019585581e-05, "loss": 0.00035362783819437027, "step": 202180 }, { "epoch": 57.39142776043145, "grad_norm": 14.03533935546875, "learning_rate": 4.263213170593244e-05, "loss": 0.006137214973568917, "step": 202190 }, { "epoch": 57.39426625035481, "grad_norm": 0.10509560257196426, "learning_rate": 4.2629293216009085e-05, "loss": 0.00046892613172531126, "step": 202200 }, { "epoch": 57.39710474027817, "grad_norm": 0.1754673570394516, "learning_rate": 4.2626454726085726e-05, "loss": 0.0007714690640568733, "step": 202210 }, { "epoch": 57.39994323020153, "grad_norm": 2.183049440383911, "learning_rate": 4.262361623616236e-05, "loss": 0.0019114887341856956, "step": 202220 }, { "epoch": 57.40278172012489, "grad_norm": 0.33251047134399414, "learning_rate": 4.2620777746239e-05, "loss": 0.0008828723803162575, "step": 202230 }, { "epoch": 57.405620210048255, "grad_norm": 0.3166636526584625, "learning_rate": 4.261793925631564e-05, "loss": 0.0011380380019545555, "step": 202240 }, { "epoch": 57.40845869997162, "grad_norm": 0.1497737467288971, "learning_rate": 4.261510076639228e-05, "loss": 0.005512848496437073, "step": 202250 }, { "epoch": 57.411297189894974, "grad_norm": 0.022029288113117218, "learning_rate": 4.2612262276468926e-05, "loss": 0.0009461095556616783, "step": 202260 }, { "epoch": 57.41413567981834, "grad_norm": 1.2176225185394287, "learning_rate": 4.260942378654556e-05, "loss": 0.0016103994101285934, "step": 202270 }, { "epoch": 57.4169741697417, "grad_norm": 3.631279945373535, "learning_rate": 4.2606585296622195e-05, "loss": 0.002469648979604244, "step": 202280 }, { "epoch": 57.419812659665055, "grad_norm": 0.30400773882865906, "learning_rate": 4.2603746806698843e-05, "loss": 0.0025425424799323084, "step": 202290 }, { "epoch": 57.42265114958842, "grad_norm": 0.62889564037323, "learning_rate": 4.260090831677548e-05, "loss": 0.0006898233667016029, "step": 202300 }, { "epoch": 57.42548963951178, "grad_norm": 5.211442470550537, "learning_rate": 4.259806982685212e-05, "loss": 0.0012510219588875771, "step": 202310 }, { "epoch": 57.428328129435144, "grad_norm": 1.352169394493103, "learning_rate": 4.2595231336928754e-05, "loss": 0.0021158624440431595, "step": 202320 }, { "epoch": 57.4311666193585, "grad_norm": 0.5961002111434937, "learning_rate": 4.2592392847005395e-05, "loss": 0.0006390294060111046, "step": 202330 }, { "epoch": 57.43400510928186, "grad_norm": 0.18688765168190002, "learning_rate": 4.258955435708204e-05, "loss": 0.0005671286955475808, "step": 202340 }, { "epoch": 57.436843599205226, "grad_norm": 0.47930917143821716, "learning_rate": 4.258671586715867e-05, "loss": 0.000659513846039772, "step": 202350 }, { "epoch": 57.43968208912858, "grad_norm": 19.706541061401367, "learning_rate": 4.258387737723531e-05, "loss": 0.003128024563193321, "step": 202360 }, { "epoch": 57.442520579051944, "grad_norm": 1.855851411819458, "learning_rate": 4.2581038887311954e-05, "loss": 0.00028920676559209826, "step": 202370 }, { "epoch": 57.44535906897531, "grad_norm": 0.10312853753566742, "learning_rate": 4.257820039738859e-05, "loss": 0.0003955261781811714, "step": 202380 }, { "epoch": 57.44819755889866, "grad_norm": 0.013278954662382603, "learning_rate": 4.257536190746523e-05, "loss": 0.0009759977459907532, "step": 202390 }, { "epoch": 57.451036048822026, "grad_norm": 0.006509707774966955, "learning_rate": 4.257252341754187e-05, "loss": 0.00046918764710426333, "step": 202400 }, { "epoch": 57.45387453874539, "grad_norm": 0.22760948538780212, "learning_rate": 4.2569684927618506e-05, "loss": 0.002006787434220314, "step": 202410 }, { "epoch": 57.45671302866875, "grad_norm": 1.296704888343811, "learning_rate": 4.256684643769515e-05, "loss": 0.0034874189645051957, "step": 202420 }, { "epoch": 57.45955151859211, "grad_norm": 2.190035104751587, "learning_rate": 4.256400794777179e-05, "loss": 0.0036304362118244173, "step": 202430 }, { "epoch": 57.46239000851547, "grad_norm": 0.009228305891156197, "learning_rate": 4.2561169457848423e-05, "loss": 0.0006612570956349373, "step": 202440 }, { "epoch": 57.46522849843883, "grad_norm": 0.12334689497947693, "learning_rate": 4.2558614816917403e-05, "loss": 0.011062853038311005, "step": 202450 }, { "epoch": 57.46806698836219, "grad_norm": 0.08039363473653793, "learning_rate": 4.255577632699404e-05, "loss": 0.0007342813536524772, "step": 202460 }, { "epoch": 57.47090547828555, "grad_norm": 0.370477557182312, "learning_rate": 4.255293783707068e-05, "loss": 0.0006167536601424217, "step": 202470 }, { "epoch": 57.473743968208915, "grad_norm": 0.1946653574705124, "learning_rate": 4.255009934714732e-05, "loss": 0.0016571998596191407, "step": 202480 }, { "epoch": 57.47658245813227, "grad_norm": 0.06228036805987358, "learning_rate": 4.2547260857223955e-05, "loss": 0.0005370384082198143, "step": 202490 }, { "epoch": 57.47942094805563, "grad_norm": 0.023676464334130287, "learning_rate": 4.25444223673006e-05, "loss": 0.0004003720358014107, "step": 202500 }, { "epoch": 57.47942094805563, "eval_accuracy": 0.9772365994786036, "eval_loss": 0.0778462290763855, "eval_runtime": 32.0481, "eval_samples_per_second": 490.731, "eval_steps_per_second": 7.676, "step": 202500 }, { "epoch": 57.482259437978996, "grad_norm": 0.42736175656318665, "learning_rate": 4.254158387737724e-05, "loss": 0.0013916702941060067, "step": 202510 }, { "epoch": 57.48509792790236, "grad_norm": 0.1252487748861313, "learning_rate": 4.253874538745388e-05, "loss": 0.001075398176908493, "step": 202520 }, { "epoch": 57.487936417825715, "grad_norm": 0.20824357867240906, "learning_rate": 4.2535906897530514e-05, "loss": 0.0006401171907782554, "step": 202530 }, { "epoch": 57.49077490774908, "grad_norm": 0.1311708688735962, "learning_rate": 4.2533068407607155e-05, "loss": 0.0008078237995505333, "step": 202540 }, { "epoch": 57.49361339767244, "grad_norm": 0.01980063132941723, "learning_rate": 4.25302299176838e-05, "loss": 0.001834871806204319, "step": 202550 }, { "epoch": 57.496451887595796, "grad_norm": 0.15550297498703003, "learning_rate": 4.252739142776043e-05, "loss": 0.0002281099557876587, "step": 202560 }, { "epoch": 57.49929037751916, "grad_norm": 0.06680907309055328, "learning_rate": 4.252455293783707e-05, "loss": 0.004640765488147736, "step": 202570 }, { "epoch": 57.50212886744252, "grad_norm": 0.21559618413448334, "learning_rate": 4.2521714447913714e-05, "loss": 0.00037467554211616515, "step": 202580 }, { "epoch": 57.50496735736588, "grad_norm": 0.025715401396155357, "learning_rate": 4.251887595799035e-05, "loss": 0.00044564176350831987, "step": 202590 }, { "epoch": 57.50780584728924, "grad_norm": 0.3162256181240082, "learning_rate": 4.251603746806699e-05, "loss": 0.0005505038425326347, "step": 202600 }, { "epoch": 57.510644337212604, "grad_norm": 0.7635800838470459, "learning_rate": 4.251319897814363e-05, "loss": 0.0008844781666994094, "step": 202610 }, { "epoch": 57.51348282713597, "grad_norm": 0.005411970894783735, "learning_rate": 4.2510360488220266e-05, "loss": 0.000229620561003685, "step": 202620 }, { "epoch": 57.51632131705932, "grad_norm": 0.03309378772974014, "learning_rate": 4.250752199829691e-05, "loss": 0.00036442819982767104, "step": 202630 }, { "epoch": 57.519159806982685, "grad_norm": 0.023250920698046684, "learning_rate": 4.250468350837355e-05, "loss": 0.0031995609402656554, "step": 202640 }, { "epoch": 57.52199829690605, "grad_norm": 15.291648864746094, "learning_rate": 4.250184501845019e-05, "loss": 0.005511532723903656, "step": 202650 }, { "epoch": 57.524836786829404, "grad_norm": 0.10993843525648117, "learning_rate": 4.2499006528526825e-05, "loss": 0.0018990088254213333, "step": 202660 }, { "epoch": 57.52767527675277, "grad_norm": 2.188270330429077, "learning_rate": 4.2496168038603466e-05, "loss": 0.0013174111023545264, "step": 202670 }, { "epoch": 57.53051376667613, "grad_norm": 0.108439140021801, "learning_rate": 4.249332954868011e-05, "loss": 0.00425332672894001, "step": 202680 }, { "epoch": 57.533352256599485, "grad_norm": 0.04370484501123428, "learning_rate": 4.249049105875674e-05, "loss": 0.00023379530757665633, "step": 202690 }, { "epoch": 57.53619074652285, "grad_norm": 0.017790384590625763, "learning_rate": 4.2487652568833384e-05, "loss": 0.001262430101633072, "step": 202700 }, { "epoch": 57.53902923644621, "grad_norm": 0.05395805835723877, "learning_rate": 4.2484814078910025e-05, "loss": 0.0012628158554434776, "step": 202710 }, { "epoch": 57.541867726369574, "grad_norm": 1.0747252702713013, "learning_rate": 4.248197558898666e-05, "loss": 0.0020679650828242303, "step": 202720 }, { "epoch": 57.54470621629293, "grad_norm": 0.4443657696247101, "learning_rate": 4.24791370990633e-05, "loss": 0.0008516723290085792, "step": 202730 }, { "epoch": 57.54754470621629, "grad_norm": 7.216020107269287, "learning_rate": 4.247629860913994e-05, "loss": 0.003615783154964447, "step": 202740 }, { "epoch": 57.550383196139656, "grad_norm": 0.061488863080739975, "learning_rate": 4.247346011921658e-05, "loss": 0.0009046025574207306, "step": 202750 }, { "epoch": 57.55322168606301, "grad_norm": 1.0289504528045654, "learning_rate": 4.247062162929322e-05, "loss": 0.0007604986429214477, "step": 202760 }, { "epoch": 57.556060175986374, "grad_norm": 0.08776504546403885, "learning_rate": 4.246778313936986e-05, "loss": 0.007427095621824265, "step": 202770 }, { "epoch": 57.55889866590974, "grad_norm": 0.09619253873825073, "learning_rate": 4.2464944649446494e-05, "loss": 0.0010243801400065422, "step": 202780 }, { "epoch": 57.5617371558331, "grad_norm": 0.11272318661212921, "learning_rate": 4.2462106159523136e-05, "loss": 0.0013272235170006751, "step": 202790 }, { "epoch": 57.564575645756456, "grad_norm": 0.06961038708686829, "learning_rate": 4.245926766959977e-05, "loss": 0.001326056569814682, "step": 202800 }, { "epoch": 57.56741413567982, "grad_norm": 1.0135842561721802, "learning_rate": 4.245642917967642e-05, "loss": 0.0036885514855384827, "step": 202810 }, { "epoch": 57.57025262560318, "grad_norm": 0.0341981016099453, "learning_rate": 4.245359068975305e-05, "loss": 0.000358176976442337, "step": 202820 }, { "epoch": 57.57309111552654, "grad_norm": 0.03228076174855232, "learning_rate": 4.245075219982969e-05, "loss": 0.0005840960890054703, "step": 202830 }, { "epoch": 57.5759296054499, "grad_norm": 0.7097041010856628, "learning_rate": 4.2447913709906336e-05, "loss": 0.00025296267122030256, "step": 202840 }, { "epoch": 57.57876809537326, "grad_norm": 0.28014692664146423, "learning_rate": 4.244507521998297e-05, "loss": 0.000554264523088932, "step": 202850 }, { "epoch": 57.58160658529662, "grad_norm": 0.07063359767198563, "learning_rate": 4.244223673005961e-05, "loss": 0.0003155924379825592, "step": 202860 }, { "epoch": 57.58444507521998, "grad_norm": 0.10980930924415588, "learning_rate": 4.243939824013625e-05, "loss": 0.010909478366374969, "step": 202870 }, { "epoch": 57.587283565143345, "grad_norm": 0.030080821365118027, "learning_rate": 4.243655975021289e-05, "loss": 0.0005289612337946892, "step": 202880 }, { "epoch": 57.59012205506671, "grad_norm": 0.9363241195678711, "learning_rate": 4.243372126028953e-05, "loss": 0.0012692077085375785, "step": 202890 }, { "epoch": 57.59296054499006, "grad_norm": 0.9792172908782959, "learning_rate": 4.2430882770366164e-05, "loss": 0.005339708179235458, "step": 202900 }, { "epoch": 57.595799034913426, "grad_norm": 0.3905777633190155, "learning_rate": 4.2428044280442805e-05, "loss": 0.007431463152170181, "step": 202910 }, { "epoch": 57.59863752483679, "grad_norm": 1.0960655212402344, "learning_rate": 4.2425205790519446e-05, "loss": 0.0017599966377019882, "step": 202920 }, { "epoch": 57.601476014760145, "grad_norm": 13.955649375915527, "learning_rate": 4.242236730059608e-05, "loss": 0.006590297818183899, "step": 202930 }, { "epoch": 57.60431450468351, "grad_norm": 0.15392546355724335, "learning_rate": 4.241952881067273e-05, "loss": 0.0009449355304241181, "step": 202940 }, { "epoch": 57.60715299460687, "grad_norm": 0.05764875188469887, "learning_rate": 4.2416690320749364e-05, "loss": 0.0004433572292327881, "step": 202950 }, { "epoch": 57.60999148453023, "grad_norm": 1.4319692850112915, "learning_rate": 4.2413851830826e-05, "loss": 0.002892743796110153, "step": 202960 }, { "epoch": 57.61282997445359, "grad_norm": 0.14103437960147858, "learning_rate": 4.2411013340902646e-05, "loss": 0.00029717758297920227, "step": 202970 }, { "epoch": 57.61566846437695, "grad_norm": 0.15540529787540436, "learning_rate": 4.240817485097928e-05, "loss": 0.002683236077427864, "step": 202980 }, { "epoch": 57.618506954300315, "grad_norm": 2.6307213306427, "learning_rate": 4.240533636105592e-05, "loss": 0.0008884282782673836, "step": 202990 }, { "epoch": 57.62134544422367, "grad_norm": 1.4305577278137207, "learning_rate": 4.240249787113256e-05, "loss": 0.0009783884510397912, "step": 203000 }, { "epoch": 57.62134544422367, "eval_accuracy": 0.9762192407960831, "eval_loss": 0.0908336341381073, "eval_runtime": 32.5069, "eval_samples_per_second": 483.805, "eval_steps_per_second": 7.568, "step": 203000 }, { "epoch": 57.624183934147034, "grad_norm": 0.8108060956001282, "learning_rate": 4.23996593812092e-05, "loss": 0.003211665153503418, "step": 203010 }, { "epoch": 57.6270224240704, "grad_norm": 0.9907627105712891, "learning_rate": 4.239682089128584e-05, "loss": 0.0004545176401734352, "step": 203020 }, { "epoch": 57.62986091399375, "grad_norm": 0.7625811100006104, "learning_rate": 4.2393982401362474e-05, "loss": 0.00041805803775787356, "step": 203030 }, { "epoch": 57.632699403917115, "grad_norm": 0.3649080991744995, "learning_rate": 4.2391143911439116e-05, "loss": 0.0007667779922485351, "step": 203040 }, { "epoch": 57.63553789384048, "grad_norm": 1.927728533744812, "learning_rate": 4.238830542151576e-05, "loss": 0.003935283422470093, "step": 203050 }, { "epoch": 57.63837638376384, "grad_norm": 0.07767146080732346, "learning_rate": 4.238546693159239e-05, "loss": 0.0009949265047907829, "step": 203060 }, { "epoch": 57.6412148736872, "grad_norm": 2.8301377296447754, "learning_rate": 4.238262844166904e-05, "loss": 0.0012938981875777245, "step": 203070 }, { "epoch": 57.64405336361056, "grad_norm": 0.0549924261868, "learning_rate": 4.2379789951745674e-05, "loss": 0.0010164132341742515, "step": 203080 }, { "epoch": 57.64689185353392, "grad_norm": 0.11862672120332718, "learning_rate": 4.237695146182231e-05, "loss": 0.00030298028141260145, "step": 203090 }, { "epoch": 57.64973034345728, "grad_norm": 0.5455009341239929, "learning_rate": 4.237411297189895e-05, "loss": 0.0003002457320690155, "step": 203100 }, { "epoch": 57.65256883338064, "grad_norm": 4.040154933929443, "learning_rate": 4.237127448197559e-05, "loss": 0.0009656190872192383, "step": 203110 }, { "epoch": 57.655407323304004, "grad_norm": 0.021062998101115227, "learning_rate": 4.236843599205223e-05, "loss": 0.0003586918115615845, "step": 203120 }, { "epoch": 57.65824581322736, "grad_norm": 0.15249450504779816, "learning_rate": 4.236559750212887e-05, "loss": 0.00026847999542951585, "step": 203130 }, { "epoch": 57.66108430315072, "grad_norm": 0.021003063768148422, "learning_rate": 4.236275901220551e-05, "loss": 0.000362345390021801, "step": 203140 }, { "epoch": 57.663922793074086, "grad_norm": 0.012804241850972176, "learning_rate": 4.235992052228215e-05, "loss": 0.0003417976200580597, "step": 203150 }, { "epoch": 57.66676128299745, "grad_norm": 0.12278220057487488, "learning_rate": 4.2357082032358785e-05, "loss": 0.000321282260119915, "step": 203160 }, { "epoch": 57.669599772920805, "grad_norm": 0.3321686089038849, "learning_rate": 4.2354243542435427e-05, "loss": 0.00035560932010412214, "step": 203170 }, { "epoch": 57.67243826284417, "grad_norm": 0.012518245726823807, "learning_rate": 4.235140505251207e-05, "loss": 0.0002537654712796211, "step": 203180 }, { "epoch": 57.67527675276753, "grad_norm": 0.02378888800740242, "learning_rate": 4.23485665625887e-05, "loss": 0.0003287248313426971, "step": 203190 }, { "epoch": 57.678115242690886, "grad_norm": 0.013889407739043236, "learning_rate": 4.2345728072665344e-05, "loss": 0.0002557728439569473, "step": 203200 }, { "epoch": 57.68095373261425, "grad_norm": 0.025578226894140244, "learning_rate": 4.2342889582741985e-05, "loss": 0.0002968860790133476, "step": 203210 }, { "epoch": 57.68379222253761, "grad_norm": 0.024410929530858994, "learning_rate": 4.234005109281862e-05, "loss": 8.724108338356018e-05, "step": 203220 }, { "epoch": 57.68663071246097, "grad_norm": 0.06054743751883507, "learning_rate": 4.233721260289526e-05, "loss": 0.0007057260721921921, "step": 203230 }, { "epoch": 57.68946920238433, "grad_norm": 0.00766676664352417, "learning_rate": 4.23343741129719e-05, "loss": 0.0007407240569591522, "step": 203240 }, { "epoch": 57.69230769230769, "grad_norm": 0.007530054543167353, "learning_rate": 4.233153562304854e-05, "loss": 0.0003676220774650574, "step": 203250 }, { "epoch": 57.695146182231056, "grad_norm": 0.023047449067234993, "learning_rate": 4.232869713312518e-05, "loss": 0.00044903326779603957, "step": 203260 }, { "epoch": 57.69798467215441, "grad_norm": 0.07136601209640503, "learning_rate": 4.232585864320182e-05, "loss": 0.0002299761399626732, "step": 203270 }, { "epoch": 57.700823162077775, "grad_norm": 0.12068890780210495, "learning_rate": 4.232302015327846e-05, "loss": 0.0002027789130806923, "step": 203280 }, { "epoch": 57.70366165200114, "grad_norm": 0.031840648502111435, "learning_rate": 4.2320181663355096e-05, "loss": 0.0006800269708037377, "step": 203290 }, { "epoch": 57.706500141924494, "grad_norm": 0.020992863923311234, "learning_rate": 4.231734317343173e-05, "loss": 0.0005277404561638832, "step": 203300 }, { "epoch": 57.70933863184786, "grad_norm": 0.1057388037443161, "learning_rate": 4.231450468350838e-05, "loss": 0.0006167959421873092, "step": 203310 }, { "epoch": 57.71217712177122, "grad_norm": 0.03239342197775841, "learning_rate": 4.231166619358501e-05, "loss": 0.0004502894356846809, "step": 203320 }, { "epoch": 57.715015611694575, "grad_norm": 0.020041802898049355, "learning_rate": 4.2308827703661655e-05, "loss": 0.0004637433215975761, "step": 203330 }, { "epoch": 57.71785410161794, "grad_norm": 0.021435806527733803, "learning_rate": 4.2305989213738296e-05, "loss": 0.0014048116281628609, "step": 203340 }, { "epoch": 57.7206925915413, "grad_norm": 0.039801742881536484, "learning_rate": 4.230315072381493e-05, "loss": 0.0005146607756614685, "step": 203350 }, { "epoch": 57.723531081464664, "grad_norm": 0.6433714032173157, "learning_rate": 4.230031223389157e-05, "loss": 0.0005228357389569282, "step": 203360 }, { "epoch": 57.72636957138802, "grad_norm": 0.03833135962486267, "learning_rate": 4.229747374396821e-05, "loss": 0.0030122546479105948, "step": 203370 }, { "epoch": 57.72920806131138, "grad_norm": 0.06501774489879608, "learning_rate": 4.229463525404485e-05, "loss": 0.00021413061767816544, "step": 203380 }, { "epoch": 57.732046551234745, "grad_norm": 0.018152276054024696, "learning_rate": 4.229179676412149e-05, "loss": 0.0058472417294979095, "step": 203390 }, { "epoch": 57.7348850411581, "grad_norm": 0.151711568236351, "learning_rate": 4.2288958274198124e-05, "loss": 0.0020419908687472343, "step": 203400 }, { "epoch": 57.737723531081464, "grad_norm": 0.15659219026565552, "learning_rate": 4.228611978427477e-05, "loss": 0.001192198693752289, "step": 203410 }, { "epoch": 57.74056202100483, "grad_norm": 0.3061080873012543, "learning_rate": 4.228328129435141e-05, "loss": 0.0012196313589811325, "step": 203420 }, { "epoch": 57.74340051092818, "grad_norm": 1.2730869054794312, "learning_rate": 4.228044280442804e-05, "loss": 0.0008027389645576477, "step": 203430 }, { "epoch": 57.746239000851546, "grad_norm": 1.0387301445007324, "learning_rate": 4.227760431450469e-05, "loss": 0.0015052201226353646, "step": 203440 }, { "epoch": 57.74907749077491, "grad_norm": 0.4488503932952881, "learning_rate": 4.2274765824581324e-05, "loss": 0.0006218140944838524, "step": 203450 }, { "epoch": 57.75191598069827, "grad_norm": 8.58390998840332, "learning_rate": 4.2271927334657965e-05, "loss": 0.007500927150249481, "step": 203460 }, { "epoch": 57.75475447062163, "grad_norm": 0.5092456340789795, "learning_rate": 4.226908884473461e-05, "loss": 0.0009272841736674309, "step": 203470 }, { "epoch": 57.75759296054499, "grad_norm": 3.9844167232513428, "learning_rate": 4.226625035481124e-05, "loss": 0.0033251769840717317, "step": 203480 }, { "epoch": 57.76043145046835, "grad_norm": 20.932180404663086, "learning_rate": 4.226341186488788e-05, "loss": 0.015054890513420105, "step": 203490 }, { "epoch": 57.76326994039171, "grad_norm": 0.021061381325125694, "learning_rate": 4.226057337496452e-05, "loss": 0.0001088099554181099, "step": 203500 }, { "epoch": 57.76326994039171, "eval_accuracy": 0.9788262224200419, "eval_loss": 0.0806448757648468, "eval_runtime": 32.6241, "eval_samples_per_second": 482.068, "eval_steps_per_second": 7.54, "step": 203500 }, { "epoch": 57.76610843031507, "grad_norm": 0.1386331170797348, "learning_rate": 4.225773488504116e-05, "loss": 0.0008573323488235473, "step": 203510 }, { "epoch": 57.768946920238434, "grad_norm": 0.03176489472389221, "learning_rate": 4.22548963951178e-05, "loss": 0.0014109503477811812, "step": 203520 }, { "epoch": 57.7717854101618, "grad_norm": 0.0408705472946167, "learning_rate": 4.2252057905194435e-05, "loss": 0.001288481242954731, "step": 203530 }, { "epoch": 57.77462390008515, "grad_norm": 0.8526514172554016, "learning_rate": 4.224921941527108e-05, "loss": 0.0005118295550346375, "step": 203540 }, { "epoch": 57.777462390008516, "grad_norm": 1.2944879531860352, "learning_rate": 4.224638092534772e-05, "loss": 0.0012841032817959785, "step": 203550 }, { "epoch": 57.78030087993188, "grad_norm": 0.05061417445540428, "learning_rate": 4.224354243542435e-05, "loss": 0.0022646715864539147, "step": 203560 }, { "epoch": 57.783139369855235, "grad_norm": 0.28999507427215576, "learning_rate": 4.2240703945501e-05, "loss": 0.0009400840848684311, "step": 203570 }, { "epoch": 57.7859778597786, "grad_norm": 0.7716155648231506, "learning_rate": 4.2237865455577635e-05, "loss": 0.0007371071726083756, "step": 203580 }, { "epoch": 57.78881634970196, "grad_norm": 0.18266406655311584, "learning_rate": 4.2235026965654276e-05, "loss": 0.0003968212753534317, "step": 203590 }, { "epoch": 57.791654839625316, "grad_norm": 0.24841676652431488, "learning_rate": 4.223218847573091e-05, "loss": 0.0010218365117907525, "step": 203600 }, { "epoch": 57.79449332954868, "grad_norm": 0.23521652817726135, "learning_rate": 4.222934998580755e-05, "loss": 0.0007314315065741539, "step": 203610 }, { "epoch": 57.79733181947204, "grad_norm": 0.04392537474632263, "learning_rate": 4.2226511495884194e-05, "loss": 0.0026745861396193504, "step": 203620 }, { "epoch": 57.800170309395405, "grad_norm": 0.055736612528562546, "learning_rate": 4.222367300596083e-05, "loss": 0.002558356150984764, "step": 203630 }, { "epoch": 57.80300879931876, "grad_norm": 0.07415959239006042, "learning_rate": 4.222083451603747e-05, "loss": 0.0007418647408485412, "step": 203640 }, { "epoch": 57.805847289242124, "grad_norm": 0.02574933134019375, "learning_rate": 4.221799602611411e-05, "loss": 0.00044418778270483017, "step": 203650 }, { "epoch": 57.80868577916549, "grad_norm": 0.10044330358505249, "learning_rate": 4.2215157536190745e-05, "loss": 0.0009741054847836495, "step": 203660 }, { "epoch": 57.81152426908884, "grad_norm": 0.05653214082121849, "learning_rate": 4.221231904626739e-05, "loss": 0.0054633978754282, "step": 203670 }, { "epoch": 57.814362759012205, "grad_norm": 0.057849444448947906, "learning_rate": 4.220948055634403e-05, "loss": 0.0003483844920992851, "step": 203680 }, { "epoch": 57.81720124893557, "grad_norm": 0.14867746829986572, "learning_rate": 4.220664206642066e-05, "loss": 0.0003938857465982437, "step": 203690 }, { "epoch": 57.820039738858924, "grad_norm": 0.13294005393981934, "learning_rate": 4.2203803576497304e-05, "loss": 0.00025340262800455093, "step": 203700 }, { "epoch": 57.82287822878229, "grad_norm": 0.18580500781536102, "learning_rate": 4.2200965086573946e-05, "loss": 0.0013450739905238151, "step": 203710 }, { "epoch": 57.82571671870565, "grad_norm": 0.02587280422449112, "learning_rate": 4.219812659665058e-05, "loss": 0.0014976449310779572, "step": 203720 }, { "epoch": 57.82855520862901, "grad_norm": 0.05808544531464577, "learning_rate": 4.219528810672722e-05, "loss": 0.00041355248540639875, "step": 203730 }, { "epoch": 57.83139369855237, "grad_norm": 0.03316033259034157, "learning_rate": 4.219244961680386e-05, "loss": 0.0016160726547241211, "step": 203740 }, { "epoch": 57.83423218847573, "grad_norm": 0.32668185234069824, "learning_rate": 4.2189611126880504e-05, "loss": 0.0006963161751627922, "step": 203750 }, { "epoch": 57.837070678399094, "grad_norm": 0.32159045338630676, "learning_rate": 4.218677263695714e-05, "loss": 0.0003088017925620079, "step": 203760 }, { "epoch": 57.83990916832245, "grad_norm": 0.5166965126991272, "learning_rate": 4.218393414703378e-05, "loss": 0.0005923550575971604, "step": 203770 }, { "epoch": 57.84274765824581, "grad_norm": 0.04078520089387894, "learning_rate": 4.218109565711042e-05, "loss": 0.00023098234087228774, "step": 203780 }, { "epoch": 57.845586148169176, "grad_norm": 0.32828110456466675, "learning_rate": 4.2178257167187056e-05, "loss": 0.00035635977983474734, "step": 203790 }, { "epoch": 57.84842463809253, "grad_norm": 0.3432689905166626, "learning_rate": 4.21754186772637e-05, "loss": 0.0008433733135461807, "step": 203800 }, { "epoch": 57.851263128015894, "grad_norm": 0.04531968757510185, "learning_rate": 4.217258018734034e-05, "loss": 0.0003129448741674423, "step": 203810 }, { "epoch": 57.85410161793926, "grad_norm": 0.5039620995521545, "learning_rate": 4.2169741697416974e-05, "loss": 0.0013726573437452317, "step": 203820 }, { "epoch": 57.85694010786262, "grad_norm": 0.08485621213912964, "learning_rate": 4.2166903207493615e-05, "loss": 0.00023926272988319396, "step": 203830 }, { "epoch": 57.859778597785976, "grad_norm": 0.04285060614347458, "learning_rate": 4.2164064717570256e-05, "loss": 0.0004269793629646301, "step": 203840 }, { "epoch": 57.86261708770934, "grad_norm": 0.1039428561925888, "learning_rate": 4.216122622764689e-05, "loss": 0.0036132253706455232, "step": 203850 }, { "epoch": 57.8654555776327, "grad_norm": 0.5054091215133667, "learning_rate": 4.215838773772353e-05, "loss": 0.0037370655685663224, "step": 203860 }, { "epoch": 57.86829406755606, "grad_norm": 0.2088550329208374, "learning_rate": 4.2155549247800174e-05, "loss": 0.00022067539393901824, "step": 203870 }, { "epoch": 57.87113255747942, "grad_norm": 0.12185687571763992, "learning_rate": 4.2152710757876815e-05, "loss": 0.0004766346886754036, "step": 203880 }, { "epoch": 57.87397104740278, "grad_norm": 0.03746906295418739, "learning_rate": 4.214987226795345e-05, "loss": 0.0011947164312005043, "step": 203890 }, { "epoch": 57.876809537326146, "grad_norm": 0.22051361203193665, "learning_rate": 4.214703377803009e-05, "loss": 0.0002687947824597359, "step": 203900 }, { "epoch": 57.8796480272495, "grad_norm": 0.05351918935775757, "learning_rate": 4.214419528810673e-05, "loss": 0.00018464382737874984, "step": 203910 }, { "epoch": 57.882486517172865, "grad_norm": 0.028821086511015892, "learning_rate": 4.214135679818337e-05, "loss": 0.00046849120408296583, "step": 203920 }, { "epoch": 57.88532500709623, "grad_norm": 0.04851937294006348, "learning_rate": 4.213851830826001e-05, "loss": 0.000472315214574337, "step": 203930 }, { "epoch": 57.88816349701958, "grad_norm": 0.7905162572860718, "learning_rate": 4.213567981833665e-05, "loss": 0.0007357167080044747, "step": 203940 }, { "epoch": 57.891001986942946, "grad_norm": 0.6250839233398438, "learning_rate": 4.2132841328413284e-05, "loss": 0.0011721968650817872, "step": 203950 }, { "epoch": 57.89384047686631, "grad_norm": 4.825534343719482, "learning_rate": 4.2130002838489926e-05, "loss": 0.001409224234521389, "step": 203960 }, { "epoch": 57.896678966789665, "grad_norm": 0.22183270752429962, "learning_rate": 4.212716434856657e-05, "loss": 0.0003189507871866226, "step": 203970 }, { "epoch": 57.89951745671303, "grad_norm": 0.08644049614667892, "learning_rate": 4.21243258586432e-05, "loss": 0.0004677772521972656, "step": 203980 }, { "epoch": 57.90235594663639, "grad_norm": 0.02203330397605896, "learning_rate": 4.212148736871984e-05, "loss": 0.0003841409459710121, "step": 203990 }, { "epoch": 57.90519443655975, "grad_norm": 0.589742124080658, "learning_rate": 4.2118648878796484e-05, "loss": 0.00018324479460716248, "step": 204000 }, { "epoch": 57.90519443655975, "eval_accuracy": 0.9803522604438227, "eval_loss": 0.07444480806589127, "eval_runtime": 32.9807, "eval_samples_per_second": 476.854, "eval_steps_per_second": 7.459, "step": 204000 }, { "epoch": 57.90803292648311, "grad_norm": 0.010767682455480099, "learning_rate": 4.2115810388873126e-05, "loss": 0.0005605537444353104, "step": 204010 }, { "epoch": 57.91087141640647, "grad_norm": 0.09171885251998901, "learning_rate": 4.211297189894976e-05, "loss": 0.0002513719722628593, "step": 204020 }, { "epoch": 57.913709906329835, "grad_norm": 0.05096288397908211, "learning_rate": 4.2110133409026395e-05, "loss": 0.0004306025803089142, "step": 204030 }, { "epoch": 57.91654839625319, "grad_norm": 0.03095061145722866, "learning_rate": 4.210729491910304e-05, "loss": 0.00045613143593072893, "step": 204040 }, { "epoch": 57.919386886176554, "grad_norm": 0.29259613156318665, "learning_rate": 4.210445642917968e-05, "loss": 0.00023514945060014724, "step": 204050 }, { "epoch": 57.92222537609992, "grad_norm": 0.027359485626220703, "learning_rate": 4.210161793925632e-05, "loss": 0.00021498017013072969, "step": 204060 }, { "epoch": 57.92506386602327, "grad_norm": 0.021599385887384415, "learning_rate": 4.209877944933296e-05, "loss": 0.00017031989991664887, "step": 204070 }, { "epoch": 57.927902355946635, "grad_norm": 0.02814623713493347, "learning_rate": 4.2095940959409595e-05, "loss": 0.00016027409583330154, "step": 204080 }, { "epoch": 57.93074084587, "grad_norm": 0.006065940484404564, "learning_rate": 4.2093102469486236e-05, "loss": 0.00018275640904903413, "step": 204090 }, { "epoch": 57.93357933579336, "grad_norm": 0.05336625501513481, "learning_rate": 4.209026397956288e-05, "loss": 0.001657491736114025, "step": 204100 }, { "epoch": 57.93641782571672, "grad_norm": 0.14144258201122284, "learning_rate": 4.208742548963951e-05, "loss": 0.0003083301708102226, "step": 204110 }, { "epoch": 57.93925631564008, "grad_norm": 0.5792751908302307, "learning_rate": 4.2084586999716154e-05, "loss": 0.0003181589767336845, "step": 204120 }, { "epoch": 57.94209480556344, "grad_norm": 0.11705828458070755, "learning_rate": 4.208174850979279e-05, "loss": 0.00027374550700187683, "step": 204130 }, { "epoch": 57.9449332954868, "grad_norm": 0.12909115850925446, "learning_rate": 4.207891001986943e-05, "loss": 0.0009355930611491204, "step": 204140 }, { "epoch": 57.94777178541016, "grad_norm": 0.03992854803800583, "learning_rate": 4.207607152994607e-05, "loss": 0.004155614227056503, "step": 204150 }, { "epoch": 57.950610275333524, "grad_norm": 0.07783089578151703, "learning_rate": 4.2073233040022706e-05, "loss": 0.000640903040766716, "step": 204160 }, { "epoch": 57.95344876525688, "grad_norm": 0.11804375052452087, "learning_rate": 4.2070394550099354e-05, "loss": 0.00016187839210033417, "step": 204170 }, { "epoch": 57.95628725518024, "grad_norm": 0.3922516405582428, "learning_rate": 4.206755606017599e-05, "loss": 0.00034200269728899, "step": 204180 }, { "epoch": 57.959125745103606, "grad_norm": 0.05401405692100525, "learning_rate": 4.206471757025262e-05, "loss": 0.0001503054052591324, "step": 204190 }, { "epoch": 57.96196423502697, "grad_norm": 0.024301636964082718, "learning_rate": 4.206187908032927e-05, "loss": 0.00017855167388916015, "step": 204200 }, { "epoch": 57.964802724950324, "grad_norm": 0.010897928848862648, "learning_rate": 4.2059040590405906e-05, "loss": 0.00013245213776826858, "step": 204210 }, { "epoch": 57.96764121487369, "grad_norm": 0.35928940773010254, "learning_rate": 4.205620210048255e-05, "loss": 0.00038835816085338595, "step": 204220 }, { "epoch": 57.97047970479705, "grad_norm": 0.22269287705421448, "learning_rate": 4.205336361055918e-05, "loss": 0.000232057087123394, "step": 204230 }, { "epoch": 57.973318194720406, "grad_norm": 0.017344897612929344, "learning_rate": 4.205052512063582e-05, "loss": 0.00034070927649736403, "step": 204240 }, { "epoch": 57.97615668464377, "grad_norm": 0.03685707971453667, "learning_rate": 4.2047686630712465e-05, "loss": 0.0016710491850972175, "step": 204250 }, { "epoch": 57.97899517456713, "grad_norm": 0.022251730784773827, "learning_rate": 4.20448481407891e-05, "loss": 0.0005398474633693696, "step": 204260 }, { "epoch": 57.981833664490495, "grad_norm": 0.4273233115673065, "learning_rate": 4.204200965086574e-05, "loss": 0.000507097877562046, "step": 204270 }, { "epoch": 57.98467215441385, "grad_norm": 0.10712866485118866, "learning_rate": 4.203917116094238e-05, "loss": 0.0003894565626978874, "step": 204280 }, { "epoch": 57.98751064433721, "grad_norm": 0.14284949004650116, "learning_rate": 4.2036332671019017e-05, "loss": 0.00029521267861127854, "step": 204290 }, { "epoch": 57.990349134260576, "grad_norm": 0.06746077537536621, "learning_rate": 4.2033494181095665e-05, "loss": 0.0011087391525506974, "step": 204300 }, { "epoch": 57.99318762418393, "grad_norm": 0.030009908601641655, "learning_rate": 4.20306556911723e-05, "loss": 0.0005026973783969879, "step": 204310 }, { "epoch": 57.996026114107295, "grad_norm": 0.24859710037708282, "learning_rate": 4.2027817201248934e-05, "loss": 0.0002466266974806786, "step": 204320 }, { "epoch": 57.99886460403066, "grad_norm": 0.15011797845363617, "learning_rate": 4.2024978711325575e-05, "loss": 0.00021987874060869217, "step": 204330 }, { "epoch": 58.00170309395401, "grad_norm": 0.17467911541461945, "learning_rate": 4.202214022140222e-05, "loss": 0.00043325275182723997, "step": 204340 }, { "epoch": 58.004541583877376, "grad_norm": 0.021950123831629753, "learning_rate": 4.201930173147886e-05, "loss": 0.00016518235206604005, "step": 204350 }, { "epoch": 58.00738007380074, "grad_norm": 0.005392394959926605, "learning_rate": 4.201646324155549e-05, "loss": 0.0002975683659315109, "step": 204360 }, { "epoch": 58.0102185637241, "grad_norm": 0.7367399334907532, "learning_rate": 4.2013624751632134e-05, "loss": 0.00039396863430738447, "step": 204370 }, { "epoch": 58.01305705364746, "grad_norm": 2.967148542404175, "learning_rate": 4.2010786261708775e-05, "loss": 0.000678195059299469, "step": 204380 }, { "epoch": 58.01589554357082, "grad_norm": 0.06893350183963776, "learning_rate": 4.200794777178541e-05, "loss": 0.0004196334630250931, "step": 204390 }, { "epoch": 58.018734033494184, "grad_norm": 0.018050355836749077, "learning_rate": 4.200510928186205e-05, "loss": 0.0002667076885700226, "step": 204400 }, { "epoch": 58.02157252341754, "grad_norm": 0.020561840385198593, "learning_rate": 4.200227079193869e-05, "loss": 0.00018485207110643386, "step": 204410 }, { "epoch": 58.0244110133409, "grad_norm": 0.028353439643979073, "learning_rate": 4.199943230201533e-05, "loss": 0.0006221201270818711, "step": 204420 }, { "epoch": 58.027249503264265, "grad_norm": 0.05734207108616829, "learning_rate": 4.199659381209197e-05, "loss": 0.0006554437801241875, "step": 204430 }, { "epoch": 58.03008799318762, "grad_norm": 0.29836517572402954, "learning_rate": 4.199375532216861e-05, "loss": 0.0018628882244229317, "step": 204440 }, { "epoch": 58.032926483110984, "grad_norm": 0.016395173966884613, "learning_rate": 4.1990916832245245e-05, "loss": 0.0003655984997749329, "step": 204450 }, { "epoch": 58.03576497303435, "grad_norm": 0.13461102545261383, "learning_rate": 4.1988078342321886e-05, "loss": 0.0028987009078264236, "step": 204460 }, { "epoch": 58.03860346295771, "grad_norm": 0.06809957325458527, "learning_rate": 4.198523985239853e-05, "loss": 0.0012411508709192277, "step": 204470 }, { "epoch": 58.041441952881065, "grad_norm": 0.1384178102016449, "learning_rate": 4.198240136247517e-05, "loss": 0.0004287479445338249, "step": 204480 }, { "epoch": 58.04428044280443, "grad_norm": 0.10587082803249359, "learning_rate": 4.19795628725518e-05, "loss": 0.0002389878034591675, "step": 204490 }, { "epoch": 58.04711893272779, "grad_norm": 0.015492012724280357, "learning_rate": 4.1976724382628445e-05, "loss": 0.00026868730783462523, "step": 204500 }, { "epoch": 58.04711893272779, "eval_accuracy": 0.9804794302791378, "eval_loss": 0.07528763264417648, "eval_runtime": 32.8393, "eval_samples_per_second": 478.908, "eval_steps_per_second": 7.491, "step": 204500 }, { "epoch": 58.04995742265115, "grad_norm": 0.13865184783935547, "learning_rate": 4.1973885892705086e-05, "loss": 0.00037951022386550903, "step": 204510 }, { "epoch": 58.05279591257451, "grad_norm": 0.057282522320747375, "learning_rate": 4.197104740278172e-05, "loss": 0.0030770041048526764, "step": 204520 }, { "epoch": 58.05563440249787, "grad_norm": 0.16568686068058014, "learning_rate": 4.196820891285836e-05, "loss": 0.0013439642265439034, "step": 204530 }, { "epoch": 58.05847289242123, "grad_norm": 1.1781235933303833, "learning_rate": 4.1965370422935003e-05, "loss": 0.0007149338722229004, "step": 204540 }, { "epoch": 58.06131138234459, "grad_norm": 0.016168765723705292, "learning_rate": 4.196253193301164e-05, "loss": 0.000862187147140503, "step": 204550 }, { "epoch": 58.064149872267954, "grad_norm": 0.13774068653583527, "learning_rate": 4.195969344308828e-05, "loss": 0.0015854593366384506, "step": 204560 }, { "epoch": 58.06698836219132, "grad_norm": 0.108001209795475, "learning_rate": 4.195685495316492e-05, "loss": 0.00246198084205389, "step": 204570 }, { "epoch": 58.06982685211467, "grad_norm": 0.7792887091636658, "learning_rate": 4.1954016463241555e-05, "loss": 0.0006846090778708458, "step": 204580 }, { "epoch": 58.072665342038036, "grad_norm": 0.18153995275497437, "learning_rate": 4.1951461822310535e-05, "loss": 0.014819648861885071, "step": 204590 }, { "epoch": 58.0755038319614, "grad_norm": 0.06315401941537857, "learning_rate": 4.194862333238717e-05, "loss": 0.0008628163486719131, "step": 204600 }, { "epoch": 58.078342321884755, "grad_norm": 0.06517153233289719, "learning_rate": 4.194578484246381e-05, "loss": 0.000557304359972477, "step": 204610 }, { "epoch": 58.08118081180812, "grad_norm": 0.015062338672578335, "learning_rate": 4.194294635254045e-05, "loss": 0.0001638738438487053, "step": 204620 }, { "epoch": 58.08401930173148, "grad_norm": 0.061194922775030136, "learning_rate": 4.194010786261709e-05, "loss": 0.000349043495953083, "step": 204630 }, { "epoch": 58.086857791654836, "grad_norm": 0.01200471818447113, "learning_rate": 4.193726937269373e-05, "loss": 0.00016806256026029586, "step": 204640 }, { "epoch": 58.0896962815782, "grad_norm": 0.03657468035817146, "learning_rate": 4.193443088277037e-05, "loss": 0.00013396386057138444, "step": 204650 }, { "epoch": 58.09253477150156, "grad_norm": 0.47877076268196106, "learning_rate": 4.1931592392847005e-05, "loss": 0.0006796017289161683, "step": 204660 }, { "epoch": 58.095373261424925, "grad_norm": 0.008521954528987408, "learning_rate": 4.1928753902923646e-05, "loss": 0.00039926283061504365, "step": 204670 }, { "epoch": 58.09821175134828, "grad_norm": 0.013571600429713726, "learning_rate": 4.192591541300029e-05, "loss": 0.00044177062809467314, "step": 204680 }, { "epoch": 58.10105024127164, "grad_norm": 0.04802403971552849, "learning_rate": 4.192307692307693e-05, "loss": 0.0005490142852067947, "step": 204690 }, { "epoch": 58.103888731195006, "grad_norm": 0.2598487138748169, "learning_rate": 4.192023843315356e-05, "loss": 0.0001978311687707901, "step": 204700 }, { "epoch": 58.10672722111836, "grad_norm": 2.142902374267578, "learning_rate": 4.19173999432302e-05, "loss": 0.0009505357593297958, "step": 204710 }, { "epoch": 58.109565711041725, "grad_norm": 0.08950124680995941, "learning_rate": 4.1914561453306846e-05, "loss": 0.002841956354677677, "step": 204720 }, { "epoch": 58.11240420096509, "grad_norm": 0.6896215677261353, "learning_rate": 4.191172296338348e-05, "loss": 0.003446846827864647, "step": 204730 }, { "epoch": 58.11524269088845, "grad_norm": 0.09965222328901291, "learning_rate": 4.190888447346012e-05, "loss": 0.0014880875125527382, "step": 204740 }, { "epoch": 58.11808118081181, "grad_norm": 5.069331645965576, "learning_rate": 4.1906045983536763e-05, "loss": 0.008741626888513565, "step": 204750 }, { "epoch": 58.12091967073517, "grad_norm": 0.04064833000302315, "learning_rate": 4.19032074936134e-05, "loss": 0.0046473868191242215, "step": 204760 }, { "epoch": 58.12375816065853, "grad_norm": 2.1111674308776855, "learning_rate": 4.190036900369004e-05, "loss": 0.0010314088314771653, "step": 204770 }, { "epoch": 58.12659665058189, "grad_norm": 0.07537281513214111, "learning_rate": 4.189753051376668e-05, "loss": 0.002217673510313034, "step": 204780 }, { "epoch": 58.12943514050525, "grad_norm": 0.17979152500629425, "learning_rate": 4.1894692023843315e-05, "loss": 0.000832880474627018, "step": 204790 }, { "epoch": 58.132273630428614, "grad_norm": 0.05780456215143204, "learning_rate": 4.189185353391996e-05, "loss": 0.00038910023868083955, "step": 204800 }, { "epoch": 58.13511212035197, "grad_norm": 0.2759091258049011, "learning_rate": 4.188901504399659e-05, "loss": 0.0022787053138017656, "step": 204810 }, { "epoch": 58.13795061027533, "grad_norm": 1.2614411115646362, "learning_rate": 4.188617655407324e-05, "loss": 0.0036401398479938506, "step": 204820 }, { "epoch": 58.140789100198695, "grad_norm": 0.0448504202067852, "learning_rate": 4.1883338064149874e-05, "loss": 0.0014636456966400147, "step": 204830 }, { "epoch": 58.14362759012206, "grad_norm": 0.16074614226818085, "learning_rate": 4.188049957422651e-05, "loss": 0.0004017755389213562, "step": 204840 }, { "epoch": 58.146466080045414, "grad_norm": 0.5667877197265625, "learning_rate": 4.187766108430316e-05, "loss": 0.009945553541183472, "step": 204850 }, { "epoch": 58.14930456996878, "grad_norm": 0.09318536520004272, "learning_rate": 4.187482259437979e-05, "loss": 0.0011715713888406753, "step": 204860 }, { "epoch": 58.15214305989214, "grad_norm": 0.01850898191332817, "learning_rate": 4.187198410445643e-05, "loss": 0.0014135394245386123, "step": 204870 }, { "epoch": 58.154981549815496, "grad_norm": 0.018807750195264816, "learning_rate": 4.1869145614533074e-05, "loss": 0.00040181931108236314, "step": 204880 }, { "epoch": 58.15782003973886, "grad_norm": 11.1284761428833, "learning_rate": 4.186630712460971e-05, "loss": 0.004034848138689995, "step": 204890 }, { "epoch": 58.16065852966222, "grad_norm": 9.102705001831055, "learning_rate": 4.186346863468635e-05, "loss": 0.0016865378245711326, "step": 204900 }, { "epoch": 58.16349701958558, "grad_norm": 0.05338349565863609, "learning_rate": 4.1860630144762985e-05, "loss": 0.00025546513497829435, "step": 204910 }, { "epoch": 58.16633550950894, "grad_norm": 0.553736686706543, "learning_rate": 4.1857791654839626e-05, "loss": 0.0015936607494950294, "step": 204920 }, { "epoch": 58.1691739994323, "grad_norm": 6.255552291870117, "learning_rate": 4.185495316491627e-05, "loss": 0.0011901333928108215, "step": 204930 }, { "epoch": 58.172012489355666, "grad_norm": 1.2497267723083496, "learning_rate": 4.18521146749929e-05, "loss": 0.005077730864286423, "step": 204940 }, { "epoch": 58.17485097927902, "grad_norm": 2.7479703426361084, "learning_rate": 4.184927618506955e-05, "loss": 0.0009032720699906349, "step": 204950 }, { "epoch": 58.177689469202384, "grad_norm": 0.018021713942289352, "learning_rate": 4.1846437695146185e-05, "loss": 0.0004354001954197884, "step": 204960 }, { "epoch": 58.18052795912575, "grad_norm": 20.678144454956055, "learning_rate": 4.184359920522282e-05, "loss": 0.013803592324256897, "step": 204970 }, { "epoch": 58.1833664490491, "grad_norm": 0.168352410197258, "learning_rate": 4.184076071529947e-05, "loss": 0.008954645693302154, "step": 204980 }, { "epoch": 58.186204938972466, "grad_norm": 0.26497358083724976, "learning_rate": 4.18379222253761e-05, "loss": 0.0010895712301135063, "step": 204990 }, { "epoch": 58.18904342889583, "grad_norm": 0.031940143555402756, "learning_rate": 4.1835083735452744e-05, "loss": 0.0008628876879811287, "step": 205000 }, { "epoch": 58.18904342889583, "eval_accuracy": 0.9774273542315762, "eval_loss": 0.08469057828187943, "eval_runtime": 32.1376, "eval_samples_per_second": 489.364, "eval_steps_per_second": 7.655, "step": 205000 }, { "epoch": 58.191881918819185, "grad_norm": 0.5235498547554016, "learning_rate": 4.183224524552938e-05, "loss": 0.0013462038710713386, "step": 205010 }, { "epoch": 58.19472040874255, "grad_norm": 0.0942637175321579, "learning_rate": 4.182940675560602e-05, "loss": 0.0012478357180953025, "step": 205020 }, { "epoch": 58.19755889866591, "grad_norm": 0.21433603763580322, "learning_rate": 4.182656826568266e-05, "loss": 0.0010499902069568633, "step": 205030 }, { "epoch": 58.20039738858927, "grad_norm": 0.6372508406639099, "learning_rate": 4.1823729775759296e-05, "loss": 0.0014020510017871857, "step": 205040 }, { "epoch": 58.20323587851263, "grad_norm": 0.018086139112710953, "learning_rate": 4.182089128583594e-05, "loss": 0.00048404969274997713, "step": 205050 }, { "epoch": 58.20607436843599, "grad_norm": 0.1767113208770752, "learning_rate": 4.181805279591258e-05, "loss": 0.0003276534378528595, "step": 205060 }, { "epoch": 58.208912858359355, "grad_norm": 0.021839821711182594, "learning_rate": 4.181521430598921e-05, "loss": 0.0010433599352836608, "step": 205070 }, { "epoch": 58.21175134828271, "grad_norm": 1.037894368171692, "learning_rate": 4.1812375816065854e-05, "loss": 0.0004463814198970795, "step": 205080 }, { "epoch": 58.214589838206074, "grad_norm": 0.01604326069355011, "learning_rate": 4.1809537326142496e-05, "loss": 0.0005635779350996017, "step": 205090 }, { "epoch": 58.21742832812944, "grad_norm": 0.3372109830379486, "learning_rate": 4.180669883621913e-05, "loss": 0.0005124708637595177, "step": 205100 }, { "epoch": 58.2202668180528, "grad_norm": 0.35462090373039246, "learning_rate": 4.180386034629577e-05, "loss": 0.0010896431282162665, "step": 205110 }, { "epoch": 58.223105307976155, "grad_norm": 0.028064021840691566, "learning_rate": 4.180102185637241e-05, "loss": 0.003514605015516281, "step": 205120 }, { "epoch": 58.22594379789952, "grad_norm": 2.936357259750366, "learning_rate": 4.179818336644905e-05, "loss": 0.0016153039410710334, "step": 205130 }, { "epoch": 58.22878228782288, "grad_norm": 0.06186690554022789, "learning_rate": 4.179534487652569e-05, "loss": 0.0038416236639022827, "step": 205140 }, { "epoch": 58.23162077774624, "grad_norm": 0.152574822306633, "learning_rate": 4.179250638660233e-05, "loss": 0.00029898025095462797, "step": 205150 }, { "epoch": 58.2344592676696, "grad_norm": 0.05094078555703163, "learning_rate": 4.178966789667897e-05, "loss": 0.0002130495384335518, "step": 205160 }, { "epoch": 58.23729775759296, "grad_norm": 0.11865977197885513, "learning_rate": 4.1786829406755606e-05, "loss": 0.0003287043422460556, "step": 205170 }, { "epoch": 58.24013624751632, "grad_norm": 0.8597666621208191, "learning_rate": 4.178399091683225e-05, "loss": 0.0008318416774272918, "step": 205180 }, { "epoch": 58.24297473743968, "grad_norm": 0.016284247860312462, "learning_rate": 4.178115242690889e-05, "loss": 0.00032555852085351943, "step": 205190 }, { "epoch": 58.245813227363044, "grad_norm": 0.019429286941885948, "learning_rate": 4.1778313936985524e-05, "loss": 0.00021120253950357437, "step": 205200 }, { "epoch": 58.24865171728641, "grad_norm": 0.01344375405460596, "learning_rate": 4.1775475447062165e-05, "loss": 0.0006610799580812454, "step": 205210 }, { "epoch": 58.25149020720976, "grad_norm": 0.02961040660738945, "learning_rate": 4.1772636957138806e-05, "loss": 0.0002518216148018837, "step": 205220 }, { "epoch": 58.254328697133126, "grad_norm": 0.09081673622131348, "learning_rate": 4.176979846721544e-05, "loss": 0.00035782288759946825, "step": 205230 }, { "epoch": 58.25716718705649, "grad_norm": 0.09560229629278183, "learning_rate": 4.176695997729208e-05, "loss": 0.00015184488147497178, "step": 205240 }, { "epoch": 58.260005676979844, "grad_norm": 0.14729832112789154, "learning_rate": 4.1764121487368724e-05, "loss": 0.0004842573776841164, "step": 205250 }, { "epoch": 58.26284416690321, "grad_norm": 2.0042402744293213, "learning_rate": 4.176128299744536e-05, "loss": 0.0006337942555546761, "step": 205260 }, { "epoch": 58.26568265682657, "grad_norm": 0.011314044706523418, "learning_rate": 4.1758444507522e-05, "loss": 0.0039548899978399275, "step": 205270 }, { "epoch": 58.268521146749926, "grad_norm": 3.559664487838745, "learning_rate": 4.175560601759864e-05, "loss": 0.0007200537249445916, "step": 205280 }, { "epoch": 58.27135963667329, "grad_norm": 0.11831052601337433, "learning_rate": 4.175276752767528e-05, "loss": 0.00027854051440954206, "step": 205290 }, { "epoch": 58.27419812659665, "grad_norm": 0.0453171543776989, "learning_rate": 4.174992903775192e-05, "loss": 0.0001888323575258255, "step": 205300 }, { "epoch": 58.277036616520014, "grad_norm": 0.044095445424318314, "learning_rate": 4.174709054782855e-05, "loss": 0.00043348148465156556, "step": 205310 }, { "epoch": 58.27987510644337, "grad_norm": 0.19925419986248016, "learning_rate": 4.17442520579052e-05, "loss": 0.0024681663140654563, "step": 205320 }, { "epoch": 58.28271359636673, "grad_norm": 0.03999914228916168, "learning_rate": 4.1741413567981834e-05, "loss": 0.00012504421174526216, "step": 205330 }, { "epoch": 58.285552086290096, "grad_norm": 7.908804416656494, "learning_rate": 4.1738575078058476e-05, "loss": 0.0012836307287216187, "step": 205340 }, { "epoch": 58.28839057621345, "grad_norm": 0.24238896369934082, "learning_rate": 4.173573658813512e-05, "loss": 0.00035486966371536257, "step": 205350 }, { "epoch": 58.291229066136815, "grad_norm": 1.3363550901412964, "learning_rate": 4.173289809821175e-05, "loss": 0.00031757280230522157, "step": 205360 }, { "epoch": 58.29406755606018, "grad_norm": 0.18619368970394135, "learning_rate": 4.173005960828839e-05, "loss": 0.00023713260889053346, "step": 205370 }, { "epoch": 58.29690604598353, "grad_norm": 0.1024603545665741, "learning_rate": 4.1727221118365035e-05, "loss": 0.0004784869030117989, "step": 205380 }, { "epoch": 58.299744535906896, "grad_norm": 0.03788735717535019, "learning_rate": 4.172438262844167e-05, "loss": 0.00025156624615192414, "step": 205390 }, { "epoch": 58.30258302583026, "grad_norm": 0.14736196398735046, "learning_rate": 4.172154413851831e-05, "loss": 0.00027147121727466583, "step": 205400 }, { "epoch": 58.30542151575362, "grad_norm": 0.012785141356289387, "learning_rate": 4.1718705648594945e-05, "loss": 0.00024437569081783297, "step": 205410 }, { "epoch": 58.30826000567698, "grad_norm": 0.0817706435918808, "learning_rate": 4.1715867158671587e-05, "loss": 0.0004734601825475693, "step": 205420 }, { "epoch": 58.31109849560034, "grad_norm": 0.06922005861997604, "learning_rate": 4.171302866874823e-05, "loss": 0.004749102890491486, "step": 205430 }, { "epoch": 58.3139369855237, "grad_norm": 2.3305904865264893, "learning_rate": 4.171019017882486e-05, "loss": 0.0017081452533602715, "step": 205440 }, { "epoch": 58.31677547544706, "grad_norm": 0.13234132528305054, "learning_rate": 4.170735168890151e-05, "loss": 0.00035175904631614687, "step": 205450 }, { "epoch": 58.31961396537042, "grad_norm": 0.42978307604789734, "learning_rate": 4.1704513198978145e-05, "loss": 0.0003889555111527443, "step": 205460 }, { "epoch": 58.322452455293785, "grad_norm": 0.9656371474266052, "learning_rate": 4.1701674709054787e-05, "loss": 0.00044166650623083114, "step": 205470 }, { "epoch": 58.32529094521715, "grad_norm": 0.11956121027469635, "learning_rate": 4.169883621913143e-05, "loss": 0.0003294184803962708, "step": 205480 }, { "epoch": 58.328129435140504, "grad_norm": 0.5444433093070984, "learning_rate": 4.169599772920806e-05, "loss": 0.0005915256217122078, "step": 205490 }, { "epoch": 58.33096792506387, "grad_norm": 0.10612577944993973, "learning_rate": 4.1693159239284704e-05, "loss": 0.00018099993467330934, "step": 205500 }, { "epoch": 58.33096792506387, "eval_accuracy": 0.9802250906085077, "eval_loss": 0.0739494189620018, "eval_runtime": 32.3586, "eval_samples_per_second": 486.023, "eval_steps_per_second": 7.602, "step": 205500 }, { "epoch": 58.33380641498723, "grad_norm": 0.11806458979845047, "learning_rate": 4.169032074936134e-05, "loss": 0.0002736002206802368, "step": 205510 }, { "epoch": 58.336644904910585, "grad_norm": 0.055456411093473434, "learning_rate": 4.168748225943798e-05, "loss": 0.0002252276986837387, "step": 205520 }, { "epoch": 58.33948339483395, "grad_norm": 0.028017638251185417, "learning_rate": 4.168464376951462e-05, "loss": 0.0003796648234128952, "step": 205530 }, { "epoch": 58.34232188475731, "grad_norm": 0.022682668641209602, "learning_rate": 4.1681805279591256e-05, "loss": 0.00015981532633304595, "step": 205540 }, { "epoch": 58.34516037468067, "grad_norm": 0.13965179026126862, "learning_rate": 4.16789667896679e-05, "loss": 0.0004375981166958809, "step": 205550 }, { "epoch": 58.34799886460403, "grad_norm": 0.03178500384092331, "learning_rate": 4.167612829974454e-05, "loss": 0.0001471705734729767, "step": 205560 }, { "epoch": 58.35083735452739, "grad_norm": 0.10785835981369019, "learning_rate": 4.167328980982117e-05, "loss": 0.0002951962873339653, "step": 205570 }, { "epoch": 58.353675844450756, "grad_norm": 0.06760025024414062, "learning_rate": 4.167045131989782e-05, "loss": 0.0003511713817715645, "step": 205580 }, { "epoch": 58.35651433437411, "grad_norm": 0.029207304120063782, "learning_rate": 4.1667612829974456e-05, "loss": 0.00028275251388549806, "step": 205590 }, { "epoch": 58.359352824297474, "grad_norm": 0.022507071495056152, "learning_rate": 4.166477434005109e-05, "loss": 0.0007560320198535919, "step": 205600 }, { "epoch": 58.36219131422084, "grad_norm": 0.035710081458091736, "learning_rate": 4.166193585012774e-05, "loss": 0.0005072861909866333, "step": 205610 }, { "epoch": 58.36502980414419, "grad_norm": 0.13027210533618927, "learning_rate": 4.165909736020437e-05, "loss": 0.001174384169280529, "step": 205620 }, { "epoch": 58.367868294067556, "grad_norm": 0.012850498780608177, "learning_rate": 4.1656258870281015e-05, "loss": 0.0013997212052345275, "step": 205630 }, { "epoch": 58.37070678399092, "grad_norm": 0.0756896585226059, "learning_rate": 4.165342038035765e-05, "loss": 0.0005036361515522003, "step": 205640 }, { "epoch": 58.373545273914274, "grad_norm": 0.032071687281131744, "learning_rate": 4.165058189043429e-05, "loss": 0.0006042823195457458, "step": 205650 }, { "epoch": 58.37638376383764, "grad_norm": 0.15187151730060577, "learning_rate": 4.164774340051093e-05, "loss": 0.0006724942475557327, "step": 205660 }, { "epoch": 58.379222253761, "grad_norm": 3.1528849601745605, "learning_rate": 4.164490491058757e-05, "loss": 0.0008101312443614006, "step": 205670 }, { "epoch": 58.38206074368436, "grad_norm": 4.844971179962158, "learning_rate": 4.164206642066421e-05, "loss": 0.0017693787813186646, "step": 205680 }, { "epoch": 58.38489923360772, "grad_norm": 0.1397276371717453, "learning_rate": 4.163922793074085e-05, "loss": 0.00033976081758737566, "step": 205690 }, { "epoch": 58.38773772353108, "grad_norm": 0.10120905935764313, "learning_rate": 4.1636389440817484e-05, "loss": 0.0020681345835328103, "step": 205700 }, { "epoch": 58.390576213454445, "grad_norm": 0.04216722398996353, "learning_rate": 4.163355095089413e-05, "loss": 0.0001390049234032631, "step": 205710 }, { "epoch": 58.3934147033778, "grad_norm": 0.3242674171924591, "learning_rate": 4.163071246097077e-05, "loss": 0.00020927637815475463, "step": 205720 }, { "epoch": 58.39625319330116, "grad_norm": 0.007905598729848862, "learning_rate": 4.16278739710474e-05, "loss": 0.00011495891958475113, "step": 205730 }, { "epoch": 58.399091683224526, "grad_norm": 3.496248722076416, "learning_rate": 4.162503548112404e-05, "loss": 0.0007126154378056526, "step": 205740 }, { "epoch": 58.40193017314788, "grad_norm": 4.174905776977539, "learning_rate": 4.1622196991200684e-05, "loss": 0.0008264940232038497, "step": 205750 }, { "epoch": 58.404768663071245, "grad_norm": 2.5469467639923096, "learning_rate": 4.1619358501277325e-05, "loss": 0.0030883658677339555, "step": 205760 }, { "epoch": 58.40760715299461, "grad_norm": 0.8082329630851746, "learning_rate": 4.161652001135396e-05, "loss": 0.00023912861943244934, "step": 205770 }, { "epoch": 58.41044564291797, "grad_norm": 0.10769607126712799, "learning_rate": 4.16136815214306e-05, "loss": 0.0013112364336848258, "step": 205780 }, { "epoch": 58.413284132841326, "grad_norm": 0.06562838703393936, "learning_rate": 4.161084303150724e-05, "loss": 0.00045734159648418425, "step": 205790 }, { "epoch": 58.41612262276469, "grad_norm": 0.0805530771613121, "learning_rate": 4.160800454158388e-05, "loss": 0.00018837898969650268, "step": 205800 }, { "epoch": 58.41896111268805, "grad_norm": 0.6682500243186951, "learning_rate": 4.160516605166052e-05, "loss": 0.00035232454538345335, "step": 205810 }, { "epoch": 58.42179960261141, "grad_norm": 0.05829198285937309, "learning_rate": 4.160232756173716e-05, "loss": 0.00018559452146291733, "step": 205820 }, { "epoch": 58.42463809253477, "grad_norm": 0.01971971057355404, "learning_rate": 4.1599489071813795e-05, "loss": 0.003474518656730652, "step": 205830 }, { "epoch": 58.427476582458134, "grad_norm": 0.05474422872066498, "learning_rate": 4.1596650581890436e-05, "loss": 0.0005648896098136902, "step": 205840 }, { "epoch": 58.43031507238149, "grad_norm": 0.0166428592056036, "learning_rate": 4.159381209196708e-05, "loss": 0.00039085801690816877, "step": 205850 }, { "epoch": 58.43315356230485, "grad_norm": 0.21890917420387268, "learning_rate": 4.159097360204371e-05, "loss": 0.0002089180052280426, "step": 205860 }, { "epoch": 58.435992052228215, "grad_norm": 0.011787100695073605, "learning_rate": 4.1588135112120354e-05, "loss": 0.00021103490144014359, "step": 205870 }, { "epoch": 58.43883054215158, "grad_norm": 0.003612253349274397, "learning_rate": 4.1585296622196995e-05, "loss": 0.0001603182405233383, "step": 205880 }, { "epoch": 58.441669032074934, "grad_norm": 0.6743913888931274, "learning_rate": 4.158245813227363e-05, "loss": 0.0005043154582381248, "step": 205890 }, { "epoch": 58.4445075219983, "grad_norm": 0.3677307069301605, "learning_rate": 4.157961964235027e-05, "loss": 0.0008349336683750152, "step": 205900 }, { "epoch": 58.44734601192166, "grad_norm": 0.049873169511556625, "learning_rate": 4.157678115242691e-05, "loss": 0.0002532351762056351, "step": 205910 }, { "epoch": 58.450184501845015, "grad_norm": 0.04382982850074768, "learning_rate": 4.1573942662503554e-05, "loss": 0.0004124777391552925, "step": 205920 }, { "epoch": 58.45302299176838, "grad_norm": 0.07203242927789688, "learning_rate": 4.157110417258019e-05, "loss": 0.00010791867971420288, "step": 205930 }, { "epoch": 58.45586148169174, "grad_norm": 0.1392454355955124, "learning_rate": 4.156826568265683e-05, "loss": 0.005626077204942704, "step": 205940 }, { "epoch": 58.458699971615104, "grad_norm": 0.06474582850933075, "learning_rate": 4.156542719273347e-05, "loss": 0.0004236608743667603, "step": 205950 }, { "epoch": 58.46153846153846, "grad_norm": 0.0070573994889855385, "learning_rate": 4.1562588702810106e-05, "loss": 0.0006157249212265014, "step": 205960 }, { "epoch": 58.46437695146182, "grad_norm": 0.012061268091201782, "learning_rate": 4.155975021288675e-05, "loss": 0.00033986270427703856, "step": 205970 }, { "epoch": 58.467215441385186, "grad_norm": 0.023371854797005653, "learning_rate": 4.155691172296339e-05, "loss": 0.0008606761693954468, "step": 205980 }, { "epoch": 58.47005393130854, "grad_norm": 7.298279762268066, "learning_rate": 4.155407323304002e-05, "loss": 0.00149534922093153, "step": 205990 }, { "epoch": 58.472892421231904, "grad_norm": 4.245069980621338, "learning_rate": 4.1551234743116664e-05, "loss": 0.001015925221145153, "step": 206000 }, { "epoch": 58.472892421231904, "eval_accuracy": 0.9803522604438227, "eval_loss": 0.07817874848842621, "eval_runtime": 32.6958, "eval_samples_per_second": 481.01, "eval_steps_per_second": 7.524, "step": 206000 }, { "epoch": 58.47573091115527, "grad_norm": 0.031011363491415977, "learning_rate": 4.1548396253193306e-05, "loss": 0.0005143677815794944, "step": 206010 }, { "epoch": 58.47856940107862, "grad_norm": 0.17171825468540192, "learning_rate": 4.154555776326994e-05, "loss": 0.000588729977607727, "step": 206020 }, { "epoch": 58.481407891001986, "grad_norm": 0.2657344937324524, "learning_rate": 4.154271927334658e-05, "loss": 0.00047618672251701353, "step": 206030 }, { "epoch": 58.48424638092535, "grad_norm": 0.01347420085221529, "learning_rate": 4.1539880783423216e-05, "loss": 0.0002003578469157219, "step": 206040 }, { "epoch": 58.48708487084871, "grad_norm": 0.01635129004716873, "learning_rate": 4.1537042293499864e-05, "loss": 0.00023457687348127365, "step": 206050 }, { "epoch": 58.48992336077207, "grad_norm": 0.07479766756296158, "learning_rate": 4.15342038035765e-05, "loss": 0.00024643857032060625, "step": 206060 }, { "epoch": 58.49276185069543, "grad_norm": 1.0512486696243286, "learning_rate": 4.1531365313653134e-05, "loss": 0.013239280879497528, "step": 206070 }, { "epoch": 58.49560034061879, "grad_norm": 0.030059413984417915, "learning_rate": 4.152852682372978e-05, "loss": 0.0011805953457951547, "step": 206080 }, { "epoch": 58.49843883054215, "grad_norm": 0.0244869627058506, "learning_rate": 4.1525688333806416e-05, "loss": 0.002078399434685707, "step": 206090 }, { "epoch": 58.50127732046551, "grad_norm": 0.2963099181652069, "learning_rate": 4.152284984388306e-05, "loss": 0.005688820779323578, "step": 206100 }, { "epoch": 58.504115810388875, "grad_norm": 0.01306278258562088, "learning_rate": 4.15200113539597e-05, "loss": 0.00016800854355096816, "step": 206110 }, { "epoch": 58.50695430031223, "grad_norm": 0.024720774963498116, "learning_rate": 4.1517172864036334e-05, "loss": 0.00040089040994644164, "step": 206120 }, { "epoch": 58.50979279023559, "grad_norm": 0.06448198854923248, "learning_rate": 4.1514334374112975e-05, "loss": 0.00034239459782838824, "step": 206130 }, { "epoch": 58.512631280158956, "grad_norm": 0.26408785581588745, "learning_rate": 4.151149588418961e-05, "loss": 0.0008199870586395264, "step": 206140 }, { "epoch": 58.51546977008232, "grad_norm": 0.04131561890244484, "learning_rate": 4.150865739426625e-05, "loss": 0.00023290663957595826, "step": 206150 }, { "epoch": 58.518308260005675, "grad_norm": 0.02171259932219982, "learning_rate": 4.150581890434289e-05, "loss": 0.00013242699205875396, "step": 206160 }, { "epoch": 58.52114674992904, "grad_norm": 0.06444291770458221, "learning_rate": 4.150298041441953e-05, "loss": 0.0002375718206167221, "step": 206170 }, { "epoch": 58.5239852398524, "grad_norm": 0.0379272997379303, "learning_rate": 4.1500141924496175e-05, "loss": 0.0002611646428704262, "step": 206180 }, { "epoch": 58.52682372977576, "grad_norm": 0.03548593819141388, "learning_rate": 4.149730343457281e-05, "loss": 0.00016426313668489457, "step": 206190 }, { "epoch": 58.52966221969912, "grad_norm": 0.08589109778404236, "learning_rate": 4.1494464944649444e-05, "loss": 0.0004239514470100403, "step": 206200 }, { "epoch": 58.53250070962248, "grad_norm": 0.5127556324005127, "learning_rate": 4.149162645472609e-05, "loss": 0.0003514504060149193, "step": 206210 }, { "epoch": 58.535339199545845, "grad_norm": 0.42057403922080994, "learning_rate": 4.148878796480273e-05, "loss": 0.001057792454957962, "step": 206220 }, { "epoch": 58.5381776894692, "grad_norm": 0.15266260504722595, "learning_rate": 4.148594947487937e-05, "loss": 0.00032053161412477494, "step": 206230 }, { "epoch": 58.541016179392564, "grad_norm": 0.06631021201610565, "learning_rate": 4.1483110984956e-05, "loss": 0.0017901424318552018, "step": 206240 }, { "epoch": 58.54385466931593, "grad_norm": 0.16495361924171448, "learning_rate": 4.1480272495032644e-05, "loss": 0.00021556876599788667, "step": 206250 }, { "epoch": 58.54669315923928, "grad_norm": 0.27234265208244324, "learning_rate": 4.1477434005109286e-05, "loss": 0.00028309207409620284, "step": 206260 }, { "epoch": 58.549531649162645, "grad_norm": 0.02861521951854229, "learning_rate": 4.147459551518592e-05, "loss": 0.00024026613682508468, "step": 206270 }, { "epoch": 58.55237013908601, "grad_norm": 0.06954305619001389, "learning_rate": 4.147175702526256e-05, "loss": 0.0005324840545654297, "step": 206280 }, { "epoch": 58.555208629009364, "grad_norm": 0.07461442053318024, "learning_rate": 4.14689185353392e-05, "loss": 0.000377296656370163, "step": 206290 }, { "epoch": 58.55804711893273, "grad_norm": 0.19013510644435883, "learning_rate": 4.146608004541584e-05, "loss": 0.00018071476370096207, "step": 206300 }, { "epoch": 58.56088560885609, "grad_norm": 0.0616844967007637, "learning_rate": 4.146324155549248e-05, "loss": 0.00022306963801383973, "step": 206310 }, { "epoch": 58.56372409877945, "grad_norm": 0.00797318946570158, "learning_rate": 4.146040306556912e-05, "loss": 0.000749894417822361, "step": 206320 }, { "epoch": 58.56656258870281, "grad_norm": 4.22609281539917, "learning_rate": 4.1457564575645755e-05, "loss": 0.0006284125149250031, "step": 206330 }, { "epoch": 58.56940107862617, "grad_norm": 0.03797655552625656, "learning_rate": 4.1454726085722396e-05, "loss": 0.00021169427782297134, "step": 206340 }, { "epoch": 58.572239568549534, "grad_norm": 0.06143837049603462, "learning_rate": 4.145188759579904e-05, "loss": 0.001446019299328327, "step": 206350 }, { "epoch": 58.57507805847289, "grad_norm": 12.631304740905762, "learning_rate": 4.144904910587567e-05, "loss": 0.003256729245185852, "step": 206360 }, { "epoch": 58.57791654839625, "grad_norm": 0.3235250413417816, "learning_rate": 4.1446210615952314e-05, "loss": 0.0018879910930991173, "step": 206370 }, { "epoch": 58.580755038319616, "grad_norm": 0.03419598191976547, "learning_rate": 4.1443372126028955e-05, "loss": 0.0009353000670671463, "step": 206380 }, { "epoch": 58.58359352824297, "grad_norm": 0.04226769879460335, "learning_rate": 4.1440533636105597e-05, "loss": 0.0005440801382064819, "step": 206390 }, { "epoch": 58.586432018166335, "grad_norm": 0.029666220769286156, "learning_rate": 4.143769514618223e-05, "loss": 0.001789804734289646, "step": 206400 }, { "epoch": 58.5892705080897, "grad_norm": 0.5707429051399231, "learning_rate": 4.143485665625887e-05, "loss": 0.0008113857358694077, "step": 206410 }, { "epoch": 58.59210899801306, "grad_norm": 0.03844762220978737, "learning_rate": 4.1432018166335514e-05, "loss": 0.00048819109797477723, "step": 206420 }, { "epoch": 58.594947487936416, "grad_norm": 1.8750592470169067, "learning_rate": 4.142917967641215e-05, "loss": 0.004617907106876373, "step": 206430 }, { "epoch": 58.59778597785978, "grad_norm": 3.6845645904541016, "learning_rate": 4.142634118648879e-05, "loss": 0.0020926138386130333, "step": 206440 }, { "epoch": 58.60062446778314, "grad_norm": 0.3527293801307678, "learning_rate": 4.142350269656543e-05, "loss": 0.0016814867034554482, "step": 206450 }, { "epoch": 58.6034629577065, "grad_norm": 0.49289071559906006, "learning_rate": 4.1420664206642066e-05, "loss": 0.0007773702964186668, "step": 206460 }, { "epoch": 58.60630144762986, "grad_norm": 0.5054125189781189, "learning_rate": 4.141782571671871e-05, "loss": 0.0024444730952382088, "step": 206470 }, { "epoch": 58.60913993755322, "grad_norm": 0.2093491554260254, "learning_rate": 4.141527107578768e-05, "loss": 0.008703501522541046, "step": 206480 }, { "epoch": 58.61197842747658, "grad_norm": 0.044851645827293396, "learning_rate": 4.141243258586432e-05, "loss": 0.001154966838657856, "step": 206490 }, { "epoch": 58.61481691739994, "grad_norm": 0.12276975810527802, "learning_rate": 4.140959409594096e-05, "loss": 0.0002062743529677391, "step": 206500 }, { "epoch": 58.61481691739994, "eval_accuracy": 0.9754562217841928, "eval_loss": 0.09584749490022659, "eval_runtime": 33.2464, "eval_samples_per_second": 473.043, "eval_steps_per_second": 7.399, "step": 206500 }, { "epoch": 58.617655407323305, "grad_norm": 0.6599223017692566, "learning_rate": 4.14067556060176e-05, "loss": 0.0009128043428063392, "step": 206510 }, { "epoch": 58.62049389724667, "grad_norm": 0.08371642231941223, "learning_rate": 4.140391711609424e-05, "loss": 0.0018699953332543373, "step": 206520 }, { "epoch": 58.623332387170024, "grad_norm": 0.1008375883102417, "learning_rate": 4.140107862617088e-05, "loss": 0.002126074209809303, "step": 206530 }, { "epoch": 58.62617087709339, "grad_norm": 0.07552443444728851, "learning_rate": 4.1398240136247515e-05, "loss": 0.0011645831167697907, "step": 206540 }, { "epoch": 58.62900936701675, "grad_norm": 0.17458155751228333, "learning_rate": 4.1395401646324156e-05, "loss": 0.0004091557115316391, "step": 206550 }, { "epoch": 58.631847856940105, "grad_norm": 2.5185909271240234, "learning_rate": 4.13925631564008e-05, "loss": 0.0011567596346139907, "step": 206560 }, { "epoch": 58.63468634686347, "grad_norm": 0.3780214190483093, "learning_rate": 4.138972466647744e-05, "loss": 0.0012123353779315948, "step": 206570 }, { "epoch": 58.63752483678683, "grad_norm": 0.23872458934783936, "learning_rate": 4.1386886176554074e-05, "loss": 0.00043185334652662277, "step": 206580 }, { "epoch": 58.64036332671019, "grad_norm": 0.057355623692274094, "learning_rate": 4.1384047686630715e-05, "loss": 0.0009125780314207077, "step": 206590 }, { "epoch": 58.64320181663355, "grad_norm": 0.19321228563785553, "learning_rate": 4.1381209196707357e-05, "loss": 0.001856425404548645, "step": 206600 }, { "epoch": 58.64604030655691, "grad_norm": 4.119048118591309, "learning_rate": 4.137837070678399e-05, "loss": 0.003102562204003334, "step": 206610 }, { "epoch": 58.648878796480275, "grad_norm": 0.23236839473247528, "learning_rate": 4.137553221686063e-05, "loss": 0.0013714956119656562, "step": 206620 }, { "epoch": 58.65171728640363, "grad_norm": 0.09808182716369629, "learning_rate": 4.1372693726937274e-05, "loss": 0.0005684921517968178, "step": 206630 }, { "epoch": 58.654555776326994, "grad_norm": 0.29362431168556213, "learning_rate": 4.136985523701391e-05, "loss": 0.0020220573991537095, "step": 206640 }, { "epoch": 58.65739426625036, "grad_norm": 0.3885773718357086, "learning_rate": 4.136701674709055e-05, "loss": 0.001153378374874592, "step": 206650 }, { "epoch": 58.66023275617371, "grad_norm": 0.18055403232574463, "learning_rate": 4.136417825716719e-05, "loss": 0.0007753573358058929, "step": 206660 }, { "epoch": 58.663071246097076, "grad_norm": 0.009069940075278282, "learning_rate": 4.1361339767243826e-05, "loss": 0.0004912793636322021, "step": 206670 }, { "epoch": 58.66590973602044, "grad_norm": 0.103178471326828, "learning_rate": 4.135850127732047e-05, "loss": 0.001505832001566887, "step": 206680 }, { "epoch": 58.6687482259438, "grad_norm": 0.06005262956023216, "learning_rate": 4.135566278739711e-05, "loss": 0.00049851443618536, "step": 206690 }, { "epoch": 58.67158671586716, "grad_norm": 0.020493220537900925, "learning_rate": 4.135282429747375e-05, "loss": 0.00025725141167640684, "step": 206700 }, { "epoch": 58.67442520579052, "grad_norm": 0.03973439708352089, "learning_rate": 4.1349985807550385e-05, "loss": 0.0012383138760924338, "step": 206710 }, { "epoch": 58.67726369571388, "grad_norm": 0.2591654062271118, "learning_rate": 4.134714731762702e-05, "loss": 0.00027418825775384905, "step": 206720 }, { "epoch": 58.68010218563724, "grad_norm": 9.220125198364258, "learning_rate": 4.134430882770367e-05, "loss": 0.002757217362523079, "step": 206730 }, { "epoch": 58.6829406755606, "grad_norm": 2.395211696624756, "learning_rate": 4.13414703377803e-05, "loss": 0.0012048156931996346, "step": 206740 }, { "epoch": 58.685779165483964, "grad_norm": 0.02052822895348072, "learning_rate": 4.133863184785694e-05, "loss": 0.0004437889903783798, "step": 206750 }, { "epoch": 58.68861765540732, "grad_norm": 0.3433036804199219, "learning_rate": 4.1335793357933585e-05, "loss": 0.0005371766164898872, "step": 206760 }, { "epoch": 58.69145614533068, "grad_norm": 0.16759438812732697, "learning_rate": 4.133295486801022e-05, "loss": 0.0006929744035005569, "step": 206770 }, { "epoch": 58.694294635254046, "grad_norm": 0.10425325483083725, "learning_rate": 4.133011637808686e-05, "loss": 0.0022274473682045936, "step": 206780 }, { "epoch": 58.69713312517741, "grad_norm": 0.00831339880824089, "learning_rate": 4.13272778881635e-05, "loss": 0.005134113878011703, "step": 206790 }, { "epoch": 58.699971615100765, "grad_norm": 0.04423481225967407, "learning_rate": 4.132443939824014e-05, "loss": 0.00044206548482179643, "step": 206800 }, { "epoch": 58.70281010502413, "grad_norm": 0.406277060508728, "learning_rate": 4.132160090831678e-05, "loss": 0.000527748093008995, "step": 206810 }, { "epoch": 58.70564859494749, "grad_norm": 0.041793614625930786, "learning_rate": 4.131876241839341e-05, "loss": 0.00043745264410972596, "step": 206820 }, { "epoch": 58.708487084870846, "grad_norm": 0.015254014171659946, "learning_rate": 4.1315923928470054e-05, "loss": 0.0012932082638144494, "step": 206830 }, { "epoch": 58.71132557479421, "grad_norm": 0.6041697859764099, "learning_rate": 4.1313085438546695e-05, "loss": 0.0015688037499785423, "step": 206840 }, { "epoch": 58.71416406471757, "grad_norm": 0.26576244831085205, "learning_rate": 4.131024694862333e-05, "loss": 0.0005255449563264847, "step": 206850 }, { "epoch": 58.71700255464093, "grad_norm": 0.015513017773628235, "learning_rate": 4.130740845869998e-05, "loss": 0.0026957720518112184, "step": 206860 }, { "epoch": 58.71984104456429, "grad_norm": 0.3130224645137787, "learning_rate": 4.130456996877661e-05, "loss": 0.000860651209950447, "step": 206870 }, { "epoch": 58.72267953448765, "grad_norm": 20.887805938720703, "learning_rate": 4.130173147885325e-05, "loss": 0.007472198456525803, "step": 206880 }, { "epoch": 58.725518024411016, "grad_norm": 0.009199848398566246, "learning_rate": 4.1298892988929895e-05, "loss": 0.00017117243260145187, "step": 206890 }, { "epoch": 58.72835651433437, "grad_norm": 0.8444786071777344, "learning_rate": 4.129605449900653e-05, "loss": 0.0017740720883011817, "step": 206900 }, { "epoch": 58.731195004257735, "grad_norm": 0.15847627818584442, "learning_rate": 4.129321600908317e-05, "loss": 0.0009329678490757942, "step": 206910 }, { "epoch": 58.7340334941811, "grad_norm": 1.2502888441085815, "learning_rate": 4.1290377519159806e-05, "loss": 0.0013886304572224618, "step": 206920 }, { "epoch": 58.736871984104454, "grad_norm": 0.5318707823753357, "learning_rate": 4.128753902923645e-05, "loss": 0.0006603360176086426, "step": 206930 }, { "epoch": 58.73971047402782, "grad_norm": 0.04900744557380676, "learning_rate": 4.128470053931309e-05, "loss": 0.0002483395859599113, "step": 206940 }, { "epoch": 58.74254896395118, "grad_norm": 0.031023385003209114, "learning_rate": 4.128186204938972e-05, "loss": 0.0005301982164382935, "step": 206950 }, { "epoch": 58.745387453874535, "grad_norm": 0.02497422695159912, "learning_rate": 4.1279023559466365e-05, "loss": 0.0007561206817626953, "step": 206960 }, { "epoch": 58.7482259437979, "grad_norm": 0.026071587577462196, "learning_rate": 4.1276185069543006e-05, "loss": 0.0022153478115797044, "step": 206970 }, { "epoch": 58.75106443372126, "grad_norm": 0.07425837963819504, "learning_rate": 4.127334657961964e-05, "loss": 0.0007734159007668495, "step": 206980 }, { "epoch": 58.753902923644624, "grad_norm": 0.03809712454676628, "learning_rate": 4.127050808969629e-05, "loss": 0.004260948672890663, "step": 206990 }, { "epoch": 58.75674141356798, "grad_norm": 0.059499625116586685, "learning_rate": 4.1267669599772923e-05, "loss": 0.0008134795352816582, "step": 207000 }, { "epoch": 58.75674141356798, "eval_accuracy": 0.9771730145609462, "eval_loss": 0.0834764838218689, "eval_runtime": 32.8295, "eval_samples_per_second": 479.051, "eval_steps_per_second": 7.493, "step": 207000 }, { "epoch": 58.75957990349134, "grad_norm": 0.20077790319919586, "learning_rate": 4.126483110984956e-05, "loss": 0.0015691665932536126, "step": 207010 }, { "epoch": 58.762418393414706, "grad_norm": 0.02881387621164322, "learning_rate": 4.12619926199262e-05, "loss": 0.0011094870045781136, "step": 207020 }, { "epoch": 58.76525688333806, "grad_norm": 1.3382675647735596, "learning_rate": 4.125915413000284e-05, "loss": 0.001234646886587143, "step": 207030 }, { "epoch": 58.768095373261424, "grad_norm": 0.3627988398075104, "learning_rate": 4.125631564007948e-05, "loss": 0.0021222662180662155, "step": 207040 }, { "epoch": 58.77093386318479, "grad_norm": 0.3301372230052948, "learning_rate": 4.125347715015612e-05, "loss": 0.004397808387875557, "step": 207050 }, { "epoch": 58.77377235310815, "grad_norm": 0.03294415399432182, "learning_rate": 4.125063866023276e-05, "loss": 0.007307543605566025, "step": 207060 }, { "epoch": 58.776610843031506, "grad_norm": 0.003059299662709236, "learning_rate": 4.12478001703094e-05, "loss": 0.0007802657783031464, "step": 207070 }, { "epoch": 58.77944933295487, "grad_norm": 0.13260625302791595, "learning_rate": 4.1244961680386034e-05, "loss": 0.0011958673596382142, "step": 207080 }, { "epoch": 58.78228782287823, "grad_norm": 2.005964756011963, "learning_rate": 4.1242123190462676e-05, "loss": 0.0012364739552140236, "step": 207090 }, { "epoch": 58.78512631280159, "grad_norm": 1.0927907228469849, "learning_rate": 4.123928470053932e-05, "loss": 0.0010178759694099426, "step": 207100 }, { "epoch": 58.78796480272495, "grad_norm": 0.13922160863876343, "learning_rate": 4.123644621061595e-05, "loss": 0.0034227706491947174, "step": 207110 }, { "epoch": 58.79080329264831, "grad_norm": 0.060910664498806, "learning_rate": 4.123360772069259e-05, "loss": 0.006601770967245102, "step": 207120 }, { "epoch": 58.79364178257167, "grad_norm": 0.7579424381256104, "learning_rate": 4.1230769230769234e-05, "loss": 0.011407683789730071, "step": 207130 }, { "epoch": 58.79648027249503, "grad_norm": 0.058132000267505646, "learning_rate": 4.122793074084587e-05, "loss": 0.0010198086500167846, "step": 207140 }, { "epoch": 58.799318762418395, "grad_norm": 1.208670735359192, "learning_rate": 4.122509225092251e-05, "loss": 0.00250982828438282, "step": 207150 }, { "epoch": 58.80215725234176, "grad_norm": 1.1116269826889038, "learning_rate": 4.122225376099915e-05, "loss": 0.0016339613124728204, "step": 207160 }, { "epoch": 58.80499574226511, "grad_norm": 0.011944854632019997, "learning_rate": 4.121941527107579e-05, "loss": 0.0005118245258927345, "step": 207170 }, { "epoch": 58.807834232188476, "grad_norm": 1.5985960960388184, "learning_rate": 4.121657678115243e-05, "loss": 0.0010832170024514197, "step": 207180 }, { "epoch": 58.81067272211184, "grad_norm": 0.36284491419792175, "learning_rate": 4.121373829122907e-05, "loss": 0.0016591813415288924, "step": 207190 }, { "epoch": 58.813511212035195, "grad_norm": 0.016494128853082657, "learning_rate": 4.121089980130571e-05, "loss": 0.0008052796125411988, "step": 207200 }, { "epoch": 58.81634970195856, "grad_norm": 0.12156101316213608, "learning_rate": 4.1208061311382345e-05, "loss": 0.0009025674313306809, "step": 207210 }, { "epoch": 58.81918819188192, "grad_norm": 0.10179951786994934, "learning_rate": 4.1205222821458986e-05, "loss": 0.00030388925224542616, "step": 207220 }, { "epoch": 58.822026681805276, "grad_norm": 0.017110340297222137, "learning_rate": 4.120238433153563e-05, "loss": 0.0003111656755208969, "step": 207230 }, { "epoch": 58.82486517172864, "grad_norm": 0.06375014781951904, "learning_rate": 4.119954584161226e-05, "loss": 0.0002698222175240517, "step": 207240 }, { "epoch": 58.827703661652, "grad_norm": 0.017609983682632446, "learning_rate": 4.1196707351688904e-05, "loss": 0.00033736247569322587, "step": 207250 }, { "epoch": 58.830542151575365, "grad_norm": 0.7699373960494995, "learning_rate": 4.1193868861765545e-05, "loss": 0.0032134436070919035, "step": 207260 }, { "epoch": 58.83338064149872, "grad_norm": 0.15301477909088135, "learning_rate": 4.119103037184218e-05, "loss": 0.0004494383931159973, "step": 207270 }, { "epoch": 58.836219131422084, "grad_norm": 0.30342134833335876, "learning_rate": 4.118819188191882e-05, "loss": 0.00041185654699802397, "step": 207280 }, { "epoch": 58.83905762134545, "grad_norm": 0.05601118877530098, "learning_rate": 4.118535339199546e-05, "loss": 0.0006584875285625458, "step": 207290 }, { "epoch": 58.8418961112688, "grad_norm": 0.03268035501241684, "learning_rate": 4.11825149020721e-05, "loss": 0.0011105112731456756, "step": 207300 }, { "epoch": 58.844734601192165, "grad_norm": 0.4537128508090973, "learning_rate": 4.117967641214874e-05, "loss": 0.006664318591356277, "step": 207310 }, { "epoch": 58.84757309111553, "grad_norm": 0.08298700302839279, "learning_rate": 4.117683792222538e-05, "loss": 0.0033212848007678985, "step": 207320 }, { "epoch": 58.850411581038884, "grad_norm": 0.01407330110669136, "learning_rate": 4.117399943230202e-05, "loss": 0.0068441122770309445, "step": 207330 }, { "epoch": 58.85325007096225, "grad_norm": 7.157983779907227, "learning_rate": 4.1171160942378656e-05, "loss": 0.003793060779571533, "step": 207340 }, { "epoch": 58.85608856088561, "grad_norm": 0.24867653846740723, "learning_rate": 4.116832245245529e-05, "loss": 0.0012763502076268197, "step": 207350 }, { "epoch": 58.85892705080897, "grad_norm": 0.0388803631067276, "learning_rate": 4.116548396253194e-05, "loss": 0.0005054570734500885, "step": 207360 }, { "epoch": 58.86176554073233, "grad_norm": 0.15159553289413452, "learning_rate": 4.116264547260857e-05, "loss": 0.0015129465609788895, "step": 207370 }, { "epoch": 58.86460403065569, "grad_norm": 0.06402606517076492, "learning_rate": 4.1159806982685214e-05, "loss": 0.0016532553359866141, "step": 207380 }, { "epoch": 58.867442520579054, "grad_norm": 0.054266732186079025, "learning_rate": 4.1156968492761856e-05, "loss": 0.0004450865089893341, "step": 207390 }, { "epoch": 58.87028101050241, "grad_norm": 0.16818787157535553, "learning_rate": 4.115413000283849e-05, "loss": 0.0009078368544578552, "step": 207400 }, { "epoch": 58.87311950042577, "grad_norm": 0.09061696380376816, "learning_rate": 4.115129151291513e-05, "loss": 0.002191256172955036, "step": 207410 }, { "epoch": 58.875957990349136, "grad_norm": 0.19953051209449768, "learning_rate": 4.114845302299177e-05, "loss": 0.000559241883456707, "step": 207420 }, { "epoch": 58.8787964802725, "grad_norm": 0.12617988884449005, "learning_rate": 4.114561453306841e-05, "loss": 0.0008060453459620476, "step": 207430 }, { "epoch": 58.881634970195854, "grad_norm": 0.010966803878545761, "learning_rate": 4.114277604314505e-05, "loss": 0.0015103500336408615, "step": 207440 }, { "epoch": 58.88447346011922, "grad_norm": 2.135359525680542, "learning_rate": 4.1139937553221684e-05, "loss": 0.0007600663229823112, "step": 207450 }, { "epoch": 58.88731195004258, "grad_norm": 0.04405699670314789, "learning_rate": 4.113709906329833e-05, "loss": 0.0029303736984729765, "step": 207460 }, { "epoch": 58.890150439965936, "grad_norm": 16.221555709838867, "learning_rate": 4.1134260573374966e-05, "loss": 0.003999515995383262, "step": 207470 }, { "epoch": 58.8929889298893, "grad_norm": 0.43182840943336487, "learning_rate": 4.11314220834516e-05, "loss": 0.00041981544345617296, "step": 207480 }, { "epoch": 58.89582741981266, "grad_norm": 0.04110600799322128, "learning_rate": 4.112858359352825e-05, "loss": 0.0004885423928499222, "step": 207490 }, { "epoch": 58.89866590973602, "grad_norm": 0.6182302832603455, "learning_rate": 4.1125745103604884e-05, "loss": 0.0003967374563217163, "step": 207500 }, { "epoch": 58.89866590973602, "eval_accuracy": 0.9788262224200419, "eval_loss": 0.07703840732574463, "eval_runtime": 33.065, "eval_samples_per_second": 475.639, "eval_steps_per_second": 7.44, "step": 207500 }, { "epoch": 58.90150439965938, "grad_norm": 0.040107447654008865, "learning_rate": 4.1122906613681525e-05, "loss": 0.00020810551941394806, "step": 207510 }, { "epoch": 58.90434288958274, "grad_norm": 0.030618177726864815, "learning_rate": 4.1120068123758167e-05, "loss": 0.0006165483966469765, "step": 207520 }, { "epoch": 58.907181379506106, "grad_norm": 0.04630030691623688, "learning_rate": 4.11172296338348e-05, "loss": 0.00020969510078430175, "step": 207530 }, { "epoch": 58.91001986942946, "grad_norm": 0.11447747051715851, "learning_rate": 4.111439114391144e-05, "loss": 0.00016018860042095185, "step": 207540 }, { "epoch": 58.912858359352825, "grad_norm": 0.008581391535699368, "learning_rate": 4.111155265398808e-05, "loss": 0.00017898213118314744, "step": 207550 }, { "epoch": 58.91569684927619, "grad_norm": 0.501482367515564, "learning_rate": 4.110871416406472e-05, "loss": 0.00027753394097089766, "step": 207560 }, { "epoch": 58.91853533919954, "grad_norm": 0.46124380826950073, "learning_rate": 4.110587567414136e-05, "loss": 0.0009124785661697388, "step": 207570 }, { "epoch": 58.921373829122906, "grad_norm": 0.7475137114524841, "learning_rate": 4.1103037184217994e-05, "loss": 0.001150272972881794, "step": 207580 }, { "epoch": 58.92421231904627, "grad_norm": 1.1510826349258423, "learning_rate": 4.110019869429464e-05, "loss": 0.0015948090702295303, "step": 207590 }, { "epoch": 58.927050808969625, "grad_norm": 3.4067542552948, "learning_rate": 4.109736020437128e-05, "loss": 0.010500933229923248, "step": 207600 }, { "epoch": 58.92988929889299, "grad_norm": 0.27619796991348267, "learning_rate": 4.109452171444791e-05, "loss": 0.0005762262269854545, "step": 207610 }, { "epoch": 58.93272778881635, "grad_norm": 10.329214096069336, "learning_rate": 4.109168322452456e-05, "loss": 0.0017792237922549248, "step": 207620 }, { "epoch": 58.935566278739714, "grad_norm": 0.510172963142395, "learning_rate": 4.1088844734601195e-05, "loss": 0.001700356788933277, "step": 207630 }, { "epoch": 58.93840476866307, "grad_norm": 0.007970583625137806, "learning_rate": 4.1086006244677836e-05, "loss": 0.0006431018933653832, "step": 207640 }, { "epoch": 58.94124325858643, "grad_norm": 0.055422693490982056, "learning_rate": 4.108316775475447e-05, "loss": 0.00013470035046339034, "step": 207650 }, { "epoch": 58.944081748509795, "grad_norm": 0.2304726243019104, "learning_rate": 4.108032926483111e-05, "loss": 0.00023025237023830414, "step": 207660 }, { "epoch": 58.94692023843315, "grad_norm": 0.19219844043254852, "learning_rate": 4.107749077490775e-05, "loss": 0.00028163082897663115, "step": 207670 }, { "epoch": 58.949758728356514, "grad_norm": 0.022780882194638252, "learning_rate": 4.107465228498439e-05, "loss": 0.0003609348088502884, "step": 207680 }, { "epoch": 58.95259721827988, "grad_norm": 0.07804735004901886, "learning_rate": 4.107181379506103e-05, "loss": 0.002425245754420757, "step": 207690 }, { "epoch": 58.95543570820323, "grad_norm": 0.21282333135604858, "learning_rate": 4.106897530513767e-05, "loss": 0.0003826828673481941, "step": 207700 }, { "epoch": 58.958274198126595, "grad_norm": 0.12351103872060776, "learning_rate": 4.1066136815214305e-05, "loss": 0.0003309847787022591, "step": 207710 }, { "epoch": 58.96111268804996, "grad_norm": 0.02407628484070301, "learning_rate": 4.1063298325290947e-05, "loss": 0.0009531006217002869, "step": 207720 }, { "epoch": 58.96395117797332, "grad_norm": 0.026386631652712822, "learning_rate": 4.106045983536759e-05, "loss": 0.00043917614966630935, "step": 207730 }, { "epoch": 58.96678966789668, "grad_norm": 0.3386162519454956, "learning_rate": 4.105762134544422e-05, "loss": 0.0007970765233039856, "step": 207740 }, { "epoch": 58.96962815782004, "grad_norm": 2.255995988845825, "learning_rate": 4.1054782855520864e-05, "loss": 0.0011912908405065537, "step": 207750 }, { "epoch": 58.9724666477434, "grad_norm": 0.018130864948034286, "learning_rate": 4.1051944365597505e-05, "loss": 0.0003508782014250755, "step": 207760 }, { "epoch": 58.97530513766676, "grad_norm": 0.11074202507734299, "learning_rate": 4.104910587567414e-05, "loss": 0.0004644874483346939, "step": 207770 }, { "epoch": 58.97814362759012, "grad_norm": 0.11638941615819931, "learning_rate": 4.104626738575078e-05, "loss": 0.0005673544481396675, "step": 207780 }, { "epoch": 58.980982117513484, "grad_norm": 0.05714520439505577, "learning_rate": 4.104342889582742e-05, "loss": 0.0037725768983364106, "step": 207790 }, { "epoch": 58.98382060743684, "grad_norm": 0.28198277950286865, "learning_rate": 4.1040590405904064e-05, "loss": 0.0009155942127108574, "step": 207800 }, { "epoch": 58.9866590973602, "grad_norm": 0.04862555116415024, "learning_rate": 4.10377519159807e-05, "loss": 0.00025427136570215224, "step": 207810 }, { "epoch": 58.989497587283566, "grad_norm": 0.623757541179657, "learning_rate": 4.103491342605734e-05, "loss": 0.0008239630609750747, "step": 207820 }, { "epoch": 58.99233607720693, "grad_norm": 0.049276720732450485, "learning_rate": 4.103207493613398e-05, "loss": 0.0006139809265732765, "step": 207830 }, { "epoch": 58.995174567130285, "grad_norm": 0.34119123220443726, "learning_rate": 4.1029236446210616e-05, "loss": 0.0004357393831014633, "step": 207840 }, { "epoch": 58.99801305705365, "grad_norm": 0.02481074258685112, "learning_rate": 4.102639795628726e-05, "loss": 0.00015672259032726288, "step": 207850 }, { "epoch": 59.00085154697701, "grad_norm": 0.02579890750348568, "learning_rate": 4.10235594663639e-05, "loss": 0.0004602399654686451, "step": 207860 }, { "epoch": 59.003690036900366, "grad_norm": 0.44813501834869385, "learning_rate": 4.102072097644053e-05, "loss": 0.000399518758058548, "step": 207870 }, { "epoch": 59.00652852682373, "grad_norm": 0.021876564249396324, "learning_rate": 4.1017882486517175e-05, "loss": 0.00020244549959897996, "step": 207880 }, { "epoch": 59.00936701674709, "grad_norm": 0.01540295034646988, "learning_rate": 4.1015043996593816e-05, "loss": 0.0017710911110043527, "step": 207890 }, { "epoch": 59.012205506670455, "grad_norm": 0.4906509518623352, "learning_rate": 4.101220550667045e-05, "loss": 0.00041702501475811006, "step": 207900 }, { "epoch": 59.01504399659381, "grad_norm": 0.012258261442184448, "learning_rate": 4.100936701674709e-05, "loss": 0.0006076676771044731, "step": 207910 }, { "epoch": 59.01788248651717, "grad_norm": 0.1737445890903473, "learning_rate": 4.1006528526823733e-05, "loss": 0.0004614373669028282, "step": 207920 }, { "epoch": 59.020720976440536, "grad_norm": 0.009206118062138557, "learning_rate": 4.1003690036900375e-05, "loss": 0.0003240685909986496, "step": 207930 }, { "epoch": 59.02355946636389, "grad_norm": 0.06124449521303177, "learning_rate": 4.100085154697701e-05, "loss": 0.006179551780223847, "step": 207940 }, { "epoch": 59.026397956287255, "grad_norm": 0.04354212433099747, "learning_rate": 4.0998013057053644e-05, "loss": 0.0024033065885305403, "step": 207950 }, { "epoch": 59.02923644621062, "grad_norm": 0.04144112765789032, "learning_rate": 4.099517456713029e-05, "loss": 0.0004903672263026238, "step": 207960 }, { "epoch": 59.032074936133974, "grad_norm": 0.08474123477935791, "learning_rate": 4.099233607720693e-05, "loss": 0.0013550233095884323, "step": 207970 }, { "epoch": 59.03491342605734, "grad_norm": 0.1835555136203766, "learning_rate": 4.098949758728357e-05, "loss": 0.0008627686649560929, "step": 207980 }, { "epoch": 59.0377519159807, "grad_norm": 7.0196533203125, "learning_rate": 4.098665909736021e-05, "loss": 0.003278651461005211, "step": 207990 }, { "epoch": 59.04059040590406, "grad_norm": 7.421067714691162, "learning_rate": 4.0983820607436844e-05, "loss": 0.004430201277136803, "step": 208000 }, { "epoch": 59.04059040590406, "eval_accuracy": 0.9776816939022064, "eval_loss": 0.0822572410106659, "eval_runtime": 32.2419, "eval_samples_per_second": 487.781, "eval_steps_per_second": 7.63, "step": 208000 }, { "epoch": 59.04342889582742, "grad_norm": 8.766963958740234, "learning_rate": 4.0980982117513485e-05, "loss": 0.0015285126864910126, "step": 208010 }, { "epoch": 59.04626738575078, "grad_norm": 1.4790773391723633, "learning_rate": 4.097814362759013e-05, "loss": 0.0004593798890709877, "step": 208020 }, { "epoch": 59.049105875674144, "grad_norm": 0.006965484004467726, "learning_rate": 4.097530513766676e-05, "loss": 0.0006050080060958863, "step": 208030 }, { "epoch": 59.0519443655975, "grad_norm": 10.768735885620117, "learning_rate": 4.09724666477434e-05, "loss": 0.00208019707351923, "step": 208040 }, { "epoch": 59.05478285552086, "grad_norm": 0.7936661243438721, "learning_rate": 4.096962815782004e-05, "loss": 0.0005237067118287087, "step": 208050 }, { "epoch": 59.057621345444225, "grad_norm": 0.2742912173271179, "learning_rate": 4.0966789667896686e-05, "loss": 0.0008844098076224328, "step": 208060 }, { "epoch": 59.06045983536758, "grad_norm": 0.09872126579284668, "learning_rate": 4.096395117797332e-05, "loss": 0.0004022387787699699, "step": 208070 }, { "epoch": 59.063298325290944, "grad_norm": 0.17566470801830292, "learning_rate": 4.0961112688049955e-05, "loss": 0.0033900637179613115, "step": 208080 }, { "epoch": 59.06613681521431, "grad_norm": 0.29065167903900146, "learning_rate": 4.09582741981266e-05, "loss": 0.0002166997641324997, "step": 208090 }, { "epoch": 59.06897530513767, "grad_norm": 0.059227947145700455, "learning_rate": 4.095543570820324e-05, "loss": 0.0015466881915926934, "step": 208100 }, { "epoch": 59.071813795061026, "grad_norm": 0.11734378337860107, "learning_rate": 4.095259721827988e-05, "loss": 0.0003778437152504921, "step": 208110 }, { "epoch": 59.07465228498439, "grad_norm": 1.2174646854400635, "learning_rate": 4.094975872835652e-05, "loss": 0.00027200598269701005, "step": 208120 }, { "epoch": 59.07749077490775, "grad_norm": 7.156342029571533, "learning_rate": 4.0946920238433155e-05, "loss": 0.0010393979027867317, "step": 208130 }, { "epoch": 59.08032926483111, "grad_norm": 0.04469839110970497, "learning_rate": 4.0944081748509796e-05, "loss": 0.000218113511800766, "step": 208140 }, { "epoch": 59.08316775475447, "grad_norm": 0.015037835575640202, "learning_rate": 4.094124325858643e-05, "loss": 0.0016883708536624908, "step": 208150 }, { "epoch": 59.08600624467783, "grad_norm": 0.024703199043869972, "learning_rate": 4.093840476866307e-05, "loss": 0.00028570666909217836, "step": 208160 }, { "epoch": 59.08884473460119, "grad_norm": 0.07027429342269897, "learning_rate": 4.0935566278739714e-05, "loss": 0.0036558333784341814, "step": 208170 }, { "epoch": 59.09168322452455, "grad_norm": 0.0906556248664856, "learning_rate": 4.093272778881635e-05, "loss": 0.00019251257181167601, "step": 208180 }, { "epoch": 59.094521714447914, "grad_norm": 0.08480474352836609, "learning_rate": 4.092988929889299e-05, "loss": 0.0002675136551260948, "step": 208190 }, { "epoch": 59.09736020437128, "grad_norm": 0.2063034176826477, "learning_rate": 4.092705080896963e-05, "loss": 0.0002925315871834755, "step": 208200 }, { "epoch": 59.10019869429463, "grad_norm": 0.06712369620800018, "learning_rate": 4.0924212319046266e-05, "loss": 0.004868596792221069, "step": 208210 }, { "epoch": 59.103037184217996, "grad_norm": 0.08728727698326111, "learning_rate": 4.0921373829122914e-05, "loss": 0.0023691341280937194, "step": 208220 }, { "epoch": 59.10587567414136, "grad_norm": 0.3144252300262451, "learning_rate": 4.091853533919955e-05, "loss": 0.00021848194301128389, "step": 208230 }, { "epoch": 59.108714164064715, "grad_norm": 0.0677976906299591, "learning_rate": 4.091569684927618e-05, "loss": 0.00472303219139576, "step": 208240 }, { "epoch": 59.11155265398808, "grad_norm": 0.5081467032432556, "learning_rate": 4.0912858359352824e-05, "loss": 0.01782566010951996, "step": 208250 }, { "epoch": 59.11439114391144, "grad_norm": 0.10976193100214005, "learning_rate": 4.0910019869429466e-05, "loss": 0.0005334103479981422, "step": 208260 }, { "epoch": 59.1172296338348, "grad_norm": 0.02519877627491951, "learning_rate": 4.090718137950611e-05, "loss": 0.0018647167831659316, "step": 208270 }, { "epoch": 59.12006812375816, "grad_norm": 13.728065490722656, "learning_rate": 4.090434288958274e-05, "loss": 0.0034889984875917436, "step": 208280 }, { "epoch": 59.12290661368152, "grad_norm": 0.4617295563220978, "learning_rate": 4.090150439965938e-05, "loss": 0.0001879347488284111, "step": 208290 }, { "epoch": 59.125745103604885, "grad_norm": 0.04866599664092064, "learning_rate": 4.0898665909736024e-05, "loss": 0.0016242315992712974, "step": 208300 }, { "epoch": 59.12858359352824, "grad_norm": 0.0794895738363266, "learning_rate": 4.089582741981266e-05, "loss": 0.005256699025630951, "step": 208310 }, { "epoch": 59.131422083451604, "grad_norm": 8.032265663146973, "learning_rate": 4.08929889298893e-05, "loss": 0.0014704780653119088, "step": 208320 }, { "epoch": 59.134260573374966, "grad_norm": 15.124927520751953, "learning_rate": 4.089015043996594e-05, "loss": 0.005256590247154236, "step": 208330 }, { "epoch": 59.13709906329832, "grad_norm": 0.014676358550786972, "learning_rate": 4.0887311950042576e-05, "loss": 0.001370241679251194, "step": 208340 }, { "epoch": 59.139937553221685, "grad_norm": 0.016551930457353592, "learning_rate": 4.088447346011922e-05, "loss": 0.0004966679960489273, "step": 208350 }, { "epoch": 59.14277604314505, "grad_norm": 7.34307861328125, "learning_rate": 4.088163497019586e-05, "loss": 0.02061762511730194, "step": 208360 }, { "epoch": 59.14561453306841, "grad_norm": 0.07635661214590073, "learning_rate": 4.0878796480272494e-05, "loss": 0.00036470629274845125, "step": 208370 }, { "epoch": 59.14845302299177, "grad_norm": 0.3257458209991455, "learning_rate": 4.0875957990349135e-05, "loss": 0.0023926762863993646, "step": 208380 }, { "epoch": 59.15129151291513, "grad_norm": 0.11409034579992294, "learning_rate": 4.0873119500425776e-05, "loss": 0.00026971213519573213, "step": 208390 }, { "epoch": 59.15413000283849, "grad_norm": 0.2311750054359436, "learning_rate": 4.087028101050242e-05, "loss": 0.002975240163505077, "step": 208400 }, { "epoch": 59.15696849276185, "grad_norm": 0.018550384789705276, "learning_rate": 4.086744252057905e-05, "loss": 0.0003716014325618744, "step": 208410 }, { "epoch": 59.15980698268521, "grad_norm": 0.6655106544494629, "learning_rate": 4.0864604030655694e-05, "loss": 0.0007350720465183258, "step": 208420 }, { "epoch": 59.162645472608574, "grad_norm": 0.19258610904216766, "learning_rate": 4.0861765540732335e-05, "loss": 0.0004562605172395706, "step": 208430 }, { "epoch": 59.16548396253193, "grad_norm": 1.0891563892364502, "learning_rate": 4.085892705080897e-05, "loss": 0.0005592022091150284, "step": 208440 }, { "epoch": 59.16832245245529, "grad_norm": 0.27627140283584595, "learning_rate": 4.085608856088561e-05, "loss": 0.0005465507507324219, "step": 208450 }, { "epoch": 59.171160942378656, "grad_norm": 0.02419949881732464, "learning_rate": 4.085325007096225e-05, "loss": 0.002882329374551773, "step": 208460 }, { "epoch": 59.17399943230202, "grad_norm": 0.07663603127002716, "learning_rate": 4.085041158103889e-05, "loss": 0.0009726541116833687, "step": 208470 }, { "epoch": 59.176837922225374, "grad_norm": 0.11414188891649246, "learning_rate": 4.084757309111553e-05, "loss": 0.00023626722395420074, "step": 208480 }, { "epoch": 59.17967641214874, "grad_norm": 0.11437925696372986, "learning_rate": 4.084473460119217e-05, "loss": 0.002907714806497097, "step": 208490 }, { "epoch": 59.1825149020721, "grad_norm": 4.730220317840576, "learning_rate": 4.0841896111268804e-05, "loss": 0.0012253113090991974, "step": 208500 }, { "epoch": 59.1825149020721, "eval_accuracy": 0.9784447129140967, "eval_loss": 0.07730703055858612, "eval_runtime": 32.709, "eval_samples_per_second": 480.815, "eval_steps_per_second": 7.521, "step": 208500 }, { "epoch": 59.185353391995456, "grad_norm": 0.08481720834970474, "learning_rate": 4.0839057621345446e-05, "loss": 0.0003444690257310867, "step": 208510 }, { "epoch": 59.18819188191882, "grad_norm": 0.47381699085235596, "learning_rate": 4.083621913142209e-05, "loss": 0.00035023875534534454, "step": 208520 }, { "epoch": 59.19103037184218, "grad_norm": 0.05361727252602577, "learning_rate": 4.083338064149873e-05, "loss": 0.0005422268062829971, "step": 208530 }, { "epoch": 59.19386886176554, "grad_norm": 0.401660293340683, "learning_rate": 4.083054215157536e-05, "loss": 0.0010594256222248078, "step": 208540 }, { "epoch": 59.1967073516889, "grad_norm": 0.4858897626399994, "learning_rate": 4.0827703661652e-05, "loss": 0.00082086231559515, "step": 208550 }, { "epoch": 59.19954584161226, "grad_norm": 0.38886117935180664, "learning_rate": 4.0824865171728646e-05, "loss": 0.0008142786100506783, "step": 208560 }, { "epoch": 59.202384331535626, "grad_norm": 0.08062991499900818, "learning_rate": 4.082202668180528e-05, "loss": 0.00039433352649211886, "step": 208570 }, { "epoch": 59.20522282145898, "grad_norm": 0.020408710464835167, "learning_rate": 4.081918819188192e-05, "loss": 0.00301948357373476, "step": 208580 }, { "epoch": 59.208061311382345, "grad_norm": 0.07235443592071533, "learning_rate": 4.081634970195856e-05, "loss": 0.00036010369658470156, "step": 208590 }, { "epoch": 59.21089980130571, "grad_norm": 0.07540994137525558, "learning_rate": 4.08135112120352e-05, "loss": 0.0006806753575801849, "step": 208600 }, { "epoch": 59.21373829122906, "grad_norm": 1.5043736696243286, "learning_rate": 4.081067272211184e-05, "loss": 0.0006596093997359275, "step": 208610 }, { "epoch": 59.216576781152426, "grad_norm": 0.3405483365058899, "learning_rate": 4.080783423218848e-05, "loss": 0.0007652267813682556, "step": 208620 }, { "epoch": 59.21941527107579, "grad_norm": 0.18182450532913208, "learning_rate": 4.0804995742265115e-05, "loss": 0.0022856274619698523, "step": 208630 }, { "epoch": 59.22225376099915, "grad_norm": 0.14827436208724976, "learning_rate": 4.0802157252341757e-05, "loss": 0.0002925468608736992, "step": 208640 }, { "epoch": 59.22509225092251, "grad_norm": 0.0358758345246315, "learning_rate": 4.07993187624184e-05, "loss": 0.0009020799770951271, "step": 208650 }, { "epoch": 59.22793074084587, "grad_norm": 0.032166801393032074, "learning_rate": 4.079648027249503e-05, "loss": 0.0005771357566118241, "step": 208660 }, { "epoch": 59.23076923076923, "grad_norm": 0.18071867525577545, "learning_rate": 4.0793641782571674e-05, "loss": 0.000900956615805626, "step": 208670 }, { "epoch": 59.23360772069259, "grad_norm": 5.668272018432617, "learning_rate": 4.079080329264831e-05, "loss": 0.0012646080926060677, "step": 208680 }, { "epoch": 59.23644621061595, "grad_norm": 0.0711614117026329, "learning_rate": 4.078796480272496e-05, "loss": 0.00026070233434438705, "step": 208690 }, { "epoch": 59.239284700539315, "grad_norm": 0.08946266770362854, "learning_rate": 4.078512631280159e-05, "loss": 0.0004927709698677063, "step": 208700 }, { "epoch": 59.24212319046267, "grad_norm": 0.03241831809282303, "learning_rate": 4.0782287822878226e-05, "loss": 0.00018174443393945695, "step": 208710 }, { "epoch": 59.244961680386034, "grad_norm": 0.049535248428583145, "learning_rate": 4.0779449332954874e-05, "loss": 0.0003244033083319664, "step": 208720 }, { "epoch": 59.2478001703094, "grad_norm": 0.07869278639554977, "learning_rate": 4.077661084303151e-05, "loss": 0.00038533341139554975, "step": 208730 }, { "epoch": 59.25063866023276, "grad_norm": 0.017420105636119843, "learning_rate": 4.077377235310815e-05, "loss": 0.00015230514109134674, "step": 208740 }, { "epoch": 59.253477150156115, "grad_norm": 0.11073766648769379, "learning_rate": 4.077093386318479e-05, "loss": 0.0003903353586792946, "step": 208750 }, { "epoch": 59.25631564007948, "grad_norm": 0.8112896680831909, "learning_rate": 4.0768095373261426e-05, "loss": 0.0008689681068062783, "step": 208760 }, { "epoch": 59.25915413000284, "grad_norm": 0.06953651458024979, "learning_rate": 4.076525688333807e-05, "loss": 0.0003301454707980156, "step": 208770 }, { "epoch": 59.2619926199262, "grad_norm": 0.00554344616830349, "learning_rate": 4.07624183934147e-05, "loss": 0.000978374108672142, "step": 208780 }, { "epoch": 59.26483110984956, "grad_norm": 0.04279300570487976, "learning_rate": 4.075957990349134e-05, "loss": 0.00013325214385986328, "step": 208790 }, { "epoch": 59.26766959977292, "grad_norm": 0.11866388469934464, "learning_rate": 4.0756741413567985e-05, "loss": 0.0020523719489574433, "step": 208800 }, { "epoch": 59.27050808969628, "grad_norm": 0.08551599085330963, "learning_rate": 4.075390292364462e-05, "loss": 0.0012163668870925903, "step": 208810 }, { "epoch": 59.27334657961964, "grad_norm": 0.5311707258224487, "learning_rate": 4.075106443372127e-05, "loss": 0.01011124551296234, "step": 208820 }, { "epoch": 59.276185069543004, "grad_norm": 0.018165912479162216, "learning_rate": 4.07482259437979e-05, "loss": 0.00016736853867769242, "step": 208830 }, { "epoch": 59.27902355946637, "grad_norm": 0.1225375235080719, "learning_rate": 4.074538745387454e-05, "loss": 0.00044635478407144545, "step": 208840 }, { "epoch": 59.28186204938972, "grad_norm": 0.012719024904072285, "learning_rate": 4.0742548963951185e-05, "loss": 0.00028905365616083145, "step": 208850 }, { "epoch": 59.284700539313086, "grad_norm": 0.09274417906999588, "learning_rate": 4.073971047402782e-05, "loss": 0.0001270987093448639, "step": 208860 }, { "epoch": 59.28753902923645, "grad_norm": 0.046678781509399414, "learning_rate": 4.073687198410446e-05, "loss": 0.00027902983129024506, "step": 208870 }, { "epoch": 59.290377519159804, "grad_norm": 0.13408954441547394, "learning_rate": 4.0734033494181095e-05, "loss": 0.0002171434462070465, "step": 208880 }, { "epoch": 59.29321600908317, "grad_norm": 0.015426598489284515, "learning_rate": 4.073119500425774e-05, "loss": 0.00027035679668188097, "step": 208890 }, { "epoch": 59.29605449900653, "grad_norm": 0.2719776928424835, "learning_rate": 4.072835651433438e-05, "loss": 0.0003261376172304153, "step": 208900 }, { "epoch": 59.298892988929886, "grad_norm": 0.06029966101050377, "learning_rate": 4.072551802441101e-05, "loss": 0.001478903368115425, "step": 208910 }, { "epoch": 59.30173147885325, "grad_norm": 0.01664654165506363, "learning_rate": 4.0722679534487654e-05, "loss": 0.001374727301299572, "step": 208920 }, { "epoch": 59.30456996877661, "grad_norm": 0.04944661632180214, "learning_rate": 4.0719841044564295e-05, "loss": 0.005882376432418823, "step": 208930 }, { "epoch": 59.307408458699975, "grad_norm": 0.4178543984889984, "learning_rate": 4.071700255464093e-05, "loss": 0.0002276284620165825, "step": 208940 }, { "epoch": 59.31024694862333, "grad_norm": 0.34803563356399536, "learning_rate": 4.071416406471758e-05, "loss": 0.0006628014147281647, "step": 208950 }, { "epoch": 59.31308543854669, "grad_norm": 0.06936804950237274, "learning_rate": 4.071132557479421e-05, "loss": 0.00011117346584796906, "step": 208960 }, { "epoch": 59.315923928470056, "grad_norm": 0.10818737000226974, "learning_rate": 4.070848708487085e-05, "loss": 0.0011971769854426384, "step": 208970 }, { "epoch": 59.31876241839341, "grad_norm": 0.0684070959687233, "learning_rate": 4.070564859494749e-05, "loss": 0.0035743337124586106, "step": 208980 }, { "epoch": 59.321600908316775, "grad_norm": 0.1310119777917862, "learning_rate": 4.070281010502413e-05, "loss": 0.0003977134823799133, "step": 208990 }, { "epoch": 59.32443939824014, "grad_norm": 16.986696243286133, "learning_rate": 4.069997161510077e-05, "loss": 0.004254086315631867, "step": 209000 }, { "epoch": 59.32443939824014, "eval_accuracy": 0.9785718827494119, "eval_loss": 0.07572280615568161, "eval_runtime": 32.2164, "eval_samples_per_second": 488.167, "eval_steps_per_second": 7.636, "step": 209000 }, { "epoch": 59.32727788816349, "grad_norm": 4.5801496505737305, "learning_rate": 4.0697133125177406e-05, "loss": 0.0011519081890583038, "step": 209010 }, { "epoch": 59.330116378086856, "grad_norm": 0.07664173096418381, "learning_rate": 4.069429463525405e-05, "loss": 0.0027889978140592576, "step": 209020 }, { "epoch": 59.33295486801022, "grad_norm": 0.4735420346260071, "learning_rate": 4.069145614533069e-05, "loss": 0.000784255750477314, "step": 209030 }, { "epoch": 59.33579335793358, "grad_norm": 0.030794506892561913, "learning_rate": 4.0688617655407323e-05, "loss": 0.0007298711687326431, "step": 209040 }, { "epoch": 59.33863184785694, "grad_norm": 0.05999572575092316, "learning_rate": 4.0685779165483965e-05, "loss": 0.0011223534122109414, "step": 209050 }, { "epoch": 59.3414703377803, "grad_norm": 0.3529675602912903, "learning_rate": 4.0682940675560606e-05, "loss": 0.0014237245544791222, "step": 209060 }, { "epoch": 59.344308827703664, "grad_norm": 0.12415836751461029, "learning_rate": 4.068010218563724e-05, "loss": 0.0009725471958518028, "step": 209070 }, { "epoch": 59.34714731762702, "grad_norm": 0.02911434695124626, "learning_rate": 4.067726369571388e-05, "loss": 0.00031318534165620805, "step": 209080 }, { "epoch": 59.34998580755038, "grad_norm": 0.29892754554748535, "learning_rate": 4.0674425205790524e-05, "loss": 0.000219133123755455, "step": 209090 }, { "epoch": 59.352824297473745, "grad_norm": 0.05873022601008415, "learning_rate": 4.067158671586716e-05, "loss": 0.00021644551306962967, "step": 209100 }, { "epoch": 59.35566278739711, "grad_norm": 0.0941539853811264, "learning_rate": 4.06687482259438e-05, "loss": 0.00045733116567134855, "step": 209110 }, { "epoch": 59.358501277320464, "grad_norm": 0.032827965915203094, "learning_rate": 4.066590973602044e-05, "loss": 0.00032626911997795104, "step": 209120 }, { "epoch": 59.36133976724383, "grad_norm": 0.0531652569770813, "learning_rate": 4.0663071246097076e-05, "loss": 0.00023807883262634278, "step": 209130 }, { "epoch": 59.36417825716719, "grad_norm": 0.23717254400253296, "learning_rate": 4.066023275617372e-05, "loss": 0.0005705766379833221, "step": 209140 }, { "epoch": 59.367016747090545, "grad_norm": 0.04272928088903427, "learning_rate": 4.065739426625036e-05, "loss": 0.00021221786737442018, "step": 209150 }, { "epoch": 59.36985523701391, "grad_norm": 0.03711260110139847, "learning_rate": 4.0654555776327e-05, "loss": 0.0003359878435730934, "step": 209160 }, { "epoch": 59.37269372693727, "grad_norm": 0.06563179939985275, "learning_rate": 4.0651717286403634e-05, "loss": 0.0002058323472738266, "step": 209170 }, { "epoch": 59.37553221686063, "grad_norm": 0.08368227630853653, "learning_rate": 4.064887879648027e-05, "loss": 0.0001309594139456749, "step": 209180 }, { "epoch": 59.37837070678399, "grad_norm": 0.1590418964624405, "learning_rate": 4.064604030655692e-05, "loss": 0.0012128213420510292, "step": 209190 }, { "epoch": 59.38120919670735, "grad_norm": 0.010934002697467804, "learning_rate": 4.064320181663355e-05, "loss": 0.0003749147057533264, "step": 209200 }, { "epoch": 59.384047686630716, "grad_norm": 0.16526152193546295, "learning_rate": 4.064036332671019e-05, "loss": 0.00020508281886577606, "step": 209210 }, { "epoch": 59.38688617655407, "grad_norm": 0.20335642993450165, "learning_rate": 4.0637524836786834e-05, "loss": 0.0005382472649216652, "step": 209220 }, { "epoch": 59.389724666477434, "grad_norm": 0.03179871663451195, "learning_rate": 4.063468634686347e-05, "loss": 0.0005037881433963776, "step": 209230 }, { "epoch": 59.3925631564008, "grad_norm": 0.02179662324488163, "learning_rate": 4.063184785694011e-05, "loss": 0.001602536253631115, "step": 209240 }, { "epoch": 59.39540164632415, "grad_norm": 0.011978219263255596, "learning_rate": 4.062900936701675e-05, "loss": 0.001953318156301975, "step": 209250 }, { "epoch": 59.398240136247516, "grad_norm": 0.9310401678085327, "learning_rate": 4.0626170877093386e-05, "loss": 0.0006333401426672936, "step": 209260 }, { "epoch": 59.40107862617088, "grad_norm": 0.09759822487831116, "learning_rate": 4.062333238717003e-05, "loss": 0.00022952873259782792, "step": 209270 }, { "epoch": 59.403917116094235, "grad_norm": 0.02207164838910103, "learning_rate": 4.062049389724666e-05, "loss": 0.000393638014793396, "step": 209280 }, { "epoch": 59.4067556060176, "grad_norm": 0.01912384107708931, "learning_rate": 4.061765540732331e-05, "loss": 0.00027714092284440993, "step": 209290 }, { "epoch": 59.40959409594096, "grad_norm": 0.10842636227607727, "learning_rate": 4.0614816917399945e-05, "loss": 0.0004163352772593498, "step": 209300 }, { "epoch": 59.41243258586432, "grad_norm": 0.029367420822381973, "learning_rate": 4.061197842747658e-05, "loss": 0.0003536500036716461, "step": 209310 }, { "epoch": 59.41527107578768, "grad_norm": 0.016597745940089226, "learning_rate": 4.060913993755323e-05, "loss": 0.000644204206764698, "step": 209320 }, { "epoch": 59.41810956571104, "grad_norm": 10.22087574005127, "learning_rate": 4.060630144762986e-05, "loss": 0.00272232536226511, "step": 209330 }, { "epoch": 59.420948055634405, "grad_norm": 0.04013590142130852, "learning_rate": 4.0603462957706504e-05, "loss": 0.0004520803689956665, "step": 209340 }, { "epoch": 59.42378654555776, "grad_norm": 0.11514192819595337, "learning_rate": 4.0600624467783145e-05, "loss": 0.0005468053743243218, "step": 209350 }, { "epoch": 59.42662503548112, "grad_norm": 0.020102227106690407, "learning_rate": 4.059778597785978e-05, "loss": 0.00038982946425676347, "step": 209360 }, { "epoch": 59.429463525404486, "grad_norm": 0.025421474128961563, "learning_rate": 4.059494748793642e-05, "loss": 0.0003222128376364708, "step": 209370 }, { "epoch": 59.43230201532784, "grad_norm": 0.04584435746073723, "learning_rate": 4.0592108998013056e-05, "loss": 0.00035425964742898943, "step": 209380 }, { "epoch": 59.435140505251205, "grad_norm": 0.10768184065818787, "learning_rate": 4.05892705080897e-05, "loss": 0.000529603473842144, "step": 209390 }, { "epoch": 59.43797899517457, "grad_norm": 0.12898315489292145, "learning_rate": 4.058643201816634e-05, "loss": 0.00023846402764320372, "step": 209400 }, { "epoch": 59.44081748509793, "grad_norm": 0.18818816542625427, "learning_rate": 4.058359352824297e-05, "loss": 0.0008022658526897431, "step": 209410 }, { "epoch": 59.44365597502129, "grad_norm": 0.018970981240272522, "learning_rate": 4.0580755038319614e-05, "loss": 0.0002594944089651108, "step": 209420 }, { "epoch": 59.44649446494465, "grad_norm": 0.92731112241745, "learning_rate": 4.0577916548396256e-05, "loss": 0.00035805404186248777, "step": 209430 }, { "epoch": 59.44933295486801, "grad_norm": 0.0064618997275829315, "learning_rate": 4.057507805847289e-05, "loss": 0.0002789141610264778, "step": 209440 }, { "epoch": 59.45217144479137, "grad_norm": 0.12312639504671097, "learning_rate": 4.057223956854954e-05, "loss": 0.000753352977335453, "step": 209450 }, { "epoch": 59.45500993471473, "grad_norm": 0.024005092680454254, "learning_rate": 4.056940107862617e-05, "loss": 0.00020345598459243775, "step": 209460 }, { "epoch": 59.457848424638094, "grad_norm": 0.04824695736169815, "learning_rate": 4.0566562588702814e-05, "loss": 0.0020585261285305024, "step": 209470 }, { "epoch": 59.46068691456146, "grad_norm": 0.02838551625609398, "learning_rate": 4.056372409877945e-05, "loss": 0.000652468018233776, "step": 209480 }, { "epoch": 59.46352540448481, "grad_norm": 0.36792969703674316, "learning_rate": 4.056088560885609e-05, "loss": 0.00019667893648147582, "step": 209490 }, { "epoch": 59.466363894408175, "grad_norm": 0.034894924610853195, "learning_rate": 4.055804711893273e-05, "loss": 0.0005311084911227226, "step": 209500 }, { "epoch": 59.466363894408175, "eval_accuracy": 0.9791441470083296, "eval_loss": 0.0780511349439621, "eval_runtime": 32.3685, "eval_samples_per_second": 485.874, "eval_steps_per_second": 7.6, "step": 209500 }, { "epoch": 59.46920238433154, "grad_norm": 0.006686811335384846, "learning_rate": 4.0555208629009366e-05, "loss": 0.0003284519538283348, "step": 209510 }, { "epoch": 59.472040874254894, "grad_norm": 0.037137459963560104, "learning_rate": 4.055237013908601e-05, "loss": 0.002166958898305893, "step": 209520 }, { "epoch": 59.47487936417826, "grad_norm": 1.5821013450622559, "learning_rate": 4.054953164916265e-05, "loss": 0.0006052663549780846, "step": 209530 }, { "epoch": 59.47771785410162, "grad_norm": 0.11672259867191315, "learning_rate": 4.0546693159239284e-05, "loss": 0.000413600355386734, "step": 209540 }, { "epoch": 59.480556344024976, "grad_norm": 0.037047822028398514, "learning_rate": 4.0543854669315925e-05, "loss": 0.0001835683360695839, "step": 209550 }, { "epoch": 59.48339483394834, "grad_norm": 0.17093174159526825, "learning_rate": 4.0541016179392567e-05, "loss": 0.00030646361410617827, "step": 209560 }, { "epoch": 59.4862333238717, "grad_norm": 0.05653679370880127, "learning_rate": 4.05381776894692e-05, "loss": 0.0003068855032324791, "step": 209570 }, { "epoch": 59.489071813795064, "grad_norm": 8.30449390411377, "learning_rate": 4.053533919954584e-05, "loss": 0.0016902435570955276, "step": 209580 }, { "epoch": 59.49191030371842, "grad_norm": 0.3221970200538635, "learning_rate": 4.0532500709622484e-05, "loss": 0.0003624701872467995, "step": 209590 }, { "epoch": 59.49474879364178, "grad_norm": 0.03315882384777069, "learning_rate": 4.052966221969912e-05, "loss": 0.0019966138526797293, "step": 209600 }, { "epoch": 59.497587283565146, "grad_norm": 0.05506691709160805, "learning_rate": 4.052682372977576e-05, "loss": 0.00032933969050645827, "step": 209610 }, { "epoch": 59.5004257734885, "grad_norm": 0.10939708352088928, "learning_rate": 4.05239852398524e-05, "loss": 0.0012741835787892341, "step": 209620 }, { "epoch": 59.503264263411864, "grad_norm": 0.532612144947052, "learning_rate": 4.052114674992904e-05, "loss": 0.0015688519924879074, "step": 209630 }, { "epoch": 59.50610275333523, "grad_norm": 0.19323134422302246, "learning_rate": 4.051830826000568e-05, "loss": 0.0005793161690235138, "step": 209640 }, { "epoch": 59.50894124325858, "grad_norm": 0.06315860897302628, "learning_rate": 4.051546977008232e-05, "loss": 0.00038368292152881624, "step": 209650 }, { "epoch": 59.511779733181946, "grad_norm": 0.03336186707019806, "learning_rate": 4.051263128015896e-05, "loss": 0.0004970654845237732, "step": 209660 }, { "epoch": 59.51461822310531, "grad_norm": 0.06280676275491714, "learning_rate": 4.0509792790235595e-05, "loss": 0.0005774416029453277, "step": 209670 }, { "epoch": 59.51745671302867, "grad_norm": 0.10459858924150467, "learning_rate": 4.0506954300312236e-05, "loss": 0.004001777619123459, "step": 209680 }, { "epoch": 59.52029520295203, "grad_norm": 0.17402468621730804, "learning_rate": 4.050411581038888e-05, "loss": 0.0036673620343208313, "step": 209690 }, { "epoch": 59.52313369287539, "grad_norm": 0.21374420821666718, "learning_rate": 4.050127732046551e-05, "loss": 0.0003277307376265526, "step": 209700 }, { "epoch": 59.52597218279875, "grad_norm": 0.01814146898686886, "learning_rate": 4.049843883054215e-05, "loss": 0.0007129870355129242, "step": 209710 }, { "epoch": 59.52881067272211, "grad_norm": 0.10264556109905243, "learning_rate": 4.0495600340618795e-05, "loss": 0.0006165146827697754, "step": 209720 }, { "epoch": 59.53164916264547, "grad_norm": 0.034034572541713715, "learning_rate": 4.049276185069543e-05, "loss": 0.0002610733732581139, "step": 209730 }, { "epoch": 59.534487652568835, "grad_norm": 0.005010182037949562, "learning_rate": 4.048992336077207e-05, "loss": 0.0014965349808335304, "step": 209740 }, { "epoch": 59.53732614249219, "grad_norm": 0.09571391344070435, "learning_rate": 4.048708487084871e-05, "loss": 0.0007876871153712272, "step": 209750 }, { "epoch": 59.540164632415554, "grad_norm": 0.0985109955072403, "learning_rate": 4.048424638092535e-05, "loss": 0.0006372373551130295, "step": 209760 }, { "epoch": 59.54300312233892, "grad_norm": 0.13740719854831696, "learning_rate": 4.048140789100199e-05, "loss": 0.0007777223363518715, "step": 209770 }, { "epoch": 59.54584161226228, "grad_norm": 0.2751544713973999, "learning_rate": 4.047856940107862e-05, "loss": 0.0005603168159723282, "step": 209780 }, { "epoch": 59.548680102185635, "grad_norm": 0.05920284241437912, "learning_rate": 4.047573091115527e-05, "loss": 0.0023684268817305565, "step": 209790 }, { "epoch": 59.551518592109, "grad_norm": 0.006170699838548899, "learning_rate": 4.0472892421231905e-05, "loss": 0.0005256429314613343, "step": 209800 }, { "epoch": 59.55435708203236, "grad_norm": 0.33179283142089844, "learning_rate": 4.047005393130855e-05, "loss": 0.0015065357089042663, "step": 209810 }, { "epoch": 59.55719557195572, "grad_norm": 0.03181219846010208, "learning_rate": 4.046721544138519e-05, "loss": 0.0002704611048102379, "step": 209820 }, { "epoch": 59.56003406187908, "grad_norm": 0.060679949820041656, "learning_rate": 4.046437695146182e-05, "loss": 0.0006097139790654183, "step": 209830 }, { "epoch": 59.56287255180244, "grad_norm": 0.16131196916103363, "learning_rate": 4.0461538461538464e-05, "loss": 0.0023860838264226913, "step": 209840 }, { "epoch": 59.565711041725805, "grad_norm": 0.02322583645582199, "learning_rate": 4.0458699971615105e-05, "loss": 0.0022280106320977213, "step": 209850 }, { "epoch": 59.56854953164916, "grad_norm": 0.014256380498409271, "learning_rate": 4.045586148169174e-05, "loss": 0.0007122134789824486, "step": 209860 }, { "epoch": 59.571388021572524, "grad_norm": 0.11740683019161224, "learning_rate": 4.045302299176838e-05, "loss": 0.0016997901722788811, "step": 209870 }, { "epoch": 59.57422651149589, "grad_norm": 0.017211854457855225, "learning_rate": 4.0450184501845016e-05, "loss": 0.0005740350112318993, "step": 209880 }, { "epoch": 59.57706500141924, "grad_norm": 0.017044732347130775, "learning_rate": 4.044734601192166e-05, "loss": 0.0004944240674376488, "step": 209890 }, { "epoch": 59.579903491342606, "grad_norm": 0.04875865578651428, "learning_rate": 4.04445075219983e-05, "loss": 0.00034034717828035355, "step": 209900 }, { "epoch": 59.58274198126597, "grad_norm": 0.12790019810199738, "learning_rate": 4.044166903207493e-05, "loss": 0.0005253316834568977, "step": 209910 }, { "epoch": 59.585580471189324, "grad_norm": 0.6637232303619385, "learning_rate": 4.043883054215158e-05, "loss": 0.0008790219202637672, "step": 209920 }, { "epoch": 59.58841896111269, "grad_norm": 0.032912835478782654, "learning_rate": 4.0435992052228216e-05, "loss": 0.0020432541146874427, "step": 209930 }, { "epoch": 59.59125745103605, "grad_norm": 0.09318677335977554, "learning_rate": 4.043315356230486e-05, "loss": 0.0016679927706718446, "step": 209940 }, { "epoch": 59.59409594095941, "grad_norm": 0.016086770221590996, "learning_rate": 4.04303150723815e-05, "loss": 0.0019808027893304823, "step": 209950 }, { "epoch": 59.59693443088277, "grad_norm": 0.2881268560886383, "learning_rate": 4.0427476582458133e-05, "loss": 0.00036832373589277265, "step": 209960 }, { "epoch": 59.59977292080613, "grad_norm": 0.45400920510292053, "learning_rate": 4.042492194152711e-05, "loss": 0.008103539049625397, "step": 209970 }, { "epoch": 59.602611410729494, "grad_norm": 0.04531880468130112, "learning_rate": 4.042208345160375e-05, "loss": 0.00017920136451721191, "step": 209980 }, { "epoch": 59.60544990065285, "grad_norm": 1.6649457216262817, "learning_rate": 4.041924496168038e-05, "loss": 0.0029488641768693922, "step": 209990 }, { "epoch": 59.60828839057621, "grad_norm": 0.12997424602508545, "learning_rate": 4.041640647175703e-05, "loss": 0.0008970633149147034, "step": 210000 }, { "epoch": 59.60828839057621, "eval_accuracy": 0.9756469765371654, "eval_loss": 0.08912095427513123, "eval_runtime": 32.2349, "eval_samples_per_second": 487.887, "eval_steps_per_second": 7.631, "step": 210000 }, { "epoch": 59.611126880499576, "grad_norm": 0.0691925436258316, "learning_rate": 4.0413567981833665e-05, "loss": 0.003855973482131958, "step": 210010 }, { "epoch": 59.61396537042293, "grad_norm": 0.017711127176880836, "learning_rate": 4.041072949191031e-05, "loss": 0.0008747655898332596, "step": 210020 }, { "epoch": 59.616803860346295, "grad_norm": 1.0098774433135986, "learning_rate": 4.040789100198695e-05, "loss": 0.0004560330882668495, "step": 210030 }, { "epoch": 59.61964235026966, "grad_norm": 0.010808572173118591, "learning_rate": 4.040505251206358e-05, "loss": 0.0019031204283237457, "step": 210040 }, { "epoch": 59.62248084019302, "grad_norm": 0.05235874280333519, "learning_rate": 4.0402214022140224e-05, "loss": 0.0012681057676672935, "step": 210050 }, { "epoch": 59.625319330116376, "grad_norm": 0.6612359285354614, "learning_rate": 4.039937553221686e-05, "loss": 0.006590881943702697, "step": 210060 }, { "epoch": 59.62815782003974, "grad_norm": 0.020638419315218925, "learning_rate": 4.03965370422935e-05, "loss": 0.0025346560403704645, "step": 210070 }, { "epoch": 59.6309963099631, "grad_norm": 0.036871735006570816, "learning_rate": 4.039369855237014e-05, "loss": 0.0002831108868122101, "step": 210080 }, { "epoch": 59.63383479988646, "grad_norm": 0.5298957824707031, "learning_rate": 4.0390860062446776e-05, "loss": 0.0028772229328751565, "step": 210090 }, { "epoch": 59.63667328980982, "grad_norm": 0.01628963276743889, "learning_rate": 4.0388021572523424e-05, "loss": 0.00022240150719881057, "step": 210100 }, { "epoch": 59.63951177973318, "grad_norm": 0.004757504444569349, "learning_rate": 4.038518308260006e-05, "loss": 0.001880733110010624, "step": 210110 }, { "epoch": 59.64235026965654, "grad_norm": 0.09656154364347458, "learning_rate": 4.038234459267669e-05, "loss": 0.0008432568982243537, "step": 210120 }, { "epoch": 59.6451887595799, "grad_norm": 0.05068759247660637, "learning_rate": 4.037950610275334e-05, "loss": 0.007057788968086243, "step": 210130 }, { "epoch": 59.648027249503265, "grad_norm": 0.013869618065655231, "learning_rate": 4.0376667612829976e-05, "loss": 0.0023025652393698693, "step": 210140 }, { "epoch": 59.65086573942663, "grad_norm": 0.009449547156691551, "learning_rate": 4.037382912290662e-05, "loss": 0.0009074300527572632, "step": 210150 }, { "epoch": 59.653704229349984, "grad_norm": 0.3935542702674866, "learning_rate": 4.037099063298325e-05, "loss": 0.0011463796719908715, "step": 210160 }, { "epoch": 59.65654271927335, "grad_norm": 9.317618370056152, "learning_rate": 4.0368152143059893e-05, "loss": 0.002867347747087479, "step": 210170 }, { "epoch": 59.65938120919671, "grad_norm": 0.0292881578207016, "learning_rate": 4.0365313653136535e-05, "loss": 0.009246638417243958, "step": 210180 }, { "epoch": 59.662219699120065, "grad_norm": 3.4665985107421875, "learning_rate": 4.036247516321317e-05, "loss": 0.011939964443445205, "step": 210190 }, { "epoch": 59.66505818904343, "grad_norm": 1.0699975490570068, "learning_rate": 4.035963667328981e-05, "loss": 0.0005017891526222229, "step": 210200 }, { "epoch": 59.66789667896679, "grad_norm": 5.382299423217773, "learning_rate": 4.035679818336645e-05, "loss": 0.0055031668394804, "step": 210210 }, { "epoch": 59.67073516889015, "grad_norm": 0.012563210912048817, "learning_rate": 4.035395969344309e-05, "loss": 0.0008338011801242828, "step": 210220 }, { "epoch": 59.67357365881351, "grad_norm": 0.09791925549507141, "learning_rate": 4.0351121203519735e-05, "loss": 0.0018773911520838737, "step": 210230 }, { "epoch": 59.67641214873687, "grad_norm": 0.14343303442001343, "learning_rate": 4.034828271359637e-05, "loss": 0.0007456514984369278, "step": 210240 }, { "epoch": 59.679250638660235, "grad_norm": 0.024527471512556076, "learning_rate": 4.0345444223673004e-05, "loss": 0.0003636350855231285, "step": 210250 }, { "epoch": 59.68208912858359, "grad_norm": 0.013003482483327389, "learning_rate": 4.0342605733749645e-05, "loss": 0.00018302742391824723, "step": 210260 }, { "epoch": 59.684927618506954, "grad_norm": 2.3842275142669678, "learning_rate": 4.033976724382629e-05, "loss": 0.00068411435931921, "step": 210270 }, { "epoch": 59.68776610843032, "grad_norm": 0.21631591022014618, "learning_rate": 4.033692875390293e-05, "loss": 0.0005968298763036727, "step": 210280 }, { "epoch": 59.69060459835367, "grad_norm": 0.06462179124355316, "learning_rate": 4.033409026397956e-05, "loss": 0.0010957717895507812, "step": 210290 }, { "epoch": 59.693443088277036, "grad_norm": 0.09878084808588028, "learning_rate": 4.0331251774056204e-05, "loss": 0.002641037665307522, "step": 210300 }, { "epoch": 59.6962815782004, "grad_norm": 0.01770390197634697, "learning_rate": 4.0328413284132846e-05, "loss": 0.0013707032427191733, "step": 210310 }, { "epoch": 59.69912006812376, "grad_norm": 0.013016712851822376, "learning_rate": 4.032557479420948e-05, "loss": 0.0003132253885269165, "step": 210320 }, { "epoch": 59.70195855804712, "grad_norm": 0.05980139598250389, "learning_rate": 4.032273630428612e-05, "loss": 0.0006172595545649529, "step": 210330 }, { "epoch": 59.70479704797048, "grad_norm": 0.21739649772644043, "learning_rate": 4.031989781436276e-05, "loss": 0.0006977930665016174, "step": 210340 }, { "epoch": 59.70763553789384, "grad_norm": 0.08454285562038422, "learning_rate": 4.03170593244394e-05, "loss": 0.0003020014613866806, "step": 210350 }, { "epoch": 59.7104740278172, "grad_norm": 0.6004785299301147, "learning_rate": 4.031422083451604e-05, "loss": 0.000492185726761818, "step": 210360 }, { "epoch": 59.71331251774056, "grad_norm": 0.09345073252916336, "learning_rate": 4.031138234459268e-05, "loss": 0.00031666383147239686, "step": 210370 }, { "epoch": 59.716151007663925, "grad_norm": 0.017778825014829636, "learning_rate": 4.0308543854669315e-05, "loss": 0.00031419284641742706, "step": 210380 }, { "epoch": 59.71898949758728, "grad_norm": 0.08248747140169144, "learning_rate": 4.0305705364745956e-05, "loss": 0.0005164919421076775, "step": 210390 }, { "epoch": 59.72182798751064, "grad_norm": 2.7587473392486572, "learning_rate": 4.03028668748226e-05, "loss": 0.0025609491392970087, "step": 210400 }, { "epoch": 59.724666477434006, "grad_norm": 0.03173038363456726, "learning_rate": 4.030002838489923e-05, "loss": 0.00046058837324380875, "step": 210410 }, { "epoch": 59.72750496735737, "grad_norm": 0.16959813237190247, "learning_rate": 4.0297189894975874e-05, "loss": 0.003878995031118393, "step": 210420 }, { "epoch": 59.730343457280725, "grad_norm": 0.2533380687236786, "learning_rate": 4.0294351405052515e-05, "loss": 0.00029757600277662276, "step": 210430 }, { "epoch": 59.73318194720409, "grad_norm": 1.6967889070510864, "learning_rate": 4.0291512915129156e-05, "loss": 0.003287537395954132, "step": 210440 }, { "epoch": 59.73602043712745, "grad_norm": 0.01348136831074953, "learning_rate": 4.028867442520579e-05, "loss": 0.003794805705547333, "step": 210450 }, { "epoch": 59.738858927050806, "grad_norm": 1.0062379837036133, "learning_rate": 4.028583593528243e-05, "loss": 0.0009785156697034835, "step": 210460 }, { "epoch": 59.74169741697417, "grad_norm": 1.1179393529891968, "learning_rate": 4.0282997445359074e-05, "loss": 0.0007082734256982804, "step": 210470 }, { "epoch": 59.74453590689753, "grad_norm": 0.07823655754327774, "learning_rate": 4.028015895543571e-05, "loss": 0.0005363060161471366, "step": 210480 }, { "epoch": 59.74737439682089, "grad_norm": 0.1506243795156479, "learning_rate": 4.027732046551235e-05, "loss": 0.0011738160625100135, "step": 210490 }, { "epoch": 59.75021288674425, "grad_norm": 0.2562336325645447, "learning_rate": 4.027448197558899e-05, "loss": 0.0036632973700761796, "step": 210500 }, { "epoch": 59.75021288674425, "eval_accuracy": 0.9784447129140967, "eval_loss": 0.0848691314458847, "eval_runtime": 32.42, "eval_samples_per_second": 485.102, "eval_steps_per_second": 7.588, "step": 210500 }, { "epoch": 59.753051376667614, "grad_norm": 0.6554681658744812, "learning_rate": 4.0271643485665626e-05, "loss": 0.0009761733934283256, "step": 210510 }, { "epoch": 59.75588986659098, "grad_norm": 0.3382176458835602, "learning_rate": 4.026880499574227e-05, "loss": 0.0010560821741819383, "step": 210520 }, { "epoch": 59.75872835651433, "grad_norm": 0.6081188321113586, "learning_rate": 4.026596650581891e-05, "loss": 0.0015516530722379685, "step": 210530 }, { "epoch": 59.761566846437695, "grad_norm": 0.11982458084821701, "learning_rate": 4.026312801589554e-05, "loss": 0.0031986549496650697, "step": 210540 }, { "epoch": 59.76440533636106, "grad_norm": 0.08546780794858932, "learning_rate": 4.0260289525972184e-05, "loss": 0.0005486521869897843, "step": 210550 }, { "epoch": 59.767243826284414, "grad_norm": 0.04678124189376831, "learning_rate": 4.0257451036048826e-05, "loss": 0.001836995780467987, "step": 210560 }, { "epoch": 59.77008231620778, "grad_norm": 0.13658803701400757, "learning_rate": 4.025461254612547e-05, "loss": 0.0005544567480683327, "step": 210570 }, { "epoch": 59.77292080613114, "grad_norm": 0.1765584796667099, "learning_rate": 4.02517740562021e-05, "loss": 0.003435543179512024, "step": 210580 }, { "epoch": 59.7757592960545, "grad_norm": 0.08655203878879547, "learning_rate": 4.0248935566278736e-05, "loss": 0.011410191655158997, "step": 210590 }, { "epoch": 59.77859778597786, "grad_norm": 0.1530972123146057, "learning_rate": 4.0246097076355384e-05, "loss": 0.003193509578704834, "step": 210600 }, { "epoch": 59.78143627590122, "grad_norm": 0.10806639492511749, "learning_rate": 4.024325858643202e-05, "loss": 0.0005395375192165375, "step": 210610 }, { "epoch": 59.784274765824584, "grad_norm": 0.33152273297309875, "learning_rate": 4.024042009650866e-05, "loss": 0.0003270072862505913, "step": 210620 }, { "epoch": 59.78711325574794, "grad_norm": 0.007902678102254868, "learning_rate": 4.02375816065853e-05, "loss": 0.00020964201539754867, "step": 210630 }, { "epoch": 59.7899517456713, "grad_norm": 0.16649530827999115, "learning_rate": 4.0234743116661936e-05, "loss": 0.0004693400114774704, "step": 210640 }, { "epoch": 59.792790235594666, "grad_norm": 0.19708706438541412, "learning_rate": 4.023190462673858e-05, "loss": 0.0005341922864317894, "step": 210650 }, { "epoch": 59.79562872551802, "grad_norm": 0.0457807257771492, "learning_rate": 4.022906613681522e-05, "loss": 0.0014165516942739487, "step": 210660 }, { "epoch": 59.798467215441384, "grad_norm": 0.036932822316884995, "learning_rate": 4.0226227646891854e-05, "loss": 0.00040470268577337266, "step": 210670 }, { "epoch": 59.80130570536475, "grad_norm": 0.13668781518936157, "learning_rate": 4.0223389156968495e-05, "loss": 0.00048705916851758956, "step": 210680 }, { "epoch": 59.80414419528811, "grad_norm": 0.16861586272716522, "learning_rate": 4.022055066704513e-05, "loss": 0.003967031836509705, "step": 210690 }, { "epoch": 59.806982685211466, "grad_norm": 0.23143552243709564, "learning_rate": 4.021771217712178e-05, "loss": 0.00048358254134655, "step": 210700 }, { "epoch": 59.80982117513483, "grad_norm": 0.2588290572166443, "learning_rate": 4.021487368719841e-05, "loss": 0.0012872416526079177, "step": 210710 }, { "epoch": 59.81265966505819, "grad_norm": 0.010121159255504608, "learning_rate": 4.021203519727505e-05, "loss": 0.004198487102985382, "step": 210720 }, { "epoch": 59.81549815498155, "grad_norm": 0.1647483855485916, "learning_rate": 4.0209196707351695e-05, "loss": 0.0010204460471868516, "step": 210730 }, { "epoch": 59.81833664490491, "grad_norm": 0.09879885613918304, "learning_rate": 4.020635821742833e-05, "loss": 0.0009374627843499184, "step": 210740 }, { "epoch": 59.82117513482827, "grad_norm": 0.11048033833503723, "learning_rate": 4.020351972750497e-05, "loss": 0.0010093601420521736, "step": 210750 }, { "epoch": 59.82401362475163, "grad_norm": 0.035544995218515396, "learning_rate": 4.020068123758161e-05, "loss": 0.0022141106426715853, "step": 210760 }, { "epoch": 59.82685211467499, "grad_norm": 0.558935821056366, "learning_rate": 4.019784274765825e-05, "loss": 0.0016254760324954987, "step": 210770 }, { "epoch": 59.829690604598355, "grad_norm": 0.10218647122383118, "learning_rate": 4.019500425773489e-05, "loss": 0.00023770444095134736, "step": 210780 }, { "epoch": 59.83252909452172, "grad_norm": 0.024288896471261978, "learning_rate": 4.019216576781152e-05, "loss": 0.0006508735939860343, "step": 210790 }, { "epoch": 59.83536758444507, "grad_norm": 0.05534564331173897, "learning_rate": 4.0189327277888165e-05, "loss": 0.0007842393592000007, "step": 210800 }, { "epoch": 59.838206074368436, "grad_norm": 0.05127452686429024, "learning_rate": 4.0186488787964806e-05, "loss": 0.0006188103929162025, "step": 210810 }, { "epoch": 59.8410445642918, "grad_norm": 0.21261249482631683, "learning_rate": 4.018365029804144e-05, "loss": 0.0010247547179460526, "step": 210820 }, { "epoch": 59.843883054215155, "grad_norm": 0.016320256516337395, "learning_rate": 4.018081180811808e-05, "loss": 0.00053226538002491, "step": 210830 }, { "epoch": 59.84672154413852, "grad_norm": 0.074388287961483, "learning_rate": 4.017797331819472e-05, "loss": 0.005699943006038666, "step": 210840 }, { "epoch": 59.84956003406188, "grad_norm": 0.04246428608894348, "learning_rate": 4.017513482827136e-05, "loss": 0.0010278725996613503, "step": 210850 }, { "epoch": 59.85239852398524, "grad_norm": 0.015400438569486141, "learning_rate": 4.0172296338348006e-05, "loss": 0.00028403401374816896, "step": 210860 }, { "epoch": 59.8552370139086, "grad_norm": 0.2638717591762543, "learning_rate": 4.016945784842464e-05, "loss": 0.0018944313749670982, "step": 210870 }, { "epoch": 59.85807550383196, "grad_norm": 0.004116884898394346, "learning_rate": 4.0166619358501275e-05, "loss": 0.00039854831993579865, "step": 210880 }, { "epoch": 59.860913993755325, "grad_norm": 0.0444093756377697, "learning_rate": 4.0163780868577917e-05, "loss": 0.00032896660268306733, "step": 210890 }, { "epoch": 59.86375248367868, "grad_norm": 0.040541838854551315, "learning_rate": 4.016094237865456e-05, "loss": 0.0003893911838531494, "step": 210900 }, { "epoch": 59.866590973602044, "grad_norm": 0.026285527274012566, "learning_rate": 4.01581038887312e-05, "loss": 0.00041197557002305987, "step": 210910 }, { "epoch": 59.86942946352541, "grad_norm": 0.17691224813461304, "learning_rate": 4.0155265398807834e-05, "loss": 0.00020665060728788375, "step": 210920 }, { "epoch": 59.87226795344876, "grad_norm": 0.0331229530274868, "learning_rate": 4.0152426908884475e-05, "loss": 0.00011796709150075912, "step": 210930 }, { "epoch": 59.875106443372125, "grad_norm": 0.030056696385145187, "learning_rate": 4.014958841896112e-05, "loss": 0.0006768664345145226, "step": 210940 }, { "epoch": 59.87794493329549, "grad_norm": 0.15968599915504456, "learning_rate": 4.014674992903775e-05, "loss": 0.0002116737887263298, "step": 210950 }, { "epoch": 59.880783423218844, "grad_norm": 0.021295493468642235, "learning_rate": 4.014391143911439e-05, "loss": 0.0007686126977205276, "step": 210960 }, { "epoch": 59.88362191314221, "grad_norm": 0.3816732168197632, "learning_rate": 4.0141072949191034e-05, "loss": 0.0002393105998635292, "step": 210970 }, { "epoch": 59.88646040306557, "grad_norm": 0.012521645054221153, "learning_rate": 4.013823445926767e-05, "loss": 0.00029872506856918333, "step": 210980 }, { "epoch": 59.88929889298893, "grad_norm": 0.0838887020945549, "learning_rate": 4.013539596934431e-05, "loss": 0.0003717290237545967, "step": 210990 }, { "epoch": 59.89213738291229, "grad_norm": 0.0840463861823082, "learning_rate": 4.013255747942095e-05, "loss": 0.00020768344402313232, "step": 211000 }, { "epoch": 59.89213738291229, "eval_accuracy": 0.979779996184905, "eval_loss": 0.07591086626052856, "eval_runtime": 32.9449, "eval_samples_per_second": 477.372, "eval_steps_per_second": 7.467, "step": 211000 }, { "epoch": 59.89497587283565, "grad_norm": 0.07254587858915329, "learning_rate": 4.0129718989497586e-05, "loss": 0.00016259998083114623, "step": 211010 }, { "epoch": 59.897814362759014, "grad_norm": 0.007282676640897989, "learning_rate": 4.012688049957423e-05, "loss": 0.00018723011016845702, "step": 211020 }, { "epoch": 59.90065285268237, "grad_norm": 0.03641302138566971, "learning_rate": 4.012404200965087e-05, "loss": 0.0009120993316173553, "step": 211030 }, { "epoch": 59.90349134260573, "grad_norm": 0.029335318133234978, "learning_rate": 4.012120351972751e-05, "loss": 0.0012496639043092727, "step": 211040 }, { "epoch": 59.906329832529096, "grad_norm": 3.793973922729492, "learning_rate": 4.0118365029804145e-05, "loss": 0.00088198222219944, "step": 211050 }, { "epoch": 59.90916832245246, "grad_norm": 0.031840045005083084, "learning_rate": 4.0115526539880786e-05, "loss": 0.001798710972070694, "step": 211060 }, { "epoch": 59.912006812375814, "grad_norm": 0.03873513266444206, "learning_rate": 4.011268804995743e-05, "loss": 0.0021045539528131486, "step": 211070 }, { "epoch": 59.91484530229918, "grad_norm": 0.019491327926516533, "learning_rate": 4.010984956003406e-05, "loss": 0.0003326389938592911, "step": 211080 }, { "epoch": 59.91768379222254, "grad_norm": 0.033514540642499924, "learning_rate": 4.0107011070110703e-05, "loss": 0.0003185467794537544, "step": 211090 }, { "epoch": 59.920522282145896, "grad_norm": 0.06798397749662399, "learning_rate": 4.0104172580187345e-05, "loss": 0.00032975375652313235, "step": 211100 }, { "epoch": 59.92336077206926, "grad_norm": 0.013308022171258926, "learning_rate": 4.010133409026398e-05, "loss": 0.00027086157351732256, "step": 211110 }, { "epoch": 59.92619926199262, "grad_norm": 15.317023277282715, "learning_rate": 4.009849560034062e-05, "loss": 0.004647216200828553, "step": 211120 }, { "epoch": 59.92903775191598, "grad_norm": 0.0389261357486248, "learning_rate": 4.009565711041726e-05, "loss": 0.0007181994616985321, "step": 211130 }, { "epoch": 59.93187624183934, "grad_norm": 0.03205104172229767, "learning_rate": 4.00928186204939e-05, "loss": 0.007881271839141845, "step": 211140 }, { "epoch": 59.9347147317627, "grad_norm": 0.013270176015794277, "learning_rate": 4.008998013057054e-05, "loss": 0.0005778089165687561, "step": 211150 }, { "epoch": 59.937553221686066, "grad_norm": 0.1708674281835556, "learning_rate": 4.008714164064718e-05, "loss": 0.000528215616941452, "step": 211160 }, { "epoch": 59.94039171160942, "grad_norm": 0.010576403699815273, "learning_rate": 4.008430315072382e-05, "loss": 0.002294227108359337, "step": 211170 }, { "epoch": 59.943230201532785, "grad_norm": 0.21540026366710663, "learning_rate": 4.0081464660800455e-05, "loss": 0.001164194382727146, "step": 211180 }, { "epoch": 59.94606869145615, "grad_norm": 0.04402341693639755, "learning_rate": 4.007862617087709e-05, "loss": 0.0010401466861367226, "step": 211190 }, { "epoch": 59.948907181379504, "grad_norm": 0.03376477211713791, "learning_rate": 4.007578768095374e-05, "loss": 0.000520293414592743, "step": 211200 }, { "epoch": 59.95174567130287, "grad_norm": 0.2946440875530243, "learning_rate": 4.007294919103037e-05, "loss": 0.0001273093745112419, "step": 211210 }, { "epoch": 59.95458416122623, "grad_norm": 0.06882165372371674, "learning_rate": 4.0070110701107014e-05, "loss": 0.0007314592599868775, "step": 211220 }, { "epoch": 59.957422651149585, "grad_norm": 1.1934900283813477, "learning_rate": 4.0067272211183656e-05, "loss": 0.0006105324253439903, "step": 211230 }, { "epoch": 59.96026114107295, "grad_norm": 0.0524231381714344, "learning_rate": 4.006443372126029e-05, "loss": 0.0005541209131479264, "step": 211240 }, { "epoch": 59.96309963099631, "grad_norm": 0.023173538967967033, "learning_rate": 4.006159523133693e-05, "loss": 0.002034512721002102, "step": 211250 }, { "epoch": 59.965938120919674, "grad_norm": 0.13710714876651764, "learning_rate": 4.005875674141357e-05, "loss": 0.0002684036269783974, "step": 211260 }, { "epoch": 59.96877661084303, "grad_norm": 0.03873835876584053, "learning_rate": 4.005591825149021e-05, "loss": 0.00039734169840812685, "step": 211270 }, { "epoch": 59.97161510076639, "grad_norm": 0.04001837223768234, "learning_rate": 4.005307976156685e-05, "loss": 0.00018114298582077025, "step": 211280 }, { "epoch": 59.974453590689755, "grad_norm": 0.013866518624126911, "learning_rate": 4.0050241271643483e-05, "loss": 0.000873916782438755, "step": 211290 }, { "epoch": 59.97729208061311, "grad_norm": 0.041815295815467834, "learning_rate": 4.0047402781720125e-05, "loss": 0.00014318842440843582, "step": 211300 }, { "epoch": 59.980130570536474, "grad_norm": 0.898971438407898, "learning_rate": 4.0044564291796766e-05, "loss": 0.0006424741819500923, "step": 211310 }, { "epoch": 59.98296906045984, "grad_norm": 0.01878456585109234, "learning_rate": 4.00417258018734e-05, "loss": 0.00011926684528589249, "step": 211320 }, { "epoch": 59.98580755038319, "grad_norm": 0.0746101513504982, "learning_rate": 4.003888731195005e-05, "loss": 0.004294080659747124, "step": 211330 }, { "epoch": 59.988646040306556, "grad_norm": 0.13133519887924194, "learning_rate": 4.0036048822026684e-05, "loss": 0.0015269083902239799, "step": 211340 }, { "epoch": 59.99148453022992, "grad_norm": 0.03762432560324669, "learning_rate": 4.003321033210332e-05, "loss": 0.0003268269822001457, "step": 211350 }, { "epoch": 59.99432302015328, "grad_norm": 0.06108442321419716, "learning_rate": 4.0030371842179966e-05, "loss": 0.0007362829521298409, "step": 211360 }, { "epoch": 59.99716151007664, "grad_norm": 0.0351344496011734, "learning_rate": 4.00275333522566e-05, "loss": 0.0020063603296875954, "step": 211370 }, { "epoch": 60.0, "grad_norm": 0.002290131291374564, "learning_rate": 4.002469486233324e-05, "loss": 0.00046923854388296603, "step": 211380 }, { "epoch": 60.00283848992336, "grad_norm": 0.1879780888557434, "learning_rate": 4.002185637240988e-05, "loss": 0.0007167583331465721, "step": 211390 }, { "epoch": 60.00567697984672, "grad_norm": 0.24673084914684296, "learning_rate": 4.001901788248652e-05, "loss": 0.0003234041854739189, "step": 211400 }, { "epoch": 60.00851546977008, "grad_norm": 0.5992982983589172, "learning_rate": 4.001617939256316e-05, "loss": 0.00522833839058876, "step": 211410 }, { "epoch": 60.011353959693444, "grad_norm": 3.305943489074707, "learning_rate": 4.0013340902639794e-05, "loss": 0.0008543869480490685, "step": 211420 }, { "epoch": 60.01419244961681, "grad_norm": 6.160771369934082, "learning_rate": 4.0010502412716436e-05, "loss": 0.0008202288299798965, "step": 211430 }, { "epoch": 60.01703093954016, "grad_norm": 0.9627863168716431, "learning_rate": 4.000766392279308e-05, "loss": 0.0002718629315495491, "step": 211440 }, { "epoch": 60.019869429463526, "grad_norm": 0.021657360717654228, "learning_rate": 4.000482543286971e-05, "loss": 0.0003370434045791626, "step": 211450 }, { "epoch": 60.02270791938689, "grad_norm": 0.025875987485051155, "learning_rate": 4.000198694294636e-05, "loss": 0.0037871208041906357, "step": 211460 }, { "epoch": 60.025546409310245, "grad_norm": 0.021952837705612183, "learning_rate": 3.9999148453022994e-05, "loss": 0.0014201078563928604, "step": 211470 }, { "epoch": 60.02838489923361, "grad_norm": 0.6657183766365051, "learning_rate": 3.999630996309963e-05, "loss": 0.00039950571954250336, "step": 211480 }, { "epoch": 60.03122338915697, "grad_norm": 0.03816501051187515, "learning_rate": 3.999347147317627e-05, "loss": 0.0010640479624271393, "step": 211490 }, { "epoch": 60.034061879080326, "grad_norm": 0.018671464174985886, "learning_rate": 3.999063298325291e-05, "loss": 0.0004991693422198296, "step": 211500 }, { "epoch": 60.034061879080326, "eval_accuracy": 0.9791441470083296, "eval_loss": 0.0776624083518982, "eval_runtime": 32.9764, "eval_samples_per_second": 476.917, "eval_steps_per_second": 7.46, "step": 211500 }, { "epoch": 60.03690036900369, "grad_norm": 0.020353300496935844, "learning_rate": 3.998779449332955e-05, "loss": 0.002411297336220741, "step": 211510 }, { "epoch": 60.03973885892705, "grad_norm": 0.11460176855325699, "learning_rate": 3.998495600340619e-05, "loss": 0.0033170025795698165, "step": 211520 }, { "epoch": 60.042577348850415, "grad_norm": 0.028718817979097366, "learning_rate": 3.998211751348283e-05, "loss": 0.00036693159490823745, "step": 211530 }, { "epoch": 60.04541583877377, "grad_norm": 0.03270211070775986, "learning_rate": 3.997927902355947e-05, "loss": 0.0003207683563232422, "step": 211540 }, { "epoch": 60.04825432869713, "grad_norm": 0.18885424733161926, "learning_rate": 3.9976440533636105e-05, "loss": 0.00022667460143566132, "step": 211550 }, { "epoch": 60.051092818620496, "grad_norm": 0.04822346195578575, "learning_rate": 3.9973602043712746e-05, "loss": 0.00021669473499059678, "step": 211560 }, { "epoch": 60.05393130854385, "grad_norm": 0.0725894421339035, "learning_rate": 3.997076355378939e-05, "loss": 0.00016036089509725572, "step": 211570 }, { "epoch": 60.056769798467215, "grad_norm": 0.03381437063217163, "learning_rate": 3.996792506386602e-05, "loss": 0.0001748962327837944, "step": 211580 }, { "epoch": 60.05960828839058, "grad_norm": 0.002026122761890292, "learning_rate": 3.9965086573942664e-05, "loss": 0.0001487504690885544, "step": 211590 }, { "epoch": 60.062446778313934, "grad_norm": 0.04309766739606857, "learning_rate": 3.9962248084019305e-05, "loss": 0.0001339823007583618, "step": 211600 }, { "epoch": 60.0652852682373, "grad_norm": 0.015863489359617233, "learning_rate": 3.995940959409594e-05, "loss": 0.00028341971337795255, "step": 211610 }, { "epoch": 60.06812375816066, "grad_norm": 0.0060238041914999485, "learning_rate": 3.995657110417258e-05, "loss": 0.00032542645931243896, "step": 211620 }, { "epoch": 60.07096224808402, "grad_norm": 0.03819098323583603, "learning_rate": 3.995373261424922e-05, "loss": 0.00019183997064828873, "step": 211630 }, { "epoch": 60.07380073800738, "grad_norm": 2.3974475860595703, "learning_rate": 3.9950894124325864e-05, "loss": 0.0005217483267188072, "step": 211640 }, { "epoch": 60.07663922793074, "grad_norm": 0.04059465602040291, "learning_rate": 3.99480556344025e-05, "loss": 0.0011557983234524727, "step": 211650 }, { "epoch": 60.079477717854104, "grad_norm": 0.1702825278043747, "learning_rate": 3.994521714447914e-05, "loss": 0.00036404523998498914, "step": 211660 }, { "epoch": 60.08231620777746, "grad_norm": 0.06325136870145798, "learning_rate": 3.994237865455578e-05, "loss": 0.0010485131293535232, "step": 211670 }, { "epoch": 60.08515469770082, "grad_norm": 0.5279908776283264, "learning_rate": 3.9939540164632416e-05, "loss": 0.0001712080091238022, "step": 211680 }, { "epoch": 60.087993187624186, "grad_norm": 0.012116272002458572, "learning_rate": 3.993670167470906e-05, "loss": 0.0006543787196278573, "step": 211690 }, { "epoch": 60.09083167754754, "grad_norm": 0.4666938781738281, "learning_rate": 3.99338631847857e-05, "loss": 0.0004225371405482292, "step": 211700 }, { "epoch": 60.093670167470904, "grad_norm": 0.3102080225944519, "learning_rate": 3.993102469486233e-05, "loss": 0.0001693522557616234, "step": 211710 }, { "epoch": 60.09650865739427, "grad_norm": 3.2004315853118896, "learning_rate": 3.9928186204938974e-05, "loss": 0.002993980050086975, "step": 211720 }, { "epoch": 60.09934714731763, "grad_norm": 0.05361263453960419, "learning_rate": 3.9925347715015616e-05, "loss": 0.000644853338599205, "step": 211730 }, { "epoch": 60.102185637240986, "grad_norm": 2.5461912155151367, "learning_rate": 3.992250922509225e-05, "loss": 0.0009059201925992966, "step": 211740 }, { "epoch": 60.10502412716435, "grad_norm": 0.009339678101241589, "learning_rate": 3.991967073516889e-05, "loss": 0.0006791934370994568, "step": 211750 }, { "epoch": 60.10786261708771, "grad_norm": 0.5242336988449097, "learning_rate": 3.991683224524553e-05, "loss": 0.00024700555950403215, "step": 211760 }, { "epoch": 60.11070110701107, "grad_norm": 0.26593783497810364, "learning_rate": 3.991399375532217e-05, "loss": 0.00022332407534122466, "step": 211770 }, { "epoch": 60.11353959693443, "grad_norm": 0.012928388081490993, "learning_rate": 3.991115526539881e-05, "loss": 0.008300552517175675, "step": 211780 }, { "epoch": 60.11637808685779, "grad_norm": 0.023058686405420303, "learning_rate": 3.990831677547545e-05, "loss": 0.0002999361604452133, "step": 211790 }, { "epoch": 60.119216576781156, "grad_norm": 0.3228331208229065, "learning_rate": 3.990547828555209e-05, "loss": 0.0012249099090695381, "step": 211800 }, { "epoch": 60.12205506670451, "grad_norm": 0.053107134997844696, "learning_rate": 3.9902639795628727e-05, "loss": 0.00047668609768152236, "step": 211810 }, { "epoch": 60.124893556627875, "grad_norm": 0.14417718350887299, "learning_rate": 3.989980130570536e-05, "loss": 0.000939561054110527, "step": 211820 }, { "epoch": 60.12773204655124, "grad_norm": 0.06709910184144974, "learning_rate": 3.989696281578201e-05, "loss": 0.0006642637774348259, "step": 211830 }, { "epoch": 60.13057053647459, "grad_norm": 0.42655712366104126, "learning_rate": 3.9894124325858644e-05, "loss": 0.003091414086520672, "step": 211840 }, { "epoch": 60.133409026397956, "grad_norm": 0.032784584909677505, "learning_rate": 3.9891285835935285e-05, "loss": 0.003954719752073288, "step": 211850 }, { "epoch": 60.13624751632132, "grad_norm": 0.08716005086898804, "learning_rate": 3.988844734601193e-05, "loss": 0.0018747624009847641, "step": 211860 }, { "epoch": 60.139086006244675, "grad_norm": 0.15843982994556427, "learning_rate": 3.988560885608856e-05, "loss": 0.00017254594713449477, "step": 211870 }, { "epoch": 60.14192449616804, "grad_norm": 0.4905562698841095, "learning_rate": 3.98827703661652e-05, "loss": 0.0009080246090888977, "step": 211880 }, { "epoch": 60.1447629860914, "grad_norm": 0.02251243032515049, "learning_rate": 3.9879931876241844e-05, "loss": 0.0002464715391397476, "step": 211890 }, { "epoch": 60.14760147601476, "grad_norm": 0.009806018322706223, "learning_rate": 3.987709338631848e-05, "loss": 0.0007453549653291703, "step": 211900 }, { "epoch": 60.15043996593812, "grad_norm": 0.10268932580947876, "learning_rate": 3.987425489639512e-05, "loss": 0.0008415870368480682, "step": 211910 }, { "epoch": 60.15327845586148, "grad_norm": 0.15582704544067383, "learning_rate": 3.9871416406471755e-05, "loss": 0.000776963122189045, "step": 211920 }, { "epoch": 60.156116945784845, "grad_norm": 0.1932487040758133, "learning_rate": 3.98685779165484e-05, "loss": 0.008484239131212235, "step": 211930 }, { "epoch": 60.1589554357082, "grad_norm": 0.03270683065056801, "learning_rate": 3.986573942662504e-05, "loss": 0.00016201771795749665, "step": 211940 }, { "epoch": 60.161793925631564, "grad_norm": 0.45067501068115234, "learning_rate": 3.986290093670167e-05, "loss": 0.0008119748905301094, "step": 211950 }, { "epoch": 60.16463241555493, "grad_norm": 0.01790882833302021, "learning_rate": 3.986006244677832e-05, "loss": 0.00020392239093780518, "step": 211960 }, { "epoch": 60.16747090547828, "grad_norm": 0.052002277225255966, "learning_rate": 3.9857223956854955e-05, "loss": 0.0023178767412900926, "step": 211970 }, { "epoch": 60.170309395401645, "grad_norm": 0.19074980914592743, "learning_rate": 3.9854385466931596e-05, "loss": 0.0009038664400577545, "step": 211980 }, { "epoch": 60.17314788532501, "grad_norm": 20.184677124023438, "learning_rate": 3.985154697700824e-05, "loss": 0.011955204606056213, "step": 211990 }, { "epoch": 60.17598637524837, "grad_norm": 0.3196350038051605, "learning_rate": 3.984870848708487e-05, "loss": 0.0003661520779132843, "step": 212000 }, { "epoch": 60.17598637524837, "eval_accuracy": 0.9708781077128505, "eval_loss": 0.11928322166204453, "eval_runtime": 32.6167, "eval_samples_per_second": 482.176, "eval_steps_per_second": 7.542, "step": 212000 }, { "epoch": 60.17882486517173, "grad_norm": 0.02429787814617157, "learning_rate": 3.984586999716151e-05, "loss": 0.0035189088433980943, "step": 212010 }, { "epoch": 60.18166335509509, "grad_norm": 0.13881230354309082, "learning_rate": 3.984303150723815e-05, "loss": 0.0012731853872537612, "step": 212020 }, { "epoch": 60.18450184501845, "grad_norm": 0.31600329279899597, "learning_rate": 3.984019301731479e-05, "loss": 0.00039008036255836485, "step": 212030 }, { "epoch": 60.18734033494181, "grad_norm": 1.4159480333328247, "learning_rate": 3.983735452739143e-05, "loss": 0.0005129110068082809, "step": 212040 }, { "epoch": 60.19017882486517, "grad_norm": 3.4235918521881104, "learning_rate": 3.9834516037468065e-05, "loss": 0.0004810469225049019, "step": 212050 }, { "epoch": 60.193017314788534, "grad_norm": 0.04775285720825195, "learning_rate": 3.9831677547544713e-05, "loss": 0.00014351978898048402, "step": 212060 }, { "epoch": 60.19585580471189, "grad_norm": 0.20962877571582794, "learning_rate": 3.982883905762135e-05, "loss": 0.0025354975834488867, "step": 212070 }, { "epoch": 60.19869429463525, "grad_norm": 0.054245539009571075, "learning_rate": 3.982600056769798e-05, "loss": 0.0006039263680577278, "step": 212080 }, { "epoch": 60.201532784558616, "grad_norm": 0.1491963118314743, "learning_rate": 3.982316207777463e-05, "loss": 0.0005111075937747955, "step": 212090 }, { "epoch": 60.20437127448198, "grad_norm": 0.7442827820777893, "learning_rate": 3.9820323587851265e-05, "loss": 0.0005620602518320083, "step": 212100 }, { "epoch": 60.207209764405334, "grad_norm": 0.8530558347702026, "learning_rate": 3.981748509792791e-05, "loss": 0.0006948946043848992, "step": 212110 }, { "epoch": 60.2100482543287, "grad_norm": 0.0891876220703125, "learning_rate": 3.981464660800454e-05, "loss": 0.0005162809044122696, "step": 212120 }, { "epoch": 60.21288674425206, "grad_norm": 0.03871693089604378, "learning_rate": 3.981180811808118e-05, "loss": 0.00027050022035837176, "step": 212130 }, { "epoch": 60.215725234175416, "grad_norm": 0.132669135928154, "learning_rate": 3.9808969628157824e-05, "loss": 0.0005691112950444221, "step": 212140 }, { "epoch": 60.21856372409878, "grad_norm": 0.5355362892150879, "learning_rate": 3.980613113823446e-05, "loss": 0.001176760345697403, "step": 212150 }, { "epoch": 60.22140221402214, "grad_norm": 1.4414494037628174, "learning_rate": 3.98032926483111e-05, "loss": 0.000702892430126667, "step": 212160 }, { "epoch": 60.2242407039455, "grad_norm": 0.34948575496673584, "learning_rate": 3.980045415838774e-05, "loss": 0.00034031178802251816, "step": 212170 }, { "epoch": 60.22707919386886, "grad_norm": 0.18872098624706268, "learning_rate": 3.9797615668464376e-05, "loss": 0.0003749992698431015, "step": 212180 }, { "epoch": 60.22991768379222, "grad_norm": 0.1686212420463562, "learning_rate": 3.979477717854102e-05, "loss": 0.0004081733524799347, "step": 212190 }, { "epoch": 60.232756173715586, "grad_norm": 0.11145982146263123, "learning_rate": 3.979193868861766e-05, "loss": 0.0008456252515316009, "step": 212200 }, { "epoch": 60.23559466363894, "grad_norm": 0.07316011935472488, "learning_rate": 3.9789100198694293e-05, "loss": 0.0005921712145209312, "step": 212210 }, { "epoch": 60.238433153562305, "grad_norm": 0.09485489875078201, "learning_rate": 3.9786261708770935e-05, "loss": 0.002485986053943634, "step": 212220 }, { "epoch": 60.24127164348567, "grad_norm": 0.045263953506946564, "learning_rate": 3.9783423218847576e-05, "loss": 0.0005272664129734039, "step": 212230 }, { "epoch": 60.24411013340902, "grad_norm": 0.10300051420927048, "learning_rate": 3.978058472892421e-05, "loss": 0.0001920163631439209, "step": 212240 }, { "epoch": 60.246948623332386, "grad_norm": 0.13175544142723083, "learning_rate": 3.977774623900085e-05, "loss": 0.0002532050013542175, "step": 212250 }, { "epoch": 60.24978711325575, "grad_norm": 0.1971817910671234, "learning_rate": 3.9774907749077494e-05, "loss": 0.0012876838445663452, "step": 212260 }, { "epoch": 60.25262560317911, "grad_norm": 0.21744953095912933, "learning_rate": 3.9772069259154135e-05, "loss": 0.0004742663353681564, "step": 212270 }, { "epoch": 60.25546409310247, "grad_norm": 0.05233932286500931, "learning_rate": 3.976923076923077e-05, "loss": 0.013525614142417907, "step": 212280 }, { "epoch": 60.25830258302583, "grad_norm": 0.03086390532553196, "learning_rate": 3.976639227930741e-05, "loss": 0.0009335322305560112, "step": 212290 }, { "epoch": 60.261141072949194, "grad_norm": 0.015302402898669243, "learning_rate": 3.976355378938405e-05, "loss": 0.0003717370331287384, "step": 212300 }, { "epoch": 60.26397956287255, "grad_norm": 0.05667561665177345, "learning_rate": 3.976071529946069e-05, "loss": 0.005561895295977592, "step": 212310 }, { "epoch": 60.26681805279591, "grad_norm": 0.6445702314376831, "learning_rate": 3.975787680953733e-05, "loss": 0.0024014754220843316, "step": 212320 }, { "epoch": 60.269656542719275, "grad_norm": 0.5079824328422546, "learning_rate": 3.975503831961397e-05, "loss": 0.0016381965950131416, "step": 212330 }, { "epoch": 60.27249503264263, "grad_norm": 0.07116887718439102, "learning_rate": 3.9752199829690604e-05, "loss": 0.00028000157326459887, "step": 212340 }, { "epoch": 60.275333522565994, "grad_norm": 0.2740797996520996, "learning_rate": 3.9749361339767246e-05, "loss": 0.0006645273417234421, "step": 212350 }, { "epoch": 60.27817201248936, "grad_norm": 0.8726823925971985, "learning_rate": 3.974652284984389e-05, "loss": 0.0008790990337729454, "step": 212360 }, { "epoch": 60.28101050241272, "grad_norm": 0.024547360837459564, "learning_rate": 3.974368435992052e-05, "loss": 0.0012587549164891243, "step": 212370 }, { "epoch": 60.283848992336075, "grad_norm": 0.023781413212418556, "learning_rate": 3.974084586999716e-05, "loss": 0.0010516205802559853, "step": 212380 }, { "epoch": 60.28668748225944, "grad_norm": 5.040438652038574, "learning_rate": 3.9738007380073804e-05, "loss": 0.0026662316173315047, "step": 212390 }, { "epoch": 60.2895259721828, "grad_norm": 0.9050663113594055, "learning_rate": 3.9735168890150446e-05, "loss": 0.00043822582811117174, "step": 212400 }, { "epoch": 60.29236446210616, "grad_norm": 0.2255355715751648, "learning_rate": 3.973233040022708e-05, "loss": 0.0004996765404939651, "step": 212410 }, { "epoch": 60.29520295202952, "grad_norm": 0.669003963470459, "learning_rate": 3.9729491910303715e-05, "loss": 0.0008336296305060387, "step": 212420 }, { "epoch": 60.29804144195288, "grad_norm": 0.011262867599725723, "learning_rate": 3.972665342038036e-05, "loss": 0.0005226518958806991, "step": 212430 }, { "epoch": 60.30087993187624, "grad_norm": 1.670279622077942, "learning_rate": 3.9723814930457e-05, "loss": 0.0005961356684565545, "step": 212440 }, { "epoch": 60.3037184217996, "grad_norm": 0.1535409688949585, "learning_rate": 3.972097644053364e-05, "loss": 0.0008101677522063255, "step": 212450 }, { "epoch": 60.306556911722964, "grad_norm": 0.8125361800193787, "learning_rate": 3.971813795061028e-05, "loss": 0.010066947340965271, "step": 212460 }, { "epoch": 60.30939540164633, "grad_norm": 6.673725128173828, "learning_rate": 3.9715299460686915e-05, "loss": 0.0014988793060183524, "step": 212470 }, { "epoch": 60.31223389156968, "grad_norm": 0.1491834968328476, "learning_rate": 3.9712460970763556e-05, "loss": 0.0008095607161521911, "step": 212480 }, { "epoch": 60.315072381493046, "grad_norm": 0.002669596578925848, "learning_rate": 3.97096224808402e-05, "loss": 0.0015592427924275398, "step": 212490 }, { "epoch": 60.31791087141641, "grad_norm": 0.03614400699734688, "learning_rate": 3.970678399091683e-05, "loss": 0.0006926767528057098, "step": 212500 }, { "epoch": 60.31791087141641, "eval_accuracy": 0.9792713168436447, "eval_loss": 0.07853108644485474, "eval_runtime": 32.6081, "eval_samples_per_second": 482.303, "eval_steps_per_second": 7.544, "step": 212500 }, { "epoch": 60.320749361339765, "grad_norm": 0.013016685843467712, "learning_rate": 3.9703945500993474e-05, "loss": 0.00036813076585531237, "step": 212510 }, { "epoch": 60.32358785126313, "grad_norm": 0.07877673208713531, "learning_rate": 3.970110701107011e-05, "loss": 0.0037849336862564088, "step": 212520 }, { "epoch": 60.32642634118649, "grad_norm": 0.03298484534025192, "learning_rate": 3.9698268521146756e-05, "loss": 0.0012386031448841095, "step": 212530 }, { "epoch": 60.329264831109846, "grad_norm": 0.02003333531320095, "learning_rate": 3.969543003122339e-05, "loss": 0.0005031388252973556, "step": 212540 }, { "epoch": 60.33210332103321, "grad_norm": 0.060534264892339706, "learning_rate": 3.9692591541300026e-05, "loss": 0.0005312932655215264, "step": 212550 }, { "epoch": 60.33494181095657, "grad_norm": 1.5442663431167603, "learning_rate": 3.9689753051376674e-05, "loss": 0.0002979043871164322, "step": 212560 }, { "epoch": 60.337780300879935, "grad_norm": 0.818830132484436, "learning_rate": 3.968691456145331e-05, "loss": 0.0005384659394621849, "step": 212570 }, { "epoch": 60.34061879080329, "grad_norm": 0.07287649810314178, "learning_rate": 3.968407607152995e-05, "loss": 0.002614613249897957, "step": 212580 }, { "epoch": 60.34345728072665, "grad_norm": 0.25836342573165894, "learning_rate": 3.968123758160659e-05, "loss": 0.00305231511592865, "step": 212590 }, { "epoch": 60.346295770650016, "grad_norm": 1.3756757974624634, "learning_rate": 3.9678399091683226e-05, "loss": 0.00048483498394489286, "step": 212600 }, { "epoch": 60.34913426057337, "grad_norm": 9.199005126953125, "learning_rate": 3.967556060175987e-05, "loss": 0.007254482805728912, "step": 212610 }, { "epoch": 60.351972750496735, "grad_norm": 0.01796429045498371, "learning_rate": 3.96727221118365e-05, "loss": 0.008895892649888992, "step": 212620 }, { "epoch": 60.3548112404201, "grad_norm": 0.1044369786977768, "learning_rate": 3.966988362191314e-05, "loss": 0.0006351402029395103, "step": 212630 }, { "epoch": 60.35764973034346, "grad_norm": 0.49802166223526, "learning_rate": 3.9667045131989784e-05, "loss": 0.0006324104964733124, "step": 212640 }, { "epoch": 60.36048822026682, "grad_norm": 0.009968497790396214, "learning_rate": 3.966420664206642e-05, "loss": 0.00044434331357479095, "step": 212650 }, { "epoch": 60.36332671019018, "grad_norm": 0.07250282913446426, "learning_rate": 3.966136815214306e-05, "loss": 0.00029443632811307905, "step": 212660 }, { "epoch": 60.36616520011354, "grad_norm": 0.021006111055612564, "learning_rate": 3.96585296622197e-05, "loss": 0.0017744384706020356, "step": 212670 }, { "epoch": 60.3690036900369, "grad_norm": 0.11253514140844345, "learning_rate": 3.9655691172296336e-05, "loss": 0.004662338271737098, "step": 212680 }, { "epoch": 60.37184217996026, "grad_norm": 0.42191165685653687, "learning_rate": 3.9652852682372985e-05, "loss": 0.0006789162755012513, "step": 212690 }, { "epoch": 60.374680669883624, "grad_norm": 0.18644465506076813, "learning_rate": 3.965001419244962e-05, "loss": 0.00025683846324682237, "step": 212700 }, { "epoch": 60.37751915980698, "grad_norm": 0.03106045350432396, "learning_rate": 3.9647175702526254e-05, "loss": 0.0029737846925854683, "step": 212710 }, { "epoch": 60.38035764973034, "grad_norm": 0.18261921405792236, "learning_rate": 3.9644337212602895e-05, "loss": 0.00019237026572227478, "step": 212720 }, { "epoch": 60.383196139653705, "grad_norm": 0.03888605162501335, "learning_rate": 3.9641498722679536e-05, "loss": 0.000781780295073986, "step": 212730 }, { "epoch": 60.38603462957707, "grad_norm": 0.4467582702636719, "learning_rate": 3.963866023275618e-05, "loss": 0.0003185665234923363, "step": 212740 }, { "epoch": 60.388873119500424, "grad_norm": 1.0408800840377808, "learning_rate": 3.963582174283281e-05, "loss": 0.0004212265834212303, "step": 212750 }, { "epoch": 60.39171160942379, "grad_norm": 0.20046047866344452, "learning_rate": 3.9632983252909454e-05, "loss": 0.0002834515646100044, "step": 212760 }, { "epoch": 60.39455009934715, "grad_norm": 0.3705109655857086, "learning_rate": 3.9630144762986095e-05, "loss": 0.00023512355983257294, "step": 212770 }, { "epoch": 60.397388589270506, "grad_norm": 0.014628705568611622, "learning_rate": 3.962730627306273e-05, "loss": 0.00016522016376256942, "step": 212780 }, { "epoch": 60.40022707919387, "grad_norm": 0.019992034882307053, "learning_rate": 3.962446778313937e-05, "loss": 0.00013082921504974364, "step": 212790 }, { "epoch": 60.40306556911723, "grad_norm": 0.04282299801707268, "learning_rate": 3.962162929321601e-05, "loss": 0.0004411064088344574, "step": 212800 }, { "epoch": 60.40590405904059, "grad_norm": 0.03131137415766716, "learning_rate": 3.961879080329265e-05, "loss": 0.0006004448980093003, "step": 212810 }, { "epoch": 60.40874254896395, "grad_norm": 0.062434159219264984, "learning_rate": 3.961595231336929e-05, "loss": 0.00012311618775129318, "step": 212820 }, { "epoch": 60.41158103888731, "grad_norm": 0.15706795454025269, "learning_rate": 3.961311382344593e-05, "loss": 0.001236712746322155, "step": 212830 }, { "epoch": 60.414419528810676, "grad_norm": 0.1135672926902771, "learning_rate": 3.9610275333522565e-05, "loss": 0.0005148442462086678, "step": 212840 }, { "epoch": 60.41725801873403, "grad_norm": 0.00707873422652483, "learning_rate": 3.9607436843599206e-05, "loss": 0.00018717125058174134, "step": 212850 }, { "epoch": 60.420096508657394, "grad_norm": 0.0035596087109297514, "learning_rate": 3.960459835367585e-05, "loss": 0.0005172420293092728, "step": 212860 }, { "epoch": 60.42293499858076, "grad_norm": 0.025607600808143616, "learning_rate": 3.960175986375249e-05, "loss": 0.00017489343881607055, "step": 212870 }, { "epoch": 60.42577348850411, "grad_norm": 0.017175517976284027, "learning_rate": 3.959892137382912e-05, "loss": 0.00019865315407514573, "step": 212880 }, { "epoch": 60.428611978427476, "grad_norm": 1.673065423965454, "learning_rate": 3.9596082883905765e-05, "loss": 0.0025501398369669913, "step": 212890 }, { "epoch": 60.43145046835084, "grad_norm": 0.5908607244491577, "learning_rate": 3.9593244393982406e-05, "loss": 0.0002895643934607506, "step": 212900 }, { "epoch": 60.434288958274195, "grad_norm": 1.4015148878097534, "learning_rate": 3.959040590405904e-05, "loss": 0.00031952802091836927, "step": 212910 }, { "epoch": 60.43712744819756, "grad_norm": 0.00445850845426321, "learning_rate": 3.958756741413568e-05, "loss": 0.0002652568742632866, "step": 212920 }, { "epoch": 60.43996593812092, "grad_norm": 0.01709609664976597, "learning_rate": 3.958472892421232e-05, "loss": 0.0010497985407710076, "step": 212930 }, { "epoch": 60.44280442804428, "grad_norm": 0.014654036611318588, "learning_rate": 3.958189043428896e-05, "loss": 0.00033137742429971694, "step": 212940 }, { "epoch": 60.44564291796764, "grad_norm": 0.05428967997431755, "learning_rate": 3.95790519443656e-05, "loss": 0.0006460757926106453, "step": 212950 }, { "epoch": 60.448481407891, "grad_norm": 0.03374316915869713, "learning_rate": 3.957621345444224e-05, "loss": 0.00018990486860275267, "step": 212960 }, { "epoch": 60.451319897814365, "grad_norm": 0.4393336772918701, "learning_rate": 3.9573374964518875e-05, "loss": 0.00019049141556024552, "step": 212970 }, { "epoch": 60.45415838773772, "grad_norm": 0.028057726100087166, "learning_rate": 3.957053647459552e-05, "loss": 0.00011762399226427079, "step": 212980 }, { "epoch": 60.45699687766108, "grad_norm": 0.04605928808450699, "learning_rate": 3.956769798467216e-05, "loss": 0.0002554373815655708, "step": 212990 }, { "epoch": 60.459835367584446, "grad_norm": 0.008764546364545822, "learning_rate": 3.95648594947488e-05, "loss": 0.0001500586047768593, "step": 213000 }, { "epoch": 60.459835367584446, "eval_accuracy": 0.9802886755261652, "eval_loss": 0.07303733378648758, "eval_runtime": 32.0496, "eval_samples_per_second": 490.708, "eval_steps_per_second": 7.676, "step": 213000 }, { "epoch": 60.46267385750781, "grad_norm": 0.05356809124350548, "learning_rate": 3.9562021004825434e-05, "loss": 0.00010720957070589065, "step": 213010 }, { "epoch": 60.465512347431165, "grad_norm": 0.008991366252303123, "learning_rate": 3.9559182514902075e-05, "loss": 9.149257093667983e-05, "step": 213020 }, { "epoch": 60.46835083735453, "grad_norm": 0.022711465135216713, "learning_rate": 3.955634402497872e-05, "loss": 7.473956793546677e-05, "step": 213030 }, { "epoch": 60.47118932727789, "grad_norm": 0.011164833791553974, "learning_rate": 3.955350553505535e-05, "loss": 7.481146603822708e-05, "step": 213040 }, { "epoch": 60.47402781720125, "grad_norm": 0.01893482357263565, "learning_rate": 3.955066704513199e-05, "loss": 0.00011330880224704742, "step": 213050 }, { "epoch": 60.47686630712461, "grad_norm": 0.08045006543397903, "learning_rate": 3.9547828555208634e-05, "loss": 0.000163152813911438, "step": 213060 }, { "epoch": 60.47970479704797, "grad_norm": 0.08550207316875458, "learning_rate": 3.954499006528527e-05, "loss": 0.0001562533900141716, "step": 213070 }, { "epoch": 60.48254328697133, "grad_norm": 0.021030280739068985, "learning_rate": 3.954215157536191e-05, "loss": 0.00041407085955142976, "step": 213080 }, { "epoch": 60.48538177689469, "grad_norm": 0.0052907331846654415, "learning_rate": 3.953931308543855e-05, "loss": 7.603410631418228e-05, "step": 213090 }, { "epoch": 60.488220266818054, "grad_norm": 0.06101894751191139, "learning_rate": 3.9536474595515186e-05, "loss": 0.000521058402955532, "step": 213100 }, { "epoch": 60.49105875674142, "grad_norm": 0.1702054888010025, "learning_rate": 3.953363610559183e-05, "loss": 0.00023218858987092972, "step": 213110 }, { "epoch": 60.49389724666477, "grad_norm": 0.019629674032330513, "learning_rate": 3.953079761566847e-05, "loss": 0.0008514957502484321, "step": 213120 }, { "epoch": 60.496735736588136, "grad_norm": 1.5716602802276611, "learning_rate": 3.9527959125745103e-05, "loss": 0.0005959402769804001, "step": 213130 }, { "epoch": 60.4995742265115, "grad_norm": 0.01242644153535366, "learning_rate": 3.9525120635821745e-05, "loss": 0.0005464812740683556, "step": 213140 }, { "epoch": 60.502412716434854, "grad_norm": 0.2799952030181885, "learning_rate": 3.952228214589838e-05, "loss": 0.00022779665887355803, "step": 213150 }, { "epoch": 60.50525120635822, "grad_norm": 0.359395295381546, "learning_rate": 3.951944365597503e-05, "loss": 0.0024619530886411667, "step": 213160 }, { "epoch": 60.50808969628158, "grad_norm": 0.03196197748184204, "learning_rate": 3.951660516605166e-05, "loss": 0.0030092371627688406, "step": 213170 }, { "epoch": 60.510928186204936, "grad_norm": 0.0396360382437706, "learning_rate": 3.95137666761283e-05, "loss": 0.0002860262989997864, "step": 213180 }, { "epoch": 60.5137666761283, "grad_norm": 0.04495998099446297, "learning_rate": 3.9510928186204945e-05, "loss": 0.00377996489405632, "step": 213190 }, { "epoch": 60.51660516605166, "grad_norm": 0.46605342626571655, "learning_rate": 3.950808969628158e-05, "loss": 0.00023503229022026062, "step": 213200 }, { "epoch": 60.519443655975024, "grad_norm": 4.214391231536865, "learning_rate": 3.950525120635822e-05, "loss": 0.0010533083230257034, "step": 213210 }, { "epoch": 60.52228214589838, "grad_norm": 0.016126129776239395, "learning_rate": 3.950241271643486e-05, "loss": 0.0003232551738619804, "step": 213220 }, { "epoch": 60.52512063582174, "grad_norm": 0.10853899270296097, "learning_rate": 3.94995742265115e-05, "loss": 0.0015983885154128076, "step": 213230 }, { "epoch": 60.527959125745106, "grad_norm": 0.014764778316020966, "learning_rate": 3.949673573658814e-05, "loss": 0.00025718845427036285, "step": 213240 }, { "epoch": 60.53079761566846, "grad_norm": 0.3328261971473694, "learning_rate": 3.949389724666477e-05, "loss": 0.005609837919473648, "step": 213250 }, { "epoch": 60.533636105591825, "grad_norm": 0.661686360836029, "learning_rate": 3.9491058756741414e-05, "loss": 0.00042172782123088836, "step": 213260 }, { "epoch": 60.53647459551519, "grad_norm": 0.02894427813589573, "learning_rate": 3.9488220266818056e-05, "loss": 0.00015194080770015718, "step": 213270 }, { "epoch": 60.53931308543854, "grad_norm": 0.005786092486232519, "learning_rate": 3.948538177689469e-05, "loss": 0.00017364732921123504, "step": 213280 }, { "epoch": 60.542151575361906, "grad_norm": 0.5930266976356506, "learning_rate": 3.948254328697134e-05, "loss": 0.0007741134613752365, "step": 213290 }, { "epoch": 60.54499006528527, "grad_norm": 0.47336289286613464, "learning_rate": 3.947970479704797e-05, "loss": 0.00017344187945127486, "step": 213300 }, { "epoch": 60.54782855520863, "grad_norm": 0.008377163670957088, "learning_rate": 3.947686630712461e-05, "loss": 0.000228220596909523, "step": 213310 }, { "epoch": 60.55066704513199, "grad_norm": 0.08914519101381302, "learning_rate": 3.9474027817201256e-05, "loss": 0.0001415695995092392, "step": 213320 }, { "epoch": 60.55350553505535, "grad_norm": 0.22376513481140137, "learning_rate": 3.947118932727789e-05, "loss": 0.00012744441628456115, "step": 213330 }, { "epoch": 60.55634402497871, "grad_norm": 1.1880178451538086, "learning_rate": 3.946835083735453e-05, "loss": 0.0003081619739532471, "step": 213340 }, { "epoch": 60.55918251490207, "grad_norm": 0.014955968596041203, "learning_rate": 3.9465512347431166e-05, "loss": 0.00019318703562021255, "step": 213350 }, { "epoch": 60.56202100482543, "grad_norm": 0.4637303948402405, "learning_rate": 3.946267385750781e-05, "loss": 0.00038141980767250063, "step": 213360 }, { "epoch": 60.564859494748795, "grad_norm": 0.12317363172769547, "learning_rate": 3.945983536758445e-05, "loss": 0.0006155364215373992, "step": 213370 }, { "epoch": 60.56769798467215, "grad_norm": 0.06244570016860962, "learning_rate": 3.9456996877661084e-05, "loss": 0.004873473569750786, "step": 213380 }, { "epoch": 60.570536474595514, "grad_norm": 0.04374377429485321, "learning_rate": 3.9454158387737725e-05, "loss": 0.0011515742167830466, "step": 213390 }, { "epoch": 60.57337496451888, "grad_norm": 0.022432034835219383, "learning_rate": 3.9451319897814366e-05, "loss": 0.00026555471122264863, "step": 213400 }, { "epoch": 60.57621345444224, "grad_norm": 0.036803606897592545, "learning_rate": 3.9448481407891e-05, "loss": 0.0009046507999300957, "step": 213410 }, { "epoch": 60.579051944365595, "grad_norm": 0.1278969645500183, "learning_rate": 3.944564291796764e-05, "loss": 0.00033058617264032365, "step": 213420 }, { "epoch": 60.58189043428896, "grad_norm": 0.5922592282295227, "learning_rate": 3.9442804428044284e-05, "loss": 0.0006589632481336594, "step": 213430 }, { "epoch": 60.58472892421232, "grad_norm": 0.13460838794708252, "learning_rate": 3.943996593812092e-05, "loss": 0.0002545630559325218, "step": 213440 }, { "epoch": 60.58756741413568, "grad_norm": 1.6269841194152832, "learning_rate": 3.943712744819756e-05, "loss": 0.0009586678817868233, "step": 213450 }, { "epoch": 60.59040590405904, "grad_norm": 0.13093166053295135, "learning_rate": 3.94342889582742e-05, "loss": 0.0023082077503204347, "step": 213460 }, { "epoch": 60.5932443939824, "grad_norm": 0.06929690390825272, "learning_rate": 3.943145046835084e-05, "loss": 0.00014449022710323335, "step": 213470 }, { "epoch": 60.596082883905765, "grad_norm": 0.023822026327252388, "learning_rate": 3.942861197842748e-05, "loss": 0.0009205533191561699, "step": 213480 }, { "epoch": 60.59892137382912, "grad_norm": 0.3524034023284912, "learning_rate": 3.942577348850412e-05, "loss": 0.0012259647250175477, "step": 213490 }, { "epoch": 60.601759863752484, "grad_norm": 0.394624263048172, "learning_rate": 3.942293499858076e-05, "loss": 0.001963217742741108, "step": 213500 }, { "epoch": 60.601759863752484, "eval_accuracy": 0.9763464106313983, "eval_loss": 0.09106861799955368, "eval_runtime": 32.5414, "eval_samples_per_second": 483.293, "eval_steps_per_second": 7.56, "step": 213500 }, { "epoch": 60.60459835367585, "grad_norm": 0.107668936252594, "learning_rate": 3.9420096508657394e-05, "loss": 0.0006617605686187744, "step": 213510 }, { "epoch": 60.6074368435992, "grad_norm": 0.2477286458015442, "learning_rate": 3.9417258018734036e-05, "loss": 0.0013930074870586394, "step": 213520 }, { "epoch": 60.610275333522566, "grad_norm": 0.03896278515458107, "learning_rate": 3.941441952881068e-05, "loss": 0.002101821079850197, "step": 213530 }, { "epoch": 60.61311382344593, "grad_norm": 0.07699217647314072, "learning_rate": 3.941158103888731e-05, "loss": 0.0008196931332349778, "step": 213540 }, { "epoch": 60.615952313369284, "grad_norm": 0.14922674000263214, "learning_rate": 3.940874254896395e-05, "loss": 0.0009234042838215828, "step": 213550 }, { "epoch": 60.61879080329265, "grad_norm": 3.742690086364746, "learning_rate": 3.9405904059040594e-05, "loss": 0.0004824059084057808, "step": 213560 }, { "epoch": 60.62162929321601, "grad_norm": 0.022591421380639076, "learning_rate": 3.940306556911723e-05, "loss": 0.00030651278793811796, "step": 213570 }, { "epoch": 60.62446778313937, "grad_norm": 0.03171566128730774, "learning_rate": 3.940022707919387e-05, "loss": 0.0005651500076055526, "step": 213580 }, { "epoch": 60.62730627306273, "grad_norm": 0.2640872895717621, "learning_rate": 3.939738858927051e-05, "loss": 0.001060624234378338, "step": 213590 }, { "epoch": 60.63014476298609, "grad_norm": 0.21782411634922028, "learning_rate": 3.9394550099347146e-05, "loss": 0.0014341872185468674, "step": 213600 }, { "epoch": 60.632983252909455, "grad_norm": 0.1470104604959488, "learning_rate": 3.939171160942379e-05, "loss": 0.0005581220611929893, "step": 213610 }, { "epoch": 60.63582174283281, "grad_norm": 0.2718030512332916, "learning_rate": 3.938887311950043e-05, "loss": 0.0005162440240383149, "step": 213620 }, { "epoch": 60.63866023275617, "grad_norm": 0.03086242452263832, "learning_rate": 3.938603462957707e-05, "loss": 0.0006310911849141121, "step": 213630 }, { "epoch": 60.641498722679536, "grad_norm": 2.541426658630371, "learning_rate": 3.9383196139653705e-05, "loss": 0.0004966450855135917, "step": 213640 }, { "epoch": 60.64433721260289, "grad_norm": 1.334401249885559, "learning_rate": 3.938035764973034e-05, "loss": 0.0010387519374489783, "step": 213650 }, { "epoch": 60.647175702526255, "grad_norm": 0.20200476050376892, "learning_rate": 3.937751915980699e-05, "loss": 0.000436924584209919, "step": 213660 }, { "epoch": 60.65001419244962, "grad_norm": 0.03386653959751129, "learning_rate": 3.937468066988362e-05, "loss": 0.00026781000196933745, "step": 213670 }, { "epoch": 60.65285268237298, "grad_norm": 0.1357858031988144, "learning_rate": 3.9371842179960264e-05, "loss": 0.00089748315513134, "step": 213680 }, { "epoch": 60.655691172296336, "grad_norm": 0.17049528658390045, "learning_rate": 3.9369003690036905e-05, "loss": 0.0004184199497103691, "step": 213690 }, { "epoch": 60.6585296622197, "grad_norm": 0.020961789414286613, "learning_rate": 3.936616520011354e-05, "loss": 0.00025675203651189806, "step": 213700 }, { "epoch": 60.66136815214306, "grad_norm": 0.1327710896730423, "learning_rate": 3.936332671019018e-05, "loss": 0.0014912977814674378, "step": 213710 }, { "epoch": 60.66420664206642, "grad_norm": 0.02027597650885582, "learning_rate": 3.936048822026682e-05, "loss": 0.0003780985251069069, "step": 213720 }, { "epoch": 60.66704513198978, "grad_norm": 0.11193887889385223, "learning_rate": 3.935764973034346e-05, "loss": 0.0010601077228784561, "step": 213730 }, { "epoch": 60.669883621913144, "grad_norm": 0.17878660559654236, "learning_rate": 3.93548112404201e-05, "loss": 0.005622755736112595, "step": 213740 }, { "epoch": 60.67272211183651, "grad_norm": 0.18254338204860687, "learning_rate": 3.935197275049673e-05, "loss": 0.0005600804463028908, "step": 213750 }, { "epoch": 60.67556060175986, "grad_norm": 0.09618737548589706, "learning_rate": 3.934913426057338e-05, "loss": 0.0004000140354037285, "step": 213760 }, { "epoch": 60.678399091683225, "grad_norm": 0.01292925514280796, "learning_rate": 3.9346295770650016e-05, "loss": 0.0003779040649533272, "step": 213770 }, { "epoch": 60.68123758160659, "grad_norm": 0.44061172008514404, "learning_rate": 3.934345728072665e-05, "loss": 0.00024771355092525484, "step": 213780 }, { "epoch": 60.684076071529944, "grad_norm": 0.43400344252586365, "learning_rate": 3.93406187908033e-05, "loss": 0.0004198167473077774, "step": 213790 }, { "epoch": 60.68691456145331, "grad_norm": 0.7531759142875671, "learning_rate": 3.933778030087993e-05, "loss": 0.00033037513494491575, "step": 213800 }, { "epoch": 60.68975305137667, "grad_norm": 0.843131959438324, "learning_rate": 3.9334941810956575e-05, "loss": 0.0018951039761304854, "step": 213810 }, { "epoch": 60.692591541300025, "grad_norm": 0.7763177156448364, "learning_rate": 3.9332103321033216e-05, "loss": 0.015769125521183015, "step": 213820 }, { "epoch": 60.69543003122339, "grad_norm": 0.03397354483604431, "learning_rate": 3.932926483110985e-05, "loss": 0.00046726595610380173, "step": 213830 }, { "epoch": 60.69826852114675, "grad_norm": 0.02210998348891735, "learning_rate": 3.932642634118649e-05, "loss": 0.0005125179886817932, "step": 213840 }, { "epoch": 60.701107011070114, "grad_norm": 0.23833899199962616, "learning_rate": 3.9323587851263127e-05, "loss": 0.0004067091271281242, "step": 213850 }, { "epoch": 60.70394550099347, "grad_norm": 0.326462984085083, "learning_rate": 3.932074936133977e-05, "loss": 0.0007082685828208923, "step": 213860 }, { "epoch": 60.70678399091683, "grad_norm": 0.35567107796669006, "learning_rate": 3.931791087141641e-05, "loss": 0.0003374157473444939, "step": 213870 }, { "epoch": 60.709622480840196, "grad_norm": 0.09233374148607254, "learning_rate": 3.9315072381493044e-05, "loss": 0.0006197206676006317, "step": 213880 }, { "epoch": 60.71246097076355, "grad_norm": 0.0896356925368309, "learning_rate": 3.9312233891569685e-05, "loss": 0.00026288367807865144, "step": 213890 }, { "epoch": 60.715299460686914, "grad_norm": 0.1026448905467987, "learning_rate": 3.930939540164633e-05, "loss": 0.0009186679497361183, "step": 213900 }, { "epoch": 60.71813795061028, "grad_norm": 0.006846647709608078, "learning_rate": 3.930655691172296e-05, "loss": 0.00015797819942235946, "step": 213910 }, { "epoch": 60.72097644053363, "grad_norm": 0.0875532478094101, "learning_rate": 3.930371842179961e-05, "loss": 0.00025676488876342775, "step": 213920 }, { "epoch": 60.723814930456996, "grad_norm": 0.06060396507382393, "learning_rate": 3.9300879931876244e-05, "loss": 0.0005107516422867775, "step": 213930 }, { "epoch": 60.72665342038036, "grad_norm": 0.07572657614946365, "learning_rate": 3.9298041441952885e-05, "loss": 0.00024239439517259598, "step": 213940 }, { "epoch": 60.72949191030372, "grad_norm": 0.037467021495103836, "learning_rate": 3.929520295202952e-05, "loss": 0.0001464400440454483, "step": 213950 }, { "epoch": 60.73233040022708, "grad_norm": 0.02191409096121788, "learning_rate": 3.929236446210616e-05, "loss": 0.0009377025067806244, "step": 213960 }, { "epoch": 60.73516889015044, "grad_norm": 1.418107271194458, "learning_rate": 3.92895259721828e-05, "loss": 0.0009064683690667152, "step": 213970 }, { "epoch": 60.7380073800738, "grad_norm": 3.0805814266204834, "learning_rate": 3.928668748225944e-05, "loss": 0.0007204145193099975, "step": 213980 }, { "epoch": 60.74084586999716, "grad_norm": 0.22702492773532867, "learning_rate": 3.928384899233608e-05, "loss": 0.0013763925060629844, "step": 213990 }, { "epoch": 60.74368435992052, "grad_norm": 0.08493654429912567, "learning_rate": 3.928101050241272e-05, "loss": 0.00012333616614341737, "step": 214000 }, { "epoch": 60.74368435992052, "eval_accuracy": 0.9752654670312202, "eval_loss": 0.09392672032117844, "eval_runtime": 33.0374, "eval_samples_per_second": 476.037, "eval_steps_per_second": 7.446, "step": 214000 }, { "epoch": 60.746522849843885, "grad_norm": 0.056936852633953094, "learning_rate": 3.9278172012489355e-05, "loss": 0.00044082049280405047, "step": 214010 }, { "epoch": 60.74936133976724, "grad_norm": 12.9418363571167, "learning_rate": 3.9275617371558335e-05, "loss": 0.010830791294574737, "step": 214020 }, { "epoch": 60.7521998296906, "grad_norm": 0.162174791097641, "learning_rate": 3.927277888163497e-05, "loss": 0.0013185026124119758, "step": 214030 }, { "epoch": 60.755038319613966, "grad_norm": 0.09921158850193024, "learning_rate": 3.926994039171161e-05, "loss": 0.001711965724825859, "step": 214040 }, { "epoch": 60.75787680953733, "grad_norm": 0.19495490193367004, "learning_rate": 3.926710190178825e-05, "loss": 0.00107131190598011, "step": 214050 }, { "epoch": 60.760715299460685, "grad_norm": 0.11336076259613037, "learning_rate": 3.9264263411864887e-05, "loss": 0.008304493129253387, "step": 214060 }, { "epoch": 60.76355378938405, "grad_norm": 0.5726718306541443, "learning_rate": 3.926142492194153e-05, "loss": 0.0007468827068805694, "step": 214070 }, { "epoch": 60.76639227930741, "grad_norm": 0.11420349776744843, "learning_rate": 3.925858643201817e-05, "loss": 0.0028796155005693437, "step": 214080 }, { "epoch": 60.76923076923077, "grad_norm": 3.2791385650634766, "learning_rate": 3.9255747942094804e-05, "loss": 0.0009003689512610436, "step": 214090 }, { "epoch": 60.77206925915413, "grad_norm": 0.07733231037855148, "learning_rate": 3.925290945217145e-05, "loss": 0.0014156935736536979, "step": 214100 }, { "epoch": 60.77490774907749, "grad_norm": 0.016510190442204475, "learning_rate": 3.925007096224809e-05, "loss": 0.003924942389130592, "step": 214110 }, { "epoch": 60.77774623900085, "grad_norm": 0.046731382608413696, "learning_rate": 3.924723247232472e-05, "loss": 0.00037130918353796006, "step": 214120 }, { "epoch": 60.78058472892421, "grad_norm": 0.020450467243790627, "learning_rate": 3.924439398240136e-05, "loss": 0.0005197262391448021, "step": 214130 }, { "epoch": 60.783423218847574, "grad_norm": 9.076140403747559, "learning_rate": 3.9241555492478004e-05, "loss": 0.003102618455886841, "step": 214140 }, { "epoch": 60.78626170877094, "grad_norm": 0.12189380079507828, "learning_rate": 3.9238717002554645e-05, "loss": 0.000965387374162674, "step": 214150 }, { "epoch": 60.78910019869429, "grad_norm": 0.08119045197963715, "learning_rate": 3.923587851263128e-05, "loss": 0.0003419533371925354, "step": 214160 }, { "epoch": 60.791938688617655, "grad_norm": 0.13633762300014496, "learning_rate": 3.923304002270792e-05, "loss": 0.0003166003152728081, "step": 214170 }, { "epoch": 60.79477717854102, "grad_norm": 0.07971606403589249, "learning_rate": 3.923020153278456e-05, "loss": 0.0014098899438977242, "step": 214180 }, { "epoch": 60.797615668464374, "grad_norm": 0.2866664528846741, "learning_rate": 3.92273630428612e-05, "loss": 0.0008065858855843544, "step": 214190 }, { "epoch": 60.80045415838774, "grad_norm": 0.07133712619543076, "learning_rate": 3.922452455293784e-05, "loss": 0.00034293755888938906, "step": 214200 }, { "epoch": 60.8032926483111, "grad_norm": 0.15783825516700745, "learning_rate": 3.922168606301448e-05, "loss": 0.00032199863344430926, "step": 214210 }, { "epoch": 60.80613113823446, "grad_norm": 0.0934293270111084, "learning_rate": 3.9218847573091115e-05, "loss": 0.0006432369351387024, "step": 214220 }, { "epoch": 60.80896962815782, "grad_norm": 0.0640489012002945, "learning_rate": 3.9216009083167756e-05, "loss": 0.001894128881394863, "step": 214230 }, { "epoch": 60.81180811808118, "grad_norm": 6.179540634155273, "learning_rate": 3.92131705932444e-05, "loss": 0.0021935222670435906, "step": 214240 }, { "epoch": 60.814646608004544, "grad_norm": 0.187752828001976, "learning_rate": 3.921033210332103e-05, "loss": 0.0005717670544981956, "step": 214250 }, { "epoch": 60.8174850979279, "grad_norm": 0.13186945021152496, "learning_rate": 3.920749361339767e-05, "loss": 0.0011180918663740158, "step": 214260 }, { "epoch": 60.82032358785126, "grad_norm": 0.07031745463609695, "learning_rate": 3.9204655123474315e-05, "loss": 0.00028594136238098146, "step": 214270 }, { "epoch": 60.823162077774626, "grad_norm": 0.12241843342781067, "learning_rate": 3.9201816633550956e-05, "loss": 0.002047180198132992, "step": 214280 }, { "epoch": 60.82600056769798, "grad_norm": 0.4714779257774353, "learning_rate": 3.919897814362759e-05, "loss": 0.0009456511586904525, "step": 214290 }, { "epoch": 60.828839057621344, "grad_norm": 0.09269167482852936, "learning_rate": 3.919613965370423e-05, "loss": 0.003608642518520355, "step": 214300 }, { "epoch": 60.83167754754471, "grad_norm": 0.9019885659217834, "learning_rate": 3.9193301163780873e-05, "loss": 0.001398182474076748, "step": 214310 }, { "epoch": 60.83451603746807, "grad_norm": 0.09004561603069305, "learning_rate": 3.919046267385751e-05, "loss": 0.0006853125989437104, "step": 214320 }, { "epoch": 60.837354527391426, "grad_norm": 0.06747071444988251, "learning_rate": 3.918762418393415e-05, "loss": 0.0008158458396792412, "step": 214330 }, { "epoch": 60.84019301731479, "grad_norm": 0.010834372602403164, "learning_rate": 3.918478569401079e-05, "loss": 0.00540047362446785, "step": 214340 }, { "epoch": 60.84303150723815, "grad_norm": 0.060086917132139206, "learning_rate": 3.9181947204087425e-05, "loss": 0.00035485439002513885, "step": 214350 }, { "epoch": 60.84586999716151, "grad_norm": 1.0271942615509033, "learning_rate": 3.917910871416407e-05, "loss": 0.0004329778254032135, "step": 214360 }, { "epoch": 60.84870848708487, "grad_norm": 2.325047016143799, "learning_rate": 3.917627022424071e-05, "loss": 0.0005090530961751938, "step": 214370 }, { "epoch": 60.85154697700823, "grad_norm": 0.015145102515816689, "learning_rate": 3.917343173431734e-05, "loss": 0.0003334898501634598, "step": 214380 }, { "epoch": 60.85438546693159, "grad_norm": 0.013235276564955711, "learning_rate": 3.9170593244393984e-05, "loss": 0.0027549231424927713, "step": 214390 }, { "epoch": 60.85722395685495, "grad_norm": 0.05014194920659065, "learning_rate": 3.9167754754470626e-05, "loss": 0.00021939016878604888, "step": 214400 }, { "epoch": 60.860062446778315, "grad_norm": 0.06601856648921967, "learning_rate": 3.916491626454726e-05, "loss": 0.002479120343923569, "step": 214410 }, { "epoch": 60.86290093670168, "grad_norm": 3.7311959266662598, "learning_rate": 3.91620777746239e-05, "loss": 0.0009954076260328293, "step": 214420 }, { "epoch": 60.865739426625034, "grad_norm": 0.03158235177397728, "learning_rate": 3.9159239284700536e-05, "loss": 0.0002541018649935722, "step": 214430 }, { "epoch": 60.868577916548396, "grad_norm": 0.014159257523715496, "learning_rate": 3.9156400794777184e-05, "loss": 0.0007186410948634148, "step": 214440 }, { "epoch": 60.87141640647176, "grad_norm": 0.029498783871531487, "learning_rate": 3.915356230485382e-05, "loss": 0.00030751824378967286, "step": 214450 }, { "epoch": 60.874254896395115, "grad_norm": 0.07901179790496826, "learning_rate": 3.9150723814930453e-05, "loss": 0.00029143039137125015, "step": 214460 }, { "epoch": 60.87709338631848, "grad_norm": 0.027470100671052933, "learning_rate": 3.91478853250071e-05, "loss": 0.0002888062968850136, "step": 214470 }, { "epoch": 60.87993187624184, "grad_norm": 0.2429257333278656, "learning_rate": 3.9145046835083736e-05, "loss": 0.00023186914622783661, "step": 214480 }, { "epoch": 60.8827703661652, "grad_norm": 0.01785132847726345, "learning_rate": 3.914220834516038e-05, "loss": 0.0005063196644186973, "step": 214490 }, { "epoch": 60.88560885608856, "grad_norm": 0.20747493207454681, "learning_rate": 3.913936985523702e-05, "loss": 0.0077833980321884155, "step": 214500 }, { "epoch": 60.88560885608856, "eval_accuracy": 0.9767279201373434, "eval_loss": 0.09059061110019684, "eval_runtime": 32.3287, "eval_samples_per_second": 486.471, "eval_steps_per_second": 7.609, "step": 214500 }, { "epoch": 60.88844734601192, "grad_norm": 0.0623568631708622, "learning_rate": 3.9136815214305985e-05, "loss": 0.003043646551668644, "step": 214510 }, { "epoch": 60.891285835935285, "grad_norm": 0.024578966200351715, "learning_rate": 3.9133976724382633e-05, "loss": 0.002449057437479496, "step": 214520 }, { "epoch": 60.89412432585864, "grad_norm": 0.07419086247682571, "learning_rate": 3.913113823445927e-05, "loss": 0.0013263147324323655, "step": 214530 }, { "epoch": 60.896962815782004, "grad_norm": 9.104339599609375, "learning_rate": 3.912829974453591e-05, "loss": 0.002042484097182751, "step": 214540 }, { "epoch": 60.89980130570537, "grad_norm": 0.39052385091781616, "learning_rate": 3.912546125461255e-05, "loss": 0.0003722306340932846, "step": 214550 }, { "epoch": 60.90263979562872, "grad_norm": 0.08576544374227524, "learning_rate": 3.9122622764689185e-05, "loss": 0.0004353789612650871, "step": 214560 }, { "epoch": 60.905478285552086, "grad_norm": 0.20577400922775269, "learning_rate": 3.911978427476583e-05, "loss": 0.00039111729711294174, "step": 214570 }, { "epoch": 60.90831677547545, "grad_norm": 0.07964835315942764, "learning_rate": 3.911694578484247e-05, "loss": 0.006367252767086029, "step": 214580 }, { "epoch": 60.91115526539881, "grad_norm": 0.025012154132127762, "learning_rate": 3.91141072949191e-05, "loss": 0.0006810370832681656, "step": 214590 }, { "epoch": 60.91399375532217, "grad_norm": 0.025986049324274063, "learning_rate": 3.9111268804995744e-05, "loss": 0.0009364180266857147, "step": 214600 }, { "epoch": 60.91683224524553, "grad_norm": 0.0074682217091321945, "learning_rate": 3.910843031507238e-05, "loss": 0.0003288388252258301, "step": 214610 }, { "epoch": 60.91967073516889, "grad_norm": 0.042985301464796066, "learning_rate": 3.910559182514903e-05, "loss": 0.00040201954543590547, "step": 214620 }, { "epoch": 60.92250922509225, "grad_norm": 1.8868718147277832, "learning_rate": 3.910275333522566e-05, "loss": 0.0006316378712654114, "step": 214630 }, { "epoch": 60.92534771501561, "grad_norm": 0.5271281003952026, "learning_rate": 3.9099914845302296e-05, "loss": 0.002761477418243885, "step": 214640 }, { "epoch": 60.928186204938974, "grad_norm": 1.2164965867996216, "learning_rate": 3.9097076355378944e-05, "loss": 0.0023493416607379912, "step": 214650 }, { "epoch": 60.93102469486233, "grad_norm": 0.014424446038901806, "learning_rate": 3.909423786545558e-05, "loss": 0.0005723649635910987, "step": 214660 }, { "epoch": 60.93386318478569, "grad_norm": 0.06557957082986832, "learning_rate": 3.909139937553222e-05, "loss": 0.0008646249771118164, "step": 214670 }, { "epoch": 60.936701674709056, "grad_norm": 1.0704470872879028, "learning_rate": 3.908856088560886e-05, "loss": 0.0006767518818378449, "step": 214680 }, { "epoch": 60.93954016463242, "grad_norm": 0.021511416882276535, "learning_rate": 3.9085722395685496e-05, "loss": 0.0003639969974756241, "step": 214690 }, { "epoch": 60.942378654555775, "grad_norm": 0.05119406431913376, "learning_rate": 3.908288390576214e-05, "loss": 0.00014451835304498672, "step": 214700 }, { "epoch": 60.94521714447914, "grad_norm": 0.010633128695189953, "learning_rate": 3.908004541583877e-05, "loss": 0.00034739989787340163, "step": 214710 }, { "epoch": 60.9480556344025, "grad_norm": 0.009206034243106842, "learning_rate": 3.9077206925915414e-05, "loss": 0.0005860179662704468, "step": 214720 }, { "epoch": 60.950894124325856, "grad_norm": 0.024251770228147507, "learning_rate": 3.9074368435992055e-05, "loss": 0.00026971139013767245, "step": 214730 }, { "epoch": 60.95373261424922, "grad_norm": 17.094083786010742, "learning_rate": 3.907152994606869e-05, "loss": 0.005224929749965667, "step": 214740 }, { "epoch": 60.95657110417258, "grad_norm": 0.02884203940629959, "learning_rate": 3.906869145614534e-05, "loss": 0.00043208450078964234, "step": 214750 }, { "epoch": 60.95940959409594, "grad_norm": 0.07443677634000778, "learning_rate": 3.906585296622197e-05, "loss": 0.0013178279623389244, "step": 214760 }, { "epoch": 60.9622480840193, "grad_norm": 0.26190799474716187, "learning_rate": 3.906301447629861e-05, "loss": 0.0005351247265934945, "step": 214770 }, { "epoch": 60.96508657394266, "grad_norm": 0.13104768097400665, "learning_rate": 3.9060175986375255e-05, "loss": 0.0004167964681982994, "step": 214780 }, { "epoch": 60.967925063866026, "grad_norm": 0.693864107131958, "learning_rate": 3.905733749645189e-05, "loss": 0.0002873716875910759, "step": 214790 }, { "epoch": 60.97076355378938, "grad_norm": 0.06054842472076416, "learning_rate": 3.905449900652853e-05, "loss": 0.0009095653891563416, "step": 214800 }, { "epoch": 60.973602043712745, "grad_norm": 0.020037194713950157, "learning_rate": 3.9051660516605166e-05, "loss": 0.00032813418656587603, "step": 214810 }, { "epoch": 60.97644053363611, "grad_norm": 0.13205863535404205, "learning_rate": 3.904882202668181e-05, "loss": 0.0006993519142270088, "step": 214820 }, { "epoch": 60.979279023559464, "grad_norm": 0.44881370663642883, "learning_rate": 3.904598353675845e-05, "loss": 0.0011865904554724694, "step": 214830 }, { "epoch": 60.98211751348283, "grad_norm": 0.5846525430679321, "learning_rate": 3.904314504683508e-05, "loss": 0.000391811691224575, "step": 214840 }, { "epoch": 60.98495600340619, "grad_norm": 0.01980476640164852, "learning_rate": 3.9040306556911724e-05, "loss": 0.00026873238384723664, "step": 214850 }, { "epoch": 60.987794493329545, "grad_norm": 0.03612753003835678, "learning_rate": 3.9037468066988366e-05, "loss": 0.0008358336985111236, "step": 214860 }, { "epoch": 60.99063298325291, "grad_norm": 0.5984060168266296, "learning_rate": 3.9034629577065e-05, "loss": 0.00033920686691999433, "step": 214870 }, { "epoch": 60.99347147317627, "grad_norm": 0.05385841429233551, "learning_rate": 3.903179108714164e-05, "loss": 0.0002008087933063507, "step": 214880 }, { "epoch": 60.996309963099634, "grad_norm": 0.09474235773086548, "learning_rate": 3.902895259721828e-05, "loss": 0.0002620801329612732, "step": 214890 }, { "epoch": 60.99914845302299, "grad_norm": 3.9117348194122314, "learning_rate": 3.902611410729492e-05, "loss": 0.000969577208161354, "step": 214900 }, { "epoch": 61.00198694294635, "grad_norm": 0.06659076362848282, "learning_rate": 3.902327561737156e-05, "loss": 0.0004684463143348694, "step": 214910 }, { "epoch": 61.004825432869715, "grad_norm": 0.03915082663297653, "learning_rate": 3.90204371274482e-05, "loss": 0.0001376800239086151, "step": 214920 }, { "epoch": 61.00766392279307, "grad_norm": 0.038332995027303696, "learning_rate": 3.9017598637524835e-05, "loss": 0.0003470022231340408, "step": 214930 }, { "epoch": 61.010502412716434, "grad_norm": 0.014502450823783875, "learning_rate": 3.9014760147601476e-05, "loss": 0.00037841424345970155, "step": 214940 }, { "epoch": 61.0133409026398, "grad_norm": 12.313241958618164, "learning_rate": 3.901192165767812e-05, "loss": 0.002149241603910923, "step": 214950 }, { "epoch": 61.01617939256316, "grad_norm": 0.01935967244207859, "learning_rate": 3.900908316775476e-05, "loss": 0.0009054835885763168, "step": 214960 }, { "epoch": 61.019017882486516, "grad_norm": 0.035346534103155136, "learning_rate": 3.9006244677831394e-05, "loss": 0.00027482658624649047, "step": 214970 }, { "epoch": 61.02185637240988, "grad_norm": 0.009410804137587547, "learning_rate": 3.9003406187908035e-05, "loss": 0.004024219885468483, "step": 214980 }, { "epoch": 61.02469486233324, "grad_norm": 0.9445405602455139, "learning_rate": 3.9000567697984676e-05, "loss": 0.00029649678617715836, "step": 214990 }, { "epoch": 61.0275333522566, "grad_norm": 1.7651691436767578, "learning_rate": 3.899772920806131e-05, "loss": 0.001547003537416458, "step": 215000 }, { "epoch": 61.0275333522566, "eval_accuracy": 0.9764099955490557, "eval_loss": 0.08333548903465271, "eval_runtime": 33.3129, "eval_samples_per_second": 472.1, "eval_steps_per_second": 7.385, "step": 215000 }, { "epoch": 61.03037184217996, "grad_norm": 0.21056205034255981, "learning_rate": 3.899489071813795e-05, "loss": 0.0014349974691867829, "step": 215010 }, { "epoch": 61.03321033210332, "grad_norm": 0.37836697697639465, "learning_rate": 3.8992052228214594e-05, "loss": 0.0018550721928477288, "step": 215020 }, { "epoch": 61.03604882202668, "grad_norm": 0.08101577311754227, "learning_rate": 3.898921373829123e-05, "loss": 0.0011468274518847466, "step": 215030 }, { "epoch": 61.03888731195004, "grad_norm": 0.02415461279451847, "learning_rate": 3.898637524836787e-05, "loss": 0.00023990217596292496, "step": 215040 }, { "epoch": 61.041725801873405, "grad_norm": 1.2065339088439941, "learning_rate": 3.898353675844451e-05, "loss": 0.0002829842269420624, "step": 215050 }, { "epoch": 61.04456429179677, "grad_norm": 0.0822652280330658, "learning_rate": 3.8980698268521146e-05, "loss": 0.001963541842997074, "step": 215060 }, { "epoch": 61.04740278172012, "grad_norm": 0.04683750495314598, "learning_rate": 3.897785977859779e-05, "loss": 0.000877559371292591, "step": 215070 }, { "epoch": 61.050241271643486, "grad_norm": 0.009820149280130863, "learning_rate": 3.897502128867443e-05, "loss": 0.00011243615299463272, "step": 215080 }, { "epoch": 61.05307976156685, "grad_norm": 0.009107191115617752, "learning_rate": 3.897218279875107e-05, "loss": 0.0004445452243089676, "step": 215090 }, { "epoch": 61.055918251490205, "grad_norm": 0.03630382940173149, "learning_rate": 3.8969344308827704e-05, "loss": 0.00037032756954431535, "step": 215100 }, { "epoch": 61.05875674141357, "grad_norm": 0.0142793795093894, "learning_rate": 3.8966505818904346e-05, "loss": 0.0006057983264327049, "step": 215110 }, { "epoch": 61.06159523133693, "grad_norm": 0.05844442918896675, "learning_rate": 3.896366732898099e-05, "loss": 0.001671992801129818, "step": 215120 }, { "epoch": 61.064433721260286, "grad_norm": 0.03213059902191162, "learning_rate": 3.896082883905762e-05, "loss": 0.00021991375833749772, "step": 215130 }, { "epoch": 61.06727221118365, "grad_norm": 0.015917714685201645, "learning_rate": 3.895799034913426e-05, "loss": 0.00019307900220155716, "step": 215140 }, { "epoch": 61.07011070110701, "grad_norm": 0.005449051968753338, "learning_rate": 3.8955151859210905e-05, "loss": 0.00017181970179080963, "step": 215150 }, { "epoch": 61.072949191030375, "grad_norm": 1.1079127788543701, "learning_rate": 3.895231336928754e-05, "loss": 0.00029346011579036715, "step": 215160 }, { "epoch": 61.07578768095373, "grad_norm": 0.8667793869972229, "learning_rate": 3.894947487936418e-05, "loss": 0.0069108143448829654, "step": 215170 }, { "epoch": 61.078626170877094, "grad_norm": 3.7374625205993652, "learning_rate": 3.894663638944082e-05, "loss": 0.00342666357755661, "step": 215180 }, { "epoch": 61.08146466080046, "grad_norm": 0.023040490224957466, "learning_rate": 3.8943797899517457e-05, "loss": 0.0025723539292812347, "step": 215190 }, { "epoch": 61.08430315072381, "grad_norm": 0.10249919444322586, "learning_rate": 3.89409594095941e-05, "loss": 0.0026480862870812416, "step": 215200 }, { "epoch": 61.087141640647175, "grad_norm": 0.04386807233095169, "learning_rate": 3.893812091967074e-05, "loss": 0.0007886188104748726, "step": 215210 }, { "epoch": 61.08998013057054, "grad_norm": 0.06683900207281113, "learning_rate": 3.893528242974738e-05, "loss": 0.0005765730515122413, "step": 215220 }, { "epoch": 61.092818620493894, "grad_norm": 0.13995268940925598, "learning_rate": 3.8932443939824015e-05, "loss": 0.002858196571469307, "step": 215230 }, { "epoch": 61.09565711041726, "grad_norm": 0.10146021842956543, "learning_rate": 3.892960544990065e-05, "loss": 0.0001858748495578766, "step": 215240 }, { "epoch": 61.09849560034062, "grad_norm": 0.13982917368412018, "learning_rate": 3.89267669599773e-05, "loss": 0.0004026645794510841, "step": 215250 }, { "epoch": 61.10133409026398, "grad_norm": 0.02427654340863228, "learning_rate": 3.892392847005393e-05, "loss": 0.00022544655948877334, "step": 215260 }, { "epoch": 61.10417258018734, "grad_norm": 0.14845460653305054, "learning_rate": 3.8921089980130574e-05, "loss": 0.0003768356516957283, "step": 215270 }, { "epoch": 61.1070110701107, "grad_norm": 0.021916588768363, "learning_rate": 3.8918251490207215e-05, "loss": 0.00034391209483146666, "step": 215280 }, { "epoch": 61.109849560034064, "grad_norm": 0.09462939202785492, "learning_rate": 3.891541300028385e-05, "loss": 0.0024301664903759955, "step": 215290 }, { "epoch": 61.11268804995742, "grad_norm": 0.00822365004569292, "learning_rate": 3.891257451036049e-05, "loss": 0.01724238395690918, "step": 215300 }, { "epoch": 61.11552653988078, "grad_norm": 0.013373183086514473, "learning_rate": 3.890973602043713e-05, "loss": 0.002230127342045307, "step": 215310 }, { "epoch": 61.118365029804146, "grad_norm": 2.709699869155884, "learning_rate": 3.890689753051377e-05, "loss": 0.0018983447924256324, "step": 215320 }, { "epoch": 61.1212035197275, "grad_norm": 0.09977324306964874, "learning_rate": 3.890405904059041e-05, "loss": 0.0002844927832484245, "step": 215330 }, { "epoch": 61.124042009650864, "grad_norm": 0.03055824525654316, "learning_rate": 3.890122055066704e-05, "loss": 0.0004997871816158295, "step": 215340 }, { "epoch": 61.12688049957423, "grad_norm": 11.654694557189941, "learning_rate": 3.8898382060743685e-05, "loss": 0.0028288539499044417, "step": 215350 }, { "epoch": 61.12971898949759, "grad_norm": 1.0620603561401367, "learning_rate": 3.8895543570820326e-05, "loss": 0.001242346316576004, "step": 215360 }, { "epoch": 61.132557479420946, "grad_norm": 0.04451851546764374, "learning_rate": 3.889270508089696e-05, "loss": 0.00031450409442186357, "step": 215370 }, { "epoch": 61.13539596934431, "grad_norm": 4.111286640167236, "learning_rate": 3.888986659097361e-05, "loss": 0.000819564238190651, "step": 215380 }, { "epoch": 61.13823445926767, "grad_norm": 0.7345172166824341, "learning_rate": 3.888702810105024e-05, "loss": 0.00025739409029483794, "step": 215390 }, { "epoch": 61.14107294919103, "grad_norm": 0.11299198865890503, "learning_rate": 3.888418961112688e-05, "loss": 0.0002423936501145363, "step": 215400 }, { "epoch": 61.14391143911439, "grad_norm": 0.025248145684599876, "learning_rate": 3.8881351121203526e-05, "loss": 0.0002275649458169937, "step": 215410 }, { "epoch": 61.14674992903775, "grad_norm": 0.11199043691158295, "learning_rate": 3.887851263128016e-05, "loss": 0.00026700273156166077, "step": 215420 }, { "epoch": 61.149588418961116, "grad_norm": 0.23489168286323547, "learning_rate": 3.88756741413568e-05, "loss": 0.0002180822193622589, "step": 215430 }, { "epoch": 61.15242690888447, "grad_norm": 0.030860157683491707, "learning_rate": 3.887283565143344e-05, "loss": 0.00043433494865894315, "step": 215440 }, { "epoch": 61.155265398807835, "grad_norm": 0.11463258415460587, "learning_rate": 3.886999716151008e-05, "loss": 0.0001647079363465309, "step": 215450 }, { "epoch": 61.1581038887312, "grad_norm": 0.03646363690495491, "learning_rate": 3.886715867158672e-05, "loss": 0.00045794956386089323, "step": 215460 }, { "epoch": 61.16094237865455, "grad_norm": 0.0644049420952797, "learning_rate": 3.8864320181663354e-05, "loss": 0.0005653230473399162, "step": 215470 }, { "epoch": 61.163780868577916, "grad_norm": 0.0036862005945295095, "learning_rate": 3.8861481691739995e-05, "loss": 0.0005882274359464646, "step": 215480 }, { "epoch": 61.16661935850128, "grad_norm": 0.2289619743824005, "learning_rate": 3.885864320181664e-05, "loss": 0.00014341063797473907, "step": 215490 }, { "epoch": 61.169457848424635, "grad_norm": 0.03673364594578743, "learning_rate": 3.885580471189327e-05, "loss": 0.00042535662651062013, "step": 215500 }, { "epoch": 61.169457848424635, "eval_accuracy": 0.9795256565142748, "eval_loss": 0.07414978742599487, "eval_runtime": 32.228, "eval_samples_per_second": 487.992, "eval_steps_per_second": 7.633, "step": 215500 }, { "epoch": 61.172296338348, "grad_norm": 0.03363264352083206, "learning_rate": 3.885296622196992e-05, "loss": 0.0003963438794016838, "step": 215510 }, { "epoch": 61.17513482827136, "grad_norm": 0.23795604705810547, "learning_rate": 3.8850127732046554e-05, "loss": 0.00019614100456237792, "step": 215520 }, { "epoch": 61.177973318194724, "grad_norm": 0.05833710730075836, "learning_rate": 3.884728924212319e-05, "loss": 0.00027827657759189607, "step": 215530 }, { "epoch": 61.18081180811808, "grad_norm": 0.07046153396368027, "learning_rate": 3.884445075219983e-05, "loss": 0.0001988256350159645, "step": 215540 }, { "epoch": 61.18365029804144, "grad_norm": 0.03421470522880554, "learning_rate": 3.884161226227647e-05, "loss": 0.0002629075199365616, "step": 215550 }, { "epoch": 61.186488787964805, "grad_norm": 0.02755540795624256, "learning_rate": 3.883877377235311e-05, "loss": 0.00019657928496599197, "step": 215560 }, { "epoch": 61.18932727788816, "grad_norm": 0.379353791475296, "learning_rate": 3.883593528242975e-05, "loss": 0.0002679511904716492, "step": 215570 }, { "epoch": 61.192165767811524, "grad_norm": 0.020547306165099144, "learning_rate": 3.883309679250639e-05, "loss": 0.0023767832666635514, "step": 215580 }, { "epoch": 61.19500425773489, "grad_norm": 0.02539360150694847, "learning_rate": 3.883025830258303e-05, "loss": 8.322466164827347e-05, "step": 215590 }, { "epoch": 61.19784274765824, "grad_norm": 0.01864849030971527, "learning_rate": 3.8827419812659665e-05, "loss": 0.00016844216734170913, "step": 215600 }, { "epoch": 61.200681237581605, "grad_norm": 0.045883096754550934, "learning_rate": 3.8824581322736306e-05, "loss": 0.0001425381749868393, "step": 215610 }, { "epoch": 61.20351972750497, "grad_norm": 0.004148701671510935, "learning_rate": 3.882174283281295e-05, "loss": 0.0004144530743360519, "step": 215620 }, { "epoch": 61.20635821742833, "grad_norm": 0.025514276698231697, "learning_rate": 3.881890434288958e-05, "loss": 0.0004511922597885132, "step": 215630 }, { "epoch": 61.20919670735169, "grad_norm": 0.05463980510830879, "learning_rate": 3.8816065852966223e-05, "loss": 0.0008060790598392487, "step": 215640 }, { "epoch": 61.21203519727505, "grad_norm": 0.03285587206482887, "learning_rate": 3.8813227363042865e-05, "loss": 0.002129584550857544, "step": 215650 }, { "epoch": 61.21487368719841, "grad_norm": 0.05309877544641495, "learning_rate": 3.88103888731195e-05, "loss": 0.0002880467101931572, "step": 215660 }, { "epoch": 61.21771217712177, "grad_norm": 0.051354002207517624, "learning_rate": 3.880755038319614e-05, "loss": 0.0003772500902414322, "step": 215670 }, { "epoch": 61.22055066704513, "grad_norm": 0.12273658812046051, "learning_rate": 3.880471189327278e-05, "loss": 0.000333344005048275, "step": 215680 }, { "epoch": 61.223389156968494, "grad_norm": 0.35780057311058044, "learning_rate": 3.8801873403349424e-05, "loss": 0.0004122009500861168, "step": 215690 }, { "epoch": 61.22622764689185, "grad_norm": 0.010805453173816204, "learning_rate": 3.879903491342606e-05, "loss": 0.0022517744451761245, "step": 215700 }, { "epoch": 61.22906613681521, "grad_norm": 0.06760265678167343, "learning_rate": 3.87961964235027e-05, "loss": 0.00019772257655858994, "step": 215710 }, { "epoch": 61.231904626738576, "grad_norm": 0.004587364383041859, "learning_rate": 3.879335793357934e-05, "loss": 0.00012964140623807907, "step": 215720 }, { "epoch": 61.23474311666194, "grad_norm": 0.07136555016040802, "learning_rate": 3.8790519443655976e-05, "loss": 0.00017781164497137069, "step": 215730 }, { "epoch": 61.237581606585294, "grad_norm": 0.01658431813120842, "learning_rate": 3.878768095373262e-05, "loss": 0.0004226746037602425, "step": 215740 }, { "epoch": 61.24042009650866, "grad_norm": 0.009985741227865219, "learning_rate": 3.878484246380926e-05, "loss": 0.00011813361197710037, "step": 215750 }, { "epoch": 61.24325858643202, "grad_norm": 0.05464964359998703, "learning_rate": 3.878200397388589e-05, "loss": 0.00017763879150152208, "step": 215760 }, { "epoch": 61.246097076355376, "grad_norm": 0.01882336288690567, "learning_rate": 3.8779165483962534e-05, "loss": 0.00017423294484615327, "step": 215770 }, { "epoch": 61.24893556627874, "grad_norm": 0.032023943960666656, "learning_rate": 3.8776326994039176e-05, "loss": 0.00017803702503442763, "step": 215780 }, { "epoch": 61.2517740562021, "grad_norm": 0.008610665798187256, "learning_rate": 3.877348850411581e-05, "loss": 0.00021099820733070374, "step": 215790 }, { "epoch": 61.254612546125465, "grad_norm": 0.004117421340197325, "learning_rate": 3.877065001419245e-05, "loss": 0.00021890066564083098, "step": 215800 }, { "epoch": 61.25745103604882, "grad_norm": 0.030683910474181175, "learning_rate": 3.876781152426909e-05, "loss": 0.00015872325748205184, "step": 215810 }, { "epoch": 61.26028952597218, "grad_norm": 0.08489096164703369, "learning_rate": 3.876497303434573e-05, "loss": 0.00024553630501031873, "step": 215820 }, { "epoch": 61.263128015895546, "grad_norm": 0.03258201479911804, "learning_rate": 3.876213454442237e-05, "loss": 0.00021523721516132356, "step": 215830 }, { "epoch": 61.2659665058189, "grad_norm": 0.029003724455833435, "learning_rate": 3.8759296054499004e-05, "loss": 0.000756535679101944, "step": 215840 }, { "epoch": 61.268804995742265, "grad_norm": 0.008857635781168938, "learning_rate": 3.875645756457565e-05, "loss": 0.0004236387088894844, "step": 215850 }, { "epoch": 61.27164348566563, "grad_norm": 0.0161388348788023, "learning_rate": 3.8753619074652286e-05, "loss": 0.0008206432685256004, "step": 215860 }, { "epoch": 61.274481975588984, "grad_norm": 1.2961947917938232, "learning_rate": 3.875078058472892e-05, "loss": 0.003052062168717384, "step": 215870 }, { "epoch": 61.27732046551235, "grad_norm": 0.07350016385316849, "learning_rate": 3.874794209480557e-05, "loss": 0.00021300595253705978, "step": 215880 }, { "epoch": 61.28015895543571, "grad_norm": 0.0889420211315155, "learning_rate": 3.8745103604882204e-05, "loss": 0.000515422597527504, "step": 215890 }, { "epoch": 61.28299744535907, "grad_norm": 0.725948691368103, "learning_rate": 3.8742265114958845e-05, "loss": 0.0008290417492389679, "step": 215900 }, { "epoch": 61.28583593528243, "grad_norm": 0.13537763059139252, "learning_rate": 3.8739426625035486e-05, "loss": 0.00027867071330547335, "step": 215910 }, { "epoch": 61.28867442520579, "grad_norm": 0.01632503606379032, "learning_rate": 3.873658813511212e-05, "loss": 0.0034831054508686066, "step": 215920 }, { "epoch": 61.291512915129154, "grad_norm": 0.08921882510185242, "learning_rate": 3.873374964518876e-05, "loss": 0.01066528782248497, "step": 215930 }, { "epoch": 61.29435140505251, "grad_norm": 1.153261661529541, "learning_rate": 3.87309111552654e-05, "loss": 0.019118335843086243, "step": 215940 }, { "epoch": 61.29718989497587, "grad_norm": 0.031276777386665344, "learning_rate": 3.872807266534204e-05, "loss": 0.00027886852622032164, "step": 215950 }, { "epoch": 61.300028384899235, "grad_norm": 0.33014097809791565, "learning_rate": 3.872523417541868e-05, "loss": 0.0037278912961483, "step": 215960 }, { "epoch": 61.30286687482259, "grad_norm": 0.02982609160244465, "learning_rate": 3.8722395685495314e-05, "loss": 0.0031723715364933014, "step": 215970 }, { "epoch": 61.305705364745954, "grad_norm": 0.04600028693675995, "learning_rate": 3.871955719557196e-05, "loss": 0.0001853032037615776, "step": 215980 }, { "epoch": 61.30854385466932, "grad_norm": 0.0322023406624794, "learning_rate": 3.87167187056486e-05, "loss": 0.0008390305563807487, "step": 215990 }, { "epoch": 61.31138234459268, "grad_norm": 0.02932199090719223, "learning_rate": 3.871388021572523e-05, "loss": 0.0013613628223538398, "step": 216000 }, { "epoch": 61.31138234459268, "eval_accuracy": 0.9746296178546449, "eval_loss": 0.10100961476564407, "eval_runtime": 32.8879, "eval_samples_per_second": 478.201, "eval_steps_per_second": 7.48, "step": 216000 }, { "epoch": 61.314220834516036, "grad_norm": 0.10337033867835999, "learning_rate": 3.871104172580188e-05, "loss": 0.0007008964195847512, "step": 216010 }, { "epoch": 61.3170593244394, "grad_norm": 0.5335471034049988, "learning_rate": 3.8708203235878514e-05, "loss": 0.0003115927800536156, "step": 216020 }, { "epoch": 61.31989781436276, "grad_norm": 14.409452438354492, "learning_rate": 3.8705364745955156e-05, "loss": 0.0036960549652576445, "step": 216030 }, { "epoch": 61.32273630428612, "grad_norm": 3.6708271503448486, "learning_rate": 3.870252625603179e-05, "loss": 0.00253945030272007, "step": 216040 }, { "epoch": 61.32557479420948, "grad_norm": 0.005125561263412237, "learning_rate": 3.869968776610843e-05, "loss": 0.00023162476718425752, "step": 216050 }, { "epoch": 61.32841328413284, "grad_norm": 0.025265710428357124, "learning_rate": 3.869684927618507e-05, "loss": 0.003088388778269291, "step": 216060 }, { "epoch": 61.3312517740562, "grad_norm": 0.19009411334991455, "learning_rate": 3.869401078626171e-05, "loss": 0.00037227123975753785, "step": 216070 }, { "epoch": 61.33409026397956, "grad_norm": 4.930599212646484, "learning_rate": 3.869117229633835e-05, "loss": 0.012727208435535431, "step": 216080 }, { "epoch": 61.336928753902924, "grad_norm": 0.7179930210113525, "learning_rate": 3.868833380641499e-05, "loss": 0.01046122908592224, "step": 216090 }, { "epoch": 61.33976724382629, "grad_norm": 0.1588418036699295, "learning_rate": 3.8685495316491625e-05, "loss": 0.009204002469778061, "step": 216100 }, { "epoch": 61.34260573374964, "grad_norm": 0.45971256494522095, "learning_rate": 3.8682656826568266e-05, "loss": 0.06700435876846314, "step": 216110 }, { "epoch": 61.345444223673006, "grad_norm": 1.2575825452804565, "learning_rate": 3.867981833664491e-05, "loss": 0.00048235543072223664, "step": 216120 }, { "epoch": 61.34828271359637, "grad_norm": 0.7565178871154785, "learning_rate": 3.867697984672154e-05, "loss": 0.0031509183347225187, "step": 216130 }, { "epoch": 61.351121203519725, "grad_norm": 0.7258362174034119, "learning_rate": 3.8674141356798184e-05, "loss": 0.0016086721792817117, "step": 216140 }, { "epoch": 61.35395969344309, "grad_norm": 0.565324604511261, "learning_rate": 3.8671302866874825e-05, "loss": 0.000313415564596653, "step": 216150 }, { "epoch": 61.35679818336645, "grad_norm": 3.1536357402801514, "learning_rate": 3.8668464376951467e-05, "loss": 0.0015474297106266022, "step": 216160 }, { "epoch": 61.35963667328981, "grad_norm": 0.2877085506916046, "learning_rate": 3.86656258870281e-05, "loss": 0.0007391052320599556, "step": 216170 }, { "epoch": 61.36247516321317, "grad_norm": 3.4423561096191406, "learning_rate": 3.866278739710474e-05, "loss": 0.0009197505190968513, "step": 216180 }, { "epoch": 61.36531365313653, "grad_norm": 0.3582790195941925, "learning_rate": 3.8659948907181384e-05, "loss": 0.004711570590734482, "step": 216190 }, { "epoch": 61.368152143059895, "grad_norm": 0.12755458056926727, "learning_rate": 3.865711041725802e-05, "loss": 0.00040833484381437303, "step": 216200 }, { "epoch": 61.37099063298325, "grad_norm": 0.02035434916615486, "learning_rate": 3.865427192733466e-05, "loss": 0.00023771263659000397, "step": 216210 }, { "epoch": 61.37382912290661, "grad_norm": 0.07076713442802429, "learning_rate": 3.86514334374113e-05, "loss": 0.0015757745131850243, "step": 216220 }, { "epoch": 61.376667612829976, "grad_norm": 0.06178713217377663, "learning_rate": 3.8648594947487936e-05, "loss": 0.0005832139402627945, "step": 216230 }, { "epoch": 61.37950610275333, "grad_norm": 9.393887519836426, "learning_rate": 3.864575645756458e-05, "loss": 0.0054584208875894545, "step": 216240 }, { "epoch": 61.382344592676695, "grad_norm": 0.3173576295375824, "learning_rate": 3.864291796764122e-05, "loss": 0.001106717437505722, "step": 216250 }, { "epoch": 61.38518308260006, "grad_norm": 0.036778200417757034, "learning_rate": 3.864007947771785e-05, "loss": 0.0002189323306083679, "step": 216260 }, { "epoch": 61.38802157252342, "grad_norm": 0.023009805008769035, "learning_rate": 3.8637240987794495e-05, "loss": 0.0003602391108870506, "step": 216270 }, { "epoch": 61.39086006244678, "grad_norm": 0.0994749590754509, "learning_rate": 3.8634402497871136e-05, "loss": 0.00029295478016138076, "step": 216280 }, { "epoch": 61.39369855237014, "grad_norm": 0.5378846526145935, "learning_rate": 3.863156400794777e-05, "loss": 0.000308367982506752, "step": 216290 }, { "epoch": 61.3965370422935, "grad_norm": 0.007245919201523066, "learning_rate": 3.862872551802441e-05, "loss": 0.0004074612632393837, "step": 216300 }, { "epoch": 61.39937553221686, "grad_norm": 0.3359479010105133, "learning_rate": 3.862588702810105e-05, "loss": 0.0004731917753815651, "step": 216310 }, { "epoch": 61.40221402214022, "grad_norm": 0.04678356647491455, "learning_rate": 3.8623048538177695e-05, "loss": 0.005140122026205063, "step": 216320 }, { "epoch": 61.405052512063584, "grad_norm": 12.768224716186523, "learning_rate": 3.862021004825433e-05, "loss": 0.0012680761516094207, "step": 216330 }, { "epoch": 61.40789100198694, "grad_norm": 0.032740410417318344, "learning_rate": 3.861737155833097e-05, "loss": 0.0010481109842658044, "step": 216340 }, { "epoch": 61.4107294919103, "grad_norm": 0.8256431818008423, "learning_rate": 3.861453306840761e-05, "loss": 0.0005402708426117897, "step": 216350 }, { "epoch": 61.413567981833665, "grad_norm": 0.030901433899998665, "learning_rate": 3.861169457848425e-05, "loss": 0.0001170651987195015, "step": 216360 }, { "epoch": 61.41640647175703, "grad_norm": 0.0486987829208374, "learning_rate": 3.860885608856089e-05, "loss": 0.0001894034445285797, "step": 216370 }, { "epoch": 61.419244961680384, "grad_norm": 0.03201828524470329, "learning_rate": 3.860601759863753e-05, "loss": 0.0003419985994696617, "step": 216380 }, { "epoch": 61.42208345160375, "grad_norm": 0.0667727068066597, "learning_rate": 3.8603179108714164e-05, "loss": 0.00010155383497476578, "step": 216390 }, { "epoch": 61.42492194152711, "grad_norm": 0.4444575905799866, "learning_rate": 3.8600340618790805e-05, "loss": 0.0012884242460131645, "step": 216400 }, { "epoch": 61.427760431450466, "grad_norm": 0.14145974814891815, "learning_rate": 3.859750212886745e-05, "loss": 0.0002982020378112793, "step": 216410 }, { "epoch": 61.43059892137383, "grad_norm": 0.5499688386917114, "learning_rate": 3.859466363894408e-05, "loss": 0.0002449605613946915, "step": 216420 }, { "epoch": 61.43343741129719, "grad_norm": 0.4462849199771881, "learning_rate": 3.859182514902072e-05, "loss": 0.00024079959839582444, "step": 216430 }, { "epoch": 61.43627590122055, "grad_norm": 0.04889913648366928, "learning_rate": 3.8588986659097364e-05, "loss": 0.0002414163202047348, "step": 216440 }, { "epoch": 61.43911439114391, "grad_norm": 0.08770814538002014, "learning_rate": 3.8586148169174005e-05, "loss": 0.001789306290447712, "step": 216450 }, { "epoch": 61.44195288106727, "grad_norm": 0.06726711243391037, "learning_rate": 3.858330967925064e-05, "loss": 0.0001368945464491844, "step": 216460 }, { "epoch": 61.444791370990636, "grad_norm": 0.0825401023030281, "learning_rate": 3.8580471189327275e-05, "loss": 0.00024643111974000933, "step": 216470 }, { "epoch": 61.44762986091399, "grad_norm": 0.38674819469451904, "learning_rate": 3.857763269940392e-05, "loss": 0.0002783354371786118, "step": 216480 }, { "epoch": 61.450468350837355, "grad_norm": 0.003995418548583984, "learning_rate": 3.857479420948056e-05, "loss": 0.0004913210868835449, "step": 216490 }, { "epoch": 61.45330684076072, "grad_norm": 0.08872446417808533, "learning_rate": 3.85719557195572e-05, "loss": 0.0002280188724398613, "step": 216500 }, { "epoch": 61.45330684076072, "eval_accuracy": 0.9781903732434667, "eval_loss": 0.07973441481590271, "eval_runtime": 32.5395, "eval_samples_per_second": 483.32, "eval_steps_per_second": 7.56, "step": 216500 }, { "epoch": 61.45614533068407, "grad_norm": 0.018108388409018517, "learning_rate": 3.856911722963384e-05, "loss": 0.0005022900179028511, "step": 216510 }, { "epoch": 61.458983820607436, "grad_norm": 0.015644313767552376, "learning_rate": 3.8566278739710475e-05, "loss": 0.00031647440046072005, "step": 216520 }, { "epoch": 61.4618223105308, "grad_norm": 0.038114290684461594, "learning_rate": 3.8563440249787116e-05, "loss": 0.0005209870636463165, "step": 216530 }, { "epoch": 61.464660800454155, "grad_norm": 0.0938488319516182, "learning_rate": 3.856060175986376e-05, "loss": 0.0017789263278245927, "step": 216540 }, { "epoch": 61.46749929037752, "grad_norm": 0.05963509902358055, "learning_rate": 3.855776326994039e-05, "loss": 0.000708799809217453, "step": 216550 }, { "epoch": 61.47033778030088, "grad_norm": 0.16830037534236908, "learning_rate": 3.8554924780017033e-05, "loss": 0.0007388941943645477, "step": 216560 }, { "epoch": 61.47317627022424, "grad_norm": 1.579108715057373, "learning_rate": 3.855208629009367e-05, "loss": 0.0012810301035642625, "step": 216570 }, { "epoch": 61.4760147601476, "grad_norm": 0.032023195177316666, "learning_rate": 3.854924780017031e-05, "loss": 0.0009687226265668869, "step": 216580 }, { "epoch": 61.47885325007096, "grad_norm": 0.08872319757938385, "learning_rate": 3.854640931024695e-05, "loss": 0.0015518367290496826, "step": 216590 }, { "epoch": 61.481691739994325, "grad_norm": 1.124248743057251, "learning_rate": 3.8543570820323585e-05, "loss": 0.0030122024938464163, "step": 216600 }, { "epoch": 61.48453022991768, "grad_norm": 0.06267736107110977, "learning_rate": 3.8540732330400234e-05, "loss": 0.002822236716747284, "step": 216610 }, { "epoch": 61.487368719841044, "grad_norm": 0.029576007276773453, "learning_rate": 3.853789384047687e-05, "loss": 0.00046252775937318804, "step": 216620 }, { "epoch": 61.49020720976441, "grad_norm": 0.07619720697402954, "learning_rate": 3.85350553505535e-05, "loss": 0.0010661303997039795, "step": 216630 }, { "epoch": 61.49304569968777, "grad_norm": 0.13890773057937622, "learning_rate": 3.853221686063015e-05, "loss": 0.0005632080137729645, "step": 216640 }, { "epoch": 61.495884189611125, "grad_norm": 0.03242727369070053, "learning_rate": 3.8529378370706786e-05, "loss": 0.0021468615159392357, "step": 216650 }, { "epoch": 61.49872267953449, "grad_norm": 0.036812588572502136, "learning_rate": 3.852653988078343e-05, "loss": 0.0024068178609013557, "step": 216660 }, { "epoch": 61.50156116945785, "grad_norm": 2.6250011920928955, "learning_rate": 3.852370139086006e-05, "loss": 0.0007956758141517639, "step": 216670 }, { "epoch": 61.50439965938121, "grad_norm": 0.10710686445236206, "learning_rate": 3.85208629009367e-05, "loss": 0.0008166905492544175, "step": 216680 }, { "epoch": 61.50723814930457, "grad_norm": 0.028865627944469452, "learning_rate": 3.8518024411013344e-05, "loss": 0.00037157442420721054, "step": 216690 }, { "epoch": 61.51007663922793, "grad_norm": 0.06609141826629639, "learning_rate": 3.851518592108998e-05, "loss": 0.00530930832028389, "step": 216700 }, { "epoch": 61.51291512915129, "grad_norm": 0.011858562007546425, "learning_rate": 3.851234743116662e-05, "loss": 0.00030265618115663526, "step": 216710 }, { "epoch": 61.51575361907465, "grad_norm": 0.0744750127196312, "learning_rate": 3.850950894124326e-05, "loss": 0.0016878196969628335, "step": 216720 }, { "epoch": 61.518592108998014, "grad_norm": 2.3313846588134766, "learning_rate": 3.8506670451319896e-05, "loss": 0.0006332077085971832, "step": 216730 }, { "epoch": 61.52143059892138, "grad_norm": 0.09978590905666351, "learning_rate": 3.8503831961396544e-05, "loss": 0.0015172585844993591, "step": 216740 }, { "epoch": 61.52426908884473, "grad_norm": 0.3876282572746277, "learning_rate": 3.850099347147318e-05, "loss": 0.0030840219929814338, "step": 216750 }, { "epoch": 61.527107578768096, "grad_norm": 0.8153766989707947, "learning_rate": 3.8498154981549814e-05, "loss": 0.0008133532479405403, "step": 216760 }, { "epoch": 61.52994606869146, "grad_norm": 4.6972880363464355, "learning_rate": 3.8495316491626455e-05, "loss": 0.003050798922777176, "step": 216770 }, { "epoch": 61.532784558614814, "grad_norm": 0.04355913773179054, "learning_rate": 3.8492478001703096e-05, "loss": 0.003405255824327469, "step": 216780 }, { "epoch": 61.53562304853818, "grad_norm": 0.017452916130423546, "learning_rate": 3.848963951177974e-05, "loss": 0.0009114744141697883, "step": 216790 }, { "epoch": 61.53846153846154, "grad_norm": 0.2788509130477905, "learning_rate": 3.848680102185637e-05, "loss": 0.001282534934580326, "step": 216800 }, { "epoch": 61.541300028384896, "grad_norm": 0.04370063915848732, "learning_rate": 3.8484246380925345e-05, "loss": 0.014765578508377075, "step": 216810 }, { "epoch": 61.54413851830826, "grad_norm": 0.47571852803230286, "learning_rate": 3.8481407891001994e-05, "loss": 0.0013713359832763672, "step": 216820 }, { "epoch": 61.54697700823162, "grad_norm": 0.20616039633750916, "learning_rate": 3.847856940107863e-05, "loss": 0.0008811812847852706, "step": 216830 }, { "epoch": 61.549815498154985, "grad_norm": 0.1657957136631012, "learning_rate": 3.847573091115527e-05, "loss": 0.0005546921864151954, "step": 216840 }, { "epoch": 61.55265398807834, "grad_norm": 5.899930477142334, "learning_rate": 3.8472892421231904e-05, "loss": 0.0016560763120651244, "step": 216850 }, { "epoch": 61.5554924780017, "grad_norm": 0.1331617683172226, "learning_rate": 3.8470053931308546e-05, "loss": 0.0008631845936179161, "step": 216860 }, { "epoch": 61.558330967925066, "grad_norm": 0.025597110390663147, "learning_rate": 3.846721544138519e-05, "loss": 0.0007656343281269073, "step": 216870 }, { "epoch": 61.56116945784842, "grad_norm": 0.03603353351354599, "learning_rate": 3.846437695146182e-05, "loss": 0.0002874046564102173, "step": 216880 }, { "epoch": 61.564007947771785, "grad_norm": 0.8880316615104675, "learning_rate": 3.846153846153846e-05, "loss": 0.0023166486993432046, "step": 216890 }, { "epoch": 61.56684643769515, "grad_norm": 0.039738018065690994, "learning_rate": 3.8458699971615104e-05, "loss": 0.0010177794843912124, "step": 216900 }, { "epoch": 61.56968492761851, "grad_norm": 0.2883826494216919, "learning_rate": 3.845586148169174e-05, "loss": 0.0002113472670316696, "step": 216910 }, { "epoch": 61.572523417541866, "grad_norm": 0.06658594310283661, "learning_rate": 3.845302299176839e-05, "loss": 0.0016385991126298904, "step": 216920 }, { "epoch": 61.57536190746523, "grad_norm": 0.16823053359985352, "learning_rate": 3.845018450184502e-05, "loss": 0.0006068894639611244, "step": 216930 }, { "epoch": 61.57820039738859, "grad_norm": 2.0332601070404053, "learning_rate": 3.8447346011921656e-05, "loss": 0.0008418796584010124, "step": 216940 }, { "epoch": 61.58103888731195, "grad_norm": 0.09676778316497803, "learning_rate": 3.84445075219983e-05, "loss": 0.00016237571835517884, "step": 216950 }, { "epoch": 61.58387737723531, "grad_norm": 0.1492823213338852, "learning_rate": 3.844166903207494e-05, "loss": 0.0004834461957216263, "step": 216960 }, { "epoch": 61.586715867158674, "grad_norm": 0.044536590576171875, "learning_rate": 3.843883054215158e-05, "loss": 0.000254264660179615, "step": 216970 }, { "epoch": 61.58955435708203, "grad_norm": 0.03837936371564865, "learning_rate": 3.8435992052228215e-05, "loss": 0.0003126336261630058, "step": 216980 }, { "epoch": 61.59239284700539, "grad_norm": 0.013523315079510212, "learning_rate": 3.8433153562304856e-05, "loss": 0.00010235235095024109, "step": 216990 }, { "epoch": 61.595231336928755, "grad_norm": 0.031085077673196793, "learning_rate": 3.84303150723815e-05, "loss": 0.00018512457609176636, "step": 217000 }, { "epoch": 61.595231336928755, "eval_accuracy": 0.9781267883258091, "eval_loss": 0.07921256870031357, "eval_runtime": 32.7485, "eval_samples_per_second": 480.236, "eval_steps_per_second": 7.512, "step": 217000 }, { "epoch": 61.59806982685212, "grad_norm": 0.032644327729940414, "learning_rate": 3.842747658245813e-05, "loss": 0.0008241433650255203, "step": 217010 }, { "epoch": 61.600908316775474, "grad_norm": 0.0199650377035141, "learning_rate": 3.8424638092534774e-05, "loss": 0.00022299699485301973, "step": 217020 }, { "epoch": 61.60374680669884, "grad_norm": 1.0879168510437012, "learning_rate": 3.8421799602611415e-05, "loss": 0.00024086497724056243, "step": 217030 }, { "epoch": 61.6065852966222, "grad_norm": 0.016900500282645226, "learning_rate": 3.841896111268805e-05, "loss": 0.00015025362372398377, "step": 217040 }, { "epoch": 61.609423786545555, "grad_norm": 0.10442055761814117, "learning_rate": 3.841612262276469e-05, "loss": 0.001385180465877056, "step": 217050 }, { "epoch": 61.61226227646892, "grad_norm": 0.3163195848464966, "learning_rate": 3.841328413284133e-05, "loss": 0.0011114569380879403, "step": 217060 }, { "epoch": 61.61510076639228, "grad_norm": 0.07231635600328445, "learning_rate": 3.841044564291797e-05, "loss": 0.0003801813349127769, "step": 217070 }, { "epoch": 61.61793925631564, "grad_norm": 0.0867600291967392, "learning_rate": 3.840760715299461e-05, "loss": 0.00032310187816619873, "step": 217080 }, { "epoch": 61.620777746239, "grad_norm": 0.14584192633628845, "learning_rate": 3.840476866307125e-05, "loss": 0.0011425664648413657, "step": 217090 }, { "epoch": 61.62361623616236, "grad_norm": 0.4013562798500061, "learning_rate": 3.8401930173147884e-05, "loss": 0.00196463018655777, "step": 217100 }, { "epoch": 61.626454726085726, "grad_norm": 0.03987845033407211, "learning_rate": 3.8399091683224526e-05, "loss": 0.0003237469121813774, "step": 217110 }, { "epoch": 61.62929321600908, "grad_norm": 0.4696276783943176, "learning_rate": 3.839625319330117e-05, "loss": 0.00028607696294784545, "step": 217120 }, { "epoch": 61.632131705932444, "grad_norm": 0.007483228109776974, "learning_rate": 3.839341470337781e-05, "loss": 0.0009665843099355698, "step": 217130 }, { "epoch": 61.63497019585581, "grad_norm": 0.06793699413537979, "learning_rate": 3.839057621345444e-05, "loss": 0.0004978250712156296, "step": 217140 }, { "epoch": 61.63780868577916, "grad_norm": 0.03911016136407852, "learning_rate": 3.838773772353108e-05, "loss": 0.0018804160878062247, "step": 217150 }, { "epoch": 61.640647175702526, "grad_norm": 0.01066668052226305, "learning_rate": 3.8384899233607726e-05, "loss": 0.0006683653220534325, "step": 217160 }, { "epoch": 61.64348566562589, "grad_norm": 0.06693126261234283, "learning_rate": 3.838206074368436e-05, "loss": 0.0006136143580079079, "step": 217170 }, { "epoch": 61.646324155549244, "grad_norm": 0.015554950572550297, "learning_rate": 3.8379222253761e-05, "loss": 0.00018095728009939193, "step": 217180 }, { "epoch": 61.64916264547261, "grad_norm": 0.013605231419205666, "learning_rate": 3.837638376383764e-05, "loss": 0.0033058110624551774, "step": 217190 }, { "epoch": 61.65200113539597, "grad_norm": 0.03259960189461708, "learning_rate": 3.837354527391428e-05, "loss": 0.000248744897544384, "step": 217200 }, { "epoch": 61.65483962531933, "grad_norm": 0.4730927348136902, "learning_rate": 3.837070678399092e-05, "loss": 0.0006757820025086403, "step": 217210 }, { "epoch": 61.65767811524269, "grad_norm": 0.009730007499456406, "learning_rate": 3.836786829406756e-05, "loss": 0.0009155202656984329, "step": 217220 }, { "epoch": 61.66051660516605, "grad_norm": 2.490445375442505, "learning_rate": 3.8365029804144195e-05, "loss": 0.005714148283004761, "step": 217230 }, { "epoch": 61.663355095089415, "grad_norm": 0.02824172005057335, "learning_rate": 3.8362191314220836e-05, "loss": 0.0009842133149504662, "step": 217240 }, { "epoch": 61.66619358501277, "grad_norm": 0.13608810305595398, "learning_rate": 3.835935282429747e-05, "loss": 0.0010041950270533561, "step": 217250 }, { "epoch": 61.66903207493613, "grad_norm": 0.12815061211585999, "learning_rate": 3.835651433437412e-05, "loss": 0.0009767692536115646, "step": 217260 }, { "epoch": 61.671870564859496, "grad_norm": 0.17982962727546692, "learning_rate": 3.8353675844450754e-05, "loss": 0.002981898933649063, "step": 217270 }, { "epoch": 61.67470905478285, "grad_norm": 0.04425077885389328, "learning_rate": 3.835083735452739e-05, "loss": 0.0013688374310731888, "step": 217280 }, { "epoch": 61.677547544706215, "grad_norm": 0.2152532935142517, "learning_rate": 3.8347998864604037e-05, "loss": 0.0006952086463570595, "step": 217290 }, { "epoch": 61.68038603462958, "grad_norm": 0.2223113775253296, "learning_rate": 3.834516037468067e-05, "loss": 0.0007308265194296837, "step": 217300 }, { "epoch": 61.68322452455294, "grad_norm": 0.0898200273513794, "learning_rate": 3.834232188475731e-05, "loss": 0.007946072518825531, "step": 217310 }, { "epoch": 61.6860630144763, "grad_norm": 13.807066917419434, "learning_rate": 3.8339483394833954e-05, "loss": 0.0027885353192687035, "step": 217320 }, { "epoch": 61.68890150439966, "grad_norm": 0.048954565078020096, "learning_rate": 3.833664490491059e-05, "loss": 0.0003602685406804085, "step": 217330 }, { "epoch": 61.69173999432302, "grad_norm": 0.07136213779449463, "learning_rate": 3.833380641498723e-05, "loss": 0.00544116348028183, "step": 217340 }, { "epoch": 61.69457848424638, "grad_norm": 0.008069753646850586, "learning_rate": 3.8330967925063864e-05, "loss": 0.00019188523292541505, "step": 217350 }, { "epoch": 61.69741697416974, "grad_norm": 0.18848802149295807, "learning_rate": 3.8328129435140506e-05, "loss": 0.00023871399462223054, "step": 217360 }, { "epoch": 61.700255464093104, "grad_norm": 0.2632492184638977, "learning_rate": 3.832529094521715e-05, "loss": 0.0003208689391613007, "step": 217370 }, { "epoch": 61.70309395401647, "grad_norm": 0.017448343336582184, "learning_rate": 3.832245245529378e-05, "loss": 0.00023610778152942656, "step": 217380 }, { "epoch": 61.70593244393982, "grad_norm": 0.24063870310783386, "learning_rate": 3.831961396537043e-05, "loss": 0.00018811449408531188, "step": 217390 }, { "epoch": 61.708770933863185, "grad_norm": 0.09818367660045624, "learning_rate": 3.8316775475447065e-05, "loss": 0.00014753080904483795, "step": 217400 }, { "epoch": 61.71160942378655, "grad_norm": 0.08642785996198654, "learning_rate": 3.83139369855237e-05, "loss": 0.00015975721180438995, "step": 217410 }, { "epoch": 61.714447913709904, "grad_norm": 0.013072864152491093, "learning_rate": 3.831109849560035e-05, "loss": 0.001008422113955021, "step": 217420 }, { "epoch": 61.71728640363327, "grad_norm": 0.017754510045051575, "learning_rate": 3.830826000567698e-05, "loss": 0.00015929248183965683, "step": 217430 }, { "epoch": 61.72012489355663, "grad_norm": 0.15794241428375244, "learning_rate": 3.830542151575362e-05, "loss": 0.00020736884325742722, "step": 217440 }, { "epoch": 61.722963383479986, "grad_norm": 0.010171509347856045, "learning_rate": 3.830258302583026e-05, "loss": 0.00011540111154317856, "step": 217450 }, { "epoch": 61.72580187340335, "grad_norm": 0.05871286243200302, "learning_rate": 3.82997445359069e-05, "loss": 0.00014006700366735457, "step": 217460 }, { "epoch": 61.72864036332671, "grad_norm": 0.04364423081278801, "learning_rate": 3.829690604598354e-05, "loss": 0.00011705216020345688, "step": 217470 }, { "epoch": 61.731478853250074, "grad_norm": 0.08226942270994186, "learning_rate": 3.8294067556060175e-05, "loss": 0.00022198930382728578, "step": 217480 }, { "epoch": 61.73431734317343, "grad_norm": 0.030558815225958824, "learning_rate": 3.8291229066136817e-05, "loss": 0.0004575246945023537, "step": 217490 }, { "epoch": 61.73715583309679, "grad_norm": 0.022238383069634438, "learning_rate": 3.828839057621346e-05, "loss": 0.00035485122352838514, "step": 217500 }, { "epoch": 61.73715583309679, "eval_accuracy": 0.9781267883258091, "eval_loss": 0.07809249311685562, "eval_runtime": 32.3747, "eval_samples_per_second": 485.781, "eval_steps_per_second": 7.599, "step": 217500 }, { "epoch": 61.739994323020156, "grad_norm": 0.04953913763165474, "learning_rate": 3.828555208629009e-05, "loss": 0.00041917525231838226, "step": 217510 }, { "epoch": 61.74283281294351, "grad_norm": 0.010976020246744156, "learning_rate": 3.8282713596366734e-05, "loss": 0.0002754954621195793, "step": 217520 }, { "epoch": 61.745671302866874, "grad_norm": 0.1964559257030487, "learning_rate": 3.8279875106443375e-05, "loss": 0.0004724917933344841, "step": 217530 }, { "epoch": 61.74850979279024, "grad_norm": 0.9222380518913269, "learning_rate": 3.827703661652001e-05, "loss": 0.0007005743682384491, "step": 217540 }, { "epoch": 61.75134828271359, "grad_norm": 0.07457002252340317, "learning_rate": 3.827419812659665e-05, "loss": 0.0008480541408061981, "step": 217550 }, { "epoch": 61.754186772636956, "grad_norm": 0.23058953881263733, "learning_rate": 3.827135963667329e-05, "loss": 0.0002143649384379387, "step": 217560 }, { "epoch": 61.75702526256032, "grad_norm": 0.018108025193214417, "learning_rate": 3.826852114674993e-05, "loss": 0.0014776678755879403, "step": 217570 }, { "epoch": 61.75986375248368, "grad_norm": 0.41060641407966614, "learning_rate": 3.826568265682657e-05, "loss": 0.0020970242097973824, "step": 217580 }, { "epoch": 61.76270224240704, "grad_norm": 0.06185018643736839, "learning_rate": 3.826284416690321e-05, "loss": 0.0004948245361447335, "step": 217590 }, { "epoch": 61.7655407323304, "grad_norm": 0.019518911838531494, "learning_rate": 3.826000567697985e-05, "loss": 0.0006813149899244309, "step": 217600 }, { "epoch": 61.76837922225376, "grad_norm": 0.030522342771291733, "learning_rate": 3.8257167187056486e-05, "loss": 0.00018682610243558884, "step": 217610 }, { "epoch": 61.77121771217712, "grad_norm": 0.07573335617780685, "learning_rate": 3.825432869713313e-05, "loss": 0.0001832040026783943, "step": 217620 }, { "epoch": 61.77405620210048, "grad_norm": 0.33344289660453796, "learning_rate": 3.825149020720977e-05, "loss": 0.00022789575159549714, "step": 217630 }, { "epoch": 61.776894692023845, "grad_norm": 0.9697441458702087, "learning_rate": 3.82486517172864e-05, "loss": 0.0006881020963191986, "step": 217640 }, { "epoch": 61.7797331819472, "grad_norm": 0.03366716578602791, "learning_rate": 3.8245813227363045e-05, "loss": 0.00013221874833106994, "step": 217650 }, { "epoch": 61.78257167187056, "grad_norm": 0.1800769418478012, "learning_rate": 3.8242974737439686e-05, "loss": 0.0001564299687743187, "step": 217660 }, { "epoch": 61.785410161793926, "grad_norm": 0.04002588614821434, "learning_rate": 3.824013624751632e-05, "loss": 0.0001950930804014206, "step": 217670 }, { "epoch": 61.78824865171729, "grad_norm": 0.07872151583433151, "learning_rate": 3.823729775759296e-05, "loss": 0.00012639742344617845, "step": 217680 }, { "epoch": 61.791087141640645, "grad_norm": 0.01501628290861845, "learning_rate": 3.8234459267669603e-05, "loss": 0.00010744854807853699, "step": 217690 }, { "epoch": 61.79392563156401, "grad_norm": 0.5224476456642151, "learning_rate": 3.823162077774624e-05, "loss": 0.00015937108546495437, "step": 217700 }, { "epoch": 61.79676412148737, "grad_norm": 0.14197589457035065, "learning_rate": 3.822878228782288e-05, "loss": 0.0002589670941233635, "step": 217710 }, { "epoch": 61.79960261141073, "grad_norm": 0.07508742809295654, "learning_rate": 3.822594379789952e-05, "loss": 0.00014753807336091994, "step": 217720 }, { "epoch": 61.80244110133409, "grad_norm": 0.028729557991027832, "learning_rate": 3.822310530797616e-05, "loss": 0.00019843950867652892, "step": 217730 }, { "epoch": 61.80527959125745, "grad_norm": 0.1470598429441452, "learning_rate": 3.82202668180528e-05, "loss": 0.0002770736813545227, "step": 217740 }, { "epoch": 61.808118081180815, "grad_norm": 0.03908504173159599, "learning_rate": 3.821742832812943e-05, "loss": 0.0002984924241900444, "step": 217750 }, { "epoch": 61.81095657110417, "grad_norm": 0.17754800617694855, "learning_rate": 3.821458983820608e-05, "loss": 0.00022840015590190886, "step": 217760 }, { "epoch": 61.813795061027534, "grad_norm": 1.180654525756836, "learning_rate": 3.8211751348282714e-05, "loss": 0.0010582447052001954, "step": 217770 }, { "epoch": 61.8166335509509, "grad_norm": 0.2780829966068268, "learning_rate": 3.8208912858359355e-05, "loss": 0.001037704385817051, "step": 217780 }, { "epoch": 61.81947204087425, "grad_norm": 0.04650702700018883, "learning_rate": 3.8206074368436e-05, "loss": 0.0002873804420232773, "step": 217790 }, { "epoch": 61.822310530797616, "grad_norm": 0.1960422396659851, "learning_rate": 3.820323587851263e-05, "loss": 0.0005781611427664757, "step": 217800 }, { "epoch": 61.82514902072098, "grad_norm": 0.1644396334886551, "learning_rate": 3.820039738858927e-05, "loss": 0.0025014590471982958, "step": 217810 }, { "epoch": 61.827987510644334, "grad_norm": 0.02390863373875618, "learning_rate": 3.8197558898665914e-05, "loss": 0.0002692295238375664, "step": 217820 }, { "epoch": 61.8308260005677, "grad_norm": 0.02874595671892166, "learning_rate": 3.819472040874255e-05, "loss": 0.00030554290860891343, "step": 217830 }, { "epoch": 61.83366449049106, "grad_norm": 0.23946499824523926, "learning_rate": 3.819188191881919e-05, "loss": 0.0008040284737944603, "step": 217840 }, { "epoch": 61.83650298041442, "grad_norm": 0.1272350549697876, "learning_rate": 3.8189043428895825e-05, "loss": 0.00016713421791791916, "step": 217850 }, { "epoch": 61.83934147033778, "grad_norm": 0.04596645012497902, "learning_rate": 3.818620493897247e-05, "loss": 0.00022974777966737746, "step": 217860 }, { "epoch": 61.84217996026114, "grad_norm": 0.09199264645576477, "learning_rate": 3.818336644904911e-05, "loss": 0.00024253036826848984, "step": 217870 }, { "epoch": 61.845018450184504, "grad_norm": 0.8560591340065002, "learning_rate": 3.818052795912574e-05, "loss": 0.001969059370458126, "step": 217880 }, { "epoch": 61.84785694010786, "grad_norm": 5.865027904510498, "learning_rate": 3.817768946920239e-05, "loss": 0.002126147411763668, "step": 217890 }, { "epoch": 61.85069543003122, "grad_norm": 0.029618939384818077, "learning_rate": 3.8174850979279025e-05, "loss": 0.00042810067534446714, "step": 217900 }, { "epoch": 61.853533919954586, "grad_norm": 1.212518334388733, "learning_rate": 3.8172012489355666e-05, "loss": 0.0010757675394415856, "step": 217910 }, { "epoch": 61.85637240987794, "grad_norm": 0.11654415726661682, "learning_rate": 3.816917399943231e-05, "loss": 0.0011976499110460282, "step": 217920 }, { "epoch": 61.859210899801305, "grad_norm": 0.8900585770606995, "learning_rate": 3.816633550950894e-05, "loss": 0.0013878131285309792, "step": 217930 }, { "epoch": 61.86204938972467, "grad_norm": 0.06424091756343842, "learning_rate": 3.8163497019585584e-05, "loss": 0.0008834740146994591, "step": 217940 }, { "epoch": 61.86488787964803, "grad_norm": 1.0066320896148682, "learning_rate": 3.816065852966222e-05, "loss": 0.0010118091478943825, "step": 217950 }, { "epoch": 61.867726369571386, "grad_norm": 0.0073267812840640545, "learning_rate": 3.815782003973886e-05, "loss": 0.006073351949453354, "step": 217960 }, { "epoch": 61.87056485949475, "grad_norm": 0.1754620224237442, "learning_rate": 3.81549815498155e-05, "loss": 0.0007122492417693139, "step": 217970 }, { "epoch": 61.87340334941811, "grad_norm": 0.2582088112831116, "learning_rate": 3.8152143059892136e-05, "loss": 0.0003916662186384201, "step": 217980 }, { "epoch": 61.87624183934147, "grad_norm": 0.08018472790718079, "learning_rate": 3.814930456996878e-05, "loss": 0.00019261129200458526, "step": 217990 }, { "epoch": 61.87908032926483, "grad_norm": 0.0822259932756424, "learning_rate": 3.814646608004542e-05, "loss": 0.0001356717199087143, "step": 218000 }, { "epoch": 61.87908032926483, "eval_accuracy": 0.9776181089845488, "eval_loss": 0.0806901752948761, "eval_runtime": 32.9001, "eval_samples_per_second": 478.023, "eval_steps_per_second": 7.477, "step": 218000 }, { "epoch": 61.88191881918819, "grad_norm": 0.10943414270877838, "learning_rate": 3.814362759012205e-05, "loss": 0.00022743958979845046, "step": 218010 }, { "epoch": 61.88475730911155, "grad_norm": 0.34840843081474304, "learning_rate": 3.81407891001987e-05, "loss": 0.00040565002709627154, "step": 218020 }, { "epoch": 61.88759579903491, "grad_norm": 0.08743405342102051, "learning_rate": 3.8137950610275336e-05, "loss": 0.00041940119117498396, "step": 218030 }, { "epoch": 61.890434288958275, "grad_norm": 0.06301245838403702, "learning_rate": 3.813511212035197e-05, "loss": 0.0002447642385959625, "step": 218040 }, { "epoch": 61.89327277888164, "grad_norm": 0.06410734355449677, "learning_rate": 3.813227363042862e-05, "loss": 0.0016137035563588142, "step": 218050 }, { "epoch": 61.896111268804994, "grad_norm": 0.5701583027839661, "learning_rate": 3.812943514050525e-05, "loss": 0.0005459537729620934, "step": 218060 }, { "epoch": 61.89894975872836, "grad_norm": 0.589994490146637, "learning_rate": 3.8126596650581894e-05, "loss": 0.0018453694880008697, "step": 218070 }, { "epoch": 61.90178824865172, "grad_norm": 0.07172094285488129, "learning_rate": 3.812375816065853e-05, "loss": 0.0007000437006354332, "step": 218080 }, { "epoch": 61.904626738575075, "grad_norm": 0.010028081014752388, "learning_rate": 3.812091967073517e-05, "loss": 0.0053277663886547085, "step": 218090 }, { "epoch": 61.90746522849844, "grad_norm": 0.16390958428382874, "learning_rate": 3.811808118081181e-05, "loss": 0.0007159151136875153, "step": 218100 }, { "epoch": 61.9103037184218, "grad_norm": 0.6031491160392761, "learning_rate": 3.8115242690888446e-05, "loss": 0.0006224658340215683, "step": 218110 }, { "epoch": 61.913142208345164, "grad_norm": 0.3407686948776245, "learning_rate": 3.811240420096509e-05, "loss": 0.0004311058670282364, "step": 218120 }, { "epoch": 61.91598069826852, "grad_norm": 0.019475551322102547, "learning_rate": 3.810956571104173e-05, "loss": 0.000987803004682064, "step": 218130 }, { "epoch": 61.91881918819188, "grad_norm": 0.19623319804668427, "learning_rate": 3.8106727221118364e-05, "loss": 0.0005460405722260475, "step": 218140 }, { "epoch": 61.921657678115245, "grad_norm": 4.246057033538818, "learning_rate": 3.810388873119501e-05, "loss": 0.0005904318764805794, "step": 218150 }, { "epoch": 61.9244961680386, "grad_norm": 0.019376257434487343, "learning_rate": 3.8101050241271646e-05, "loss": 0.001971563883125782, "step": 218160 }, { "epoch": 61.927334657961964, "grad_norm": 10.392424583435059, "learning_rate": 3.809821175134828e-05, "loss": 0.005836671218276024, "step": 218170 }, { "epoch": 61.93017314788533, "grad_norm": 1.6899350881576538, "learning_rate": 3.809537326142492e-05, "loss": 0.02230727970600128, "step": 218180 }, { "epoch": 61.93301163780868, "grad_norm": 0.08037459850311279, "learning_rate": 3.8092534771501564e-05, "loss": 0.00044489521533250807, "step": 218190 }, { "epoch": 61.935850127732046, "grad_norm": 0.01890498772263527, "learning_rate": 3.8089696281578205e-05, "loss": 0.0017836006358265878, "step": 218200 }, { "epoch": 61.93868861765541, "grad_norm": 0.035886578261852264, "learning_rate": 3.808685779165484e-05, "loss": 0.002017683908343315, "step": 218210 }, { "epoch": 61.94152710757877, "grad_norm": 0.1778830587863922, "learning_rate": 3.808401930173148e-05, "loss": 0.0005489835515618324, "step": 218220 }, { "epoch": 61.94436559750213, "grad_norm": 0.14478468894958496, "learning_rate": 3.808118081180812e-05, "loss": 0.0002561919391155243, "step": 218230 }, { "epoch": 61.94720408742549, "grad_norm": 0.7613511085510254, "learning_rate": 3.807834232188476e-05, "loss": 0.0017649397253990174, "step": 218240 }, { "epoch": 61.95004257734885, "grad_norm": 0.1005609929561615, "learning_rate": 3.80755038319614e-05, "loss": 0.001559816487133503, "step": 218250 }, { "epoch": 61.95288106727221, "grad_norm": 0.13863050937652588, "learning_rate": 3.807266534203804e-05, "loss": 0.0014277094975113868, "step": 218260 }, { "epoch": 61.95571955719557, "grad_norm": 0.02590377815067768, "learning_rate": 3.8069826852114674e-05, "loss": 0.0016290482133626937, "step": 218270 }, { "epoch": 61.958558047118935, "grad_norm": 0.08019240200519562, "learning_rate": 3.8066988362191316e-05, "loss": 0.0006424658000469208, "step": 218280 }, { "epoch": 61.96139653704229, "grad_norm": 0.05989689379930496, "learning_rate": 3.806414987226796e-05, "loss": 0.0006014382466673851, "step": 218290 }, { "epoch": 61.96423502696565, "grad_norm": 0.0120118148624897, "learning_rate": 3.806131138234459e-05, "loss": 0.001042616367340088, "step": 218300 }, { "epoch": 61.967073516889016, "grad_norm": 0.010656965896487236, "learning_rate": 3.805847289242123e-05, "loss": 0.0007512902840971947, "step": 218310 }, { "epoch": 61.96991200681238, "grad_norm": 0.1776086390018463, "learning_rate": 3.8055634402497875e-05, "loss": 0.00037555508315563203, "step": 218320 }, { "epoch": 61.972750496735735, "grad_norm": 0.005589599721133709, "learning_rate": 3.8052795912574516e-05, "loss": 0.0005252476781606674, "step": 218330 }, { "epoch": 61.9755889866591, "grad_norm": 0.047544583678245544, "learning_rate": 3.804995742265115e-05, "loss": 0.0003793686628341675, "step": 218340 }, { "epoch": 61.97842747658246, "grad_norm": 0.40727704763412476, "learning_rate": 3.804711893272779e-05, "loss": 0.00035220328718423846, "step": 218350 }, { "epoch": 61.981265966505816, "grad_norm": 0.0571572482585907, "learning_rate": 3.804428044280443e-05, "loss": 0.0030634259805083276, "step": 218360 }, { "epoch": 61.98410445642918, "grad_norm": 1.0696675777435303, "learning_rate": 3.804144195288107e-05, "loss": 0.000828913226723671, "step": 218370 }, { "epoch": 61.98694294635254, "grad_norm": 0.5296390652656555, "learning_rate": 3.803860346295771e-05, "loss": 0.0011889945715665817, "step": 218380 }, { "epoch": 61.9897814362759, "grad_norm": 0.03299457207322121, "learning_rate": 3.803576497303435e-05, "loss": 0.005045325309038162, "step": 218390 }, { "epoch": 61.99261992619926, "grad_norm": 0.041387103497982025, "learning_rate": 3.8032926483110985e-05, "loss": 0.00047532860189676286, "step": 218400 }, { "epoch": 61.995458416122624, "grad_norm": 0.1026676744222641, "learning_rate": 3.8030087993187627e-05, "loss": 0.0024150582030415535, "step": 218410 }, { "epoch": 61.99829690604599, "grad_norm": 0.014434552751481533, "learning_rate": 3.802724950326427e-05, "loss": 0.0003462545573711395, "step": 218420 }, { "epoch": 62.00113539596934, "grad_norm": 0.10152295231819153, "learning_rate": 3.80244110133409e-05, "loss": 0.00018874008674174547, "step": 218430 }, { "epoch": 62.003973885892705, "grad_norm": 0.1566774547100067, "learning_rate": 3.8021572523417544e-05, "loss": 0.0007962495088577271, "step": 218440 }, { "epoch": 62.00681237581607, "grad_norm": 0.008062702603638172, "learning_rate": 3.8018734033494185e-05, "loss": 0.00019996333867311478, "step": 218450 }, { "epoch": 62.009650865739424, "grad_norm": 3.7242159843444824, "learning_rate": 3.801589554357082e-05, "loss": 0.0012314040213823318, "step": 218460 }, { "epoch": 62.01248935566279, "grad_norm": 0.040409885346889496, "learning_rate": 3.801305705364746e-05, "loss": 0.0002881327643990517, "step": 218470 }, { "epoch": 62.01532784558615, "grad_norm": 0.03602750599384308, "learning_rate": 3.8010218563724096e-05, "loss": 0.002608831599354744, "step": 218480 }, { "epoch": 62.018166335509505, "grad_norm": 0.008669503964483738, "learning_rate": 3.8007380073800744e-05, "loss": 0.0019926443696022035, "step": 218490 }, { "epoch": 62.02100482543287, "grad_norm": 0.006281090900301933, "learning_rate": 3.800454158387738e-05, "loss": 0.0001863257959485054, "step": 218500 }, { "epoch": 62.02100482543287, "eval_accuracy": 0.9781903732434667, "eval_loss": 0.07797908782958984, "eval_runtime": 33.1756, "eval_samples_per_second": 474.054, "eval_steps_per_second": 7.415, "step": 218500 }, { "epoch": 62.02384331535623, "grad_norm": 0.025058424100279808, "learning_rate": 3.800170309395401e-05, "loss": 0.0004523938521742821, "step": 218510 }, { "epoch": 62.026681805279594, "grad_norm": 0.031698934733867645, "learning_rate": 3.799886460403066e-05, "loss": 0.000352846086025238, "step": 218520 }, { "epoch": 62.02952029520295, "grad_norm": 3.3013434410095215, "learning_rate": 3.7996026114107296e-05, "loss": 0.007594221085309982, "step": 218530 }, { "epoch": 62.03235878512631, "grad_norm": 0.053129155188798904, "learning_rate": 3.799318762418394e-05, "loss": 0.0003856644034385681, "step": 218540 }, { "epoch": 62.035197275049676, "grad_norm": 0.11171478033065796, "learning_rate": 3.799034913426058e-05, "loss": 0.008635406196117402, "step": 218550 }, { "epoch": 62.03803576497303, "grad_norm": 0.4059250056743622, "learning_rate": 3.798751064433721e-05, "loss": 0.0002635778859257698, "step": 218560 }, { "epoch": 62.040874254896394, "grad_norm": 0.003278650576248765, "learning_rate": 3.7984672154413855e-05, "loss": 0.000611814297735691, "step": 218570 }, { "epoch": 62.04371274481976, "grad_norm": 0.06394561380147934, "learning_rate": 3.798183366449049e-05, "loss": 0.0011465108022093774, "step": 218580 }, { "epoch": 62.04655123474312, "grad_norm": 2.870055913925171, "learning_rate": 3.797899517456713e-05, "loss": 0.0030003517866134645, "step": 218590 }, { "epoch": 62.049389724666476, "grad_norm": 0.06478840857744217, "learning_rate": 3.797615668464377e-05, "loss": 0.0003382187336683273, "step": 218600 }, { "epoch": 62.05222821458984, "grad_norm": 5.38042688369751, "learning_rate": 3.797331819472041e-05, "loss": 0.00080911535769701, "step": 218610 }, { "epoch": 62.0550667045132, "grad_norm": 0.009469506330788136, "learning_rate": 3.7970479704797055e-05, "loss": 0.00022126678377389907, "step": 218620 }, { "epoch": 62.05790519443656, "grad_norm": 0.0882493108510971, "learning_rate": 3.796764121487369e-05, "loss": 0.00010358002036809921, "step": 218630 }, { "epoch": 62.06074368435992, "grad_norm": 0.21512244641780853, "learning_rate": 3.7964802724950324e-05, "loss": 0.00036390181630849836, "step": 218640 }, { "epoch": 62.06358217428328, "grad_norm": 0.09913040697574615, "learning_rate": 3.796196423502697e-05, "loss": 0.00013126786798238753, "step": 218650 }, { "epoch": 62.06642066420664, "grad_norm": 3.011153221130371, "learning_rate": 3.795912574510361e-05, "loss": 0.0008127864450216294, "step": 218660 }, { "epoch": 62.06925915413, "grad_norm": 0.07965370267629623, "learning_rate": 3.795628725518025e-05, "loss": 0.00017133615911006926, "step": 218670 }, { "epoch": 62.072097644053365, "grad_norm": 0.04524220526218414, "learning_rate": 3.795344876525688e-05, "loss": 0.00029147397726774215, "step": 218680 }, { "epoch": 62.07493613397673, "grad_norm": 0.01728985644876957, "learning_rate": 3.7950610275333524e-05, "loss": 0.0002709727734327316, "step": 218690 }, { "epoch": 62.07777462390008, "grad_norm": 0.7607879638671875, "learning_rate": 3.7947771785410165e-05, "loss": 0.0008877310901880264, "step": 218700 }, { "epoch": 62.080613113823446, "grad_norm": 0.022659361362457275, "learning_rate": 3.79449332954868e-05, "loss": 0.00017004497349262237, "step": 218710 }, { "epoch": 62.08345160374681, "grad_norm": 0.16287976503372192, "learning_rate": 3.794209480556344e-05, "loss": 0.0003151360899209976, "step": 218720 }, { "epoch": 62.086290093670165, "grad_norm": 0.1375291496515274, "learning_rate": 3.793925631564008e-05, "loss": 0.0023483801633119583, "step": 218730 }, { "epoch": 62.08912858359353, "grad_norm": 0.030452726408839226, "learning_rate": 3.793641782571672e-05, "loss": 0.0003312036395072937, "step": 218740 }, { "epoch": 62.09196707351689, "grad_norm": 0.017766226083040237, "learning_rate": 3.7933579335793366e-05, "loss": 0.0002023354172706604, "step": 218750 }, { "epoch": 62.09480556344025, "grad_norm": 2.9425907135009766, "learning_rate": 3.793074084587e-05, "loss": 0.0016248567029833795, "step": 218760 }, { "epoch": 62.09764405336361, "grad_norm": 0.007457878440618515, "learning_rate": 3.7927902355946635e-05, "loss": 0.0004856141284108162, "step": 218770 }, { "epoch": 62.10048254328697, "grad_norm": 0.9341108202934265, "learning_rate": 3.7925063866023276e-05, "loss": 0.0009186632931232452, "step": 218780 }, { "epoch": 62.103321033210335, "grad_norm": 0.020968137308955193, "learning_rate": 3.792222537609992e-05, "loss": 0.0001819279044866562, "step": 218790 }, { "epoch": 62.10615952313369, "grad_norm": 0.8765019178390503, "learning_rate": 3.791938688617656e-05, "loss": 0.0002856217324733734, "step": 218800 }, { "epoch": 62.108998013057054, "grad_norm": 0.1387549340724945, "learning_rate": 3.7916548396253193e-05, "loss": 0.0037482578307390215, "step": 218810 }, { "epoch": 62.11183650298042, "grad_norm": 0.058692507445812225, "learning_rate": 3.7913709906329835e-05, "loss": 0.0005689345300197602, "step": 218820 }, { "epoch": 62.11467499290377, "grad_norm": 0.054790280759334564, "learning_rate": 3.7910871416406476e-05, "loss": 0.0002366473898291588, "step": 218830 }, { "epoch": 62.117513482827135, "grad_norm": 0.004854176193475723, "learning_rate": 3.790803292648311e-05, "loss": 0.00016262289136648178, "step": 218840 }, { "epoch": 62.1203519727505, "grad_norm": 0.01813781075179577, "learning_rate": 3.790519443655975e-05, "loss": 0.00011467225849628448, "step": 218850 }, { "epoch": 62.123190462673854, "grad_norm": 0.13857761025428772, "learning_rate": 3.7902355946636394e-05, "loss": 0.0004955129697918892, "step": 218860 }, { "epoch": 62.12602895259722, "grad_norm": 0.031174857169389725, "learning_rate": 3.789951745671303e-05, "loss": 0.00036786533892154696, "step": 218870 }, { "epoch": 62.12886744252058, "grad_norm": 0.03192077949643135, "learning_rate": 3.789667896678967e-05, "loss": 0.0005792295560240746, "step": 218880 }, { "epoch": 62.13170593244394, "grad_norm": 0.05309580639004707, "learning_rate": 3.789384047686631e-05, "loss": 0.0003389021381735802, "step": 218890 }, { "epoch": 62.1345444223673, "grad_norm": 0.012357905507087708, "learning_rate": 3.7891001986942946e-05, "loss": 0.0020979007706046103, "step": 218900 }, { "epoch": 62.13738291229066, "grad_norm": 0.06666938215494156, "learning_rate": 3.788816349701959e-05, "loss": 0.00020651239901781082, "step": 218910 }, { "epoch": 62.140221402214024, "grad_norm": 0.01389849092811346, "learning_rate": 3.788532500709623e-05, "loss": 0.00039290226995944975, "step": 218920 }, { "epoch": 62.14305989213738, "grad_norm": 0.01174547802656889, "learning_rate": 3.788248651717286e-05, "loss": 0.0001665867865085602, "step": 218930 }, { "epoch": 62.14589838206074, "grad_norm": 0.022854190319776535, "learning_rate": 3.7879648027249504e-05, "loss": 0.00037089120596647265, "step": 218940 }, { "epoch": 62.148736871984106, "grad_norm": 0.21299807727336884, "learning_rate": 3.7876809537326146e-05, "loss": 0.00032324548810720445, "step": 218950 }, { "epoch": 62.15157536190747, "grad_norm": 0.04487968608736992, "learning_rate": 3.787397104740279e-05, "loss": 0.000251220166683197, "step": 218960 }, { "epoch": 62.154413851830824, "grad_norm": 0.5494627356529236, "learning_rate": 3.787113255747942e-05, "loss": 0.00022986680269241334, "step": 218970 }, { "epoch": 62.15725234175419, "grad_norm": 0.019761867821216583, "learning_rate": 3.7868294067556056e-05, "loss": 0.0005991479381918907, "step": 218980 }, { "epoch": 62.16009083167755, "grad_norm": 0.06674439460039139, "learning_rate": 3.7865455577632704e-05, "loss": 0.0010180534794926644, "step": 218990 }, { "epoch": 62.162929321600906, "grad_norm": 0.018801558762788773, "learning_rate": 3.786261708770934e-05, "loss": 0.00019344966858625412, "step": 219000 }, { "epoch": 62.162929321600906, "eval_accuracy": 0.9799707509378776, "eval_loss": 0.07611135393381119, "eval_runtime": 32.1576, "eval_samples_per_second": 489.061, "eval_steps_per_second": 7.65, "step": 219000 }, { "epoch": 62.16576781152427, "grad_norm": 0.4616508185863495, "learning_rate": 3.785977859778598e-05, "loss": 0.0004517126828432083, "step": 219010 }, { "epoch": 62.16860630144763, "grad_norm": 0.4357989728450775, "learning_rate": 3.785694010786262e-05, "loss": 0.0007189519703388215, "step": 219020 }, { "epoch": 62.17144479137099, "grad_norm": 0.13999664783477783, "learning_rate": 3.7854101617939256e-05, "loss": 0.0009274106472730636, "step": 219030 }, { "epoch": 62.17428328129435, "grad_norm": 0.015074108727276325, "learning_rate": 3.78512631280159e-05, "loss": 0.000232531875371933, "step": 219040 }, { "epoch": 62.17712177121771, "grad_norm": 0.06282586604356766, "learning_rate": 3.784842463809254e-05, "loss": 0.0023205233737826347, "step": 219050 }, { "epoch": 62.179960261141076, "grad_norm": 0.07035356014966965, "learning_rate": 3.7845586148169174e-05, "loss": 0.0026645392179489137, "step": 219060 }, { "epoch": 62.18279875106443, "grad_norm": 0.07992282509803772, "learning_rate": 3.7842747658245815e-05, "loss": 0.0002038177102804184, "step": 219070 }, { "epoch": 62.185637240987795, "grad_norm": 0.01094980537891388, "learning_rate": 3.783990916832245e-05, "loss": 0.0007394351065158844, "step": 219080 }, { "epoch": 62.18847573091116, "grad_norm": 0.01801287941634655, "learning_rate": 3.78370706783991e-05, "loss": 0.006691852211952209, "step": 219090 }, { "epoch": 62.19131422083451, "grad_norm": 0.9573699831962585, "learning_rate": 3.783423218847573e-05, "loss": 0.0031136304140090944, "step": 219100 }, { "epoch": 62.194152710757876, "grad_norm": 0.0036239661276340485, "learning_rate": 3.783139369855237e-05, "loss": 0.0018950289115309716, "step": 219110 }, { "epoch": 62.19699120068124, "grad_norm": 0.03517189249396324, "learning_rate": 3.7828555208629015e-05, "loss": 0.0001710960641503334, "step": 219120 }, { "epoch": 62.199829690604595, "grad_norm": 0.01670032925903797, "learning_rate": 3.782571671870565e-05, "loss": 0.0008104411885142326, "step": 219130 }, { "epoch": 62.20266818052796, "grad_norm": 0.07388734072446823, "learning_rate": 3.782287822878229e-05, "loss": 0.0008274240419268609, "step": 219140 }, { "epoch": 62.20550667045132, "grad_norm": 0.04889446496963501, "learning_rate": 3.782003973885893e-05, "loss": 0.0004162942990660667, "step": 219150 }, { "epoch": 62.208345160374684, "grad_norm": 12.564270973205566, "learning_rate": 3.781720124893557e-05, "loss": 0.00367276668548584, "step": 219160 }, { "epoch": 62.21118365029804, "grad_norm": 0.2573581337928772, "learning_rate": 3.781436275901221e-05, "loss": 0.015656030178070067, "step": 219170 }, { "epoch": 62.2140221402214, "grad_norm": 3.346475124359131, "learning_rate": 3.781152426908884e-05, "loss": 0.001143277995288372, "step": 219180 }, { "epoch": 62.216860630144765, "grad_norm": 1.0355018377304077, "learning_rate": 3.7808685779165484e-05, "loss": 0.009125328809022903, "step": 219190 }, { "epoch": 62.21969912006812, "grad_norm": 8.296284675598145, "learning_rate": 3.7805847289242126e-05, "loss": 0.010005916655063628, "step": 219200 }, { "epoch": 62.222537609991484, "grad_norm": 1.6540566682815552, "learning_rate": 3.780300879931876e-05, "loss": 0.011241798847913742, "step": 219210 }, { "epoch": 62.22537609991485, "grad_norm": 0.7496623396873474, "learning_rate": 3.780017030939541e-05, "loss": 0.0006888814270496369, "step": 219220 }, { "epoch": 62.2282145898382, "grad_norm": 0.1709733009338379, "learning_rate": 3.779733181947204e-05, "loss": 0.005229627341032028, "step": 219230 }, { "epoch": 62.231053079761566, "grad_norm": 0.04142606630921364, "learning_rate": 3.779449332954868e-05, "loss": 0.0001645239070057869, "step": 219240 }, { "epoch": 62.23389156968493, "grad_norm": 0.005263874307274818, "learning_rate": 3.7791654839625326e-05, "loss": 0.00043667983263731005, "step": 219250 }, { "epoch": 62.23673005960829, "grad_norm": 0.3858121335506439, "learning_rate": 3.778881634970196e-05, "loss": 0.002494099922478199, "step": 219260 }, { "epoch": 62.23956854953165, "grad_norm": 6.464031219482422, "learning_rate": 3.77859778597786e-05, "loss": 0.00207147803157568, "step": 219270 }, { "epoch": 62.24240703945501, "grad_norm": 0.13995666801929474, "learning_rate": 3.7783139369855236e-05, "loss": 0.0005194349214434624, "step": 219280 }, { "epoch": 62.24524552937837, "grad_norm": 0.017478762194514275, "learning_rate": 3.778030087993188e-05, "loss": 0.0008244156837463379, "step": 219290 }, { "epoch": 62.24808401930173, "grad_norm": 0.13240978121757507, "learning_rate": 3.777746239000852e-05, "loss": 0.0010995114222168922, "step": 219300 }, { "epoch": 62.25092250922509, "grad_norm": 0.0076134842820465565, "learning_rate": 3.7774623900085154e-05, "loss": 0.000849650613963604, "step": 219310 }, { "epoch": 62.253760999148454, "grad_norm": 0.5563639998435974, "learning_rate": 3.7771785410161795e-05, "loss": 0.0005636958405375481, "step": 219320 }, { "epoch": 62.25659948907182, "grad_norm": 0.0216164942830801, "learning_rate": 3.7768946920238437e-05, "loss": 0.00012986231595277787, "step": 219330 }, { "epoch": 62.25943797899517, "grad_norm": 0.006106148008257151, "learning_rate": 3.776610843031507e-05, "loss": 0.0004336489364504814, "step": 219340 }, { "epoch": 62.262276468918536, "grad_norm": 0.028716305270791054, "learning_rate": 3.776326994039171e-05, "loss": 0.0004390360787510872, "step": 219350 }, { "epoch": 62.2651149588419, "grad_norm": 0.07563501596450806, "learning_rate": 3.7760431450468354e-05, "loss": 0.006640709191560745, "step": 219360 }, { "epoch": 62.267953448765255, "grad_norm": 0.05550495162606239, "learning_rate": 3.775759296054499e-05, "loss": 0.0004650451242923737, "step": 219370 }, { "epoch": 62.27079193868862, "grad_norm": 0.09874530881643295, "learning_rate": 3.775475447062164e-05, "loss": 0.0005641063675284385, "step": 219380 }, { "epoch": 62.27363042861198, "grad_norm": 0.017176497727632523, "learning_rate": 3.775191598069827e-05, "loss": 0.00022655315697193146, "step": 219390 }, { "epoch": 62.276468918535336, "grad_norm": 0.4507908523082733, "learning_rate": 3.7749077490774906e-05, "loss": 0.000202757865190506, "step": 219400 }, { "epoch": 62.2793074084587, "grad_norm": 0.11348843574523926, "learning_rate": 3.774623900085155e-05, "loss": 0.0002904435619711876, "step": 219410 }, { "epoch": 62.28214589838206, "grad_norm": 0.016872573643922806, "learning_rate": 3.774340051092819e-05, "loss": 0.0004588117823004723, "step": 219420 }, { "epoch": 62.284984388305425, "grad_norm": 0.07719072699546814, "learning_rate": 3.774056202100483e-05, "loss": 0.00019363835453987122, "step": 219430 }, { "epoch": 62.28782287822878, "grad_norm": 0.5255810022354126, "learning_rate": 3.7737723531081465e-05, "loss": 0.0011194759979844093, "step": 219440 }, { "epoch": 62.29066136815214, "grad_norm": 0.04389384388923645, "learning_rate": 3.7734885041158106e-05, "loss": 0.0002085333690047264, "step": 219450 }, { "epoch": 62.293499858075506, "grad_norm": 0.01597927138209343, "learning_rate": 3.773204655123475e-05, "loss": 0.000486685149371624, "step": 219460 }, { "epoch": 62.29633834799886, "grad_norm": 0.02351010963320732, "learning_rate": 3.772920806131138e-05, "loss": 0.0002326617017388344, "step": 219470 }, { "epoch": 62.299176837922225, "grad_norm": 0.14318598806858063, "learning_rate": 3.772636957138802e-05, "loss": 0.0002879161387681961, "step": 219480 }, { "epoch": 62.30201532784559, "grad_norm": 0.14352332055568695, "learning_rate": 3.7723531081464665e-05, "loss": 0.00012005586177110672, "step": 219490 }, { "epoch": 62.304853817768944, "grad_norm": 0.2028595209121704, "learning_rate": 3.77206925915413e-05, "loss": 0.00018038004636764527, "step": 219500 }, { "epoch": 62.304853817768944, "eval_accuracy": 0.978953392255357, "eval_loss": 0.07779113203287125, "eval_runtime": 32.6883, "eval_samples_per_second": 481.12, "eval_steps_per_second": 7.526, "step": 219500 }, { "epoch": 62.30769230769231, "grad_norm": 0.02011045068502426, "learning_rate": 3.771785410161794e-05, "loss": 0.00012589525431394577, "step": 219510 }, { "epoch": 62.31053079761567, "grad_norm": 0.010861233808100224, "learning_rate": 3.771501561169458e-05, "loss": 0.0005329407751560212, "step": 219520 }, { "epoch": 62.31336928753903, "grad_norm": 0.052278075367212296, "learning_rate": 3.7712177121771217e-05, "loss": 0.005591506510972977, "step": 219530 }, { "epoch": 62.31620777746239, "grad_norm": 0.18878480792045593, "learning_rate": 3.770933863184786e-05, "loss": 0.0003347795456647873, "step": 219540 }, { "epoch": 62.31904626738575, "grad_norm": 0.026020199060440063, "learning_rate": 3.77065001419245e-05, "loss": 0.000538238137960434, "step": 219550 }, { "epoch": 62.321884757309114, "grad_norm": 0.0202209260314703, "learning_rate": 3.770366165200114e-05, "loss": 0.00016019903123378753, "step": 219560 }, { "epoch": 62.32472324723247, "grad_norm": 0.06138351187109947, "learning_rate": 3.7700823162077775e-05, "loss": 0.00035441704094409945, "step": 219570 }, { "epoch": 62.32756173715583, "grad_norm": 0.06058640033006668, "learning_rate": 3.769798467215442e-05, "loss": 0.00031191930174827575, "step": 219580 }, { "epoch": 62.330400227079195, "grad_norm": 0.20166485011577606, "learning_rate": 3.769514618223106e-05, "loss": 0.0019159005954861642, "step": 219590 }, { "epoch": 62.33323871700255, "grad_norm": 0.04015286639332771, "learning_rate": 3.769230769230769e-05, "loss": 0.0003723150119185448, "step": 219600 }, { "epoch": 62.336077206925914, "grad_norm": 23.481773376464844, "learning_rate": 3.7689469202384334e-05, "loss": 0.00763494074344635, "step": 219610 }, { "epoch": 62.33891569684928, "grad_norm": 0.01890701800584793, "learning_rate": 3.7686630712460975e-05, "loss": 0.00042074769735336306, "step": 219620 }, { "epoch": 62.34175418677264, "grad_norm": 0.08658425509929657, "learning_rate": 3.768379222253761e-05, "loss": 0.00028143078088760376, "step": 219630 }, { "epoch": 62.344592676695996, "grad_norm": 0.2765031158924103, "learning_rate": 3.768095373261425e-05, "loss": 0.00246813353151083, "step": 219640 }, { "epoch": 62.34743116661936, "grad_norm": 3.382819890975952, "learning_rate": 3.7678399091683225e-05, "loss": 0.016927482187747957, "step": 219650 }, { "epoch": 62.35026965654272, "grad_norm": 0.012410280294716358, "learning_rate": 3.7675560601759866e-05, "loss": 0.0012172715738415717, "step": 219660 }, { "epoch": 62.35310814646608, "grad_norm": 1.3713065385818481, "learning_rate": 3.767272211183651e-05, "loss": 0.0007400527596473694, "step": 219670 }, { "epoch": 62.35594663638944, "grad_norm": 22.14598274230957, "learning_rate": 3.766988362191314e-05, "loss": 0.013165083527565003, "step": 219680 }, { "epoch": 62.3587851263128, "grad_norm": 0.32524076104164124, "learning_rate": 3.766704513198978e-05, "loss": 0.00033500846475362776, "step": 219690 }, { "epoch": 62.36162361623616, "grad_norm": 0.006239022593945265, "learning_rate": 3.7664206642066425e-05, "loss": 0.007063467800617218, "step": 219700 }, { "epoch": 62.36446210615952, "grad_norm": 0.0684872567653656, "learning_rate": 3.766136815214306e-05, "loss": 0.0007958961650729179, "step": 219710 }, { "epoch": 62.367300596082885, "grad_norm": 0.03391709923744202, "learning_rate": 3.76585296622197e-05, "loss": 0.00037691351026296615, "step": 219720 }, { "epoch": 62.37013908600625, "grad_norm": 0.02030166983604431, "learning_rate": 3.765569117229634e-05, "loss": 0.00037884432822465897, "step": 219730 }, { "epoch": 62.3729775759296, "grad_norm": 0.09407872706651688, "learning_rate": 3.7652852682372977e-05, "loss": 0.0011985165998339652, "step": 219740 }, { "epoch": 62.375816065852966, "grad_norm": 0.3466847836971283, "learning_rate": 3.765001419244962e-05, "loss": 0.00045046042650938035, "step": 219750 }, { "epoch": 62.37865455577633, "grad_norm": 0.007372156251221895, "learning_rate": 3.764717570252626e-05, "loss": 0.00034062806516885755, "step": 219760 }, { "epoch": 62.381493045699685, "grad_norm": 0.015063219703733921, "learning_rate": 3.76443372126029e-05, "loss": 0.001486562006175518, "step": 219770 }, { "epoch": 62.38433153562305, "grad_norm": 0.015805484727025032, "learning_rate": 3.7641498722679535e-05, "loss": 0.00020647961646318435, "step": 219780 }, { "epoch": 62.38717002554641, "grad_norm": 0.11486828327178955, "learning_rate": 3.763866023275617e-05, "loss": 0.00048134420067071917, "step": 219790 }, { "epoch": 62.39000851546977, "grad_norm": 0.783648669719696, "learning_rate": 3.763582174283282e-05, "loss": 0.00043035224080085755, "step": 219800 }, { "epoch": 62.39284700539313, "grad_norm": 0.020256655290722847, "learning_rate": 3.763298325290945e-05, "loss": 0.0007004821673035622, "step": 219810 }, { "epoch": 62.39568549531649, "grad_norm": 0.16775614023208618, "learning_rate": 3.7630144762986094e-05, "loss": 0.0005523417145013809, "step": 219820 }, { "epoch": 62.398523985239855, "grad_norm": 0.04210258647799492, "learning_rate": 3.7627306273062735e-05, "loss": 0.000672232173383236, "step": 219830 }, { "epoch": 62.40136247516321, "grad_norm": 0.011634457856416702, "learning_rate": 3.762446778313937e-05, "loss": 0.00016869492828845977, "step": 219840 }, { "epoch": 62.404200965086574, "grad_norm": 0.0229943934828043, "learning_rate": 3.762162929321601e-05, "loss": 0.00037203337997198104, "step": 219850 }, { "epoch": 62.40703945500994, "grad_norm": 0.005593643523752689, "learning_rate": 3.761879080329265e-05, "loss": 0.0006641879677772522, "step": 219860 }, { "epoch": 62.40987794493329, "grad_norm": 0.03380071744322777, "learning_rate": 3.761595231336929e-05, "loss": 0.0013285016641020775, "step": 219870 }, { "epoch": 62.412716434856655, "grad_norm": 0.05653723329305649, "learning_rate": 3.761311382344593e-05, "loss": 0.00026184637099504473, "step": 219880 }, { "epoch": 62.41555492478002, "grad_norm": 0.12228763103485107, "learning_rate": 3.761027533352256e-05, "loss": 0.00034721381962299347, "step": 219890 }, { "epoch": 62.41839341470338, "grad_norm": 0.05563554912805557, "learning_rate": 3.760743684359921e-05, "loss": 0.0002539144828915596, "step": 219900 }, { "epoch": 62.42123190462674, "grad_norm": 0.007663853466510773, "learning_rate": 3.7604598353675846e-05, "loss": 0.0003175731748342514, "step": 219910 }, { "epoch": 62.4240703945501, "grad_norm": 0.06445333361625671, "learning_rate": 3.760175986375248e-05, "loss": 0.0005133796483278274, "step": 219920 }, { "epoch": 62.42690888447346, "grad_norm": 0.19025073945522308, "learning_rate": 3.759892137382913e-05, "loss": 0.00032522790133953093, "step": 219930 }, { "epoch": 62.42974737439682, "grad_norm": 0.011832769960165024, "learning_rate": 3.7596082883905763e-05, "loss": 0.0002409936860203743, "step": 219940 }, { "epoch": 62.43258586432018, "grad_norm": 0.39448243379592896, "learning_rate": 3.7593244393982405e-05, "loss": 0.0004256727173924446, "step": 219950 }, { "epoch": 62.435424354243544, "grad_norm": 0.04302350431680679, "learning_rate": 3.7590405904059046e-05, "loss": 0.00019156653434038162, "step": 219960 }, { "epoch": 62.4382628441669, "grad_norm": 0.02866055630147457, "learning_rate": 3.758756741413568e-05, "loss": 0.00047951415181159975, "step": 219970 }, { "epoch": 62.44110133409026, "grad_norm": 0.3037232458591461, "learning_rate": 3.758472892421232e-05, "loss": 0.0008851617574691772, "step": 219980 }, { "epoch": 62.443939824013626, "grad_norm": 0.2409208118915558, "learning_rate": 3.758189043428896e-05, "loss": 0.0007486550137400627, "step": 219990 }, { "epoch": 62.44677831393699, "grad_norm": 0.04556864872574806, "learning_rate": 3.75790519443656e-05, "loss": 0.00035252124071121215, "step": 220000 }, { "epoch": 62.44677831393699, "eval_accuracy": 0.9761556558784257, "eval_loss": 0.09264658391475677, "eval_runtime": 32.2571, "eval_samples_per_second": 487.551, "eval_steps_per_second": 7.626, "step": 220000 }, { "epoch": 62.449616803860344, "grad_norm": 0.5532296299934387, "learning_rate": 3.757621345444224e-05, "loss": 0.0020470358431339264, "step": 220010 }, { "epoch": 62.45245529378371, "grad_norm": 0.2950701117515564, "learning_rate": 3.7573374964518874e-05, "loss": 0.0008316058665513992, "step": 220020 }, { "epoch": 62.45529378370707, "grad_norm": 0.045538172125816345, "learning_rate": 3.757053647459552e-05, "loss": 0.0003891659900546074, "step": 220030 }, { "epoch": 62.458132273630426, "grad_norm": 0.03646159917116165, "learning_rate": 3.756769798467216e-05, "loss": 0.0021652096882462503, "step": 220040 }, { "epoch": 62.46097076355379, "grad_norm": 0.13905228674411774, "learning_rate": 3.756485949474879e-05, "loss": 0.00118691585958004, "step": 220050 }, { "epoch": 62.46380925347715, "grad_norm": 0.018202180042862892, "learning_rate": 3.756202100482544e-05, "loss": 0.0005172975361347198, "step": 220060 }, { "epoch": 62.46664774340051, "grad_norm": 0.12948396801948547, "learning_rate": 3.7559182514902074e-05, "loss": 0.00012460928410291673, "step": 220070 }, { "epoch": 62.46948623332387, "grad_norm": 0.02978682890534401, "learning_rate": 3.7556344024978716e-05, "loss": 0.0007194764912128448, "step": 220080 }, { "epoch": 62.47232472324723, "grad_norm": 0.1586718112230301, "learning_rate": 3.755350553505535e-05, "loss": 0.008529847115278244, "step": 220090 }, { "epoch": 62.475163213170596, "grad_norm": 0.028984202072024345, "learning_rate": 3.755066704513199e-05, "loss": 0.0005697222426533699, "step": 220100 }, { "epoch": 62.47800170309395, "grad_norm": 0.08513357490301132, "learning_rate": 3.754782855520863e-05, "loss": 0.0005765927955508233, "step": 220110 }, { "epoch": 62.480840193017315, "grad_norm": 0.057428762316703796, "learning_rate": 3.754499006528527e-05, "loss": 0.0028147926554083825, "step": 220120 }, { "epoch": 62.48367868294068, "grad_norm": 0.04622599110007286, "learning_rate": 3.754215157536191e-05, "loss": 0.0009506290778517723, "step": 220130 }, { "epoch": 62.48651717286403, "grad_norm": 8.83021068572998, "learning_rate": 3.753931308543855e-05, "loss": 0.002114705741405487, "step": 220140 }, { "epoch": 62.489355662787396, "grad_norm": 0.022999458014965057, "learning_rate": 3.7536474595515185e-05, "loss": 0.0015185102820396423, "step": 220150 }, { "epoch": 62.49219415271076, "grad_norm": 0.6429575085639954, "learning_rate": 3.7533636105591826e-05, "loss": 0.001180780865252018, "step": 220160 }, { "epoch": 62.49503264263412, "grad_norm": 0.05055927485227585, "learning_rate": 3.753079761566847e-05, "loss": 0.0002830306068062782, "step": 220170 }, { "epoch": 62.49787113255748, "grad_norm": 0.01604345254600048, "learning_rate": 3.75279591257451e-05, "loss": 0.00020285006612539292, "step": 220180 }, { "epoch": 62.50070962248084, "grad_norm": 1.6896758079528809, "learning_rate": 3.7525120635821744e-05, "loss": 0.0003705553710460663, "step": 220190 }, { "epoch": 62.503548112404204, "grad_norm": 0.00716337189078331, "learning_rate": 3.7522282145898385e-05, "loss": 0.00024059005081653596, "step": 220200 }, { "epoch": 62.50638660232756, "grad_norm": 0.06678484380245209, "learning_rate": 3.751944365597502e-05, "loss": 0.000671716034412384, "step": 220210 }, { "epoch": 62.50922509225092, "grad_norm": 0.09156182408332825, "learning_rate": 3.751660516605166e-05, "loss": 0.00020352285355329514, "step": 220220 }, { "epoch": 62.512063582174285, "grad_norm": 0.19403307139873505, "learning_rate": 3.75137666761283e-05, "loss": 0.01471591293811798, "step": 220230 }, { "epoch": 62.51490207209764, "grad_norm": 0.0022113036829978228, "learning_rate": 3.7510928186204944e-05, "loss": 0.00021448787301778794, "step": 220240 }, { "epoch": 62.517740562021004, "grad_norm": 0.04682810232043266, "learning_rate": 3.750808969628158e-05, "loss": 0.00014065522700548171, "step": 220250 }, { "epoch": 62.52057905194437, "grad_norm": 0.06576584279537201, "learning_rate": 3.750525120635822e-05, "loss": 0.0002667957916855812, "step": 220260 }, { "epoch": 62.52341754186773, "grad_norm": 0.02518932893872261, "learning_rate": 3.750241271643486e-05, "loss": 0.00015745870769023895, "step": 220270 }, { "epoch": 62.526256031791085, "grad_norm": 0.07633817195892334, "learning_rate": 3.7499574226511496e-05, "loss": 0.00020928401499986649, "step": 220280 }, { "epoch": 62.52909452171445, "grad_norm": 0.013097911141812801, "learning_rate": 3.749673573658814e-05, "loss": 0.00011100433766841888, "step": 220290 }, { "epoch": 62.53193301163781, "grad_norm": 0.02696506679058075, "learning_rate": 3.749389724666478e-05, "loss": 6.610751152038574e-05, "step": 220300 }, { "epoch": 62.53477150156117, "grad_norm": 0.04498637467622757, "learning_rate": 3.749105875674141e-05, "loss": 0.0006032867357134819, "step": 220310 }, { "epoch": 62.53760999148453, "grad_norm": 0.06372048705816269, "learning_rate": 3.7488220266818054e-05, "loss": 0.0005412934347987175, "step": 220320 }, { "epoch": 62.54044848140789, "grad_norm": 0.00394667312502861, "learning_rate": 3.7485381776894696e-05, "loss": 0.0013209940865635873, "step": 220330 }, { "epoch": 62.54328697133125, "grad_norm": 0.036725301295518875, "learning_rate": 3.748254328697133e-05, "loss": 0.0012402640655636787, "step": 220340 }, { "epoch": 62.54612546125461, "grad_norm": 0.15138983726501465, "learning_rate": 3.747970479704797e-05, "loss": 0.0002975022420287132, "step": 220350 }, { "epoch": 62.548963951177974, "grad_norm": 0.2635360360145569, "learning_rate": 3.747686630712461e-05, "loss": 0.0005023496225476265, "step": 220360 }, { "epoch": 62.55180244110134, "grad_norm": 0.047502949833869934, "learning_rate": 3.7474027817201254e-05, "loss": 0.0018935756757855414, "step": 220370 }, { "epoch": 62.55464093102469, "grad_norm": 0.7674239873886108, "learning_rate": 3.747118932727789e-05, "loss": 0.0016071878373622895, "step": 220380 }, { "epoch": 62.557479420948056, "grad_norm": 0.017517661675810814, "learning_rate": 3.7468350837354524e-05, "loss": 0.00019743293523788452, "step": 220390 }, { "epoch": 62.56031791087142, "grad_norm": 0.045989491045475006, "learning_rate": 3.746551234743117e-05, "loss": 0.0007046166807413101, "step": 220400 }, { "epoch": 62.563156400794774, "grad_norm": 0.018961738795042038, "learning_rate": 3.7462673857507806e-05, "loss": 0.0011204695329070091, "step": 220410 }, { "epoch": 62.56599489071814, "grad_norm": 0.23179659247398376, "learning_rate": 3.745983536758445e-05, "loss": 0.0003025852143764496, "step": 220420 }, { "epoch": 62.5688333806415, "grad_norm": 0.14665548503398895, "learning_rate": 3.745699687766109e-05, "loss": 0.00046889930963516234, "step": 220430 }, { "epoch": 62.571671870564856, "grad_norm": 0.7377319931983948, "learning_rate": 3.7454158387737724e-05, "loss": 0.0007603036239743233, "step": 220440 }, { "epoch": 62.57451036048822, "grad_norm": 0.19971708953380585, "learning_rate": 3.7451319897814365e-05, "loss": 0.0013747775927186012, "step": 220450 }, { "epoch": 62.57734885041158, "grad_norm": 0.16037562489509583, "learning_rate": 3.7448481407891006e-05, "loss": 0.0042459025979042055, "step": 220460 }, { "epoch": 62.580187340334945, "grad_norm": 5.44827938079834, "learning_rate": 3.744564291796764e-05, "loss": 0.001369336247444153, "step": 220470 }, { "epoch": 62.5830258302583, "grad_norm": 0.03660914674401283, "learning_rate": 3.744280442804428e-05, "loss": 0.0006473138928413391, "step": 220480 }, { "epoch": 62.58586432018166, "grad_norm": 0.16802188754081726, "learning_rate": 3.743996593812092e-05, "loss": 0.00017759446054697037, "step": 220490 }, { "epoch": 62.588702810105026, "grad_norm": 0.03436180576682091, "learning_rate": 3.7437127448197565e-05, "loss": 0.0004955517128109932, "step": 220500 }, { "epoch": 62.588702810105026, "eval_accuracy": 0.9804158453614803, "eval_loss": 0.07600770890712738, "eval_runtime": 32.8252, "eval_samples_per_second": 479.114, "eval_steps_per_second": 7.494, "step": 220500 }, { "epoch": 62.59154130002838, "grad_norm": 0.1225135400891304, "learning_rate": 3.74342889582742e-05, "loss": 0.0004224097356200218, "step": 220510 }, { "epoch": 62.594379789951745, "grad_norm": 0.04546502232551575, "learning_rate": 3.7431450468350834e-05, "loss": 0.001396300457417965, "step": 220520 }, { "epoch": 62.59721827987511, "grad_norm": 0.05635577440261841, "learning_rate": 3.742861197842748e-05, "loss": 0.001854581944644451, "step": 220530 }, { "epoch": 62.60005676979847, "grad_norm": 0.03737569600343704, "learning_rate": 3.742577348850412e-05, "loss": 0.00524551123380661, "step": 220540 }, { "epoch": 62.60289525972183, "grad_norm": 1.5329885482788086, "learning_rate": 3.742293499858076e-05, "loss": 0.000603405013680458, "step": 220550 }, { "epoch": 62.60573374964519, "grad_norm": 0.03986736759543419, "learning_rate": 3.74200965086574e-05, "loss": 0.0007313285022974014, "step": 220560 }, { "epoch": 62.60857223956855, "grad_norm": 0.3240727484226227, "learning_rate": 3.7417258018734035e-05, "loss": 0.0005974115803837777, "step": 220570 }, { "epoch": 62.61141072949191, "grad_norm": 0.03289702162146568, "learning_rate": 3.7414419528810676e-05, "loss": 0.0006734821945428848, "step": 220580 }, { "epoch": 62.61424921941527, "grad_norm": 0.09203065931797028, "learning_rate": 3.741158103888731e-05, "loss": 0.007814368605613709, "step": 220590 }, { "epoch": 62.617087709338634, "grad_norm": 0.07434184849262238, "learning_rate": 3.740874254896395e-05, "loss": 0.0015671016648411752, "step": 220600 }, { "epoch": 62.61992619926199, "grad_norm": 0.03134223818778992, "learning_rate": 3.740590405904059e-05, "loss": 0.0007429582998156548, "step": 220610 }, { "epoch": 62.62276468918535, "grad_norm": 0.1887994110584259, "learning_rate": 3.740306556911723e-05, "loss": 0.00022660382091999054, "step": 220620 }, { "epoch": 62.625603179108715, "grad_norm": 0.05799306556582451, "learning_rate": 3.740022707919387e-05, "loss": 0.0006130540743470192, "step": 220630 }, { "epoch": 62.62844166903208, "grad_norm": 0.01516047865152359, "learning_rate": 3.739738858927051e-05, "loss": 0.00103780347853899, "step": 220640 }, { "epoch": 62.631280158955434, "grad_norm": 0.9778589010238647, "learning_rate": 3.7394550099347145e-05, "loss": 0.0007133908569812774, "step": 220650 }, { "epoch": 62.6341186488788, "grad_norm": 0.9045343995094299, "learning_rate": 3.739171160942379e-05, "loss": 0.002260998263955116, "step": 220660 }, { "epoch": 62.63695713880216, "grad_norm": 0.48831111192703247, "learning_rate": 3.738887311950043e-05, "loss": 0.0003881271928548813, "step": 220670 }, { "epoch": 62.639795628725516, "grad_norm": 0.7465749382972717, "learning_rate": 3.738603462957706e-05, "loss": 0.00029463451355695727, "step": 220680 }, { "epoch": 62.64263411864888, "grad_norm": 0.06525035947561264, "learning_rate": 3.7383196139653704e-05, "loss": 0.00025153327733278273, "step": 220690 }, { "epoch": 62.64547260857224, "grad_norm": 0.2418093979358673, "learning_rate": 3.7380357649730345e-05, "loss": 0.000286782905459404, "step": 220700 }, { "epoch": 62.6483110984956, "grad_norm": 0.596157431602478, "learning_rate": 3.737751915980699e-05, "loss": 0.0022610699757933616, "step": 220710 }, { "epoch": 62.65114958841896, "grad_norm": 0.1989627331495285, "learning_rate": 3.737468066988362e-05, "loss": 0.0005165455862879753, "step": 220720 }, { "epoch": 62.65398807834232, "grad_norm": 0.21977877616882324, "learning_rate": 3.737184217996026e-05, "loss": 0.000334666483104229, "step": 220730 }, { "epoch": 62.656826568265686, "grad_norm": 0.016593199223279953, "learning_rate": 3.7369003690036904e-05, "loss": 0.0005463510751724243, "step": 220740 }, { "epoch": 62.65966505818904, "grad_norm": 2.721832513809204, "learning_rate": 3.736616520011354e-05, "loss": 0.0008658876642584801, "step": 220750 }, { "epoch": 62.662503548112404, "grad_norm": 0.2064489722251892, "learning_rate": 3.736332671019018e-05, "loss": 0.0004147423431277275, "step": 220760 }, { "epoch": 62.66534203803577, "grad_norm": 0.07577303797006607, "learning_rate": 3.736048822026682e-05, "loss": 0.0003905940800905228, "step": 220770 }, { "epoch": 62.66818052795912, "grad_norm": 3.1362242698669434, "learning_rate": 3.7357649730343456e-05, "loss": 0.001255347765982151, "step": 220780 }, { "epoch": 62.671019017882486, "grad_norm": 0.08183205127716064, "learning_rate": 3.73548112404201e-05, "loss": 0.0004605952650308609, "step": 220790 }, { "epoch": 62.67385750780585, "grad_norm": 0.9915308952331543, "learning_rate": 3.735197275049674e-05, "loss": 0.00033500026911497115, "step": 220800 }, { "epoch": 62.676695997729205, "grad_norm": 0.09687336534261703, "learning_rate": 3.734913426057337e-05, "loss": 0.00018446575850248337, "step": 220810 }, { "epoch": 62.67953448765257, "grad_norm": 0.010253356769680977, "learning_rate": 3.7346295770650015e-05, "loss": 0.00047416072338819505, "step": 220820 }, { "epoch": 62.68237297757593, "grad_norm": 0.047340117394924164, "learning_rate": 3.7343457280726656e-05, "loss": 0.0017058374360203743, "step": 220830 }, { "epoch": 62.68521146749929, "grad_norm": 0.5831284523010254, "learning_rate": 3.73406187908033e-05, "loss": 0.00045192502439022063, "step": 220840 }, { "epoch": 62.68804995742265, "grad_norm": 0.21998797357082367, "learning_rate": 3.733778030087993e-05, "loss": 0.0002714775502681732, "step": 220850 }, { "epoch": 62.69088844734601, "grad_norm": 0.048612214624881744, "learning_rate": 3.7334941810956573e-05, "loss": 0.0006042754277586937, "step": 220860 }, { "epoch": 62.693726937269375, "grad_norm": 0.03342023864388466, "learning_rate": 3.7332103321033215e-05, "loss": 0.0012373730540275573, "step": 220870 }, { "epoch": 62.69656542719273, "grad_norm": 0.02772674150764942, "learning_rate": 3.732926483110985e-05, "loss": 0.0004272120073437691, "step": 220880 }, { "epoch": 62.69940391711609, "grad_norm": 0.01515103131532669, "learning_rate": 3.732642634118649e-05, "loss": 0.00019667018204927444, "step": 220890 }, { "epoch": 62.702242407039456, "grad_norm": 0.24886518716812134, "learning_rate": 3.732358785126313e-05, "loss": 0.00013839714229106902, "step": 220900 }, { "epoch": 62.70508089696281, "grad_norm": 1.2524290084838867, "learning_rate": 3.732074936133977e-05, "loss": 0.0004319304600358009, "step": 220910 }, { "epoch": 62.707919386886175, "grad_norm": 0.008192543871700764, "learning_rate": 3.731791087141641e-05, "loss": 0.0009183449670672417, "step": 220920 }, { "epoch": 62.71075787680954, "grad_norm": 0.10828685760498047, "learning_rate": 3.731507238149305e-05, "loss": 0.004860294610261917, "step": 220930 }, { "epoch": 62.7135963667329, "grad_norm": 17.202125549316406, "learning_rate": 3.7312233891569684e-05, "loss": 0.005052722990512848, "step": 220940 }, { "epoch": 62.71643485665626, "grad_norm": 0.30226701498031616, "learning_rate": 3.7309395401646325e-05, "loss": 0.00030899811536073686, "step": 220950 }, { "epoch": 62.71927334657962, "grad_norm": 0.013504995964467525, "learning_rate": 3.730655691172297e-05, "loss": 0.00032200515270233156, "step": 220960 }, { "epoch": 62.72211183650298, "grad_norm": 0.01178754959255457, "learning_rate": 3.730371842179961e-05, "loss": 0.0010052049532532693, "step": 220970 }, { "epoch": 62.72495032642634, "grad_norm": 0.05268789455294609, "learning_rate": 3.730087993187624e-05, "loss": 0.0002615513280034065, "step": 220980 }, { "epoch": 62.7277888163497, "grad_norm": 0.014984403736889362, "learning_rate": 3.729804144195288e-05, "loss": 0.0003170343115925789, "step": 220990 }, { "epoch": 62.730627306273064, "grad_norm": 1.610073447227478, "learning_rate": 3.7295202952029526e-05, "loss": 0.0005050806328654289, "step": 221000 }, { "epoch": 62.730627306273064, "eval_accuracy": 0.9790805620906721, "eval_loss": 0.07736451923847198, "eval_runtime": 32.6568, "eval_samples_per_second": 481.584, "eval_steps_per_second": 7.533, "step": 221000 }, { "epoch": 62.73346579619643, "grad_norm": 0.029072754085063934, "learning_rate": 3.729236446210616e-05, "loss": 0.00037256386131048205, "step": 221010 }, { "epoch": 62.73630428611978, "grad_norm": 0.15091291069984436, "learning_rate": 3.72895259721828e-05, "loss": 0.0010726409032940864, "step": 221020 }, { "epoch": 62.739142776043145, "grad_norm": 0.01626225747168064, "learning_rate": 3.728668748225944e-05, "loss": 0.0005709551274776459, "step": 221030 }, { "epoch": 62.74198126596651, "grad_norm": 0.735421359539032, "learning_rate": 3.728384899233608e-05, "loss": 0.0005545029416680336, "step": 221040 }, { "epoch": 62.744819755889864, "grad_norm": 0.03571925684809685, "learning_rate": 3.728101050241272e-05, "loss": 0.0019419748336076737, "step": 221050 }, { "epoch": 62.74765824581323, "grad_norm": 0.029926322400569916, "learning_rate": 3.727817201248936e-05, "loss": 0.0021665140986442564, "step": 221060 }, { "epoch": 62.75049673573659, "grad_norm": 0.04182121157646179, "learning_rate": 3.7275333522565995e-05, "loss": 0.00050969198346138, "step": 221070 }, { "epoch": 62.753335225659946, "grad_norm": 0.07838650047779083, "learning_rate": 3.7272495032642636e-05, "loss": 0.005736139416694641, "step": 221080 }, { "epoch": 62.75617371558331, "grad_norm": 0.03903937339782715, "learning_rate": 3.726965654271928e-05, "loss": 0.0003226315602660179, "step": 221090 }, { "epoch": 62.75901220550667, "grad_norm": 0.12722247838974, "learning_rate": 3.726681805279591e-05, "loss": 0.00256072711199522, "step": 221100 }, { "epoch": 62.761850695430034, "grad_norm": 0.45306870341300964, "learning_rate": 3.7263979562872554e-05, "loss": 0.00023096241056919098, "step": 221110 }, { "epoch": 62.76468918535339, "grad_norm": 0.7984899878501892, "learning_rate": 3.726114107294919e-05, "loss": 0.00027173981070518496, "step": 221120 }, { "epoch": 62.76752767527675, "grad_norm": 0.05048202723264694, "learning_rate": 3.7258302583025836e-05, "loss": 0.00018072295933961867, "step": 221130 }, { "epoch": 62.770366165200116, "grad_norm": 0.02276439033448696, "learning_rate": 3.725546409310247e-05, "loss": 0.0002523284405469894, "step": 221140 }, { "epoch": 62.77320465512347, "grad_norm": 0.12624478340148926, "learning_rate": 3.7252625603179105e-05, "loss": 0.00025866702198982237, "step": 221150 }, { "epoch": 62.776043145046835, "grad_norm": 0.020636245608329773, "learning_rate": 3.7249787113255754e-05, "loss": 0.00036194305866956713, "step": 221160 }, { "epoch": 62.7788816349702, "grad_norm": 2.036579132080078, "learning_rate": 3.724694862333239e-05, "loss": 0.0008509108796715736, "step": 221170 }, { "epoch": 62.78172012489355, "grad_norm": 0.14460164308547974, "learning_rate": 3.724411013340903e-05, "loss": 0.00016202405095100403, "step": 221180 }, { "epoch": 62.784558614816916, "grad_norm": 0.10844504833221436, "learning_rate": 3.724127164348567e-05, "loss": 0.0032292120158672333, "step": 221190 }, { "epoch": 62.78739710474028, "grad_norm": 0.12857072055339813, "learning_rate": 3.7238433153562306e-05, "loss": 0.0003208884969353676, "step": 221200 }, { "epoch": 62.79023559466364, "grad_norm": 0.022208433598279953, "learning_rate": 3.723559466363895e-05, "loss": 9.976215660572052e-05, "step": 221210 }, { "epoch": 62.793074084587, "grad_norm": 0.03178482502698898, "learning_rate": 3.723275617371558e-05, "loss": 0.00018774624913930893, "step": 221220 }, { "epoch": 62.79591257451036, "grad_norm": 0.3308212459087372, "learning_rate": 3.722991768379222e-05, "loss": 0.0009816626086831092, "step": 221230 }, { "epoch": 62.79875106443372, "grad_norm": 0.012342538684606552, "learning_rate": 3.7227079193868864e-05, "loss": 0.0002688990905880928, "step": 221240 }, { "epoch": 62.80158955435708, "grad_norm": 0.048857398331165314, "learning_rate": 3.72242407039455e-05, "loss": 0.00040606539696455004, "step": 221250 }, { "epoch": 62.80442804428044, "grad_norm": 0.03173855319619179, "learning_rate": 3.722140221402215e-05, "loss": 0.0006343487650156022, "step": 221260 }, { "epoch": 62.807266534203805, "grad_norm": 0.14571121335029602, "learning_rate": 3.721856372409878e-05, "loss": 0.00032192151993513105, "step": 221270 }, { "epoch": 62.81010502412717, "grad_norm": 1.6813653707504272, "learning_rate": 3.7215725234175416e-05, "loss": 0.00040964093059301374, "step": 221280 }, { "epoch": 62.812943514050524, "grad_norm": 0.09138424694538116, "learning_rate": 3.7212886744252064e-05, "loss": 0.0002347376197576523, "step": 221290 }, { "epoch": 62.81578200397389, "grad_norm": 0.03747747838497162, "learning_rate": 3.72100482543287e-05, "loss": 0.00014528799802064895, "step": 221300 }, { "epoch": 62.81862049389725, "grad_norm": 0.017470844089984894, "learning_rate": 3.720720976440534e-05, "loss": 0.0003998352214694023, "step": 221310 }, { "epoch": 62.821458983820605, "grad_norm": 0.007343909703195095, "learning_rate": 3.7204371274481975e-05, "loss": 0.000683414377272129, "step": 221320 }, { "epoch": 62.82429747374397, "grad_norm": 0.04427691921591759, "learning_rate": 3.7201532784558616e-05, "loss": 0.00034426674246788023, "step": 221330 }, { "epoch": 62.82713596366733, "grad_norm": 0.0723504051566124, "learning_rate": 3.719869429463526e-05, "loss": 0.0002257300540804863, "step": 221340 }, { "epoch": 62.82997445359069, "grad_norm": 0.01600656844675541, "learning_rate": 3.719585580471189e-05, "loss": 0.0006023889407515525, "step": 221350 }, { "epoch": 62.83281294351405, "grad_norm": 0.023682599887251854, "learning_rate": 3.7193017314788534e-05, "loss": 0.0006419939920306206, "step": 221360 }, { "epoch": 62.83565143343741, "grad_norm": 0.012780014425516129, "learning_rate": 3.7190178824865175e-05, "loss": 0.0006440529599785805, "step": 221370 }, { "epoch": 62.838489923360775, "grad_norm": 0.002825644565746188, "learning_rate": 3.718734033494181e-05, "loss": 0.00020237155258655547, "step": 221380 }, { "epoch": 62.84132841328413, "grad_norm": 0.06286982446908951, "learning_rate": 3.718450184501846e-05, "loss": 0.00034492947161197663, "step": 221390 }, { "epoch": 62.844166903207494, "grad_norm": 0.0709298700094223, "learning_rate": 3.718166335509509e-05, "loss": 0.0003029840067028999, "step": 221400 }, { "epoch": 62.84700539313086, "grad_norm": 0.004451101645827293, "learning_rate": 3.717882486517173e-05, "loss": 7.170941680669784e-05, "step": 221410 }, { "epoch": 62.84984388305421, "grad_norm": 0.019794009625911713, "learning_rate": 3.717598637524837e-05, "loss": 0.00023233871906995772, "step": 221420 }, { "epoch": 62.852682372977576, "grad_norm": 0.2831297814846039, "learning_rate": 3.717314788532501e-05, "loss": 0.0018797386437654495, "step": 221430 }, { "epoch": 62.85552086290094, "grad_norm": 0.2952229976654053, "learning_rate": 3.717030939540165e-05, "loss": 0.0002957427874207497, "step": 221440 }, { "epoch": 62.858359352824294, "grad_norm": 0.07380744069814682, "learning_rate": 3.7167470905478286e-05, "loss": 0.0011314468458294869, "step": 221450 }, { "epoch": 62.86119784274766, "grad_norm": 0.20523954927921295, "learning_rate": 3.716463241555493e-05, "loss": 0.00029276981949806214, "step": 221460 }, { "epoch": 62.86403633267102, "grad_norm": 0.18051712214946747, "learning_rate": 3.716179392563157e-05, "loss": 0.0004997003823518753, "step": 221470 }, { "epoch": 62.86687482259438, "grad_norm": 0.20822861790657043, "learning_rate": 3.71589554357082e-05, "loss": 0.0005903499200940132, "step": 221480 }, { "epoch": 62.86971331251774, "grad_norm": 0.061056673526763916, "learning_rate": 3.7156116945784844e-05, "loss": 0.0006460011005401612, "step": 221490 }, { "epoch": 62.8725518024411, "grad_norm": 0.06393151730298996, "learning_rate": 3.7153278455861486e-05, "loss": 0.0003125712275505066, "step": 221500 }, { "epoch": 62.8725518024411, "eval_accuracy": 0.9773637693139188, "eval_loss": 0.0836043655872345, "eval_runtime": 33.1109, "eval_samples_per_second": 474.98, "eval_steps_per_second": 7.43, "step": 221500 }, { "epoch": 62.875390292364465, "grad_norm": 0.0933319702744484, "learning_rate": 3.715043996593812e-05, "loss": 0.00033258162438869475, "step": 221510 }, { "epoch": 62.87822878228782, "grad_norm": 0.5218175053596497, "learning_rate": 3.714760147601476e-05, "loss": 0.0005800547078251839, "step": 221520 }, { "epoch": 62.88106727221118, "grad_norm": 0.027496440336108208, "learning_rate": 3.71447629860914e-05, "loss": 0.0005764544010162353, "step": 221530 }, { "epoch": 62.883905762134546, "grad_norm": 0.16000771522521973, "learning_rate": 3.714192449616804e-05, "loss": 0.005874023586511612, "step": 221540 }, { "epoch": 62.8867442520579, "grad_norm": 0.1213483139872551, "learning_rate": 3.713908600624468e-05, "loss": 0.0005395777523517608, "step": 221550 }, { "epoch": 62.889582741981265, "grad_norm": 0.0938689187169075, "learning_rate": 3.713624751632132e-05, "loss": 0.003927844017744065, "step": 221560 }, { "epoch": 62.89242123190463, "grad_norm": 0.03203064948320389, "learning_rate": 3.7133409026397955e-05, "loss": 0.004167410731315613, "step": 221570 }, { "epoch": 62.89525972182799, "grad_norm": 0.10928358137607574, "learning_rate": 3.7130570536474597e-05, "loss": 0.0003794051706790924, "step": 221580 }, { "epoch": 62.898098211751346, "grad_norm": 0.34167349338531494, "learning_rate": 3.712773204655124e-05, "loss": 0.0011486953124403953, "step": 221590 }, { "epoch": 62.90093670167471, "grad_norm": 0.2215585559606552, "learning_rate": 3.712489355662788e-05, "loss": 0.0002844205126166344, "step": 221600 }, { "epoch": 62.90377519159807, "grad_norm": 0.037220295518636703, "learning_rate": 3.7122055066704514e-05, "loss": 0.00016481857746839524, "step": 221610 }, { "epoch": 62.90661368152143, "grad_norm": 0.18866892158985138, "learning_rate": 3.711921657678115e-05, "loss": 0.00014819875359535218, "step": 221620 }, { "epoch": 62.90945217144479, "grad_norm": 0.017261669039726257, "learning_rate": 3.71163780868578e-05, "loss": 0.00011001136153936386, "step": 221630 }, { "epoch": 62.912290661368154, "grad_norm": 0.0674494057893753, "learning_rate": 3.711353959693443e-05, "loss": 0.00021523162722587587, "step": 221640 }, { "epoch": 62.91512915129151, "grad_norm": 0.13221725821495056, "learning_rate": 3.711070110701107e-05, "loss": 0.0022959308698773384, "step": 221650 }, { "epoch": 62.91796764121487, "grad_norm": 10.170283317565918, "learning_rate": 3.7107862617087714e-05, "loss": 0.002906334586441517, "step": 221660 }, { "epoch": 62.920806131138235, "grad_norm": 0.9249834418296814, "learning_rate": 3.710502412716435e-05, "loss": 0.00035566221922636034, "step": 221670 }, { "epoch": 62.9236446210616, "grad_norm": 0.06344310939311981, "learning_rate": 3.710218563724099e-05, "loss": 0.000892285630106926, "step": 221680 }, { "epoch": 62.926483110984954, "grad_norm": 0.014313209801912308, "learning_rate": 3.709934714731763e-05, "loss": 0.012858664989471436, "step": 221690 }, { "epoch": 62.92932160090832, "grad_norm": 0.036715563386678696, "learning_rate": 3.7096508657394266e-05, "loss": 0.0012296322733163834, "step": 221700 }, { "epoch": 62.93216009083168, "grad_norm": 1.1415212154388428, "learning_rate": 3.709367016747091e-05, "loss": 0.0004183949902653694, "step": 221710 }, { "epoch": 62.934998580755035, "grad_norm": 0.7998591661453247, "learning_rate": 3.709083167754754e-05, "loss": 0.0004766814410686493, "step": 221720 }, { "epoch": 62.9378370706784, "grad_norm": 0.13821984827518463, "learning_rate": 3.708799318762419e-05, "loss": 0.0003136558458209038, "step": 221730 }, { "epoch": 62.94067556060176, "grad_norm": 0.008431218564510345, "learning_rate": 3.7085154697700825e-05, "loss": 0.003783559799194336, "step": 221740 }, { "epoch": 62.943514050525124, "grad_norm": 0.03993619233369827, "learning_rate": 3.708231620777746e-05, "loss": 0.00023132599890232086, "step": 221750 }, { "epoch": 62.94635254044848, "grad_norm": 0.08023405820131302, "learning_rate": 3.707947771785411e-05, "loss": 0.0002563139423727989, "step": 221760 }, { "epoch": 62.94919103037184, "grad_norm": 0.002759313676506281, "learning_rate": 3.707663922793074e-05, "loss": 0.00020506344735622407, "step": 221770 }, { "epoch": 62.952029520295206, "grad_norm": 0.659589946269989, "learning_rate": 3.707380073800738e-05, "loss": 0.00026085879653692245, "step": 221780 }, { "epoch": 62.95486801021856, "grad_norm": 0.016593962907791138, "learning_rate": 3.7070962248084025e-05, "loss": 0.0005251606926321983, "step": 221790 }, { "epoch": 62.957706500141924, "grad_norm": 0.029335135594010353, "learning_rate": 3.706812375816066e-05, "loss": 0.00024507790803909304, "step": 221800 }, { "epoch": 62.96054499006529, "grad_norm": 0.016252443194389343, "learning_rate": 3.70652852682373e-05, "loss": 0.00018752869218587875, "step": 221810 }, { "epoch": 62.96338347998864, "grad_norm": 0.051973696798086166, "learning_rate": 3.7062446778313935e-05, "loss": 0.00037641599774360655, "step": 221820 }, { "epoch": 62.966221969912006, "grad_norm": 0.013584896922111511, "learning_rate": 3.705960828839058e-05, "loss": 0.00022341348230838775, "step": 221830 }, { "epoch": 62.96906045983537, "grad_norm": 0.021034209057688713, "learning_rate": 3.705676979846722e-05, "loss": 0.002510124258697033, "step": 221840 }, { "epoch": 62.97189894975873, "grad_norm": 0.24433168768882751, "learning_rate": 3.705393130854385e-05, "loss": 0.0003959193825721741, "step": 221850 }, { "epoch": 62.97473743968209, "grad_norm": 0.05004934221506119, "learning_rate": 3.70510928186205e-05, "loss": 0.00017364602535963058, "step": 221860 }, { "epoch": 62.97757592960545, "grad_norm": 0.3371748626232147, "learning_rate": 3.7048254328697135e-05, "loss": 0.00035803411155939103, "step": 221870 }, { "epoch": 62.98041441952881, "grad_norm": 0.09849247336387634, "learning_rate": 3.704541583877377e-05, "loss": 0.00011715590953826904, "step": 221880 }, { "epoch": 62.98325290945217, "grad_norm": 0.09758304059505463, "learning_rate": 3.704257734885042e-05, "loss": 0.00020075794309377671, "step": 221890 }, { "epoch": 62.98609139937553, "grad_norm": 5.555059432983398, "learning_rate": 3.703973885892705e-05, "loss": 0.0013607826083898544, "step": 221900 }, { "epoch": 62.988929889298895, "grad_norm": 0.02382068522274494, "learning_rate": 3.7036900369003694e-05, "loss": 0.0015381064265966415, "step": 221910 }, { "epoch": 62.99176837922225, "grad_norm": 0.03526509925723076, "learning_rate": 3.703406187908033e-05, "loss": 0.0019354859367012977, "step": 221920 }, { "epoch": 62.99460686914561, "grad_norm": 0.017935989424586296, "learning_rate": 3.703122338915697e-05, "loss": 0.0007024912163615226, "step": 221930 }, { "epoch": 62.997445359068976, "grad_norm": 0.508703351020813, "learning_rate": 3.702838489923361e-05, "loss": 0.0002306222915649414, "step": 221940 }, { "epoch": 63.00028384899234, "grad_norm": 0.09749090671539307, "learning_rate": 3.7025546409310246e-05, "loss": 0.0003968656063079834, "step": 221950 }, { "epoch": 63.003122338915695, "grad_norm": 0.11861522495746613, "learning_rate": 3.702270791938689e-05, "loss": 0.0009935002774000168, "step": 221960 }, { "epoch": 63.00596082883906, "grad_norm": 0.13088586926460266, "learning_rate": 3.701986942946353e-05, "loss": 0.0005473753437399865, "step": 221970 }, { "epoch": 63.00879931876242, "grad_norm": 14.756282806396484, "learning_rate": 3.7017030939540163e-05, "loss": 0.0015325598418712617, "step": 221980 }, { "epoch": 63.01163780868578, "grad_norm": 1.800063967704773, "learning_rate": 3.7014192449616805e-05, "loss": 0.0018998807296156882, "step": 221990 }, { "epoch": 63.01447629860914, "grad_norm": 0.3618490695953369, "learning_rate": 3.7011353959693446e-05, "loss": 0.0001423874869942665, "step": 222000 }, { "epoch": 63.01447629860914, "eval_accuracy": 0.9743752781840147, "eval_loss": 0.09624533355236053, "eval_runtime": 33.1445, "eval_samples_per_second": 474.498, "eval_steps_per_second": 7.422, "step": 222000 }, { "epoch": 63.0173147885325, "grad_norm": 0.04066452383995056, "learning_rate": 3.700851546977008e-05, "loss": 0.0006798496469855309, "step": 222010 }, { "epoch": 63.02015327845586, "grad_norm": 0.05239201337099075, "learning_rate": 3.700567697984672e-05, "loss": 0.001550297997891903, "step": 222020 }, { "epoch": 63.02299176837922, "grad_norm": 0.03552217781543732, "learning_rate": 3.7002838489923364e-05, "loss": 0.002151862159371376, "step": 222030 }, { "epoch": 63.025830258302584, "grad_norm": 0.04686582833528519, "learning_rate": 3.7e-05, "loss": 0.00017016418278217315, "step": 222040 }, { "epoch": 63.02866874822595, "grad_norm": 0.20809105038642883, "learning_rate": 3.699716151007664e-05, "loss": 0.0003229530528187752, "step": 222050 }, { "epoch": 63.0315072381493, "grad_norm": 0.1375904530286789, "learning_rate": 3.699432302015328e-05, "loss": 0.003982384502887726, "step": 222060 }, { "epoch": 63.034345728072665, "grad_norm": 0.11510719358921051, "learning_rate": 3.699148453022992e-05, "loss": 0.00020192936062812805, "step": 222070 }, { "epoch": 63.03718421799603, "grad_norm": 0.022936230525374413, "learning_rate": 3.698864604030656e-05, "loss": 0.00023498386144638062, "step": 222080 }, { "epoch": 63.040022707919384, "grad_norm": 0.025212842971086502, "learning_rate": 3.69858075503832e-05, "loss": 0.00019252020865678787, "step": 222090 }, { "epoch": 63.04286119784275, "grad_norm": 0.0306193009018898, "learning_rate": 3.698296906045984e-05, "loss": 0.00015121083706617355, "step": 222100 }, { "epoch": 63.04569968776611, "grad_norm": 0.029223550111055374, "learning_rate": 3.6980130570536474e-05, "loss": 8.840765804052353e-05, "step": 222110 }, { "epoch": 63.04853817768947, "grad_norm": 0.0823156088590622, "learning_rate": 3.6977292080613116e-05, "loss": 0.0001844773069024086, "step": 222120 }, { "epoch": 63.05137666761283, "grad_norm": 0.010103429667651653, "learning_rate": 3.697445359068976e-05, "loss": 0.00011989008635282516, "step": 222130 }, { "epoch": 63.05421515753619, "grad_norm": 0.0016102748923003674, "learning_rate": 3.697161510076639e-05, "loss": 7.132887840270996e-05, "step": 222140 }, { "epoch": 63.057053647459554, "grad_norm": 0.006803066935390234, "learning_rate": 3.696877661084303e-05, "loss": 0.00019915085285902024, "step": 222150 }, { "epoch": 63.05989213738291, "grad_norm": 0.03678028658032417, "learning_rate": 3.6965938120919674e-05, "loss": 0.004399031400680542, "step": 222160 }, { "epoch": 63.06273062730627, "grad_norm": 0.27749061584472656, "learning_rate": 3.696309963099631e-05, "loss": 0.0027730124071240424, "step": 222170 }, { "epoch": 63.065569117229636, "grad_norm": 0.12102781981229782, "learning_rate": 3.696026114107295e-05, "loss": 0.00028396695852279664, "step": 222180 }, { "epoch": 63.06840760715299, "grad_norm": 0.22646041214466095, "learning_rate": 3.695742265114959e-05, "loss": 0.00043182596564292906, "step": 222190 }, { "epoch": 63.071246097076354, "grad_norm": 0.02128811739385128, "learning_rate": 3.695458416122623e-05, "loss": 0.0011990437284111976, "step": 222200 }, { "epoch": 63.07408458699972, "grad_norm": 2.4359209537506104, "learning_rate": 3.695174567130287e-05, "loss": 0.0004339456558227539, "step": 222210 }, { "epoch": 63.07692307692308, "grad_norm": 1.7531226873397827, "learning_rate": 3.69489071813795e-05, "loss": 0.0005901388823986054, "step": 222220 }, { "epoch": 63.079761566846436, "grad_norm": 0.0870654433965683, "learning_rate": 3.694606869145615e-05, "loss": 0.0003680797293782234, "step": 222230 }, { "epoch": 63.0826000567698, "grad_norm": 0.09533345699310303, "learning_rate": 3.6943230201532785e-05, "loss": 0.00015436746180057525, "step": 222240 }, { "epoch": 63.08543854669316, "grad_norm": 0.021106502041220665, "learning_rate": 3.6940391711609426e-05, "loss": 8.522514253854752e-05, "step": 222250 }, { "epoch": 63.08827703661652, "grad_norm": 0.03947500139474869, "learning_rate": 3.693755322168607e-05, "loss": 8.559450507164001e-05, "step": 222260 }, { "epoch": 63.09111552653988, "grad_norm": 0.01650817133486271, "learning_rate": 3.69347147317627e-05, "loss": 0.0001304119825363159, "step": 222270 }, { "epoch": 63.09395401646324, "grad_norm": 0.484923779964447, "learning_rate": 3.6931876241839344e-05, "loss": 0.0003352878615260124, "step": 222280 }, { "epoch": 63.0967925063866, "grad_norm": 0.4122466444969177, "learning_rate": 3.6929037751915985e-05, "loss": 0.00021409206092357636, "step": 222290 }, { "epoch": 63.09963099630996, "grad_norm": 0.11534779518842697, "learning_rate": 3.692619926199262e-05, "loss": 0.00029919333755970003, "step": 222300 }, { "epoch": 63.102469486233325, "grad_norm": 4.089741230010986, "learning_rate": 3.692336077206926e-05, "loss": 0.0005991507321596145, "step": 222310 }, { "epoch": 63.10530797615669, "grad_norm": 0.07785464078187943, "learning_rate": 3.69205222821459e-05, "loss": 0.0001516314223408699, "step": 222320 }, { "epoch": 63.10814646608004, "grad_norm": 0.014666670002043247, "learning_rate": 3.6917683792222544e-05, "loss": 0.0024389436468482016, "step": 222330 }, { "epoch": 63.110984956003406, "grad_norm": 0.007482943125069141, "learning_rate": 3.691484530229918e-05, "loss": 0.0005008239299058914, "step": 222340 }, { "epoch": 63.11382344592677, "grad_norm": 0.010378503240644932, "learning_rate": 3.691200681237581e-05, "loss": 0.00019440222531557082, "step": 222350 }, { "epoch": 63.116661935850125, "grad_norm": 0.15446656942367554, "learning_rate": 3.690916832245246e-05, "loss": 0.00037731193006038664, "step": 222360 }, { "epoch": 63.11950042577349, "grad_norm": 0.16951552033424377, "learning_rate": 3.6906329832529096e-05, "loss": 0.0002531193196773529, "step": 222370 }, { "epoch": 63.12233891569685, "grad_norm": 0.1492270678281784, "learning_rate": 3.690349134260574e-05, "loss": 0.00012338235974311828, "step": 222380 }, { "epoch": 63.12517740562021, "grad_norm": 0.014598102308809757, "learning_rate": 3.690065285268238e-05, "loss": 0.0002052679657936096, "step": 222390 }, { "epoch": 63.12801589554357, "grad_norm": 0.015792399644851685, "learning_rate": 3.689781436275901e-05, "loss": 0.0001588882878422737, "step": 222400 }, { "epoch": 63.13085438546693, "grad_norm": 0.12372888624668121, "learning_rate": 3.6894975872835654e-05, "loss": 0.00013833194971084594, "step": 222410 }, { "epoch": 63.133692875390295, "grad_norm": 0.017437366768717766, "learning_rate": 3.6892137382912296e-05, "loss": 0.00010833889245986938, "step": 222420 }, { "epoch": 63.13653136531365, "grad_norm": 0.03344358131289482, "learning_rate": 3.688929889298893e-05, "loss": 0.00022490303963422774, "step": 222430 }, { "epoch": 63.139369855237014, "grad_norm": 0.05987146869301796, "learning_rate": 3.688646040306557e-05, "loss": 0.00013126730918884278, "step": 222440 }, { "epoch": 63.14220834516038, "grad_norm": 0.01600753329694271, "learning_rate": 3.6883621913142206e-05, "loss": 0.00027775615453720095, "step": 222450 }, { "epoch": 63.14504683508373, "grad_norm": 0.044473182410001755, "learning_rate": 3.688078342321885e-05, "loss": 0.0005924517288804054, "step": 222460 }, { "epoch": 63.147885325007096, "grad_norm": 3.4597675800323486, "learning_rate": 3.687794493329549e-05, "loss": 0.0017128989100456237, "step": 222470 }, { "epoch": 63.15072381493046, "grad_norm": 0.040201932191848755, "learning_rate": 3.6875106443372124e-05, "loss": 0.001567579433321953, "step": 222480 }, { "epoch": 63.15356230485382, "grad_norm": 0.015649186447262764, "learning_rate": 3.687226795344877e-05, "loss": 0.0014405371621251106, "step": 222490 }, { "epoch": 63.15640079477718, "grad_norm": 0.046983592212200165, "learning_rate": 3.6869429463525406e-05, "loss": 0.0018902258947491647, "step": 222500 }, { "epoch": 63.15640079477718, "eval_accuracy": 0.9739301837604121, "eval_loss": 0.10085046291351318, "eval_runtime": 32.588, "eval_samples_per_second": 482.6, "eval_steps_per_second": 7.549, "step": 222500 }, { "epoch": 63.15923928470054, "grad_norm": 0.6243544220924377, "learning_rate": 3.686659097360204e-05, "loss": 0.0014897385612130166, "step": 222510 }, { "epoch": 63.1620777746239, "grad_norm": 0.5263832807540894, "learning_rate": 3.686375248367869e-05, "loss": 0.002801300399005413, "step": 222520 }, { "epoch": 63.16491626454726, "grad_norm": 0.07941048592329025, "learning_rate": 3.6860913993755324e-05, "loss": 0.0010701123625040053, "step": 222530 }, { "epoch": 63.16775475447062, "grad_norm": 0.014640825800597668, "learning_rate": 3.6858075503831965e-05, "loss": 0.0009040471166372299, "step": 222540 }, { "epoch": 63.170593244393984, "grad_norm": 0.34598857164382935, "learning_rate": 3.68552370139086e-05, "loss": 0.00030457302927970885, "step": 222550 }, { "epoch": 63.17343173431734, "grad_norm": 0.01571085676550865, "learning_rate": 3.685239852398524e-05, "loss": 0.0004549348726868629, "step": 222560 }, { "epoch": 63.1762702242407, "grad_norm": 0.1959463655948639, "learning_rate": 3.684956003406188e-05, "loss": 0.0003264063969254494, "step": 222570 }, { "epoch": 63.179108714164066, "grad_norm": 0.0394119992852211, "learning_rate": 3.684672154413852e-05, "loss": 0.00040846075862646105, "step": 222580 }, { "epoch": 63.18194720408743, "grad_norm": 0.21493591368198395, "learning_rate": 3.684388305421516e-05, "loss": 0.0007286196574568749, "step": 222590 }, { "epoch": 63.184785694010785, "grad_norm": 0.014978724531829357, "learning_rate": 3.68410445642918e-05, "loss": 0.0014208262786269189, "step": 222600 }, { "epoch": 63.18762418393415, "grad_norm": 0.07457029074430466, "learning_rate": 3.6838206074368435e-05, "loss": 0.00015747901052236556, "step": 222610 }, { "epoch": 63.19046267385751, "grad_norm": 0.08383733034133911, "learning_rate": 3.683536758444508e-05, "loss": 0.0006684379652142525, "step": 222620 }, { "epoch": 63.193301163780866, "grad_norm": 0.10786515474319458, "learning_rate": 3.683252909452172e-05, "loss": 0.000507449172437191, "step": 222630 }, { "epoch": 63.19613965370423, "grad_norm": 0.04150820150971413, "learning_rate": 3.682969060459835e-05, "loss": 0.005959928035736084, "step": 222640 }, { "epoch": 63.19897814362759, "grad_norm": 0.16716830432415009, "learning_rate": 3.682685211467499e-05, "loss": 0.0005399556830525398, "step": 222650 }, { "epoch": 63.20181663355095, "grad_norm": 0.06488361954689026, "learning_rate": 3.6824013624751635e-05, "loss": 0.00014287084341049194, "step": 222660 }, { "epoch": 63.20465512347431, "grad_norm": 0.03859921172261238, "learning_rate": 3.6821175134828276e-05, "loss": 0.0013493884354829789, "step": 222670 }, { "epoch": 63.20749361339767, "grad_norm": 0.08405287563800812, "learning_rate": 3.681833664490491e-05, "loss": 0.0005160909146070481, "step": 222680 }, { "epoch": 63.210332103321036, "grad_norm": 0.062039799988269806, "learning_rate": 3.681549815498155e-05, "loss": 0.00036007892340421676, "step": 222690 }, { "epoch": 63.21317059324439, "grad_norm": 0.16438180208206177, "learning_rate": 3.681265966505819e-05, "loss": 0.000617966242134571, "step": 222700 }, { "epoch": 63.216009083167755, "grad_norm": 0.5380982756614685, "learning_rate": 3.680982117513483e-05, "loss": 0.00027030911296606065, "step": 222710 }, { "epoch": 63.21884757309112, "grad_norm": 0.2200469970703125, "learning_rate": 3.680698268521147e-05, "loss": 0.0008650330826640129, "step": 222720 }, { "epoch": 63.221686063014474, "grad_norm": 0.019728846848011017, "learning_rate": 3.680414419528811e-05, "loss": 0.005314484611153603, "step": 222730 }, { "epoch": 63.22452455293784, "grad_norm": 2.4692025184631348, "learning_rate": 3.6801305705364745e-05, "loss": 0.0018581535667181015, "step": 222740 }, { "epoch": 63.2273630428612, "grad_norm": 0.039046015590429306, "learning_rate": 3.679846721544139e-05, "loss": 0.001369146816432476, "step": 222750 }, { "epoch": 63.230201532784555, "grad_norm": 0.043520327657461166, "learning_rate": 3.679562872551803e-05, "loss": 0.0004117781296372414, "step": 222760 }, { "epoch": 63.23304002270792, "grad_norm": 0.03483003005385399, "learning_rate": 3.679279023559466e-05, "loss": 0.002009091153740883, "step": 222770 }, { "epoch": 63.23587851263128, "grad_norm": 0.05678689107298851, "learning_rate": 3.6789951745671304e-05, "loss": 0.00043452270328998567, "step": 222780 }, { "epoch": 63.238717002554644, "grad_norm": 0.16596706211566925, "learning_rate": 3.6787113255747945e-05, "loss": 0.0009283162653446198, "step": 222790 }, { "epoch": 63.241555492478, "grad_norm": 0.039930060505867004, "learning_rate": 3.678427476582459e-05, "loss": 0.0003033565357327461, "step": 222800 }, { "epoch": 63.24439398240136, "grad_norm": 1.528653860092163, "learning_rate": 3.678143627590122e-05, "loss": 0.0011295134201645852, "step": 222810 }, { "epoch": 63.247232472324725, "grad_norm": 0.4009380340576172, "learning_rate": 3.677859778597786e-05, "loss": 0.0012015856802463532, "step": 222820 }, { "epoch": 63.25007096224808, "grad_norm": 0.29122796654701233, "learning_rate": 3.6775759296054504e-05, "loss": 0.0008800780400633812, "step": 222830 }, { "epoch": 63.252909452171444, "grad_norm": 0.29701393842697144, "learning_rate": 3.677292080613114e-05, "loss": 0.0006461620330810546, "step": 222840 }, { "epoch": 63.25574794209481, "grad_norm": 0.028278445824980736, "learning_rate": 3.677008231620778e-05, "loss": 0.0007592126727104187, "step": 222850 }, { "epoch": 63.25858643201816, "grad_norm": 0.044579699635505676, "learning_rate": 3.676724382628442e-05, "loss": 0.0014387547969818115, "step": 222860 }, { "epoch": 63.261424921941526, "grad_norm": 4.254112243652344, "learning_rate": 3.6764405336361056e-05, "loss": 0.0008183998987078666, "step": 222870 }, { "epoch": 63.26426341186489, "grad_norm": 0.35128235816955566, "learning_rate": 3.67615668464377e-05, "loss": 0.00154423788189888, "step": 222880 }, { "epoch": 63.26710190178825, "grad_norm": 0.01831040345132351, "learning_rate": 3.675872835651434e-05, "loss": 0.0011856276541948318, "step": 222890 }, { "epoch": 63.26994039171161, "grad_norm": 1.7920013666152954, "learning_rate": 3.675588986659097e-05, "loss": 0.00034234002232551574, "step": 222900 }, { "epoch": 63.27277888163497, "grad_norm": 0.03529277816414833, "learning_rate": 3.6753051376667615e-05, "loss": 0.000364505872130394, "step": 222910 }, { "epoch": 63.27561737155833, "grad_norm": 0.30966782569885254, "learning_rate": 3.6750212886744256e-05, "loss": 0.0014404036104679109, "step": 222920 }, { "epoch": 63.27845586148169, "grad_norm": 0.06424389779567719, "learning_rate": 3.674737439682089e-05, "loss": 0.00038585904985666275, "step": 222930 }, { "epoch": 63.28129435140505, "grad_norm": 0.03836692497134209, "learning_rate": 3.674453590689753e-05, "loss": 0.00038603637367486956, "step": 222940 }, { "epoch": 63.284132841328415, "grad_norm": 0.3858807682991028, "learning_rate": 3.674169741697417e-05, "loss": 0.000572367012500763, "step": 222950 }, { "epoch": 63.28697133125178, "grad_norm": 0.5881721377372742, "learning_rate": 3.6738858927050815e-05, "loss": 0.0007988136261701584, "step": 222960 }, { "epoch": 63.28980982117513, "grad_norm": 0.04903225228190422, "learning_rate": 3.673602043712745e-05, "loss": 0.0023206396028399466, "step": 222970 }, { "epoch": 63.292648311098496, "grad_norm": 0.46652910113334656, "learning_rate": 3.6733181947204084e-05, "loss": 0.0017977748066186905, "step": 222980 }, { "epoch": 63.29548680102186, "grad_norm": 0.021855583414435387, "learning_rate": 3.673034345728073e-05, "loss": 0.0006477676331996917, "step": 222990 }, { "epoch": 63.298325290945215, "grad_norm": 0.024461625143885612, "learning_rate": 3.672750496735737e-05, "loss": 0.00020331274718046188, "step": 223000 }, { "epoch": 63.298325290945215, "eval_accuracy": 0.9792077319259872, "eval_loss": 0.07978944480419159, "eval_runtime": 33.1666, "eval_samples_per_second": 474.181, "eval_steps_per_second": 7.417, "step": 223000 }, { "epoch": 63.30116378086858, "grad_norm": 0.05832643434405327, "learning_rate": 3.672466647743401e-05, "loss": 0.0014985736459493637, "step": 223010 }, { "epoch": 63.30400227079194, "grad_norm": 0.06378906965255737, "learning_rate": 3.672182798751065e-05, "loss": 0.0010667722672224045, "step": 223020 }, { "epoch": 63.306840760715296, "grad_norm": 1.0933951139450073, "learning_rate": 3.6718989497587284e-05, "loss": 0.0014186076819896697, "step": 223030 }, { "epoch": 63.30967925063866, "grad_norm": 0.9511200785636902, "learning_rate": 3.6716151007663926e-05, "loss": 0.001658061146736145, "step": 223040 }, { "epoch": 63.31251774056202, "grad_norm": 0.11717627197504044, "learning_rate": 3.671331251774056e-05, "loss": 0.0003696547821164131, "step": 223050 }, { "epoch": 63.315356230485385, "grad_norm": 0.3648156225681305, "learning_rate": 3.67104740278172e-05, "loss": 0.001051601395010948, "step": 223060 }, { "epoch": 63.31819472040874, "grad_norm": 0.05299392715096474, "learning_rate": 3.670763553789384e-05, "loss": 0.0008675988763570785, "step": 223070 }, { "epoch": 63.321033210332104, "grad_norm": 0.01848066784441471, "learning_rate": 3.670479704797048e-05, "loss": 0.0018120346590876578, "step": 223080 }, { "epoch": 63.32387170025547, "grad_norm": 0.26518502831459045, "learning_rate": 3.6701958558047126e-05, "loss": 0.0007869521155953408, "step": 223090 }, { "epoch": 63.32671019017882, "grad_norm": 0.011847668327391148, "learning_rate": 3.669912006812376e-05, "loss": 0.00030502546578645704, "step": 223100 }, { "epoch": 63.329548680102185, "grad_norm": 1.7225652933120728, "learning_rate": 3.6696281578200395e-05, "loss": 0.0005635194480419159, "step": 223110 }, { "epoch": 63.33238717002555, "grad_norm": 0.07520072907209396, "learning_rate": 3.669344308827704e-05, "loss": 0.0007388025522232055, "step": 223120 }, { "epoch": 63.335225659948904, "grad_norm": 0.018903842195868492, "learning_rate": 3.669060459835368e-05, "loss": 0.00024162307381629943, "step": 223130 }, { "epoch": 63.33806414987227, "grad_norm": 0.020696748048067093, "learning_rate": 3.668776610843032e-05, "loss": 0.0003941576927900314, "step": 223140 }, { "epoch": 63.34090263979563, "grad_norm": 0.11120280623435974, "learning_rate": 3.6684927618506954e-05, "loss": 0.000406407006084919, "step": 223150 }, { "epoch": 63.34374112971899, "grad_norm": 0.04610070586204529, "learning_rate": 3.6682089128583595e-05, "loss": 0.00027455557137727736, "step": 223160 }, { "epoch": 63.34657961964235, "grad_norm": 0.0141451982781291, "learning_rate": 3.6679250638660236e-05, "loss": 0.00012269150465726852, "step": 223170 }, { "epoch": 63.34941810956571, "grad_norm": 0.6683109402656555, "learning_rate": 3.667641214873687e-05, "loss": 0.0022162292152643204, "step": 223180 }, { "epoch": 63.352256599489074, "grad_norm": 7.353968620300293, "learning_rate": 3.667357365881351e-05, "loss": 0.003374944627285004, "step": 223190 }, { "epoch": 63.35509508941243, "grad_norm": 0.18984372913837433, "learning_rate": 3.6670735168890154e-05, "loss": 0.00028263144195079805, "step": 223200 }, { "epoch": 63.35793357933579, "grad_norm": 0.054205141961574554, "learning_rate": 3.666789667896679e-05, "loss": 0.0001990489661693573, "step": 223210 }, { "epoch": 63.360772069259156, "grad_norm": 0.13059395551681519, "learning_rate": 3.6665058189043436e-05, "loss": 0.0005018491297960282, "step": 223220 }, { "epoch": 63.36361055918251, "grad_norm": 0.015749845653772354, "learning_rate": 3.666221969912007e-05, "loss": 0.001197049394249916, "step": 223230 }, { "epoch": 63.366449049105874, "grad_norm": 0.12327847629785538, "learning_rate": 3.6659381209196706e-05, "loss": 0.0003326380625367165, "step": 223240 }, { "epoch": 63.36928753902924, "grad_norm": 1.1081219911575317, "learning_rate": 3.665654271927335e-05, "loss": 0.0010806074365973472, "step": 223250 }, { "epoch": 63.3721260289526, "grad_norm": 2.3641059398651123, "learning_rate": 3.665370422934999e-05, "loss": 0.0004905544221401215, "step": 223260 }, { "epoch": 63.374964518875956, "grad_norm": 0.15047495067119598, "learning_rate": 3.665086573942663e-05, "loss": 0.00026196129620075225, "step": 223270 }, { "epoch": 63.37780300879932, "grad_norm": 0.1565430611371994, "learning_rate": 3.6648027249503264e-05, "loss": 0.00047994628548622134, "step": 223280 }, { "epoch": 63.38064149872268, "grad_norm": 0.25629186630249023, "learning_rate": 3.6645188759579906e-05, "loss": 0.00038407668471336366, "step": 223290 }, { "epoch": 63.38347998864604, "grad_norm": 0.11215836554765701, "learning_rate": 3.664235026965655e-05, "loss": 0.0002856113016605377, "step": 223300 }, { "epoch": 63.3863184785694, "grad_norm": 0.07265391945838928, "learning_rate": 3.663951177973318e-05, "loss": 0.0005048206076025963, "step": 223310 }, { "epoch": 63.38915696849276, "grad_norm": 0.30489373207092285, "learning_rate": 3.663667328980982e-05, "loss": 0.0028773272410035134, "step": 223320 }, { "epoch": 63.391995458416126, "grad_norm": 0.0663236528635025, "learning_rate": 3.6633834799886464e-05, "loss": 0.0003373030573129654, "step": 223330 }, { "epoch": 63.39483394833948, "grad_norm": 0.5260716080665588, "learning_rate": 3.66309963099631e-05, "loss": 0.0003296591341495514, "step": 223340 }, { "epoch": 63.397672438262845, "grad_norm": 0.0467161126434803, "learning_rate": 3.662815782003974e-05, "loss": 0.00022893212735652924, "step": 223350 }, { "epoch": 63.40051092818621, "grad_norm": 0.017364367842674255, "learning_rate": 3.662531933011638e-05, "loss": 0.00010982230305671692, "step": 223360 }, { "epoch": 63.40334941810956, "grad_norm": 0.028566643595695496, "learning_rate": 3.6622480840193016e-05, "loss": 0.00024002380669116974, "step": 223370 }, { "epoch": 63.406187908032926, "grad_norm": 0.011106804944574833, "learning_rate": 3.661964235026966e-05, "loss": 0.00011408347636461258, "step": 223380 }, { "epoch": 63.40902639795629, "grad_norm": 0.03660682216286659, "learning_rate": 3.66168038603463e-05, "loss": 0.0005589857697486878, "step": 223390 }, { "epoch": 63.411864887879645, "grad_norm": 0.047657184302806854, "learning_rate": 3.6613965370422934e-05, "loss": 0.0005269348621368408, "step": 223400 }, { "epoch": 63.41470337780301, "grad_norm": 0.015334545634686947, "learning_rate": 3.6611126880499575e-05, "loss": 0.0003244917839765549, "step": 223410 }, { "epoch": 63.41754186772637, "grad_norm": 1.2991056442260742, "learning_rate": 3.6608288390576216e-05, "loss": 0.0007083222270011901, "step": 223420 }, { "epoch": 63.420380357649734, "grad_norm": 0.03295411542057991, "learning_rate": 3.660544990065286e-05, "loss": 0.000199834443628788, "step": 223430 }, { "epoch": 63.42321884757309, "grad_norm": 0.09537182748317719, "learning_rate": 3.660261141072949e-05, "loss": 0.00046051088720560073, "step": 223440 }, { "epoch": 63.42605733749645, "grad_norm": 0.14992915093898773, "learning_rate": 3.659977292080613e-05, "loss": 0.00046184565871953964, "step": 223450 }, { "epoch": 63.428895827419815, "grad_norm": 1.1201171875, "learning_rate": 3.6596934430882775e-05, "loss": 0.0005515260621905327, "step": 223460 }, { "epoch": 63.43173431734317, "grad_norm": 0.03545794636011124, "learning_rate": 3.659409594095941e-05, "loss": 0.00019689593464136123, "step": 223470 }, { "epoch": 63.434572807266534, "grad_norm": 0.07283949851989746, "learning_rate": 3.659125745103605e-05, "loss": 0.00022553279995918274, "step": 223480 }, { "epoch": 63.4374112971899, "grad_norm": 0.017604945227503777, "learning_rate": 3.658841896111269e-05, "loss": 0.00045907646417617796, "step": 223490 }, { "epoch": 63.44024978711325, "grad_norm": 0.42313745617866516, "learning_rate": 3.658558047118933e-05, "loss": 0.0010253774002194405, "step": 223500 }, { "epoch": 63.44024978711325, "eval_accuracy": 0.9783811279964393, "eval_loss": 0.08207947760820389, "eval_runtime": 32.6471, "eval_samples_per_second": 481.728, "eval_steps_per_second": 7.535, "step": 223500 }, { "epoch": 63.443088277036615, "grad_norm": 8.376821517944336, "learning_rate": 3.658274198126597e-05, "loss": 0.001646113581955433, "step": 223510 }, { "epoch": 63.44592676695998, "grad_norm": 0.04202789440751076, "learning_rate": 3.657990349134261e-05, "loss": 0.00015992391854524612, "step": 223520 }, { "epoch": 63.44876525688334, "grad_norm": 0.05442344769835472, "learning_rate": 3.6577065001419244e-05, "loss": 0.00017020665109157562, "step": 223530 }, { "epoch": 63.4516037468067, "grad_norm": 0.029028786346316338, "learning_rate": 3.6574226511495886e-05, "loss": 0.00011553820222616195, "step": 223540 }, { "epoch": 63.45444223673006, "grad_norm": 0.0681825801730156, "learning_rate": 3.657138802157252e-05, "loss": 0.0001388832926750183, "step": 223550 }, { "epoch": 63.45728072665342, "grad_norm": 0.028805602341890335, "learning_rate": 3.656854953164917e-05, "loss": 0.00012537594884634017, "step": 223560 }, { "epoch": 63.46011921657678, "grad_norm": 0.34578147530555725, "learning_rate": 3.65657110417258e-05, "loss": 0.00016492772847414016, "step": 223570 }, { "epoch": 63.46295770650014, "grad_norm": 0.037857603281736374, "learning_rate": 3.656287255180244e-05, "loss": 0.00011042803525924682, "step": 223580 }, { "epoch": 63.465796196423504, "grad_norm": 0.20112371444702148, "learning_rate": 3.6560034061879086e-05, "loss": 0.00026157274842262266, "step": 223590 }, { "epoch": 63.46863468634686, "grad_norm": 0.009926735423505306, "learning_rate": 3.655719557195572e-05, "loss": 0.00022343844175338746, "step": 223600 }, { "epoch": 63.47147317627022, "grad_norm": 1.3026700019836426, "learning_rate": 3.655435708203236e-05, "loss": 0.0002471527084708214, "step": 223610 }, { "epoch": 63.474311666193586, "grad_norm": 0.047060754150152206, "learning_rate": 3.6551518592109e-05, "loss": 0.0002538273110985756, "step": 223620 }, { "epoch": 63.47715015611695, "grad_norm": 0.052999403327703476, "learning_rate": 3.654868010218564e-05, "loss": 0.00015318337827920913, "step": 223630 }, { "epoch": 63.479988646040304, "grad_norm": 0.009487537667155266, "learning_rate": 3.654584161226228e-05, "loss": 0.00029899366199970245, "step": 223640 }, { "epoch": 63.48282713596367, "grad_norm": 0.19196276366710663, "learning_rate": 3.654300312233892e-05, "loss": 0.00016205664724111557, "step": 223650 }, { "epoch": 63.48566562588703, "grad_norm": 0.15439818799495697, "learning_rate": 3.6540164632415555e-05, "loss": 0.0002551918849349022, "step": 223660 }, { "epoch": 63.488504115810386, "grad_norm": 0.008861695416271687, "learning_rate": 3.6537326142492197e-05, "loss": 0.00017722360789775848, "step": 223670 }, { "epoch": 63.49134260573375, "grad_norm": 0.021694336086511612, "learning_rate": 3.653448765256883e-05, "loss": 0.00015702378004789352, "step": 223680 }, { "epoch": 63.49418109565711, "grad_norm": 0.027342243120074272, "learning_rate": 3.653164916264548e-05, "loss": 0.0002125358209013939, "step": 223690 }, { "epoch": 63.497019585580475, "grad_norm": 0.045691367238759995, "learning_rate": 3.6528810672722114e-05, "loss": 0.00037649646401405334, "step": 223700 }, { "epoch": 63.49985807550383, "grad_norm": 0.011465326882898808, "learning_rate": 3.652597218279875e-05, "loss": 0.0002548329532146454, "step": 223710 }, { "epoch": 63.50269656542719, "grad_norm": 0.2503974437713623, "learning_rate": 3.65231336928754e-05, "loss": 0.00043977871537208556, "step": 223720 }, { "epoch": 63.505535055350556, "grad_norm": 0.17101015150547028, "learning_rate": 3.652029520295203e-05, "loss": 0.0004071470350027084, "step": 223730 }, { "epoch": 63.50837354527391, "grad_norm": 0.22685126960277557, "learning_rate": 3.651745671302867e-05, "loss": 0.0002570757642388344, "step": 223740 }, { "epoch": 63.511212035197275, "grad_norm": 0.027577245607972145, "learning_rate": 3.6514618223105314e-05, "loss": 0.0005594300106167793, "step": 223750 }, { "epoch": 63.51405052512064, "grad_norm": 0.132013201713562, "learning_rate": 3.651206358217428e-05, "loss": 0.0026603545993566513, "step": 223760 }, { "epoch": 63.51688901504399, "grad_norm": 0.004958732053637505, "learning_rate": 3.650922509225093e-05, "loss": 0.0014467721804976464, "step": 223770 }, { "epoch": 63.519727504967356, "grad_norm": 3.806523084640503, "learning_rate": 3.650638660232756e-05, "loss": 0.0006628228351473808, "step": 223780 }, { "epoch": 63.52256599489072, "grad_norm": 0.02572963386774063, "learning_rate": 3.65035481124042e-05, "loss": 0.00012870635837316513, "step": 223790 }, { "epoch": 63.52540448481408, "grad_norm": 0.035188227891922, "learning_rate": 3.6500709622480846e-05, "loss": 9.522289037704468e-05, "step": 223800 }, { "epoch": 63.52824297473744, "grad_norm": 0.029137175530195236, "learning_rate": 3.649787113255748e-05, "loss": 0.000906759686768055, "step": 223810 }, { "epoch": 63.5310814646608, "grad_norm": 0.058693256229162216, "learning_rate": 3.649503264263412e-05, "loss": 0.0019776618108153345, "step": 223820 }, { "epoch": 63.533919954584164, "grad_norm": 4.727992057800293, "learning_rate": 3.6492194152710757e-05, "loss": 0.0017699170857667923, "step": 223830 }, { "epoch": 63.53675844450752, "grad_norm": 0.03395581617951393, "learning_rate": 3.64893556627874e-05, "loss": 0.0015294859185814857, "step": 223840 }, { "epoch": 63.53959693443088, "grad_norm": 0.004711443092674017, "learning_rate": 3.648651717286404e-05, "loss": 0.0014114465564489364, "step": 223850 }, { "epoch": 63.542435424354245, "grad_norm": 0.00806459505110979, "learning_rate": 3.6483678682940674e-05, "loss": 0.0003004983067512512, "step": 223860 }, { "epoch": 63.5452739142776, "grad_norm": 0.016837652772665024, "learning_rate": 3.6480840193017315e-05, "loss": 0.000626312755048275, "step": 223870 }, { "epoch": 63.548112404200964, "grad_norm": 0.07849552482366562, "learning_rate": 3.647800170309396e-05, "loss": 0.00037410594522953035, "step": 223880 }, { "epoch": 63.55095089412433, "grad_norm": 0.704382061958313, "learning_rate": 3.647516321317059e-05, "loss": 0.0023267984390258787, "step": 223890 }, { "epoch": 63.55378938404769, "grad_norm": 0.042670633643865585, "learning_rate": 3.647232472324724e-05, "loss": 0.0008226621896028519, "step": 223900 }, { "epoch": 63.556627873971046, "grad_norm": 0.01936558447778225, "learning_rate": 3.6469486233323874e-05, "loss": 0.000643080472946167, "step": 223910 }, { "epoch": 63.55946636389441, "grad_norm": 0.19851155579090118, "learning_rate": 3.646664774340051e-05, "loss": 0.00026398710906505585, "step": 223920 }, { "epoch": 63.56230485381777, "grad_norm": 0.06842292845249176, "learning_rate": 3.646380925347715e-05, "loss": 0.0014824427664279938, "step": 223930 }, { "epoch": 63.56514334374113, "grad_norm": 0.1126151904463768, "learning_rate": 3.646097076355379e-05, "loss": 0.00032440591603517533, "step": 223940 }, { "epoch": 63.56798183366449, "grad_norm": 0.015271972864866257, "learning_rate": 3.645813227363043e-05, "loss": 0.00023623406887054443, "step": 223950 }, { "epoch": 63.57082032358785, "grad_norm": 0.11235082149505615, "learning_rate": 3.645529378370707e-05, "loss": 0.0001907750964164734, "step": 223960 }, { "epoch": 63.57365881351121, "grad_norm": 0.8468502163887024, "learning_rate": 3.645245529378371e-05, "loss": 0.00027998797595500945, "step": 223970 }, { "epoch": 63.57649730343457, "grad_norm": 0.10770338773727417, "learning_rate": 3.644961680386035e-05, "loss": 0.00010765604674816132, "step": 223980 }, { "epoch": 63.579335793357934, "grad_norm": 0.20835553109645844, "learning_rate": 3.6446778313936985e-05, "loss": 0.0008030563592910766, "step": 223990 }, { "epoch": 63.5821742832813, "grad_norm": 1.3233823776245117, "learning_rate": 3.6443939824013626e-05, "loss": 0.0008538063615560531, "step": 224000 }, { "epoch": 63.5821742832813, "eval_accuracy": 0.9780632034081516, "eval_loss": 0.07864438742399216, "eval_runtime": 32.8511, "eval_samples_per_second": 478.736, "eval_steps_per_second": 7.488, "step": 224000 }, { "epoch": 63.58501277320465, "grad_norm": 1.145976185798645, "learning_rate": 3.644110133409027e-05, "loss": 0.0005381714552640914, "step": 224010 }, { "epoch": 63.587851263128016, "grad_norm": 0.08711237460374832, "learning_rate": 3.64382628441669e-05, "loss": 0.004125714302062988, "step": 224020 }, { "epoch": 63.59068975305138, "grad_norm": 2.6091971397399902, "learning_rate": 3.643542435424355e-05, "loss": 0.0013687342405319213, "step": 224030 }, { "epoch": 63.593528242974735, "grad_norm": 4.404272556304932, "learning_rate": 3.6432585864320185e-05, "loss": 0.0008155850693583489, "step": 224040 }, { "epoch": 63.5963667328981, "grad_norm": 0.04506436735391617, "learning_rate": 3.642974737439682e-05, "loss": 0.0017896765843033791, "step": 224050 }, { "epoch": 63.59920522282146, "grad_norm": 0.6834333539009094, "learning_rate": 3.642690888447346e-05, "loss": 0.0020935479551553726, "step": 224060 }, { "epoch": 63.602043712744816, "grad_norm": 0.037196241319179535, "learning_rate": 3.64240703945501e-05, "loss": 0.005231668800115585, "step": 224070 }, { "epoch": 63.60488220266818, "grad_norm": 0.5990810394287109, "learning_rate": 3.6421231904626743e-05, "loss": 0.0007414866238832473, "step": 224080 }, { "epoch": 63.60772069259154, "grad_norm": 0.01359111163765192, "learning_rate": 3.641839341470338e-05, "loss": 0.0005565857514739037, "step": 224090 }, { "epoch": 63.610559182514905, "grad_norm": 0.05971262976527214, "learning_rate": 3.641555492478002e-05, "loss": 0.0012302357703447343, "step": 224100 }, { "epoch": 63.61339767243826, "grad_norm": 0.8995712995529175, "learning_rate": 3.641271643485666e-05, "loss": 0.0019820192828774452, "step": 224110 }, { "epoch": 63.61623616236162, "grad_norm": 0.11152498424053192, "learning_rate": 3.6409877944933295e-05, "loss": 0.0012632576748728753, "step": 224120 }, { "epoch": 63.619074652284986, "grad_norm": 0.06649849563837051, "learning_rate": 3.640703945500994e-05, "loss": 0.007293985784053802, "step": 224130 }, { "epoch": 63.62191314220834, "grad_norm": 0.0432928241789341, "learning_rate": 3.640420096508658e-05, "loss": 0.0018134333193302154, "step": 224140 }, { "epoch": 63.624751632131705, "grad_norm": 0.5464154481887817, "learning_rate": 3.640136247516321e-05, "loss": 0.0006778333336114883, "step": 224150 }, { "epoch": 63.62759012205507, "grad_norm": 0.0472242571413517, "learning_rate": 3.6398523985239854e-05, "loss": 0.0007098762318491936, "step": 224160 }, { "epoch": 63.63042861197843, "grad_norm": 0.6130415201187134, "learning_rate": 3.6395685495316495e-05, "loss": 0.0002561876550316811, "step": 224170 }, { "epoch": 63.63326710190179, "grad_norm": 0.10148940235376358, "learning_rate": 3.639284700539313e-05, "loss": 0.0006665991619229317, "step": 224180 }, { "epoch": 63.63610559182515, "grad_norm": 0.4504221975803375, "learning_rate": 3.639000851546977e-05, "loss": 0.00023106206208467484, "step": 224190 }, { "epoch": 63.63894408174851, "grad_norm": 0.15223711729049683, "learning_rate": 3.638717002554641e-05, "loss": 0.0002560446038842201, "step": 224200 }, { "epoch": 63.64178257167187, "grad_norm": 0.09493300318717957, "learning_rate": 3.638433153562305e-05, "loss": 0.0017477802932262421, "step": 224210 }, { "epoch": 63.64462106159523, "grad_norm": 0.02578337863087654, "learning_rate": 3.638149304569969e-05, "loss": 0.0004847826436161995, "step": 224220 }, { "epoch": 63.647459551518594, "grad_norm": 0.017281465232372284, "learning_rate": 3.637865455577633e-05, "loss": 0.00016190093010663985, "step": 224230 }, { "epoch": 63.65029804144195, "grad_norm": 0.020872266963124275, "learning_rate": 3.637581606585297e-05, "loss": 0.0004323353990912437, "step": 224240 }, { "epoch": 63.65313653136531, "grad_norm": 0.013306882232427597, "learning_rate": 3.6372977575929606e-05, "loss": 0.0009837733581662179, "step": 224250 }, { "epoch": 63.655975021288675, "grad_norm": 0.021725419908761978, "learning_rate": 3.637013908600624e-05, "loss": 0.00024094954133033753, "step": 224260 }, { "epoch": 63.65881351121204, "grad_norm": 0.14972157776355743, "learning_rate": 3.636730059608289e-05, "loss": 0.004216253012418747, "step": 224270 }, { "epoch": 63.661652001135394, "grad_norm": 0.017985695973038673, "learning_rate": 3.6364462106159524e-05, "loss": 0.00021517854183912278, "step": 224280 }, { "epoch": 63.66449049105876, "grad_norm": 0.04004627838730812, "learning_rate": 3.6361623616236165e-05, "loss": 0.00022351332008838654, "step": 224290 }, { "epoch": 63.66732898098212, "grad_norm": 0.004322052001953125, "learning_rate": 3.6358785126312806e-05, "loss": 0.0006223093718290329, "step": 224300 }, { "epoch": 63.670167470905476, "grad_norm": 0.07481026649475098, "learning_rate": 3.635594663638944e-05, "loss": 0.000769777037203312, "step": 224310 }, { "epoch": 63.67300596082884, "grad_norm": 0.20656420290470123, "learning_rate": 3.635310814646608e-05, "loss": 0.0011296188458800316, "step": 224320 }, { "epoch": 63.6758444507522, "grad_norm": 0.08076050132513046, "learning_rate": 3.6350269656542724e-05, "loss": 0.0003452707082033157, "step": 224330 }, { "epoch": 63.67868294067556, "grad_norm": 0.019852444529533386, "learning_rate": 3.634743116661936e-05, "loss": 0.0001354474574327469, "step": 224340 }, { "epoch": 63.68152143059892, "grad_norm": 0.009588783606886864, "learning_rate": 3.6344592676696e-05, "loss": 0.0001936035230755806, "step": 224350 }, { "epoch": 63.68435992052228, "grad_norm": 0.11474621295928955, "learning_rate": 3.6341754186772634e-05, "loss": 0.0015029072761535644, "step": 224360 }, { "epoch": 63.687198410445646, "grad_norm": 0.3391484320163727, "learning_rate": 3.633891569684928e-05, "loss": 0.00025910399854183196, "step": 224370 }, { "epoch": 63.690036900369, "grad_norm": 5.7421345710754395, "learning_rate": 3.633607720692592e-05, "loss": 0.0015565535053610803, "step": 224380 }, { "epoch": 63.692875390292365, "grad_norm": 0.12042143195867538, "learning_rate": 3.633323871700255e-05, "loss": 0.00026180408895015717, "step": 224390 }, { "epoch": 63.69571388021573, "grad_norm": 0.004368194844573736, "learning_rate": 3.63304002270792e-05, "loss": 0.00029761362820863725, "step": 224400 }, { "epoch": 63.69855237013908, "grad_norm": 0.04389287531375885, "learning_rate": 3.6327561737155834e-05, "loss": 0.00028510894626379015, "step": 224410 }, { "epoch": 63.701390860062446, "grad_norm": 0.005231706891208887, "learning_rate": 3.6324723247232476e-05, "loss": 0.00035474412143230436, "step": 224420 }, { "epoch": 63.70422934998581, "grad_norm": 0.08212676644325256, "learning_rate": 3.632188475730912e-05, "loss": 0.00018621403723955153, "step": 224430 }, { "epoch": 63.70706783990917, "grad_norm": 0.8004197478294373, "learning_rate": 3.631904626738575e-05, "loss": 0.0004247302189469337, "step": 224440 }, { "epoch": 63.70990632983253, "grad_norm": 0.034846000373363495, "learning_rate": 3.631620777746239e-05, "loss": 0.0002795059233903885, "step": 224450 }, { "epoch": 63.71274481975589, "grad_norm": 0.015181692317128181, "learning_rate": 3.631336928753903e-05, "loss": 0.004319984465837479, "step": 224460 }, { "epoch": 63.71558330967925, "grad_norm": 0.014265087433159351, "learning_rate": 3.631053079761567e-05, "loss": 0.00015086904168128967, "step": 224470 }, { "epoch": 63.71842179960261, "grad_norm": 0.05621592327952385, "learning_rate": 3.630769230769231e-05, "loss": 0.00047922786325216293, "step": 224480 }, { "epoch": 63.72126028952597, "grad_norm": 0.0355403907597065, "learning_rate": 3.6304853817768945e-05, "loss": 0.0001550225540995598, "step": 224490 }, { "epoch": 63.724098779449335, "grad_norm": 0.055215127766132355, "learning_rate": 3.630201532784559e-05, "loss": 0.00046750903129577637, "step": 224500 }, { "epoch": 63.724098779449335, "eval_accuracy": 0.9804158453614803, "eval_loss": 0.07156595587730408, "eval_runtime": 32.7439, "eval_samples_per_second": 480.303, "eval_steps_per_second": 7.513, "step": 224500 }, { "epoch": 63.72693726937269, "grad_norm": 0.014392265118658543, "learning_rate": 3.629917683792223e-05, "loss": 0.0008051542565226555, "step": 224510 }, { "epoch": 63.729775759296054, "grad_norm": 1.1169310808181763, "learning_rate": 3.629633834799886e-05, "loss": 0.0028339531272649765, "step": 224520 }, { "epoch": 63.73261424921942, "grad_norm": 0.13225238025188446, "learning_rate": 3.629349985807551e-05, "loss": 0.0003013983368873596, "step": 224530 }, { "epoch": 63.73545273914278, "grad_norm": 0.8949704766273499, "learning_rate": 3.6290661368152145e-05, "loss": 0.0010794878005981445, "step": 224540 }, { "epoch": 63.738291229066135, "grad_norm": 1.656553030014038, "learning_rate": 3.6287822878228786e-05, "loss": 0.006018198654055595, "step": 224550 }, { "epoch": 63.7411297189895, "grad_norm": 0.035306207835674286, "learning_rate": 3.628498438830542e-05, "loss": 0.0009844541549682618, "step": 224560 }, { "epoch": 63.74396820891286, "grad_norm": 0.4865168631076813, "learning_rate": 3.628214589838206e-05, "loss": 0.0009433574974536896, "step": 224570 }, { "epoch": 63.74680669883622, "grad_norm": 0.32857009768486023, "learning_rate": 3.6279307408458704e-05, "loss": 0.005086997896432877, "step": 224580 }, { "epoch": 63.74964518875958, "grad_norm": 0.2155779004096985, "learning_rate": 3.627646891853534e-05, "loss": 0.0009758591651916504, "step": 224590 }, { "epoch": 63.75248367868294, "grad_norm": 0.010447981767356396, "learning_rate": 3.627363042861198e-05, "loss": 0.0008790446445345879, "step": 224600 }, { "epoch": 63.7553221686063, "grad_norm": 1.984439492225647, "learning_rate": 3.627079193868862e-05, "loss": 0.0005144774913787842, "step": 224610 }, { "epoch": 63.75816065852966, "grad_norm": 0.31451982259750366, "learning_rate": 3.6267953448765256e-05, "loss": 0.0003846365958452225, "step": 224620 }, { "epoch": 63.760999148453024, "grad_norm": 0.027535755187273026, "learning_rate": 3.62651149588419e-05, "loss": 0.0004064567387104034, "step": 224630 }, { "epoch": 63.76383763837639, "grad_norm": 1.6327887773513794, "learning_rate": 3.626227646891854e-05, "loss": 0.0005753325298428535, "step": 224640 }, { "epoch": 63.76667612829974, "grad_norm": 0.22552283108234406, "learning_rate": 3.625943797899517e-05, "loss": 0.00018854141235351561, "step": 224650 }, { "epoch": 63.769514618223106, "grad_norm": 0.2663899064064026, "learning_rate": 3.6256599489071814e-05, "loss": 0.0005783155560493469, "step": 224660 }, { "epoch": 63.77235310814647, "grad_norm": 0.8291921019554138, "learning_rate": 3.6253760999148456e-05, "loss": 0.002137721888720989, "step": 224670 }, { "epoch": 63.775191598069824, "grad_norm": 1.6700938940048218, "learning_rate": 3.625092250922509e-05, "loss": 0.0011700239032506944, "step": 224680 }, { "epoch": 63.77803008799319, "grad_norm": 0.041978947818279266, "learning_rate": 3.624808401930173e-05, "loss": 0.0007059192284941673, "step": 224690 }, { "epoch": 63.78086857791655, "grad_norm": 0.033448975533246994, "learning_rate": 3.624524552937837e-05, "loss": 0.002449842169880867, "step": 224700 }, { "epoch": 63.783707067839906, "grad_norm": 1.2871676683425903, "learning_rate": 3.6242407039455015e-05, "loss": 0.002666942775249481, "step": 224710 }, { "epoch": 63.78654555776327, "grad_norm": 3.1064724922180176, "learning_rate": 3.623956854953165e-05, "loss": 0.0027611643075942994, "step": 224720 }, { "epoch": 63.78938404768663, "grad_norm": 3.698308229446411, "learning_rate": 3.623673005960829e-05, "loss": 0.0021921943873167036, "step": 224730 }, { "epoch": 63.792222537609995, "grad_norm": 0.19769923388957977, "learning_rate": 3.623389156968493e-05, "loss": 0.000431833416223526, "step": 224740 }, { "epoch": 63.79506102753335, "grad_norm": 1.7552073001861572, "learning_rate": 3.6231053079761566e-05, "loss": 0.0004585022106766701, "step": 224750 }, { "epoch": 63.79789951745671, "grad_norm": 0.00428078044205904, "learning_rate": 3.622821458983821e-05, "loss": 0.0007421964779496193, "step": 224760 }, { "epoch": 63.800738007380076, "grad_norm": 0.6080329418182373, "learning_rate": 3.622537609991485e-05, "loss": 0.0004448911175131798, "step": 224770 }, { "epoch": 63.80357649730343, "grad_norm": 0.027296708896756172, "learning_rate": 3.6222537609991484e-05, "loss": 0.0003587124869227409, "step": 224780 }, { "epoch": 63.806414987226795, "grad_norm": 0.006950725801289082, "learning_rate": 3.6219699120068125e-05, "loss": 0.001052931509912014, "step": 224790 }, { "epoch": 63.80925347715016, "grad_norm": 0.15644274652004242, "learning_rate": 3.6216860630144767e-05, "loss": 0.0010136395692825317, "step": 224800 }, { "epoch": 63.81209196707351, "grad_norm": 0.008039982058107853, "learning_rate": 3.62140221402214e-05, "loss": 0.0001883283257484436, "step": 224810 }, { "epoch": 63.814930456996876, "grad_norm": 2.18320894241333, "learning_rate": 3.621118365029804e-05, "loss": 0.0010849986225366592, "step": 224820 }, { "epoch": 63.81776894692024, "grad_norm": 0.026601672172546387, "learning_rate": 3.6208345160374684e-05, "loss": 0.000851152092218399, "step": 224830 }, { "epoch": 63.8206074368436, "grad_norm": 0.11838595569133759, "learning_rate": 3.6205506670451325e-05, "loss": 0.004031016677618027, "step": 224840 }, { "epoch": 63.82344592676696, "grad_norm": 0.2057223916053772, "learning_rate": 3.620266818052796e-05, "loss": 0.003005993738770485, "step": 224850 }, { "epoch": 63.82628441669032, "grad_norm": 0.04075217247009277, "learning_rate": 3.6199829690604594e-05, "loss": 0.00020407196134328843, "step": 224860 }, { "epoch": 63.829122906613684, "grad_norm": 0.07504704594612122, "learning_rate": 3.619699120068124e-05, "loss": 0.00040705259889364245, "step": 224870 }, { "epoch": 63.83196139653704, "grad_norm": 0.07844492793083191, "learning_rate": 3.619415271075788e-05, "loss": 7.885117083787918e-05, "step": 224880 }, { "epoch": 63.8347998864604, "grad_norm": 0.023320579901337624, "learning_rate": 3.619131422083452e-05, "loss": 0.00020247306674718856, "step": 224890 }, { "epoch": 63.837638376383765, "grad_norm": 0.1836031824350357, "learning_rate": 3.618847573091116e-05, "loss": 0.0005699736997485161, "step": 224900 }, { "epoch": 63.84047686630713, "grad_norm": 0.01980968564748764, "learning_rate": 3.6185637240987795e-05, "loss": 0.000213627889752388, "step": 224910 }, { "epoch": 63.843315356230484, "grad_norm": 0.013688120990991592, "learning_rate": 3.6182798751064436e-05, "loss": 0.0028121789917349817, "step": 224920 }, { "epoch": 63.84615384615385, "grad_norm": 0.34825628995895386, "learning_rate": 3.617996026114108e-05, "loss": 0.00046895071864128113, "step": 224930 }, { "epoch": 63.84899233607721, "grad_norm": 0.02100256271660328, "learning_rate": 3.617712177121771e-05, "loss": 0.00024995915591716764, "step": 224940 }, { "epoch": 63.851830826000565, "grad_norm": 0.16673873364925385, "learning_rate": 3.617428328129435e-05, "loss": 0.0004692522808909416, "step": 224950 }, { "epoch": 63.85466931592393, "grad_norm": 0.03671998530626297, "learning_rate": 3.617144479137099e-05, "loss": 0.0002580223605036736, "step": 224960 }, { "epoch": 63.85750780584729, "grad_norm": 0.04799570515751839, "learning_rate": 3.6168606301447636e-05, "loss": 0.00015658382326364518, "step": 224970 }, { "epoch": 63.86034629577065, "grad_norm": 0.09831889718770981, "learning_rate": 3.616576781152427e-05, "loss": 0.0004115656018257141, "step": 224980 }, { "epoch": 63.86318478569401, "grad_norm": 0.0906229242682457, "learning_rate": 3.6162929321600905e-05, "loss": 0.00021391455084085464, "step": 224990 }, { "epoch": 63.86602327561737, "grad_norm": 0.02601264789700508, "learning_rate": 3.6160090831677553e-05, "loss": 0.00031835511326789857, "step": 225000 }, { "epoch": 63.86602327561737, "eval_accuracy": 0.9790805620906721, "eval_loss": 0.07967314124107361, "eval_runtime": 32.7919, "eval_samples_per_second": 479.601, "eval_steps_per_second": 7.502, "step": 225000 }, { "epoch": 63.868861765540736, "grad_norm": 0.03787478804588318, "learning_rate": 3.615725234175419e-05, "loss": 0.00886744111776352, "step": 225010 }, { "epoch": 63.87170025546409, "grad_norm": 0.08542962372303009, "learning_rate": 3.615441385183083e-05, "loss": 0.00013766512274742125, "step": 225020 }, { "epoch": 63.874538745387454, "grad_norm": 0.05806276574730873, "learning_rate": 3.615157536190747e-05, "loss": 0.00015752632170915604, "step": 225030 }, { "epoch": 63.87737723531082, "grad_norm": 0.3203323781490326, "learning_rate": 3.6148736871984105e-05, "loss": 0.0033796004951000213, "step": 225040 }, { "epoch": 63.88021572523417, "grad_norm": 0.06530045717954636, "learning_rate": 3.614589838206075e-05, "loss": 9.647663682699203e-05, "step": 225050 }, { "epoch": 63.883054215157536, "grad_norm": 0.06559520959854126, "learning_rate": 3.614305989213738e-05, "loss": 0.0011992694810032845, "step": 225060 }, { "epoch": 63.8858927050809, "grad_norm": 0.17584893107414246, "learning_rate": 3.614022140221402e-05, "loss": 0.001924452744424343, "step": 225070 }, { "epoch": 63.888731195004254, "grad_norm": 1.372194528579712, "learning_rate": 3.6137382912290664e-05, "loss": 0.0056559242308139804, "step": 225080 }, { "epoch": 63.89156968492762, "grad_norm": 0.0700661763548851, "learning_rate": 3.61345444223673e-05, "loss": 0.0005012307316064835, "step": 225090 }, { "epoch": 63.89440817485098, "grad_norm": 2.214586019515991, "learning_rate": 3.613170593244394e-05, "loss": 0.0030909327790141104, "step": 225100 }, { "epoch": 63.89724666477434, "grad_norm": 1.3360726833343506, "learning_rate": 3.612886744252058e-05, "loss": 0.0004929795861244202, "step": 225110 }, { "epoch": 63.9000851546977, "grad_norm": 0.07528794556856155, "learning_rate": 3.6126028952597216e-05, "loss": 0.000261850468814373, "step": 225120 }, { "epoch": 63.90292364462106, "grad_norm": 0.046959083527326584, "learning_rate": 3.6123190462673864e-05, "loss": 0.0008098874241113663, "step": 225130 }, { "epoch": 63.905762134544425, "grad_norm": 0.04502107575535774, "learning_rate": 3.61203519727505e-05, "loss": 0.00029917098581790926, "step": 225140 }, { "epoch": 63.90860062446778, "grad_norm": 0.4759259819984436, "learning_rate": 3.611751348282713e-05, "loss": 0.0004285890609025955, "step": 225150 }, { "epoch": 63.91143911439114, "grad_norm": 0.027916869148612022, "learning_rate": 3.6114674992903775e-05, "loss": 0.0004437034949660301, "step": 225160 }, { "epoch": 63.914277604314506, "grad_norm": 0.08362825959920883, "learning_rate": 3.6111836502980416e-05, "loss": 0.003944588080048561, "step": 225170 }, { "epoch": 63.91711609423786, "grad_norm": 3.528416872024536, "learning_rate": 3.610899801305706e-05, "loss": 0.0008362419903278351, "step": 225180 }, { "epoch": 63.919954584161225, "grad_norm": 0.4539260268211365, "learning_rate": 3.610615952313369e-05, "loss": 0.0002358408644795418, "step": 225190 }, { "epoch": 63.92279307408459, "grad_norm": 0.06131996214389801, "learning_rate": 3.6103321033210333e-05, "loss": 0.0004313550889492035, "step": 225200 }, { "epoch": 63.92563156400795, "grad_norm": 0.04696110263466835, "learning_rate": 3.6100482543286975e-05, "loss": 0.00038882624357938766, "step": 225210 }, { "epoch": 63.928470053931306, "grad_norm": 0.035636257380247116, "learning_rate": 3.609764405336361e-05, "loss": 0.0003195410594344139, "step": 225220 }, { "epoch": 63.93130854385467, "grad_norm": 0.023019082844257355, "learning_rate": 3.609480556344025e-05, "loss": 0.0005151335150003433, "step": 225230 }, { "epoch": 63.93414703377803, "grad_norm": 0.14070335030555725, "learning_rate": 3.609196707351689e-05, "loss": 0.00017023365944623947, "step": 225240 }, { "epoch": 63.93698552370139, "grad_norm": 0.02505980059504509, "learning_rate": 3.608912858359353e-05, "loss": 0.0029459377750754357, "step": 225250 }, { "epoch": 63.93982401362475, "grad_norm": 0.030038023367524147, "learning_rate": 3.608629009367017e-05, "loss": 0.0002865279093384743, "step": 225260 }, { "epoch": 63.942662503548114, "grad_norm": 0.5393161773681641, "learning_rate": 3.608345160374681e-05, "loss": 0.00026196874678134917, "step": 225270 }, { "epoch": 63.94550099347148, "grad_norm": 0.050458863377571106, "learning_rate": 3.6080613113823444e-05, "loss": 9.706318378448487e-05, "step": 225280 }, { "epoch": 63.94833948339483, "grad_norm": 0.05576249584555626, "learning_rate": 3.6077774623900086e-05, "loss": 0.0019566601142287253, "step": 225290 }, { "epoch": 63.951177973318195, "grad_norm": 1.0488667488098145, "learning_rate": 3.607493613397673e-05, "loss": 0.0012837210670113564, "step": 225300 }, { "epoch": 63.95401646324156, "grad_norm": 0.23463396728038788, "learning_rate": 3.607209764405337e-05, "loss": 0.0006284246221184731, "step": 225310 }, { "epoch": 63.956854953164914, "grad_norm": 0.3566974103450775, "learning_rate": 3.606925915413e-05, "loss": 0.008516659587621688, "step": 225320 }, { "epoch": 63.95969344308828, "grad_norm": 0.24091586470603943, "learning_rate": 3.6066420664206644e-05, "loss": 0.0012085732072591782, "step": 225330 }, { "epoch": 63.96253193301164, "grad_norm": 0.030770020559430122, "learning_rate": 3.6063582174283286e-05, "loss": 0.0002991087734699249, "step": 225340 }, { "epoch": 63.965370422934996, "grad_norm": 0.011261243373155594, "learning_rate": 3.606074368435992e-05, "loss": 0.0008709967136383056, "step": 225350 }, { "epoch": 63.96820891285836, "grad_norm": 0.060628827661275864, "learning_rate": 3.605790519443656e-05, "loss": 0.005290616303682327, "step": 225360 }, { "epoch": 63.97104740278172, "grad_norm": 0.08735974878072739, "learning_rate": 3.60550667045132e-05, "loss": 0.0012508576735854148, "step": 225370 }, { "epoch": 63.973885892705084, "grad_norm": 0.036764901131391525, "learning_rate": 3.605222821458984e-05, "loss": 0.0002717345952987671, "step": 225380 }, { "epoch": 63.97672438262844, "grad_norm": 0.816257655620575, "learning_rate": 3.604938972466648e-05, "loss": 0.00155891552567482, "step": 225390 }, { "epoch": 63.9795628725518, "grad_norm": 0.024156149476766586, "learning_rate": 3.604655123474312e-05, "loss": 0.0014712136238813401, "step": 225400 }, { "epoch": 63.982401362475166, "grad_norm": 0.02049519307911396, "learning_rate": 3.6043712744819755e-05, "loss": 0.00024126041680574417, "step": 225410 }, { "epoch": 63.98523985239852, "grad_norm": 0.06827552616596222, "learning_rate": 3.6040874254896396e-05, "loss": 0.0003369135782122612, "step": 225420 }, { "epoch": 63.988078342321884, "grad_norm": 0.10673180222511292, "learning_rate": 3.603803576497304e-05, "loss": 0.0005344051867723465, "step": 225430 }, { "epoch": 63.99091683224525, "grad_norm": 0.029519351199269295, "learning_rate": 3.603519727504968e-05, "loss": 0.0019487669691443444, "step": 225440 }, { "epoch": 63.9937553221686, "grad_norm": 2.0460259914398193, "learning_rate": 3.6032358785126314e-05, "loss": 0.0005363475531339646, "step": 225450 }, { "epoch": 63.996593812091966, "grad_norm": 0.045117057859897614, "learning_rate": 3.6029520295202955e-05, "loss": 0.00029621664434671403, "step": 225460 }, { "epoch": 63.99943230201533, "grad_norm": 0.13858415186405182, "learning_rate": 3.6026681805279596e-05, "loss": 0.00020586680620908736, "step": 225470 }, { "epoch": 64.00227079193868, "grad_norm": 0.008380487561225891, "learning_rate": 3.602384331535623e-05, "loss": 0.0022238465026021004, "step": 225480 }, { "epoch": 64.00510928186205, "grad_norm": 0.020235488191246986, "learning_rate": 3.602100482543287e-05, "loss": 9.889211505651475e-05, "step": 225490 }, { "epoch": 64.00794777178541, "grad_norm": 0.019735604524612427, "learning_rate": 3.6018166335509514e-05, "loss": 0.0008364584296941757, "step": 225500 }, { "epoch": 64.00794777178541, "eval_accuracy": 0.9795892414319324, "eval_loss": 0.07281606644392014, "eval_runtime": 33.0857, "eval_samples_per_second": 475.342, "eval_steps_per_second": 7.435, "step": 225500 }, { "epoch": 64.01078626170877, "grad_norm": 0.05334628000855446, "learning_rate": 3.601532784558615e-05, "loss": 0.0005608545616269112, "step": 225510 }, { "epoch": 64.01362475163214, "grad_norm": 0.0789429247379303, "learning_rate": 3.601248935566279e-05, "loss": 0.0009571185335516929, "step": 225520 }, { "epoch": 64.0164632415555, "grad_norm": 0.023302171379327774, "learning_rate": 3.600965086573943e-05, "loss": 0.00013711117208003998, "step": 225530 }, { "epoch": 64.01930173147885, "grad_norm": 0.0636732429265976, "learning_rate": 3.6006812375816066e-05, "loss": 0.0017106911167502404, "step": 225540 }, { "epoch": 64.02214022140221, "grad_norm": 1.7744396924972534, "learning_rate": 3.600397388589271e-05, "loss": 0.0002216283231973648, "step": 225550 }, { "epoch": 64.02497871132557, "grad_norm": 2.597858190536499, "learning_rate": 3.600113539596935e-05, "loss": 0.002311082184314728, "step": 225560 }, { "epoch": 64.02781720124894, "grad_norm": 0.047048892825841904, "learning_rate": 3.599829690604598e-05, "loss": 0.0001324361190199852, "step": 225570 }, { "epoch": 64.0306556911723, "grad_norm": 0.15715128183364868, "learning_rate": 3.5995458416122624e-05, "loss": 0.0006887489929795265, "step": 225580 }, { "epoch": 64.03349418109566, "grad_norm": 0.09049355983734131, "learning_rate": 3.599261992619926e-05, "loss": 0.0004651915282011032, "step": 225590 }, { "epoch": 64.03633267101901, "grad_norm": 0.04880692437291145, "learning_rate": 3.598978143627591e-05, "loss": 0.0005904717370867729, "step": 225600 }, { "epoch": 64.03917116094237, "grad_norm": 0.6023997068405151, "learning_rate": 3.598694294635254e-05, "loss": 0.00021831318736076355, "step": 225610 }, { "epoch": 64.04200965086574, "grad_norm": 0.006442782934755087, "learning_rate": 3.5984104456429176e-05, "loss": 0.0004392102360725403, "step": 225620 }, { "epoch": 64.0448481407891, "grad_norm": 9.170734405517578, "learning_rate": 3.5981265966505824e-05, "loss": 0.004915034025907516, "step": 225630 }, { "epoch": 64.04768663071246, "grad_norm": 0.032801248133182526, "learning_rate": 3.597842747658246e-05, "loss": 0.0008158409968018532, "step": 225640 }, { "epoch": 64.05052512063583, "grad_norm": 0.009990845806896687, "learning_rate": 3.59755889866591e-05, "loss": 0.0013832228258252143, "step": 225650 }, { "epoch": 64.05336361055919, "grad_norm": 0.2922166585922241, "learning_rate": 3.597275049673574e-05, "loss": 0.0003526870161294937, "step": 225660 }, { "epoch": 64.05620210048254, "grad_norm": 0.02581685408949852, "learning_rate": 3.5969912006812376e-05, "loss": 0.00012815054506063462, "step": 225670 }, { "epoch": 64.0590405904059, "grad_norm": 0.04092688113451004, "learning_rate": 3.596707351688902e-05, "loss": 0.00014609433710575104, "step": 225680 }, { "epoch": 64.06187908032926, "grad_norm": 0.12763315439224243, "learning_rate": 3.596423502696565e-05, "loss": 0.00024637095630168914, "step": 225690 }, { "epoch": 64.06471757025263, "grad_norm": 0.05214500054717064, "learning_rate": 3.5961396537042294e-05, "loss": 0.00020516272634267806, "step": 225700 }, { "epoch": 64.06755606017599, "grad_norm": 0.06695237010717392, "learning_rate": 3.5958558047118935e-05, "loss": 0.0003362055867910385, "step": 225710 }, { "epoch": 64.07039455009935, "grad_norm": 0.053965870290994644, "learning_rate": 3.595571955719557e-05, "loss": 0.00010519232600927352, "step": 225720 }, { "epoch": 64.07323304002271, "grad_norm": 1.744786262512207, "learning_rate": 3.595288106727222e-05, "loss": 0.0005765903741121292, "step": 225730 }, { "epoch": 64.07607152994606, "grad_norm": 7.338283061981201, "learning_rate": 3.595004257734885e-05, "loss": 0.001788925565779209, "step": 225740 }, { "epoch": 64.07891001986943, "grad_norm": 1.5170080661773682, "learning_rate": 3.594720408742549e-05, "loss": 0.003729585185647011, "step": 225750 }, { "epoch": 64.08174850979279, "grad_norm": 0.2071271687746048, "learning_rate": 3.5944365597502135e-05, "loss": 0.0015554523095488548, "step": 225760 }, { "epoch": 64.08458699971615, "grad_norm": 0.034490037709474564, "learning_rate": 3.594152710757877e-05, "loss": 0.0002894224599003792, "step": 225770 }, { "epoch": 64.08742548963951, "grad_norm": 0.019468436017632484, "learning_rate": 3.593868861765541e-05, "loss": 0.00031349435448646545, "step": 225780 }, { "epoch": 64.09026397956288, "grad_norm": 0.010690311901271343, "learning_rate": 3.5935850127732046e-05, "loss": 0.0012890243902802466, "step": 225790 }, { "epoch": 64.09310246948624, "grad_norm": 0.29279813170433044, "learning_rate": 3.593301163780869e-05, "loss": 0.0016189467161893845, "step": 225800 }, { "epoch": 64.09594095940959, "grad_norm": 0.2605352997779846, "learning_rate": 3.593017314788533e-05, "loss": 0.0010280540212988854, "step": 225810 }, { "epoch": 64.09877944933295, "grad_norm": 0.008754221722483635, "learning_rate": 3.592733465796196e-05, "loss": 0.00015467945486307143, "step": 225820 }, { "epoch": 64.10161793925631, "grad_norm": 0.02535228244960308, "learning_rate": 3.5924496168038605e-05, "loss": 0.0003149053081870079, "step": 225830 }, { "epoch": 64.10445642917968, "grad_norm": 0.007408665493130684, "learning_rate": 3.5921657678115246e-05, "loss": 0.0002118196338415146, "step": 225840 }, { "epoch": 64.10729491910304, "grad_norm": 0.5629767179489136, "learning_rate": 3.591881918819188e-05, "loss": 0.0003276322036981583, "step": 225850 }, { "epoch": 64.1101334090264, "grad_norm": 0.009962372481822968, "learning_rate": 3.591598069826853e-05, "loss": 0.00028475429862737653, "step": 225860 }, { "epoch": 64.11297189894975, "grad_norm": 0.01532883569598198, "learning_rate": 3.591314220834516e-05, "loss": 9.420327842235565e-05, "step": 225870 }, { "epoch": 64.11581038887311, "grad_norm": 0.009321934543550014, "learning_rate": 3.59103037184218e-05, "loss": 0.00047872476279735564, "step": 225880 }, { "epoch": 64.11864887879648, "grad_norm": 0.030270352959632874, "learning_rate": 3.590746522849844e-05, "loss": 0.00024196412414312363, "step": 225890 }, { "epoch": 64.12148736871984, "grad_norm": 0.4954870343208313, "learning_rate": 3.590462673857508e-05, "loss": 0.0002753160893917084, "step": 225900 }, { "epoch": 64.1243258586432, "grad_norm": 0.10643914341926575, "learning_rate": 3.590178824865172e-05, "loss": 0.0008167998865246772, "step": 225910 }, { "epoch": 64.12716434856657, "grad_norm": 0.006969409063458443, "learning_rate": 3.5898949758728357e-05, "loss": 0.0008104793727397918, "step": 225920 }, { "epoch": 64.13000283848993, "grad_norm": 0.3603917062282562, "learning_rate": 3.5896111268805e-05, "loss": 0.001976025477051735, "step": 225930 }, { "epoch": 64.13284132841328, "grad_norm": 0.010034373961389065, "learning_rate": 3.589327277888164e-05, "loss": 0.000402035191655159, "step": 225940 }, { "epoch": 64.13567981833664, "grad_norm": 4.584012985229492, "learning_rate": 3.5890434288958274e-05, "loss": 0.0011951865628361702, "step": 225950 }, { "epoch": 64.13851830826, "grad_norm": 0.24615073204040527, "learning_rate": 3.5887879648027254e-05, "loss": 0.0021301694214344025, "step": 225960 }, { "epoch": 64.14135679818337, "grad_norm": 0.2506524920463562, "learning_rate": 3.588504115810389e-05, "loss": 0.005956707894802094, "step": 225970 }, { "epoch": 64.14419528810673, "grad_norm": 0.29895687103271484, "learning_rate": 3.588220266818053e-05, "loss": 0.0008015289902687072, "step": 225980 }, { "epoch": 64.14703377803009, "grad_norm": 0.006833396852016449, "learning_rate": 3.587936417825717e-05, "loss": 0.0016420749947428704, "step": 225990 }, { "epoch": 64.14987226795346, "grad_norm": 0.06541809439659119, "learning_rate": 3.5876525688333806e-05, "loss": 0.00042325574904680254, "step": 226000 }, { "epoch": 64.14987226795346, "eval_accuracy": 0.9776816939022064, "eval_loss": 0.08531413227319717, "eval_runtime": 32.9806, "eval_samples_per_second": 476.856, "eval_steps_per_second": 7.459, "step": 226000 }, { "epoch": 64.1527107578768, "grad_norm": 6.881387233734131, "learning_rate": 3.587368719841045e-05, "loss": 0.0012235604226589203, "step": 226010 }, { "epoch": 64.15554924780017, "grad_norm": 0.04838798940181732, "learning_rate": 3.587084870848709e-05, "loss": 0.00017306357622146607, "step": 226020 }, { "epoch": 64.15838773772353, "grad_norm": 0.09536255896091461, "learning_rate": 3.586801021856372e-05, "loss": 0.0002011701464653015, "step": 226030 }, { "epoch": 64.16122622764689, "grad_norm": 0.10439273715019226, "learning_rate": 3.5865171728640365e-05, "loss": 0.00012302305549383163, "step": 226040 }, { "epoch": 64.16406471757026, "grad_norm": 0.1740400493144989, "learning_rate": 3.5862333238717006e-05, "loss": 0.00012539587914943694, "step": 226050 }, { "epoch": 64.16690320749362, "grad_norm": 0.12171534448862076, "learning_rate": 3.585949474879364e-05, "loss": 0.0003893587738275528, "step": 226060 }, { "epoch": 64.16974169741698, "grad_norm": 0.016652582213282585, "learning_rate": 3.585665625887028e-05, "loss": 0.00011251289397478104, "step": 226070 }, { "epoch": 64.17258018734033, "grad_norm": 0.020085537806153297, "learning_rate": 3.585381776894692e-05, "loss": 0.00038974378257989885, "step": 226080 }, { "epoch": 64.17541867726369, "grad_norm": 0.01949652098119259, "learning_rate": 3.585097927902356e-05, "loss": 0.00022530127316713333, "step": 226090 }, { "epoch": 64.17825716718706, "grad_norm": 0.0192548930644989, "learning_rate": 3.58481407891002e-05, "loss": 0.00018048398196697236, "step": 226100 }, { "epoch": 64.18109565711042, "grad_norm": 0.00596956629306078, "learning_rate": 3.584530229917684e-05, "loss": 6.864462047815323e-05, "step": 226110 }, { "epoch": 64.18393414703378, "grad_norm": 0.009871427901089191, "learning_rate": 3.584246380925348e-05, "loss": 0.00010660532861948014, "step": 226120 }, { "epoch": 64.18677263695714, "grad_norm": 1.5881246328353882, "learning_rate": 3.5839625319330117e-05, "loss": 0.0009110776707530021, "step": 226130 }, { "epoch": 64.1896111268805, "grad_norm": 0.058314915746450424, "learning_rate": 3.583678682940676e-05, "loss": 0.0002647867426276207, "step": 226140 }, { "epoch": 64.19244961680386, "grad_norm": 0.026866160333156586, "learning_rate": 3.58339483394834e-05, "loss": 0.000495917908847332, "step": 226150 }, { "epoch": 64.19528810672722, "grad_norm": 0.04071198031306267, "learning_rate": 3.5831109849560034e-05, "loss": 7.717311382293701e-05, "step": 226160 }, { "epoch": 64.19812659665058, "grad_norm": 0.032703083008527756, "learning_rate": 3.5828271359636675e-05, "loss": 7.667187601327896e-05, "step": 226170 }, { "epoch": 64.20096508657394, "grad_norm": 0.24686218798160553, "learning_rate": 3.582543286971332e-05, "loss": 0.00014807265251874923, "step": 226180 }, { "epoch": 64.20380357649731, "grad_norm": 0.00932091660797596, "learning_rate": 3.582259437978995e-05, "loss": 0.0001563597470521927, "step": 226190 }, { "epoch": 64.20664206642067, "grad_norm": 0.04663356393575668, "learning_rate": 3.581975588986659e-05, "loss": 9.128805249929428e-05, "step": 226200 }, { "epoch": 64.20948055634402, "grad_norm": 0.009335332550108433, "learning_rate": 3.5816917399943234e-05, "loss": 0.00013524889945983886, "step": 226210 }, { "epoch": 64.21231904626738, "grad_norm": 0.002837138483300805, "learning_rate": 3.581407891001987e-05, "loss": 6.556175649166107e-05, "step": 226220 }, { "epoch": 64.21515753619074, "grad_norm": 0.01361202634871006, "learning_rate": 3.581124042009651e-05, "loss": 0.00012867078185081483, "step": 226230 }, { "epoch": 64.21799602611411, "grad_norm": 0.07660821825265884, "learning_rate": 3.580840193017315e-05, "loss": 0.00012727770954370498, "step": 226240 }, { "epoch": 64.22083451603747, "grad_norm": 0.011597616598010063, "learning_rate": 3.580556344024979e-05, "loss": 0.00045406799763441085, "step": 226250 }, { "epoch": 64.22367300596083, "grad_norm": 0.013767405413091183, "learning_rate": 3.580272495032643e-05, "loss": 0.0012319032102823257, "step": 226260 }, { "epoch": 64.2265114958842, "grad_norm": 0.05508313328027725, "learning_rate": 3.579988646040306e-05, "loss": 0.00045019239187240603, "step": 226270 }, { "epoch": 64.22934998580754, "grad_norm": 0.10069742053747177, "learning_rate": 3.579704797047971e-05, "loss": 0.0015166960656642914, "step": 226280 }, { "epoch": 64.23218847573091, "grad_norm": 0.07478328049182892, "learning_rate": 3.5794209480556345e-05, "loss": 0.00028510801494121554, "step": 226290 }, { "epoch": 64.23502696565427, "grad_norm": 0.1531326025724411, "learning_rate": 3.5791370990632986e-05, "loss": 0.0009127387776970864, "step": 226300 }, { "epoch": 64.23786545557763, "grad_norm": 0.5119642615318298, "learning_rate": 3.578853250070963e-05, "loss": 0.0002471761777997017, "step": 226310 }, { "epoch": 64.240703945501, "grad_norm": 0.010212230496108532, "learning_rate": 3.578569401078626e-05, "loss": 0.00016364865005016326, "step": 226320 }, { "epoch": 64.24354243542436, "grad_norm": 0.08899784833192825, "learning_rate": 3.5782855520862903e-05, "loss": 0.00043965857475996016, "step": 226330 }, { "epoch": 64.24638092534771, "grad_norm": 0.5204827189445496, "learning_rate": 3.5780017030939545e-05, "loss": 0.0003086550161242485, "step": 226340 }, { "epoch": 64.24921941527107, "grad_norm": 0.13571280241012573, "learning_rate": 3.577717854101618e-05, "loss": 0.0001589776948094368, "step": 226350 }, { "epoch": 64.25205790519443, "grad_norm": 0.01725863665342331, "learning_rate": 3.577434005109282e-05, "loss": 0.0015774711966514588, "step": 226360 }, { "epoch": 64.2548963951178, "grad_norm": 0.04782959073781967, "learning_rate": 3.5771501561169455e-05, "loss": 0.0001449989154934883, "step": 226370 }, { "epoch": 64.25773488504116, "grad_norm": 1.7088624238967896, "learning_rate": 3.5768663071246104e-05, "loss": 0.0008180089294910431, "step": 226380 }, { "epoch": 64.26057337496452, "grad_norm": 0.18049293756484985, "learning_rate": 3.576582458132274e-05, "loss": 0.00014406889677047728, "step": 226390 }, { "epoch": 64.26341186488789, "grad_norm": 0.04832873493432999, "learning_rate": 3.576298609139937e-05, "loss": 0.0017211440950632094, "step": 226400 }, { "epoch": 64.26625035481123, "grad_norm": 0.5989809036254883, "learning_rate": 3.576014760147602e-05, "loss": 0.0010254276916384698, "step": 226410 }, { "epoch": 64.2690888447346, "grad_norm": 0.13365912437438965, "learning_rate": 3.5757309111552655e-05, "loss": 0.0003421025350689888, "step": 226420 }, { "epoch": 64.27192733465796, "grad_norm": 0.7348617315292358, "learning_rate": 3.57544706216293e-05, "loss": 0.0008985867723822593, "step": 226430 }, { "epoch": 64.27476582458132, "grad_norm": 0.21525119245052338, "learning_rate": 3.575163213170594e-05, "loss": 0.00020962301641702652, "step": 226440 }, { "epoch": 64.27760431450469, "grad_norm": 0.08904733508825302, "learning_rate": 3.574879364178257e-05, "loss": 0.00015540234744548798, "step": 226450 }, { "epoch": 64.28044280442805, "grad_norm": 0.014033038169145584, "learning_rate": 3.5745955151859214e-05, "loss": 0.0003388112410902977, "step": 226460 }, { "epoch": 64.28328129435141, "grad_norm": 0.4618314206600189, "learning_rate": 3.574311666193585e-05, "loss": 0.0004490155726671219, "step": 226470 }, { "epoch": 64.28611978427476, "grad_norm": 0.03341633081436157, "learning_rate": 3.574027817201249e-05, "loss": 0.00018044523894786835, "step": 226480 }, { "epoch": 64.28895827419812, "grad_norm": 0.027076125144958496, "learning_rate": 3.573743968208913e-05, "loss": 0.0011175639927387237, "step": 226490 }, { "epoch": 64.29179676412149, "grad_norm": 0.020943593233823776, "learning_rate": 3.5734601192165766e-05, "loss": 0.0007911078631877899, "step": 226500 }, { "epoch": 64.29179676412149, "eval_accuracy": 0.9722133909836587, "eval_loss": 0.11135087162256241, "eval_runtime": 33.167, "eval_samples_per_second": 474.176, "eval_steps_per_second": 7.417, "step": 226500 }, { "epoch": 64.29463525404485, "grad_norm": 0.0023058445658534765, "learning_rate": 3.573176270224241e-05, "loss": 0.0013081364333629609, "step": 226510 }, { "epoch": 64.29747374396821, "grad_norm": 0.028749050572514534, "learning_rate": 3.572892421231905e-05, "loss": 0.0003888057544827461, "step": 226520 }, { "epoch": 64.30031223389157, "grad_norm": 0.03666175156831741, "learning_rate": 3.5726085722395684e-05, "loss": 0.0007046950981020927, "step": 226530 }, { "epoch": 64.30315072381494, "grad_norm": 0.028619147837162018, "learning_rate": 3.572324723247233e-05, "loss": 0.0003707041963934898, "step": 226540 }, { "epoch": 64.30598921373829, "grad_norm": 3.833080291748047, "learning_rate": 3.5720408742548966e-05, "loss": 0.0005193129181861877, "step": 226550 }, { "epoch": 64.30882770366165, "grad_norm": 0.021174633875489235, "learning_rate": 3.57175702526256e-05, "loss": 0.0009632449597120285, "step": 226560 }, { "epoch": 64.31166619358501, "grad_norm": 0.04642188549041748, "learning_rate": 3.571473176270224e-05, "loss": 0.00011688489466905594, "step": 226570 }, { "epoch": 64.31450468350837, "grad_norm": 0.11116960644721985, "learning_rate": 3.5711893272778884e-05, "loss": 0.0009990982711315156, "step": 226580 }, { "epoch": 64.31734317343174, "grad_norm": 0.07844068855047226, "learning_rate": 3.5709054782855525e-05, "loss": 0.0007017653435468674, "step": 226590 }, { "epoch": 64.3201816633551, "grad_norm": 0.07922183722257614, "learning_rate": 3.570621629293216e-05, "loss": 0.0009196262806653976, "step": 226600 }, { "epoch": 64.32302015327845, "grad_norm": 0.0707586407661438, "learning_rate": 3.57033778030088e-05, "loss": 0.0005971027538180351, "step": 226610 }, { "epoch": 64.32585864320181, "grad_norm": 0.056553035974502563, "learning_rate": 3.570053931308544e-05, "loss": 0.0007618982344865799, "step": 226620 }, { "epoch": 64.32869713312517, "grad_norm": 0.02442164346575737, "learning_rate": 3.569770082316208e-05, "loss": 0.0022794131189584734, "step": 226630 }, { "epoch": 64.33153562304854, "grad_norm": 0.23808389902114868, "learning_rate": 3.569486233323872e-05, "loss": 0.007311782985925675, "step": 226640 }, { "epoch": 64.3343741129719, "grad_norm": 0.45268985629081726, "learning_rate": 3.569202384331536e-05, "loss": 0.0060161460191011425, "step": 226650 }, { "epoch": 64.33721260289526, "grad_norm": 0.041465748101472855, "learning_rate": 3.5689185353391994e-05, "loss": 0.00014994069933891297, "step": 226660 }, { "epoch": 64.34005109281863, "grad_norm": 12.896570205688477, "learning_rate": 3.5686346863468636e-05, "loss": 0.0032412678003311155, "step": 226670 }, { "epoch": 64.34288958274198, "grad_norm": 0.9873967170715332, "learning_rate": 3.568350837354528e-05, "loss": 0.00032231807708740237, "step": 226680 }, { "epoch": 64.34572807266534, "grad_norm": 0.05810638144612312, "learning_rate": 3.568066988362191e-05, "loss": 0.00010569561272859574, "step": 226690 }, { "epoch": 64.3485665625887, "grad_norm": 0.1932067573070526, "learning_rate": 3.567783139369855e-05, "loss": 0.00012325253337621688, "step": 226700 }, { "epoch": 64.35140505251206, "grad_norm": 0.27922871708869934, "learning_rate": 3.5674992903775194e-05, "loss": 0.0003151927143335342, "step": 226710 }, { "epoch": 64.35424354243543, "grad_norm": 0.03479532152414322, "learning_rate": 3.5672154413851836e-05, "loss": 0.0005983352661132812, "step": 226720 }, { "epoch": 64.35708203235879, "grad_norm": 0.36491966247558594, "learning_rate": 3.566931592392847e-05, "loss": 0.0003227923065423965, "step": 226730 }, { "epoch": 64.35992052228215, "grad_norm": 0.19436660408973694, "learning_rate": 3.566647743400511e-05, "loss": 0.0003984479233622551, "step": 226740 }, { "epoch": 64.3627590122055, "grad_norm": 1.1209758520126343, "learning_rate": 3.566363894408175e-05, "loss": 0.0008572598919272423, "step": 226750 }, { "epoch": 64.36559750212886, "grad_norm": 0.05188921093940735, "learning_rate": 3.566080045415839e-05, "loss": 0.00045839790254831314, "step": 226760 }, { "epoch": 64.36843599205223, "grad_norm": 0.3226931095123291, "learning_rate": 3.565796196423503e-05, "loss": 0.00035271886736154556, "step": 226770 }, { "epoch": 64.37127448197559, "grad_norm": 0.05054834857583046, "learning_rate": 3.565512347431167e-05, "loss": 0.0006363483145833016, "step": 226780 }, { "epoch": 64.37411297189895, "grad_norm": 0.037618428468704224, "learning_rate": 3.5652284984388305e-05, "loss": 0.0002492522820830345, "step": 226790 }, { "epoch": 64.37695146182232, "grad_norm": 0.12460628896951675, "learning_rate": 3.5649446494464946e-05, "loss": 0.0001988973468542099, "step": 226800 }, { "epoch": 64.37978995174566, "grad_norm": 0.0705265998840332, "learning_rate": 3.564660800454159e-05, "loss": 0.0033376879990100862, "step": 226810 }, { "epoch": 64.38262844166903, "grad_norm": 0.012924819253385067, "learning_rate": 3.564376951461822e-05, "loss": 0.0004001487046480179, "step": 226820 }, { "epoch": 64.38546693159239, "grad_norm": 0.03740304335951805, "learning_rate": 3.5640931024694864e-05, "loss": 0.00034913234412670135, "step": 226830 }, { "epoch": 64.38830542151575, "grad_norm": 0.12138964235782623, "learning_rate": 3.5638092534771505e-05, "loss": 0.00036101825535297396, "step": 226840 }, { "epoch": 64.39114391143912, "grad_norm": 0.016875706613063812, "learning_rate": 3.563525404484814e-05, "loss": 0.00023956168442964553, "step": 226850 }, { "epoch": 64.39398240136248, "grad_norm": 0.017406277358531952, "learning_rate": 3.563241555492478e-05, "loss": 0.0029905539005994795, "step": 226860 }, { "epoch": 64.39682089128584, "grad_norm": 0.05565466359257698, "learning_rate": 3.5629577065001416e-05, "loss": 0.0001872614026069641, "step": 226870 }, { "epoch": 64.39965938120919, "grad_norm": 0.07277591526508331, "learning_rate": 3.5626738575078064e-05, "loss": 0.00014880988746881484, "step": 226880 }, { "epoch": 64.40249787113255, "grad_norm": 0.04102430120110512, "learning_rate": 3.56239000851547e-05, "loss": 0.0012762289494276046, "step": 226890 }, { "epoch": 64.40533636105592, "grad_norm": 0.04281811788678169, "learning_rate": 3.562106159523134e-05, "loss": 0.0009597392752766609, "step": 226900 }, { "epoch": 64.40817485097928, "grad_norm": 0.010992499068379402, "learning_rate": 3.561822310530798e-05, "loss": 0.00015740711241960526, "step": 226910 }, { "epoch": 64.41101334090264, "grad_norm": 0.11131087690591812, "learning_rate": 3.5615384615384616e-05, "loss": 0.0005279656499624253, "step": 226920 }, { "epoch": 64.413851830826, "grad_norm": 9.382285118103027, "learning_rate": 3.561254612546126e-05, "loss": 0.002839912474155426, "step": 226930 }, { "epoch": 64.41669032074937, "grad_norm": 0.9375768899917603, "learning_rate": 3.56097076355379e-05, "loss": 0.0002333007752895355, "step": 226940 }, { "epoch": 64.41952881067272, "grad_norm": 0.14448152482509613, "learning_rate": 3.560686914561453e-05, "loss": 0.00012990422546863556, "step": 226950 }, { "epoch": 64.42236730059608, "grad_norm": 0.021059956401586533, "learning_rate": 3.5604030655691175e-05, "loss": 0.0009376503527164459, "step": 226960 }, { "epoch": 64.42520579051944, "grad_norm": 0.041144099086523056, "learning_rate": 3.560119216576781e-05, "loss": 0.0002996537834405899, "step": 226970 }, { "epoch": 64.4280442804428, "grad_norm": 2.10440993309021, "learning_rate": 3.559835367584445e-05, "loss": 0.0013378538191318513, "step": 226980 }, { "epoch": 64.43088277036617, "grad_norm": 0.03850625827908516, "learning_rate": 3.559551518592109e-05, "loss": 0.0006754685193300248, "step": 226990 }, { "epoch": 64.43372126028953, "grad_norm": 0.010102166794240475, "learning_rate": 3.5592676695997726e-05, "loss": 0.0004506360739469528, "step": 227000 }, { "epoch": 64.43372126028953, "eval_accuracy": 0.9779360335728365, "eval_loss": 0.0847136378288269, "eval_runtime": 32.4832, "eval_samples_per_second": 484.159, "eval_steps_per_second": 7.573, "step": 227000 }, { "epoch": 64.4365597502129, "grad_norm": 0.12759935855865479, "learning_rate": 3.5589838206074375e-05, "loss": 0.0005242889747023582, "step": 227010 }, { "epoch": 64.43939824013624, "grad_norm": 0.0180843286216259, "learning_rate": 3.558699971615101e-05, "loss": 0.0009631512686610222, "step": 227020 }, { "epoch": 64.4422367300596, "grad_norm": 0.08088906109333038, "learning_rate": 3.5584161226227644e-05, "loss": 0.001322108879685402, "step": 227030 }, { "epoch": 64.44507521998297, "grad_norm": 0.04792669788002968, "learning_rate": 3.558132273630429e-05, "loss": 0.0066739052534103395, "step": 227040 }, { "epoch": 64.44791370990633, "grad_norm": 0.11347947269678116, "learning_rate": 3.5578484246380927e-05, "loss": 0.002053220756351948, "step": 227050 }, { "epoch": 64.4507521998297, "grad_norm": 0.04892696067690849, "learning_rate": 3.557564575645757e-05, "loss": 0.0009788483381271361, "step": 227060 }, { "epoch": 64.45359068975306, "grad_norm": 0.01286416593939066, "learning_rate": 3.557280726653421e-05, "loss": 0.0008971275761723519, "step": 227070 }, { "epoch": 64.4564291796764, "grad_norm": 0.03952132165431976, "learning_rate": 3.5569968776610844e-05, "loss": 0.0003102939575910568, "step": 227080 }, { "epoch": 64.45926766959977, "grad_norm": 0.013046781532466412, "learning_rate": 3.5567130286687485e-05, "loss": 0.0016698693856596948, "step": 227090 }, { "epoch": 64.46210615952313, "grad_norm": 0.007003657054156065, "learning_rate": 3.556429179676412e-05, "loss": 0.0011827267706394195, "step": 227100 }, { "epoch": 64.4649446494465, "grad_norm": 0.04076068475842476, "learning_rate": 3.556145330684076e-05, "loss": 0.004574423283338546, "step": 227110 }, { "epoch": 64.46778313936986, "grad_norm": 0.03721284121274948, "learning_rate": 3.55586148169174e-05, "loss": 0.00030366312712430953, "step": 227120 }, { "epoch": 64.47062162929322, "grad_norm": 0.344247967004776, "learning_rate": 3.555577632699404e-05, "loss": 0.00024676043540239334, "step": 227130 }, { "epoch": 64.47346011921658, "grad_norm": 0.11584793031215668, "learning_rate": 3.5552937837070685e-05, "loss": 0.00033060908317565917, "step": 227140 }, { "epoch": 64.47629860913993, "grad_norm": 0.2506316006183624, "learning_rate": 3.555009934714732e-05, "loss": 0.000381021574139595, "step": 227150 }, { "epoch": 64.4791370990633, "grad_norm": 0.009774028323590755, "learning_rate": 3.5547260857223955e-05, "loss": 0.00024173445999622344, "step": 227160 }, { "epoch": 64.48197558898666, "grad_norm": 0.027842149138450623, "learning_rate": 3.55444223673006e-05, "loss": 0.000182328000664711, "step": 227170 }, { "epoch": 64.48481407891002, "grad_norm": 0.14081759750843048, "learning_rate": 3.554158387737724e-05, "loss": 0.00017489120364189148, "step": 227180 }, { "epoch": 64.48765256883338, "grad_norm": 0.03934396430850029, "learning_rate": 3.553874538745388e-05, "loss": 0.0002364378422498703, "step": 227190 }, { "epoch": 64.49049105875675, "grad_norm": 0.011172533966600895, "learning_rate": 3.553590689753051e-05, "loss": 0.0003931697458028793, "step": 227200 }, { "epoch": 64.49332954868011, "grad_norm": 0.004419004078954458, "learning_rate": 3.5533068407607155e-05, "loss": 0.0009636769071221351, "step": 227210 }, { "epoch": 64.49616803860346, "grad_norm": 2.2440853118896484, "learning_rate": 3.5530229917683796e-05, "loss": 0.0003806058317422867, "step": 227220 }, { "epoch": 64.49900652852682, "grad_norm": 0.023585433140397072, "learning_rate": 3.552739142776043e-05, "loss": 0.0008241184055805207, "step": 227230 }, { "epoch": 64.50184501845018, "grad_norm": 0.058894336223602295, "learning_rate": 3.552455293783707e-05, "loss": 0.0014057930558919907, "step": 227240 }, { "epoch": 64.50468350837355, "grad_norm": 0.3639346659183502, "learning_rate": 3.5521714447913713e-05, "loss": 0.0007924098521471023, "step": 227250 }, { "epoch": 64.50752199829691, "grad_norm": 0.3888349235057831, "learning_rate": 3.551887595799035e-05, "loss": 0.0003626767545938492, "step": 227260 }, { "epoch": 64.51036048822027, "grad_norm": 0.13349321484565735, "learning_rate": 3.551603746806699e-05, "loss": 0.0005032308399677277, "step": 227270 }, { "epoch": 64.51319897814363, "grad_norm": 0.009500262327492237, "learning_rate": 3.551319897814363e-05, "loss": 0.00012325141578912736, "step": 227280 }, { "epoch": 64.51603746806698, "grad_norm": 0.015890706330537796, "learning_rate": 3.5510360488220265e-05, "loss": 0.00012912489473819732, "step": 227290 }, { "epoch": 64.51887595799035, "grad_norm": 0.023742029443383217, "learning_rate": 3.550752199829691e-05, "loss": 0.00023947730660438537, "step": 227300 }, { "epoch": 64.52171444791371, "grad_norm": 9.8718900680542, "learning_rate": 3.550468350837355e-05, "loss": 0.0035428866744041444, "step": 227310 }, { "epoch": 64.52455293783707, "grad_norm": 0.41373974084854126, "learning_rate": 3.550184501845018e-05, "loss": 0.0009513290598988533, "step": 227320 }, { "epoch": 64.52739142776043, "grad_norm": 0.01422911137342453, "learning_rate": 3.5499006528526824e-05, "loss": 0.00010710060596466064, "step": 227330 }, { "epoch": 64.5302299176838, "grad_norm": 0.010934261605143547, "learning_rate": 3.5496168038603465e-05, "loss": 0.0017043884843587875, "step": 227340 }, { "epoch": 64.53306840760715, "grad_norm": 0.1288539618253708, "learning_rate": 3.549332954868011e-05, "loss": 0.0002602929249405861, "step": 227350 }, { "epoch": 64.53590689753051, "grad_norm": 0.06147071719169617, "learning_rate": 3.549049105875674e-05, "loss": 0.00019090678542852402, "step": 227360 }, { "epoch": 64.53874538745387, "grad_norm": 0.023164156824350357, "learning_rate": 3.548765256883338e-05, "loss": 7.439423352479934e-05, "step": 227370 }, { "epoch": 64.54158387737724, "grad_norm": 0.01951715350151062, "learning_rate": 3.5484814078910024e-05, "loss": 0.00026624277234077454, "step": 227380 }, { "epoch": 64.5444223673006, "grad_norm": 0.043041449040174484, "learning_rate": 3.548197558898666e-05, "loss": 0.00014764182269573213, "step": 227390 }, { "epoch": 64.54726085722396, "grad_norm": 0.6787921190261841, "learning_rate": 3.54791370990633e-05, "loss": 0.0003999501466751099, "step": 227400 }, { "epoch": 64.55009934714732, "grad_norm": 0.14617577195167542, "learning_rate": 3.547629860913994e-05, "loss": 0.00032883621752262117, "step": 227410 }, { "epoch": 64.55293783707067, "grad_norm": 0.017888391390442848, "learning_rate": 3.5473460119216576e-05, "loss": 0.0004807775840163231, "step": 227420 }, { "epoch": 64.55577632699404, "grad_norm": 4.158189296722412, "learning_rate": 3.547062162929322e-05, "loss": 0.0006672345101833343, "step": 227430 }, { "epoch": 64.5586148169174, "grad_norm": 0.010387646965682507, "learning_rate": 3.546778313936986e-05, "loss": 0.00028142724186182023, "step": 227440 }, { "epoch": 64.56145330684076, "grad_norm": 0.024116594344377518, "learning_rate": 3.5464944649446493e-05, "loss": 0.0030637938529253007, "step": 227450 }, { "epoch": 64.56429179676412, "grad_norm": 0.03489859402179718, "learning_rate": 3.5462106159523135e-05, "loss": 0.0025845248252153396, "step": 227460 }, { "epoch": 64.56713028668749, "grad_norm": 0.013325735926628113, "learning_rate": 3.5459267669599776e-05, "loss": 0.000940852239727974, "step": 227470 }, { "epoch": 64.56996877661085, "grad_norm": 0.18206597864627838, "learning_rate": 3.545642917967642e-05, "loss": 0.001006605662405491, "step": 227480 }, { "epoch": 64.5728072665342, "grad_norm": 1.6445246934890747, "learning_rate": 3.545359068975305e-05, "loss": 0.0027904415503144262, "step": 227490 }, { "epoch": 64.57564575645756, "grad_norm": 0.056534599512815475, "learning_rate": 3.545075219982969e-05, "loss": 0.00031729955226182937, "step": 227500 }, { "epoch": 64.57564575645756, "eval_accuracy": 0.9781903732434667, "eval_loss": 0.0824812799692154, "eval_runtime": 33.2725, "eval_samples_per_second": 472.673, "eval_steps_per_second": 7.394, "step": 227500 }, { "epoch": 64.57848424638092, "grad_norm": 6.02877140045166, "learning_rate": 3.5447913709906335e-05, "loss": 0.001592763513326645, "step": 227510 }, { "epoch": 64.58132273630429, "grad_norm": 0.036787260323762894, "learning_rate": 3.544507521998297e-05, "loss": 0.0017896093428134917, "step": 227520 }, { "epoch": 64.58416122622765, "grad_norm": 0.10064104199409485, "learning_rate": 3.544223673005961e-05, "loss": 0.0010635115206241608, "step": 227530 }, { "epoch": 64.58699971615101, "grad_norm": 0.014023300260305405, "learning_rate": 3.543939824013625e-05, "loss": 0.004633621126413345, "step": 227540 }, { "epoch": 64.58983820607436, "grad_norm": 0.06034943833947182, "learning_rate": 3.543655975021289e-05, "loss": 0.005195567011833191, "step": 227550 }, { "epoch": 64.59267669599772, "grad_norm": 0.23157483339309692, "learning_rate": 3.543372126028953e-05, "loss": 0.00028497520834207534, "step": 227560 }, { "epoch": 64.59551518592109, "grad_norm": 0.0328245609998703, "learning_rate": 3.543088277036617e-05, "loss": 0.000813632644712925, "step": 227570 }, { "epoch": 64.59835367584445, "grad_norm": 0.018730374053120613, "learning_rate": 3.5428044280442804e-05, "loss": 0.0003656305372714996, "step": 227580 }, { "epoch": 64.60119216576781, "grad_norm": 2.44191312789917, "learning_rate": 3.5425205790519446e-05, "loss": 0.000599067471921444, "step": 227590 }, { "epoch": 64.60403065569118, "grad_norm": 0.024014819413423538, "learning_rate": 3.542236730059608e-05, "loss": 0.006177405267953873, "step": 227600 }, { "epoch": 64.60686914561454, "grad_norm": 0.02569492533802986, "learning_rate": 3.541952881067273e-05, "loss": 0.0004070069640874863, "step": 227610 }, { "epoch": 64.60970763553789, "grad_norm": 0.11033676564693451, "learning_rate": 3.541669032074936e-05, "loss": 0.006826505064964294, "step": 227620 }, { "epoch": 64.61254612546125, "grad_norm": 0.23225270211696625, "learning_rate": 3.5413851830826e-05, "loss": 0.0003704478964209557, "step": 227630 }, { "epoch": 64.61538461538461, "grad_norm": 0.042143069207668304, "learning_rate": 3.5411013340902646e-05, "loss": 0.0008974594995379448, "step": 227640 }, { "epoch": 64.61822310530798, "grad_norm": 0.24286215007305145, "learning_rate": 3.540817485097928e-05, "loss": 0.0005177156999707222, "step": 227650 }, { "epoch": 64.62106159523134, "grad_norm": 0.14003713428974152, "learning_rate": 3.540533636105592e-05, "loss": 0.0005303861573338509, "step": 227660 }, { "epoch": 64.6239000851547, "grad_norm": 0.23829707503318787, "learning_rate": 3.540249787113256e-05, "loss": 0.000249442458152771, "step": 227670 }, { "epoch": 64.62673857507806, "grad_norm": 0.016767079010605812, "learning_rate": 3.53996593812092e-05, "loss": 0.000661054439842701, "step": 227680 }, { "epoch": 64.62957706500141, "grad_norm": 0.07220937311649323, "learning_rate": 3.539682089128584e-05, "loss": 0.01052992269396782, "step": 227690 }, { "epoch": 64.63241555492478, "grad_norm": 18.694955825805664, "learning_rate": 3.5393982401362474e-05, "loss": 0.00579703152179718, "step": 227700 }, { "epoch": 64.63525404484814, "grad_norm": 0.02216956950724125, "learning_rate": 3.5391143911439115e-05, "loss": 0.0010020527988672256, "step": 227710 }, { "epoch": 64.6380925347715, "grad_norm": 0.29641762375831604, "learning_rate": 3.5388305421515756e-05, "loss": 0.0022799234837293627, "step": 227720 }, { "epoch": 64.64093102469486, "grad_norm": 0.04273602366447449, "learning_rate": 3.538546693159239e-05, "loss": 0.00022718701511621476, "step": 227730 }, { "epoch": 64.64376951461823, "grad_norm": 0.09753295034170151, "learning_rate": 3.538262844166903e-05, "loss": 0.00011789780110120774, "step": 227740 }, { "epoch": 64.64660800454159, "grad_norm": 0.3434138298034668, "learning_rate": 3.5379789951745674e-05, "loss": 0.00020439419895410538, "step": 227750 }, { "epoch": 64.64944649446494, "grad_norm": 0.3323395252227783, "learning_rate": 3.537695146182231e-05, "loss": 0.00035300571471452713, "step": 227760 }, { "epoch": 64.6522849843883, "grad_norm": 0.04733119532465935, "learning_rate": 3.5374112971898956e-05, "loss": 0.0005828278139233589, "step": 227770 }, { "epoch": 64.65512347431167, "grad_norm": 0.21523348987102509, "learning_rate": 3.537127448197559e-05, "loss": 0.0005199907347559929, "step": 227780 }, { "epoch": 64.65796196423503, "grad_norm": 0.021186986938118935, "learning_rate": 3.5368435992052226e-05, "loss": 0.0001672951504588127, "step": 227790 }, { "epoch": 64.66080045415839, "grad_norm": 0.008240382187068462, "learning_rate": 3.536559750212887e-05, "loss": 0.0029375005513429643, "step": 227800 }, { "epoch": 64.66363894408175, "grad_norm": 0.01379274483770132, "learning_rate": 3.536275901220551e-05, "loss": 7.382091134786606e-05, "step": 227810 }, { "epoch": 64.6664774340051, "grad_norm": 0.12929123640060425, "learning_rate": 3.535992052228215e-05, "loss": 0.00016041845083236694, "step": 227820 }, { "epoch": 64.66931592392847, "grad_norm": 0.03246399760246277, "learning_rate": 3.5357082032358784e-05, "loss": 0.00022974200546741486, "step": 227830 }, { "epoch": 64.67215441385183, "grad_norm": 0.009126763790845871, "learning_rate": 3.5354243542435426e-05, "loss": 0.00011737309396266937, "step": 227840 }, { "epoch": 64.67499290377519, "grad_norm": 0.018399422988295555, "learning_rate": 3.535140505251207e-05, "loss": 0.00013364888727664947, "step": 227850 }, { "epoch": 64.67783139369855, "grad_norm": 0.10503066331148148, "learning_rate": 3.53485665625887e-05, "loss": 0.0005435882136225701, "step": 227860 }, { "epoch": 64.68066988362192, "grad_norm": 0.08922678232192993, "learning_rate": 3.534572807266534e-05, "loss": 0.009031365811824798, "step": 227870 }, { "epoch": 64.68350837354528, "grad_norm": 0.005905136466026306, "learning_rate": 3.5342889582741984e-05, "loss": 0.0002526151016354561, "step": 227880 }, { "epoch": 64.68634686346863, "grad_norm": 0.022331571206450462, "learning_rate": 3.534005109281862e-05, "loss": 0.0002189716324210167, "step": 227890 }, { "epoch": 64.68918535339199, "grad_norm": 0.09813796728849411, "learning_rate": 3.533721260289526e-05, "loss": 0.0001441851258277893, "step": 227900 }, { "epoch": 64.69202384331535, "grad_norm": 0.008964224718511105, "learning_rate": 3.53343741129719e-05, "loss": 0.0002774640917778015, "step": 227910 }, { "epoch": 64.69486233323872, "grad_norm": 0.05893057957291603, "learning_rate": 3.5331535623048536e-05, "loss": 0.00014841370284557343, "step": 227920 }, { "epoch": 64.69770082316208, "grad_norm": 0.10634230822324753, "learning_rate": 3.532869713312518e-05, "loss": 0.00013045892119407655, "step": 227930 }, { "epoch": 64.70053931308544, "grad_norm": 0.11731266975402832, "learning_rate": 3.532585864320182e-05, "loss": 0.00023052543401718139, "step": 227940 }, { "epoch": 64.7033778030088, "grad_norm": 0.1258549839258194, "learning_rate": 3.532302015327846e-05, "loss": 0.00013660378754138948, "step": 227950 }, { "epoch": 64.70621629293215, "grad_norm": 0.027348896488547325, "learning_rate": 3.5320181663355095e-05, "loss": 0.00021154098212718963, "step": 227960 }, { "epoch": 64.70905478285552, "grad_norm": 0.017850887030363083, "learning_rate": 3.5317343173431737e-05, "loss": 0.0002308053895831108, "step": 227970 }, { "epoch": 64.71189327277888, "grad_norm": 0.06431466341018677, "learning_rate": 3.531450468350838e-05, "loss": 0.00019279178231954575, "step": 227980 }, { "epoch": 64.71473176270224, "grad_norm": 0.03229772299528122, "learning_rate": 3.531166619358501e-05, "loss": 0.00014930013567209243, "step": 227990 }, { "epoch": 64.7175702526256, "grad_norm": 0.10112423449754715, "learning_rate": 3.5308827703661654e-05, "loss": 0.0001798415556550026, "step": 228000 }, { "epoch": 64.7175702526256, "eval_accuracy": 0.9802250906085077, "eval_loss": 0.07291392236948013, "eval_runtime": 32.9945, "eval_samples_per_second": 476.655, "eval_steps_per_second": 7.456, "step": 228000 }, { "epoch": 64.72040874254897, "grad_norm": 0.08900847285985947, "learning_rate": 3.5305989213738295e-05, "loss": 8.714385330677032e-05, "step": 228010 }, { "epoch": 64.72324723247232, "grad_norm": 0.04261748865246773, "learning_rate": 3.530315072381493e-05, "loss": 0.00013610608875751494, "step": 228020 }, { "epoch": 64.72608572239568, "grad_norm": 1.2438918352127075, "learning_rate": 3.530031223389157e-05, "loss": 0.00025221072137355806, "step": 228030 }, { "epoch": 64.72892421231904, "grad_norm": 0.020528972148895264, "learning_rate": 3.529747374396821e-05, "loss": 0.00025026053190231324, "step": 228040 }, { "epoch": 64.7317627022424, "grad_norm": 0.024411732330918312, "learning_rate": 3.529463525404485e-05, "loss": 0.00010906551033258438, "step": 228050 }, { "epoch": 64.73460119216577, "grad_norm": 0.00716741569340229, "learning_rate": 3.529179676412149e-05, "loss": 0.0001208556815981865, "step": 228060 }, { "epoch": 64.73743968208913, "grad_norm": 1.1603209972381592, "learning_rate": 3.528895827419813e-05, "loss": 0.0005732202902436256, "step": 228070 }, { "epoch": 64.7402781720125, "grad_norm": 0.014557943679392338, "learning_rate": 3.528611978427477e-05, "loss": 0.00025891028344631194, "step": 228080 }, { "epoch": 64.74311666193584, "grad_norm": 1.268306016921997, "learning_rate": 3.5283281294351406e-05, "loss": 0.000368807278573513, "step": 228090 }, { "epoch": 64.7459551518592, "grad_norm": 0.29416602849960327, "learning_rate": 3.528044280442804e-05, "loss": 0.0003156732767820358, "step": 228100 }, { "epoch": 64.74879364178257, "grad_norm": 0.9271993041038513, "learning_rate": 3.527760431450469e-05, "loss": 0.004461620002985001, "step": 228110 }, { "epoch": 64.75163213170593, "grad_norm": 0.03562605753540993, "learning_rate": 3.527476582458132e-05, "loss": 0.00035455580800771715, "step": 228120 }, { "epoch": 64.7544706216293, "grad_norm": 0.03544740378856659, "learning_rate": 3.5271927334657965e-05, "loss": 0.0014394694939255715, "step": 228130 }, { "epoch": 64.75730911155266, "grad_norm": 0.7851232886314392, "learning_rate": 3.5269088844734606e-05, "loss": 0.00033451486378908155, "step": 228140 }, { "epoch": 64.76014760147602, "grad_norm": 0.17049427330493927, "learning_rate": 3.526625035481124e-05, "loss": 0.00018730293959379196, "step": 228150 }, { "epoch": 64.76298609139937, "grad_norm": 0.011601299978792667, "learning_rate": 3.526341186488788e-05, "loss": 0.000165439210832119, "step": 228160 }, { "epoch": 64.76582458132273, "grad_norm": 0.02351550944149494, "learning_rate": 3.526057337496452e-05, "loss": 0.00011493787169456482, "step": 228170 }, { "epoch": 64.7686630712461, "grad_norm": 0.1096300259232521, "learning_rate": 3.525773488504116e-05, "loss": 9.328555315732956e-05, "step": 228180 }, { "epoch": 64.77150156116946, "grad_norm": 0.02514713630080223, "learning_rate": 3.52548963951178e-05, "loss": 0.0001400751993060112, "step": 228190 }, { "epoch": 64.77434005109282, "grad_norm": 0.011365020647644997, "learning_rate": 3.5252057905194434e-05, "loss": 0.00016196947544813156, "step": 228200 }, { "epoch": 64.77717854101618, "grad_norm": 0.09121741354465485, "learning_rate": 3.5249219415271075e-05, "loss": 0.00020008999854326248, "step": 228210 }, { "epoch": 64.78001703093955, "grad_norm": 0.024293268099427223, "learning_rate": 3.524638092534772e-05, "loss": 9.87498089671135e-05, "step": 228220 }, { "epoch": 64.7828555208629, "grad_norm": 0.32961133122444153, "learning_rate": 3.524354243542435e-05, "loss": 0.00041998010128736497, "step": 228230 }, { "epoch": 64.78569401078626, "grad_norm": 0.03152919188141823, "learning_rate": 3.5240703945501e-05, "loss": 0.00010145101696252823, "step": 228240 }, { "epoch": 64.78853250070962, "grad_norm": 0.006745504215359688, "learning_rate": 3.5237865455577634e-05, "loss": 0.00013104677200317382, "step": 228250 }, { "epoch": 64.79137099063298, "grad_norm": 0.01531310472637415, "learning_rate": 3.523502696565427e-05, "loss": 0.0005022125318646431, "step": 228260 }, { "epoch": 64.79420948055635, "grad_norm": 0.246595099568367, "learning_rate": 3.523218847573092e-05, "loss": 0.00013446249067783356, "step": 228270 }, { "epoch": 64.79704797047971, "grad_norm": 0.9065418839454651, "learning_rate": 3.522934998580755e-05, "loss": 0.004447096213698387, "step": 228280 }, { "epoch": 64.79988646040306, "grad_norm": 0.04589758813381195, "learning_rate": 3.522651149588419e-05, "loss": 0.0002563228830695152, "step": 228290 }, { "epoch": 64.80272495032642, "grad_norm": 0.019310442730784416, "learning_rate": 3.522367300596083e-05, "loss": 0.00034010689705610275, "step": 228300 }, { "epoch": 64.80556344024978, "grad_norm": 0.14963369071483612, "learning_rate": 3.522083451603747e-05, "loss": 0.00018470417708158494, "step": 228310 }, { "epoch": 64.80840193017315, "grad_norm": 2.7597739696502686, "learning_rate": 3.521799602611411e-05, "loss": 0.0008137373253703118, "step": 228320 }, { "epoch": 64.81124042009651, "grad_norm": 0.20758791267871857, "learning_rate": 3.5215157536190745e-05, "loss": 0.0013518983498215676, "step": 228330 }, { "epoch": 64.81407891001987, "grad_norm": 0.05411308631300926, "learning_rate": 3.5212319046267386e-05, "loss": 0.0012829853221774101, "step": 228340 }, { "epoch": 64.81691739994324, "grad_norm": 0.029415912926197052, "learning_rate": 3.520948055634403e-05, "loss": 0.00015726890414953233, "step": 228350 }, { "epoch": 64.81975588986658, "grad_norm": 0.061615124344825745, "learning_rate": 3.5206925915413e-05, "loss": 0.005569319427013397, "step": 228360 }, { "epoch": 64.82259437978995, "grad_norm": 0.07015086710453033, "learning_rate": 3.520408742548964e-05, "loss": 0.00023839082568883897, "step": 228370 }, { "epoch": 64.82543286971331, "grad_norm": 0.3058590590953827, "learning_rate": 3.5201248935566277e-05, "loss": 0.002316715009510517, "step": 228380 }, { "epoch": 64.82827135963667, "grad_norm": 0.02330838143825531, "learning_rate": 3.519841044564292e-05, "loss": 0.0004554379731416702, "step": 228390 }, { "epoch": 64.83110984956004, "grad_norm": 0.02738683484494686, "learning_rate": 3.519557195571956e-05, "loss": 0.00012701880186796188, "step": 228400 }, { "epoch": 64.8339483394834, "grad_norm": 0.12166569381952286, "learning_rate": 3.5192733465796194e-05, "loss": 0.00044478066265583036, "step": 228410 }, { "epoch": 64.83678682940676, "grad_norm": 0.04078010097146034, "learning_rate": 3.518989497587284e-05, "loss": 0.00025309715420007706, "step": 228420 }, { "epoch": 64.83962531933011, "grad_norm": 0.0959625095129013, "learning_rate": 3.518705648594948e-05, "loss": 0.00013638250529766083, "step": 228430 }, { "epoch": 64.84246380925347, "grad_norm": 0.0832049548625946, "learning_rate": 3.518421799602611e-05, "loss": 0.00046504531055688856, "step": 228440 }, { "epoch": 64.84530229917684, "grad_norm": 0.008345777168869972, "learning_rate": 3.518137950610276e-05, "loss": 0.0006667375564575195, "step": 228450 }, { "epoch": 64.8481407891002, "grad_norm": 0.20776358246803284, "learning_rate": 3.5178541016179394e-05, "loss": 0.00013624019920825957, "step": 228460 }, { "epoch": 64.85097927902356, "grad_norm": 0.16698192059993744, "learning_rate": 3.5175702526256035e-05, "loss": 0.0001493966206908226, "step": 228470 }, { "epoch": 64.85381776894693, "grad_norm": 0.05590372532606125, "learning_rate": 3.517286403633267e-05, "loss": 0.0002258399501442909, "step": 228480 }, { "epoch": 64.85665625887029, "grad_norm": 0.08549810200929642, "learning_rate": 3.517002554640931e-05, "loss": 0.00024744980037212373, "step": 228490 }, { "epoch": 64.85949474879364, "grad_norm": 0.08570590615272522, "learning_rate": 3.516718705648595e-05, "loss": 0.00013787485659122467, "step": 228500 }, { "epoch": 64.85949474879364, "eval_accuracy": 0.9793349017613022, "eval_loss": 0.08059171587228775, "eval_runtime": 32.6254, "eval_samples_per_second": 482.048, "eval_steps_per_second": 7.54, "step": 228500 }, { "epoch": 64.862333238717, "grad_norm": 0.006238734349608421, "learning_rate": 3.516434856656259e-05, "loss": 0.00011661574244499206, "step": 228510 }, { "epoch": 64.86517172864036, "grad_norm": 0.04444466903805733, "learning_rate": 3.516151007663923e-05, "loss": 9.542964398860932e-05, "step": 228520 }, { "epoch": 64.86801021856373, "grad_norm": 0.004281532485038042, "learning_rate": 3.515867158671587e-05, "loss": 8.04506242275238e-05, "step": 228530 }, { "epoch": 64.87084870848709, "grad_norm": 0.0077082389034330845, "learning_rate": 3.5155833096792505e-05, "loss": 0.00014811176806688308, "step": 228540 }, { "epoch": 64.87368719841045, "grad_norm": 0.015262167900800705, "learning_rate": 3.515299460686915e-05, "loss": 0.0005406780168414116, "step": 228550 }, { "epoch": 64.8765256883338, "grad_norm": 0.011923297308385372, "learning_rate": 3.515015611694579e-05, "loss": 0.0001754138618707657, "step": 228560 }, { "epoch": 64.87936417825716, "grad_norm": 0.15329918265342712, "learning_rate": 3.514731762702242e-05, "loss": 0.0013900475576519966, "step": 228570 }, { "epoch": 64.88220266818053, "grad_norm": 7.012753486633301, "learning_rate": 3.5144479137099063e-05, "loss": 0.0025579500943422318, "step": 228580 }, { "epoch": 64.88504115810389, "grad_norm": 1.0779067277908325, "learning_rate": 3.5141640647175705e-05, "loss": 0.0003856310620903969, "step": 228590 }, { "epoch": 64.88787964802725, "grad_norm": 0.014730863273143768, "learning_rate": 3.5138802157252346e-05, "loss": 0.003830450028181076, "step": 228600 }, { "epoch": 64.89071813795061, "grad_norm": 0.7305921912193298, "learning_rate": 3.513596366732898e-05, "loss": 0.0022347010672092438, "step": 228610 }, { "epoch": 64.89355662787398, "grad_norm": 0.2584199905395508, "learning_rate": 3.513312517740562e-05, "loss": 0.004457659646868705, "step": 228620 }, { "epoch": 64.89639511779733, "grad_norm": 0.33071789145469666, "learning_rate": 3.5130286687482264e-05, "loss": 0.011424675583839417, "step": 228630 }, { "epoch": 64.89923360772069, "grad_norm": 0.03937506675720215, "learning_rate": 3.51274481975589e-05, "loss": 0.0004631970077753067, "step": 228640 }, { "epoch": 64.90207209764405, "grad_norm": 0.05989633500576019, "learning_rate": 3.512460970763554e-05, "loss": 0.0002565082162618637, "step": 228650 }, { "epoch": 64.90491058756741, "grad_norm": 0.35470426082611084, "learning_rate": 3.512177121771218e-05, "loss": 0.0005886577069759369, "step": 228660 }, { "epoch": 64.90774907749078, "grad_norm": 0.23171883821487427, "learning_rate": 3.5118932727788815e-05, "loss": 0.0005947344005107879, "step": 228670 }, { "epoch": 64.91058756741414, "grad_norm": 0.05296129360795021, "learning_rate": 3.511609423786546e-05, "loss": 0.00033188238739967346, "step": 228680 }, { "epoch": 64.9134260573375, "grad_norm": 0.07429060339927673, "learning_rate": 3.51132557479421e-05, "loss": 0.00011734943836927414, "step": 228690 }, { "epoch": 64.91626454726085, "grad_norm": 0.016929257661104202, "learning_rate": 3.511041725801873e-05, "loss": 0.0003141514956951141, "step": 228700 }, { "epoch": 64.91910303718421, "grad_norm": 0.0515906885266304, "learning_rate": 3.5107578768095374e-05, "loss": 0.00045821554958820344, "step": 228710 }, { "epoch": 64.92194152710758, "grad_norm": 0.018287762999534607, "learning_rate": 3.5104740278172016e-05, "loss": 0.0002114308997988701, "step": 228720 }, { "epoch": 64.92478001703094, "grad_norm": 0.10078737884759903, "learning_rate": 3.510190178824865e-05, "loss": 0.00017918720841407775, "step": 228730 }, { "epoch": 64.9276185069543, "grad_norm": 0.005425699055194855, "learning_rate": 3.509906329832529e-05, "loss": 0.000498165562748909, "step": 228740 }, { "epoch": 64.93045699687767, "grad_norm": 0.1361352503299713, "learning_rate": 3.509622480840193e-05, "loss": 0.000264924019575119, "step": 228750 }, { "epoch": 64.93329548680101, "grad_norm": 0.01587316393852234, "learning_rate": 3.5093386318478574e-05, "loss": 0.0007388696074485778, "step": 228760 }, { "epoch": 64.93613397672438, "grad_norm": 0.042246703058481216, "learning_rate": 3.509054782855521e-05, "loss": 0.00021332018077373504, "step": 228770 }, { "epoch": 64.93897246664774, "grad_norm": 0.02428055927157402, "learning_rate": 3.508770933863185e-05, "loss": 0.00021994765847921371, "step": 228780 }, { "epoch": 64.9418109565711, "grad_norm": 0.042926132678985596, "learning_rate": 3.508487084870849e-05, "loss": 0.00024486444890499116, "step": 228790 }, { "epoch": 64.94464944649447, "grad_norm": 0.036877937614917755, "learning_rate": 3.5082032358785126e-05, "loss": 0.001571900025010109, "step": 228800 }, { "epoch": 64.94748793641783, "grad_norm": 1.2337578535079956, "learning_rate": 3.507919386886177e-05, "loss": 0.0006897265091538429, "step": 228810 }, { "epoch": 64.95032642634119, "grad_norm": 0.039720479398965836, "learning_rate": 3.507635537893841e-05, "loss": 0.00016620457172393798, "step": 228820 }, { "epoch": 64.95316491626454, "grad_norm": 0.012899552471935749, "learning_rate": 3.5073516889015044e-05, "loss": 0.00014179348945617676, "step": 228830 }, { "epoch": 64.9560034061879, "grad_norm": 0.9321927428245544, "learning_rate": 3.5070678399091685e-05, "loss": 0.00030822567641735076, "step": 228840 }, { "epoch": 64.95884189611127, "grad_norm": 0.021039534360170364, "learning_rate": 3.5067839909168326e-05, "loss": 0.0003265434876084328, "step": 228850 }, { "epoch": 64.96168038603463, "grad_norm": 0.0178206954151392, "learning_rate": 3.506500141924496e-05, "loss": 0.00022183302789926528, "step": 228860 }, { "epoch": 64.96451887595799, "grad_norm": 0.10837355256080627, "learning_rate": 3.50621629293216e-05, "loss": 0.0014633018523454665, "step": 228870 }, { "epoch": 64.96735736588136, "grad_norm": 0.008110346272587776, "learning_rate": 3.5059324439398244e-05, "loss": 0.00011632349342107772, "step": 228880 }, { "epoch": 64.97019585580472, "grad_norm": 0.058470018208026886, "learning_rate": 3.5056485949474885e-05, "loss": 0.0013840999454259872, "step": 228890 }, { "epoch": 64.97303434572807, "grad_norm": 0.010778256691992283, "learning_rate": 3.505364745955152e-05, "loss": 0.0017486196011304855, "step": 228900 }, { "epoch": 64.97587283565143, "grad_norm": 0.047925375401973724, "learning_rate": 3.5050808969628154e-05, "loss": 0.00029247943311929705, "step": 228910 }, { "epoch": 64.97871132557479, "grad_norm": 0.16158564388751984, "learning_rate": 3.50479704797048e-05, "loss": 0.0024313317611813546, "step": 228920 }, { "epoch": 64.98154981549816, "grad_norm": 20.1882266998291, "learning_rate": 3.504513198978144e-05, "loss": 0.007141861319541931, "step": 228930 }, { "epoch": 64.98438830542152, "grad_norm": 0.002830962184816599, "learning_rate": 3.504229349985808e-05, "loss": 0.0009547272697091102, "step": 228940 }, { "epoch": 64.98722679534488, "grad_norm": 0.01265433244407177, "learning_rate": 3.503945500993472e-05, "loss": 0.002023368515074253, "step": 228950 }, { "epoch": 64.99006528526824, "grad_norm": 0.0354875773191452, "learning_rate": 3.5036616520011354e-05, "loss": 0.0022039802744984625, "step": 228960 }, { "epoch": 64.99290377519159, "grad_norm": 0.29998937249183655, "learning_rate": 3.5033778030087996e-05, "loss": 0.0007000364363193512, "step": 228970 }, { "epoch": 64.99574226511496, "grad_norm": 0.41380774974823, "learning_rate": 3.503093954016464e-05, "loss": 0.0009984109550714494, "step": 228980 }, { "epoch": 64.99858075503832, "grad_norm": 0.0715368390083313, "learning_rate": 3.502810105024127e-05, "loss": 0.000525078922510147, "step": 228990 }, { "epoch": 65.00141924496168, "grad_norm": 0.5037841796875, "learning_rate": 3.502526256031791e-05, "loss": 0.0006352453492581844, "step": 229000 }, { "epoch": 65.00141924496168, "eval_accuracy": 0.9757741463724805, "eval_loss": 0.09193598479032516, "eval_runtime": 33.2982, "eval_samples_per_second": 472.308, "eval_steps_per_second": 7.388, "step": 229000 }, { "epoch": 65.00425773488504, "grad_norm": 0.05479614809155464, "learning_rate": 3.502242407039455e-05, "loss": 0.003186298906803131, "step": 229010 }, { "epoch": 65.00709622480841, "grad_norm": 0.08068352192640305, "learning_rate": 3.5019585580471196e-05, "loss": 0.011520585417747498, "step": 229020 }, { "epoch": 65.00993471473176, "grad_norm": 0.031193336471915245, "learning_rate": 3.501674709054783e-05, "loss": 0.000502321869134903, "step": 229030 }, { "epoch": 65.01277320465512, "grad_norm": 0.060957327485084534, "learning_rate": 3.5013908600624465e-05, "loss": 0.0013095280155539513, "step": 229040 }, { "epoch": 65.01561169457848, "grad_norm": 0.036372117698192596, "learning_rate": 3.501107011070111e-05, "loss": 0.0007886707782745362, "step": 229050 }, { "epoch": 65.01845018450184, "grad_norm": 0.08591330796480179, "learning_rate": 3.500823162077775e-05, "loss": 0.00048009101301431655, "step": 229060 }, { "epoch": 65.02128867442521, "grad_norm": 0.06429411470890045, "learning_rate": 3.500539313085439e-05, "loss": 0.0003225667402148247, "step": 229070 }, { "epoch": 65.02412716434857, "grad_norm": 2.2827236652374268, "learning_rate": 3.500255464093103e-05, "loss": 0.0005907127633690834, "step": 229080 }, { "epoch": 65.02696565427193, "grad_norm": 0.8169274926185608, "learning_rate": 3.4999716151007665e-05, "loss": 0.0007720300927758217, "step": 229090 }, { "epoch": 65.02980414419528, "grad_norm": 0.0725683942437172, "learning_rate": 3.4996877661084307e-05, "loss": 0.0002673149108886719, "step": 229100 }, { "epoch": 65.03264263411864, "grad_norm": 0.004973485134541988, "learning_rate": 3.499403917116094e-05, "loss": 0.00018357466906309127, "step": 229110 }, { "epoch": 65.03548112404201, "grad_norm": 0.03577915579080582, "learning_rate": 3.499120068123758e-05, "loss": 0.0020963814109563827, "step": 229120 }, { "epoch": 65.03831961396537, "grad_norm": 0.022209584712982178, "learning_rate": 3.4988362191314224e-05, "loss": 0.0024253327399492266, "step": 229130 }, { "epoch": 65.04115810388873, "grad_norm": 0.008739454671740532, "learning_rate": 3.498552370139086e-05, "loss": 0.0008276848122477532, "step": 229140 }, { "epoch": 65.0439965938121, "grad_norm": 0.6820820569992065, "learning_rate": 3.49826852114675e-05, "loss": 0.00024139154702425004, "step": 229150 }, { "epoch": 65.04683508373546, "grad_norm": 0.14321482181549072, "learning_rate": 3.497984672154414e-05, "loss": 0.0008460866287350654, "step": 229160 }, { "epoch": 65.04967357365881, "grad_norm": 0.06330700218677521, "learning_rate": 3.4977008231620776e-05, "loss": 0.0006181022152304649, "step": 229170 }, { "epoch": 65.05251206358217, "grad_norm": 0.030846847221255302, "learning_rate": 3.4974169741697424e-05, "loss": 0.00036870408803224564, "step": 229180 }, { "epoch": 65.05535055350553, "grad_norm": 0.33206796646118164, "learning_rate": 3.497133125177406e-05, "loss": 0.0007896116003394127, "step": 229190 }, { "epoch": 65.0581890434289, "grad_norm": 0.6131073236465454, "learning_rate": 3.496849276185069e-05, "loss": 0.0016240157186985017, "step": 229200 }, { "epoch": 65.06102753335226, "grad_norm": 0.13126453757286072, "learning_rate": 3.4965654271927335e-05, "loss": 0.0004170423373579979, "step": 229210 }, { "epoch": 65.06386602327562, "grad_norm": 0.02850036695599556, "learning_rate": 3.4962815782003976e-05, "loss": 0.0008174004033207893, "step": 229220 }, { "epoch": 65.06670451319899, "grad_norm": 0.015924982726573944, "learning_rate": 3.495997729208062e-05, "loss": 0.00026204828172922135, "step": 229230 }, { "epoch": 65.06954300312233, "grad_norm": 2.3965506553649902, "learning_rate": 3.495713880215725e-05, "loss": 0.001095571368932724, "step": 229240 }, { "epoch": 65.0723814930457, "grad_norm": 2.221008777618408, "learning_rate": 3.495430031223389e-05, "loss": 0.004897543415427208, "step": 229250 }, { "epoch": 65.07521998296906, "grad_norm": 0.4559348523616791, "learning_rate": 3.4951461822310535e-05, "loss": 0.0004163816571235657, "step": 229260 }, { "epoch": 65.07805847289242, "grad_norm": 3.349370002746582, "learning_rate": 3.494862333238717e-05, "loss": 0.0029130171984434127, "step": 229270 }, { "epoch": 65.08089696281579, "grad_norm": 0.0927416980266571, "learning_rate": 3.494578484246381e-05, "loss": 0.0003390710800886154, "step": 229280 }, { "epoch": 65.08373545273915, "grad_norm": 0.5207904577255249, "learning_rate": 3.494294635254045e-05, "loss": 0.00040192436426877975, "step": 229290 }, { "epoch": 65.0865739426625, "grad_norm": 0.061975330114364624, "learning_rate": 3.4940107862617087e-05, "loss": 0.0001819005236029625, "step": 229300 }, { "epoch": 65.08941243258586, "grad_norm": 0.31487739086151123, "learning_rate": 3.493726937269373e-05, "loss": 0.00038095135241746905, "step": 229310 }, { "epoch": 65.09225092250922, "grad_norm": 0.4930785596370697, "learning_rate": 3.493443088277037e-05, "loss": 0.0014207342639565468, "step": 229320 }, { "epoch": 65.09508941243259, "grad_norm": 0.07682589441537857, "learning_rate": 3.4931592392847004e-05, "loss": 0.0026118090376257897, "step": 229330 }, { "epoch": 65.09792790235595, "grad_norm": 0.013613464310765266, "learning_rate": 3.4928753902923645e-05, "loss": 0.0002553405240178108, "step": 229340 }, { "epoch": 65.10076639227931, "grad_norm": 13.439159393310547, "learning_rate": 3.492591541300029e-05, "loss": 0.002045172452926636, "step": 229350 }, { "epoch": 65.10360488220267, "grad_norm": 0.15311965346336365, "learning_rate": 3.492307692307693e-05, "loss": 0.0002773746848106384, "step": 229360 }, { "epoch": 65.10644337212602, "grad_norm": 0.10589984059333801, "learning_rate": 3.492023843315356e-05, "loss": 0.00018305163830518724, "step": 229370 }, { "epoch": 65.10928186204939, "grad_norm": 0.0267547108232975, "learning_rate": 3.4917399943230204e-05, "loss": 0.00010274406522512436, "step": 229380 }, { "epoch": 65.11212035197275, "grad_norm": 0.3524516224861145, "learning_rate": 3.4914561453306845e-05, "loss": 0.0001599179580807686, "step": 229390 }, { "epoch": 65.11495884189611, "grad_norm": 0.19062456488609314, "learning_rate": 3.491172296338348e-05, "loss": 0.00011992137879133225, "step": 229400 }, { "epoch": 65.11779733181947, "grad_norm": 0.00820415560156107, "learning_rate": 3.490888447346012e-05, "loss": 0.0004836037755012512, "step": 229410 }, { "epoch": 65.12063582174284, "grad_norm": 0.17747282981872559, "learning_rate": 3.490604598353676e-05, "loss": 0.004467607289552688, "step": 229420 }, { "epoch": 65.1234743116662, "grad_norm": 0.009906544350087643, "learning_rate": 3.49032074936134e-05, "loss": 0.0007542431354522705, "step": 229430 }, { "epoch": 65.12631280158955, "grad_norm": 0.3473424017429352, "learning_rate": 3.490036900369004e-05, "loss": 0.00026749838143587114, "step": 229440 }, { "epoch": 65.12915129151291, "grad_norm": 0.05080820992588997, "learning_rate": 3.489753051376668e-05, "loss": 0.00022383183240890503, "step": 229450 }, { "epoch": 65.13198978143627, "grad_norm": 0.01427767239511013, "learning_rate": 3.4894692023843315e-05, "loss": 0.00016125701367855071, "step": 229460 }, { "epoch": 65.13482827135964, "grad_norm": 0.0320495180785656, "learning_rate": 3.4891853533919956e-05, "loss": 0.00010007526725530624, "step": 229470 }, { "epoch": 65.137666761283, "grad_norm": 0.123896025121212, "learning_rate": 3.48890150439966e-05, "loss": 0.000272434763610363, "step": 229480 }, { "epoch": 65.14050525120636, "grad_norm": 0.14948038756847382, "learning_rate": 3.488617655407324e-05, "loss": 9.644664824008942e-05, "step": 229490 }, { "epoch": 65.14334374112971, "grad_norm": 0.03119046241044998, "learning_rate": 3.4883338064149873e-05, "loss": 0.0020104911178350448, "step": 229500 }, { "epoch": 65.14334374112971, "eval_accuracy": 0.9791441470083296, "eval_loss": 0.07697154581546783, "eval_runtime": 33.0737, "eval_samples_per_second": 475.513, "eval_steps_per_second": 7.438, "step": 229500 }, { "epoch": 65.14618223105307, "grad_norm": 0.050489313900470734, "learning_rate": 3.488049957422651e-05, "loss": 0.0013831224292516708, "step": 229510 }, { "epoch": 65.14902072097644, "grad_norm": 0.02803208865225315, "learning_rate": 3.4877661084303156e-05, "loss": 0.0006270445883274078, "step": 229520 }, { "epoch": 65.1518592108998, "grad_norm": 0.24929587543010712, "learning_rate": 3.487482259437979e-05, "loss": 0.0005738938227295876, "step": 229530 }, { "epoch": 65.15469770082316, "grad_norm": 0.013013454154133797, "learning_rate": 3.487198410445643e-05, "loss": 0.0004579050466418266, "step": 229540 }, { "epoch": 65.15753619074653, "grad_norm": 0.020105259492993355, "learning_rate": 3.4869145614533074e-05, "loss": 0.0005306549370288849, "step": 229550 }, { "epoch": 65.16037468066989, "grad_norm": 0.030942514538764954, "learning_rate": 3.486630712460971e-05, "loss": 0.0012645466253161431, "step": 229560 }, { "epoch": 65.16321317059324, "grad_norm": 0.0073573836125433445, "learning_rate": 3.486346863468635e-05, "loss": 0.00012910030782222747, "step": 229570 }, { "epoch": 65.1660516605166, "grad_norm": 0.008817413821816444, "learning_rate": 3.486063014476299e-05, "loss": 0.00011296533048152924, "step": 229580 }, { "epoch": 65.16889015043996, "grad_norm": 0.039188235998153687, "learning_rate": 3.4857791654839625e-05, "loss": 0.00022115688771009445, "step": 229590 }, { "epoch": 65.17172864036333, "grad_norm": 0.05255655571818352, "learning_rate": 3.485495316491627e-05, "loss": 0.0002570157870650291, "step": 229600 }, { "epoch": 65.17456713028669, "grad_norm": 0.022985966876149178, "learning_rate": 3.48521146749929e-05, "loss": 0.00017924681305885314, "step": 229610 }, { "epoch": 65.17740562021005, "grad_norm": 0.01840195246040821, "learning_rate": 3.484927618506954e-05, "loss": 5.7394057512283327e-05, "step": 229620 }, { "epoch": 65.18024411013342, "grad_norm": 0.3032020032405853, "learning_rate": 3.4846437695146184e-05, "loss": 0.0001550896093249321, "step": 229630 }, { "epoch": 65.18308260005676, "grad_norm": 0.009323233738541603, "learning_rate": 3.484359920522282e-05, "loss": 0.00015639737248420715, "step": 229640 }, { "epoch": 65.18592108998013, "grad_norm": 0.008873976767063141, "learning_rate": 3.484076071529947e-05, "loss": 0.000606117770075798, "step": 229650 }, { "epoch": 65.18875957990349, "grad_norm": 0.9524775743484497, "learning_rate": 3.48379222253761e-05, "loss": 0.0002569831907749176, "step": 229660 }, { "epoch": 65.19159806982685, "grad_norm": 0.010795419104397297, "learning_rate": 3.4835083735452736e-05, "loss": 0.00017020776867866517, "step": 229670 }, { "epoch": 65.19443655975022, "grad_norm": 0.058275166898965836, "learning_rate": 3.4832245245529384e-05, "loss": 0.0008393898606300354, "step": 229680 }, { "epoch": 65.19727504967358, "grad_norm": 0.037776391953229904, "learning_rate": 3.482940675560602e-05, "loss": 0.00029291976243257524, "step": 229690 }, { "epoch": 65.20011353959694, "grad_norm": 0.03828248754143715, "learning_rate": 3.482656826568266e-05, "loss": 0.00015361644327640533, "step": 229700 }, { "epoch": 65.20295202952029, "grad_norm": 0.03192436695098877, "learning_rate": 3.4823729775759295e-05, "loss": 0.0006170524284243584, "step": 229710 }, { "epoch": 65.20579051944365, "grad_norm": 0.0719391331076622, "learning_rate": 3.4820891285835936e-05, "loss": 0.0002520453184843063, "step": 229720 }, { "epoch": 65.20862900936702, "grad_norm": 0.03251367807388306, "learning_rate": 3.481805279591258e-05, "loss": 0.0004302188754081726, "step": 229730 }, { "epoch": 65.21146749929038, "grad_norm": 0.012099698185920715, "learning_rate": 3.481521430598921e-05, "loss": 0.0032926045358181, "step": 229740 }, { "epoch": 65.21430598921374, "grad_norm": 0.10387854278087616, "learning_rate": 3.4812375816065854e-05, "loss": 0.0005930440500378608, "step": 229750 }, { "epoch": 65.2171444791371, "grad_norm": 0.02268698439002037, "learning_rate": 3.4809537326142495e-05, "loss": 0.000557217001914978, "step": 229760 }, { "epoch": 65.21998296906045, "grad_norm": 0.02164512500166893, "learning_rate": 3.480669883621913e-05, "loss": 0.004361123964190483, "step": 229770 }, { "epoch": 65.22282145898382, "grad_norm": 0.08999887108802795, "learning_rate": 3.480386034629578e-05, "loss": 0.0004393072798848152, "step": 229780 }, { "epoch": 65.22565994890718, "grad_norm": 0.006770040839910507, "learning_rate": 3.480102185637241e-05, "loss": 0.006859703361988068, "step": 229790 }, { "epoch": 65.22849843883054, "grad_norm": 0.055072225630283356, "learning_rate": 3.479818336644905e-05, "loss": 0.0005920510739088058, "step": 229800 }, { "epoch": 65.2313369287539, "grad_norm": 0.14166371524333954, "learning_rate": 3.479534487652569e-05, "loss": 0.00046294480562210084, "step": 229810 }, { "epoch": 65.23417541867727, "grad_norm": 0.025354202836751938, "learning_rate": 3.479250638660233e-05, "loss": 0.0002845579758286476, "step": 229820 }, { "epoch": 65.23701390860063, "grad_norm": 0.08689596503973007, "learning_rate": 3.478966789667897e-05, "loss": 9.42939892411232e-05, "step": 229830 }, { "epoch": 65.23985239852398, "grad_norm": 0.011884348466992378, "learning_rate": 3.4786829406755606e-05, "loss": 0.0002040725201368332, "step": 229840 }, { "epoch": 65.24269088844734, "grad_norm": 0.23543092608451843, "learning_rate": 3.478399091683225e-05, "loss": 0.00026011522859334944, "step": 229850 }, { "epoch": 65.2455293783707, "grad_norm": 0.14591899514198303, "learning_rate": 3.478115242690889e-05, "loss": 0.0006381263956427574, "step": 229860 }, { "epoch": 65.24836786829407, "grad_norm": 0.056799184530973434, "learning_rate": 3.477831393698552e-05, "loss": 0.0015196120366454124, "step": 229870 }, { "epoch": 65.25120635821743, "grad_norm": 0.06244302913546562, "learning_rate": 3.4775475447062164e-05, "loss": 0.00046346522867679596, "step": 229880 }, { "epoch": 65.2540448481408, "grad_norm": 0.19157803058624268, "learning_rate": 3.4772636957138806e-05, "loss": 0.0052953936159610745, "step": 229890 }, { "epoch": 65.25688333806416, "grad_norm": 0.01785968244075775, "learning_rate": 3.476979846721544e-05, "loss": 0.0005230056121945381, "step": 229900 }, { "epoch": 65.2597218279875, "grad_norm": 3.848658323287964, "learning_rate": 3.476695997729208e-05, "loss": 0.001254352554678917, "step": 229910 }, { "epoch": 65.26256031791087, "grad_norm": 0.010446425527334213, "learning_rate": 3.476412148736872e-05, "loss": 0.000417804904282093, "step": 229920 }, { "epoch": 65.26539880783423, "grad_norm": 0.0497196689248085, "learning_rate": 3.476128299744536e-05, "loss": 0.00048804152756929395, "step": 229930 }, { "epoch": 65.2682372977576, "grad_norm": 0.04484446719288826, "learning_rate": 3.4758444507522e-05, "loss": 0.0005066849291324616, "step": 229940 }, { "epoch": 65.27107578768096, "grad_norm": 0.1865955889225006, "learning_rate": 3.475560601759864e-05, "loss": 0.0009935196489095689, "step": 229950 }, { "epoch": 65.27391427760432, "grad_norm": 0.680886447429657, "learning_rate": 3.475276752767528e-05, "loss": 0.0010285081341862678, "step": 229960 }, { "epoch": 65.27675276752768, "grad_norm": 0.2630820572376251, "learning_rate": 3.4749929037751916e-05, "loss": 0.00044944640249013903, "step": 229970 }, { "epoch": 65.27959125745103, "grad_norm": 0.06253282725811005, "learning_rate": 3.474709054782856e-05, "loss": 0.0018233034759759903, "step": 229980 }, { "epoch": 65.2824297473744, "grad_norm": 0.052729811519384384, "learning_rate": 3.47442520579052e-05, "loss": 0.0015331929549574851, "step": 229990 }, { "epoch": 65.28526823729776, "grad_norm": 0.10477558523416519, "learning_rate": 3.4741413567981834e-05, "loss": 0.00046506300568580625, "step": 230000 }, { "epoch": 65.28526823729776, "eval_accuracy": 0.9705601831245628, "eval_loss": 0.1166166365146637, "eval_runtime": 32.9518, "eval_samples_per_second": 477.272, "eval_steps_per_second": 7.465, "step": 230000 }, { "epoch": 65.28810672722112, "grad_norm": 0.08292065560817719, "learning_rate": 3.4738575078058475e-05, "loss": 0.0033504992723464968, "step": 230010 }, { "epoch": 65.29094521714448, "grad_norm": 0.9038577079772949, "learning_rate": 3.4735736588135116e-05, "loss": 0.0005789464339613914, "step": 230020 }, { "epoch": 65.29378370706785, "grad_norm": 0.020823480561375618, "learning_rate": 3.473289809821175e-05, "loss": 0.0006854185834527015, "step": 230030 }, { "epoch": 65.2966221969912, "grad_norm": 1.0181567668914795, "learning_rate": 3.473005960828839e-05, "loss": 0.0008813267573714257, "step": 230040 }, { "epoch": 65.29946068691456, "grad_norm": 0.03446287289261818, "learning_rate": 3.4727221118365034e-05, "loss": 0.007146850228309631, "step": 230050 }, { "epoch": 65.30229917683792, "grad_norm": 0.44271478056907654, "learning_rate": 3.472438262844167e-05, "loss": 0.0002869222313165665, "step": 230060 }, { "epoch": 65.30513766676128, "grad_norm": 0.3417847156524658, "learning_rate": 3.472154413851831e-05, "loss": 0.00022647716104984283, "step": 230070 }, { "epoch": 65.30797615668465, "grad_norm": 3.1347646713256836, "learning_rate": 3.471870564859495e-05, "loss": 0.0006802832707762718, "step": 230080 }, { "epoch": 65.31081464660801, "grad_norm": 0.07173875719308853, "learning_rate": 3.4715867158671586e-05, "loss": 0.00018881857395172119, "step": 230090 }, { "epoch": 65.31365313653137, "grad_norm": 0.011403699405491352, "learning_rate": 3.471302866874823e-05, "loss": 0.00010371096432209015, "step": 230100 }, { "epoch": 65.31649162645472, "grad_norm": 0.015037884935736656, "learning_rate": 3.471019017882487e-05, "loss": 0.00010374002158641815, "step": 230110 }, { "epoch": 65.31933011637808, "grad_norm": 0.017012938857078552, "learning_rate": 3.470735168890151e-05, "loss": 0.0005794903263449669, "step": 230120 }, { "epoch": 65.32216860630145, "grad_norm": 0.03473366051912308, "learning_rate": 3.4704513198978144e-05, "loss": 0.00012717004865407943, "step": 230130 }, { "epoch": 65.32500709622481, "grad_norm": 0.008127683773636818, "learning_rate": 3.470167470905478e-05, "loss": 0.0003181645646691322, "step": 230140 }, { "epoch": 65.32784558614817, "grad_norm": 0.10164839774370193, "learning_rate": 3.469883621913143e-05, "loss": 0.00010783802717924118, "step": 230150 }, { "epoch": 65.33068407607153, "grad_norm": 0.5886454582214355, "learning_rate": 3.469599772920806e-05, "loss": 0.00025347527116537094, "step": 230160 }, { "epoch": 65.3335225659949, "grad_norm": 0.008659115992486477, "learning_rate": 3.46931592392847e-05, "loss": 0.0013678845018148423, "step": 230170 }, { "epoch": 65.33636105591825, "grad_norm": 2.6044373512268066, "learning_rate": 3.4690320749361345e-05, "loss": 0.0006966769695281982, "step": 230180 }, { "epoch": 65.33919954584161, "grad_norm": 0.4325608015060425, "learning_rate": 3.468748225943798e-05, "loss": 0.0008468374609947205, "step": 230190 }, { "epoch": 65.34203803576497, "grad_norm": 0.08342748135328293, "learning_rate": 3.468464376951462e-05, "loss": 0.0001406598836183548, "step": 230200 }, { "epoch": 65.34487652568833, "grad_norm": 0.007242447696626186, "learning_rate": 3.468180527959126e-05, "loss": 0.00013828482478857041, "step": 230210 }, { "epoch": 65.3477150156117, "grad_norm": 0.011243588291108608, "learning_rate": 3.4678966789667897e-05, "loss": 0.0003251615911722183, "step": 230220 }, { "epoch": 65.35055350553506, "grad_norm": 0.6143301129341125, "learning_rate": 3.467612829974454e-05, "loss": 0.0009912194684147835, "step": 230230 }, { "epoch": 65.35339199545841, "grad_norm": 1.3268275260925293, "learning_rate": 3.467328980982117e-05, "loss": 0.00035418737679719924, "step": 230240 }, { "epoch": 65.35623048538177, "grad_norm": 0.1074412539601326, "learning_rate": 3.467045131989782e-05, "loss": 0.0001795763149857521, "step": 230250 }, { "epoch": 65.35906897530514, "grad_norm": 0.029931599274277687, "learning_rate": 3.4667612829974455e-05, "loss": 0.00033677797764539716, "step": 230260 }, { "epoch": 65.3619074652285, "grad_norm": 0.01415445189923048, "learning_rate": 3.466477434005109e-05, "loss": 0.00013789571821689605, "step": 230270 }, { "epoch": 65.36474595515186, "grad_norm": 0.10055750608444214, "learning_rate": 3.466193585012774e-05, "loss": 0.00010680314153432846, "step": 230280 }, { "epoch": 65.36758444507522, "grad_norm": 0.040038395673036575, "learning_rate": 3.465909736020437e-05, "loss": 0.00020609032362699508, "step": 230290 }, { "epoch": 65.37042293499859, "grad_norm": 0.27710017561912537, "learning_rate": 3.4656258870281014e-05, "loss": 0.004402939975261688, "step": 230300 }, { "epoch": 65.37326142492194, "grad_norm": 0.026688730344176292, "learning_rate": 3.4653420380357655e-05, "loss": 0.00030074585229158404, "step": 230310 }, { "epoch": 65.3760999148453, "grad_norm": 0.725716769695282, "learning_rate": 3.465058189043429e-05, "loss": 0.0006171843037009239, "step": 230320 }, { "epoch": 65.37893840476866, "grad_norm": 0.029741698876023293, "learning_rate": 3.464774340051093e-05, "loss": 0.0003771902993321419, "step": 230330 }, { "epoch": 65.38177689469202, "grad_norm": 0.02311135083436966, "learning_rate": 3.4644904910587566e-05, "loss": 0.00014627501368522645, "step": 230340 }, { "epoch": 65.38461538461539, "grad_norm": 0.08507902175188065, "learning_rate": 3.464206642066421e-05, "loss": 0.0002539176493883133, "step": 230350 }, { "epoch": 65.38745387453875, "grad_norm": 0.08854518085718155, "learning_rate": 3.463922793074085e-05, "loss": 0.00014759786427021027, "step": 230360 }, { "epoch": 65.39029236446211, "grad_norm": 0.01596117950975895, "learning_rate": 3.463638944081748e-05, "loss": 0.00015472415834665297, "step": 230370 }, { "epoch": 65.39313085438546, "grad_norm": 0.016562962904572487, "learning_rate": 3.463355095089413e-05, "loss": 0.00011501666158437729, "step": 230380 }, { "epoch": 65.39596934430882, "grad_norm": 0.019831513985991478, "learning_rate": 3.4630712460970766e-05, "loss": 0.00014644060283899307, "step": 230390 }, { "epoch": 65.39880783423219, "grad_norm": 0.13157080113887787, "learning_rate": 3.46278739710474e-05, "loss": 0.00022806357592344284, "step": 230400 }, { "epoch": 65.40164632415555, "grad_norm": 0.005039254203438759, "learning_rate": 3.462503548112405e-05, "loss": 9.468793869018554e-05, "step": 230410 }, { "epoch": 65.40448481407891, "grad_norm": 0.017745940014719963, "learning_rate": 3.462219699120068e-05, "loss": 0.00018891878426074982, "step": 230420 }, { "epoch": 65.40732330400228, "grad_norm": 0.013486118987202644, "learning_rate": 3.4619358501277325e-05, "loss": 0.00016265679150819778, "step": 230430 }, { "epoch": 65.41016179392564, "grad_norm": 0.018304064869880676, "learning_rate": 3.461652001135396e-05, "loss": 0.0004008414223790169, "step": 230440 }, { "epoch": 65.41300028384899, "grad_norm": 0.00928535033017397, "learning_rate": 3.46136815214306e-05, "loss": 0.000920371524989605, "step": 230450 }, { "epoch": 65.41583877377235, "grad_norm": 0.07542373985052109, "learning_rate": 3.461084303150724e-05, "loss": 0.0001936875283718109, "step": 230460 }, { "epoch": 65.41867726369571, "grad_norm": 0.03510186821222305, "learning_rate": 3.460800454158388e-05, "loss": 0.0013897791504859924, "step": 230470 }, { "epoch": 65.42151575361908, "grad_norm": 0.016283566132187843, "learning_rate": 3.460516605166052e-05, "loss": 0.00034122951328754427, "step": 230480 }, { "epoch": 65.42435424354244, "grad_norm": 0.03949473425745964, "learning_rate": 3.460232756173716e-05, "loss": 0.0009489970281720162, "step": 230490 }, { "epoch": 65.4271927334658, "grad_norm": 2.7300937175750732, "learning_rate": 3.4599489071813794e-05, "loss": 0.0010180028155446052, "step": 230500 }, { "epoch": 65.4271927334658, "eval_accuracy": 0.9783175430787817, "eval_loss": 0.07831234484910965, "eval_runtime": 32.8777, "eval_samples_per_second": 478.348, "eval_steps_per_second": 7.482, "step": 230500 }, { "epoch": 65.43003122338915, "grad_norm": 0.2614760100841522, "learning_rate": 3.4596650581890435e-05, "loss": 0.0005953298881649971, "step": 230510 }, { "epoch": 65.43286971331251, "grad_norm": 0.3019252419471741, "learning_rate": 3.459381209196708e-05, "loss": 0.0004292204976081848, "step": 230520 }, { "epoch": 65.43570820323588, "grad_norm": 0.06626644730567932, "learning_rate": 3.459097360204371e-05, "loss": 0.0002352869138121605, "step": 230530 }, { "epoch": 65.43854669315924, "grad_norm": 0.039274945855140686, "learning_rate": 3.458813511212035e-05, "loss": 0.0018612436950206756, "step": 230540 }, { "epoch": 65.4413851830826, "grad_norm": 0.01713317632675171, "learning_rate": 3.4585296622196994e-05, "loss": 0.0005646152421832085, "step": 230550 }, { "epoch": 65.44422367300596, "grad_norm": 0.11152097582817078, "learning_rate": 3.458245813227363e-05, "loss": 0.0008381742984056473, "step": 230560 }, { "epoch": 65.44706216292933, "grad_norm": 0.002642684616148472, "learning_rate": 3.457961964235027e-05, "loss": 0.00011054836213588714, "step": 230570 }, { "epoch": 65.44990065285268, "grad_norm": 3.1664795875549316, "learning_rate": 3.457678115242691e-05, "loss": 0.0009320586919784546, "step": 230580 }, { "epoch": 65.45273914277604, "grad_norm": 0.008346828632056713, "learning_rate": 3.457394266250355e-05, "loss": 0.0003162892535328865, "step": 230590 }, { "epoch": 65.4555776326994, "grad_norm": 0.08433346450328827, "learning_rate": 3.4571388021572526e-05, "loss": 0.0037971440702676772, "step": 230600 }, { "epoch": 65.45841612262276, "grad_norm": 0.24099792540073395, "learning_rate": 3.456854953164916e-05, "loss": 0.010692233592271805, "step": 230610 }, { "epoch": 65.46125461254613, "grad_norm": 0.06739385426044464, "learning_rate": 3.45657110417258e-05, "loss": 0.0004597621038556099, "step": 230620 }, { "epoch": 65.46409310246949, "grad_norm": 0.01220053993165493, "learning_rate": 3.456287255180244e-05, "loss": 0.00041505414992570877, "step": 230630 }, { "epoch": 65.46693159239285, "grad_norm": 0.019190708175301552, "learning_rate": 3.4560034061879085e-05, "loss": 0.00036629755049943924, "step": 230640 }, { "epoch": 65.4697700823162, "grad_norm": 0.03015018254518509, "learning_rate": 3.455719557195572e-05, "loss": 8.632075041532516e-05, "step": 230650 }, { "epoch": 65.47260857223957, "grad_norm": 4.916889190673828, "learning_rate": 3.455435708203236e-05, "loss": 0.0012139411643147468, "step": 230660 }, { "epoch": 65.47544706216293, "grad_norm": 0.028063824400305748, "learning_rate": 3.4551518592109e-05, "loss": 0.0003934372216463089, "step": 230670 }, { "epoch": 65.47828555208629, "grad_norm": 0.050168950110673904, "learning_rate": 3.454868010218564e-05, "loss": 0.0004121895879507065, "step": 230680 }, { "epoch": 65.48112404200965, "grad_norm": 0.0836583599448204, "learning_rate": 3.454584161226228e-05, "loss": 0.0004968998953700065, "step": 230690 }, { "epoch": 65.48396253193302, "grad_norm": 0.02380485087633133, "learning_rate": 3.454300312233892e-05, "loss": 0.0004846619442105293, "step": 230700 }, { "epoch": 65.48680102185637, "grad_norm": 0.10654915124177933, "learning_rate": 3.4540164632415554e-05, "loss": 0.00037540644407272337, "step": 230710 }, { "epoch": 65.48963951177973, "grad_norm": 0.06196329742670059, "learning_rate": 3.4537326142492195e-05, "loss": 0.00023442618548870088, "step": 230720 }, { "epoch": 65.49247800170309, "grad_norm": 0.013339802622795105, "learning_rate": 3.453448765256884e-05, "loss": 0.00013826563954353333, "step": 230730 }, { "epoch": 65.49531649162645, "grad_norm": 0.3302919566631317, "learning_rate": 3.453164916264547e-05, "loss": 0.00036805160343647005, "step": 230740 }, { "epoch": 65.49815498154982, "grad_norm": 0.01947767846286297, "learning_rate": 3.452881067272211e-05, "loss": 0.0003106825053691864, "step": 230750 }, { "epoch": 65.50099347147318, "grad_norm": 0.06973744183778763, "learning_rate": 3.4525972182798754e-05, "loss": 0.00019247308373451232, "step": 230760 }, { "epoch": 65.50383196139654, "grad_norm": 0.04456651210784912, "learning_rate": 3.4523133692875396e-05, "loss": 0.0019077777862548829, "step": 230770 }, { "epoch": 65.50667045131989, "grad_norm": 0.1005941778421402, "learning_rate": 3.452029520295203e-05, "loss": 0.0002575790509581566, "step": 230780 }, { "epoch": 65.50950894124325, "grad_norm": 0.0442071259021759, "learning_rate": 3.451745671302867e-05, "loss": 0.0004042450338602066, "step": 230790 }, { "epoch": 65.51234743116662, "grad_norm": 0.2052815556526184, "learning_rate": 3.451461822310531e-05, "loss": 0.0002114584669470787, "step": 230800 }, { "epoch": 65.51518592108998, "grad_norm": 0.09678258746862411, "learning_rate": 3.451177973318195e-05, "loss": 0.00029650796204805374, "step": 230810 }, { "epoch": 65.51802441101334, "grad_norm": 0.026263011619448662, "learning_rate": 3.450894124325859e-05, "loss": 0.0002931110560894012, "step": 230820 }, { "epoch": 65.5208629009367, "grad_norm": 0.4610153138637543, "learning_rate": 3.450610275333523e-05, "loss": 0.0003575865179300308, "step": 230830 }, { "epoch": 65.52370139086007, "grad_norm": 0.007482717279344797, "learning_rate": 3.4503264263411865e-05, "loss": 0.0002703210338950157, "step": 230840 }, { "epoch": 65.52653988078342, "grad_norm": 0.021622737869620323, "learning_rate": 3.4500425773488506e-05, "loss": 0.00036898069083690643, "step": 230850 }, { "epoch": 65.52937837070678, "grad_norm": 0.068491131067276, "learning_rate": 3.449758728356515e-05, "loss": 0.00039840303361415863, "step": 230860 }, { "epoch": 65.53221686063014, "grad_norm": 0.02338431030511856, "learning_rate": 3.449474879364178e-05, "loss": 0.0004004988819360733, "step": 230870 }, { "epoch": 65.5350553505535, "grad_norm": 0.1574171930551529, "learning_rate": 3.4491910303718424e-05, "loss": 0.0004371890798211098, "step": 230880 }, { "epoch": 65.53789384047687, "grad_norm": 0.01621890813112259, "learning_rate": 3.4489071813795065e-05, "loss": 0.00015305392444133758, "step": 230890 }, { "epoch": 65.54073233040023, "grad_norm": 0.049762826412916183, "learning_rate": 3.44862333238717e-05, "loss": 0.00012197401374578476, "step": 230900 }, { "epoch": 65.5435708203236, "grad_norm": 0.05430343747138977, "learning_rate": 3.448339483394834e-05, "loss": 0.00033241156488657, "step": 230910 }, { "epoch": 65.54640931024694, "grad_norm": 0.20650848746299744, "learning_rate": 3.4480556344024975e-05, "loss": 0.0002188732847571373, "step": 230920 }, { "epoch": 65.5492478001703, "grad_norm": 0.021488307043910027, "learning_rate": 3.4477717854101624e-05, "loss": 0.00014530587941408157, "step": 230930 }, { "epoch": 65.55208629009367, "grad_norm": 0.15565164387226105, "learning_rate": 3.447487936417826e-05, "loss": 0.0006895087659358978, "step": 230940 }, { "epoch": 65.55492478001703, "grad_norm": 0.08786024153232574, "learning_rate": 3.447204087425489e-05, "loss": 0.009687669575214386, "step": 230950 }, { "epoch": 65.5577632699404, "grad_norm": 0.07408939301967621, "learning_rate": 3.446920238433154e-05, "loss": 0.0005891254171729088, "step": 230960 }, { "epoch": 65.56060175986376, "grad_norm": 0.0551992692053318, "learning_rate": 3.4466363894408176e-05, "loss": 0.00021363552659749984, "step": 230970 }, { "epoch": 65.5634402497871, "grad_norm": 0.015658782795071602, "learning_rate": 3.446352540448482e-05, "loss": 0.00010338053107261658, "step": 230980 }, { "epoch": 65.56627873971047, "grad_norm": 0.02230912074446678, "learning_rate": 3.446068691456146e-05, "loss": 0.00015950407832860948, "step": 230990 }, { "epoch": 65.56911722963383, "grad_norm": 0.3710186183452606, "learning_rate": 3.445784842463809e-05, "loss": 0.00012608412653207778, "step": 231000 }, { "epoch": 65.56911722963383, "eval_accuracy": 0.9790805620906721, "eval_loss": 0.07118082791566849, "eval_runtime": 33.0474, "eval_samples_per_second": 475.893, "eval_steps_per_second": 7.444, "step": 231000 }, { "epoch": 65.5719557195572, "grad_norm": 0.025240499526262283, "learning_rate": 3.4455009934714734e-05, "loss": 0.00010130684822797775, "step": 231010 }, { "epoch": 65.57479420948056, "grad_norm": 0.0051840199157595634, "learning_rate": 3.445217144479137e-05, "loss": 9.222421795129776e-05, "step": 231020 }, { "epoch": 65.57763269940392, "grad_norm": 0.0024436688981950283, "learning_rate": 3.444933295486801e-05, "loss": 8.761975914239883e-05, "step": 231030 }, { "epoch": 65.58047118932728, "grad_norm": 0.006105129607021809, "learning_rate": 3.444649446494465e-05, "loss": 6.300210952758789e-05, "step": 231040 }, { "epoch": 65.58330967925063, "grad_norm": 0.10514181852340698, "learning_rate": 3.4443655975021286e-05, "loss": 0.0001981062814593315, "step": 231050 }, { "epoch": 65.586148169174, "grad_norm": 0.06704547256231308, "learning_rate": 3.4440817485097934e-05, "loss": 0.0014233734458684922, "step": 231060 }, { "epoch": 65.58898665909736, "grad_norm": 0.017359917983412743, "learning_rate": 3.443797899517457e-05, "loss": 0.0008830849081277847, "step": 231070 }, { "epoch": 65.59182514902072, "grad_norm": 0.7944636940956116, "learning_rate": 3.4435140505251204e-05, "loss": 0.0001443542540073395, "step": 231080 }, { "epoch": 65.59466363894408, "grad_norm": 0.911065936088562, "learning_rate": 3.443230201532785e-05, "loss": 0.0002723760902881622, "step": 231090 }, { "epoch": 65.59750212886745, "grad_norm": 0.0750395655632019, "learning_rate": 3.4429463525404486e-05, "loss": 0.00011400282382965088, "step": 231100 }, { "epoch": 65.60034061879081, "grad_norm": 0.09097669273614883, "learning_rate": 3.442662503548113e-05, "loss": 0.00015837717801332475, "step": 231110 }, { "epoch": 65.60317910871416, "grad_norm": 0.018021922558546066, "learning_rate": 3.442378654555776e-05, "loss": 7.276460528373718e-05, "step": 231120 }, { "epoch": 65.60601759863752, "grad_norm": 0.009925609454512596, "learning_rate": 3.4420948055634404e-05, "loss": 0.00012372098863124847, "step": 231130 }, { "epoch": 65.60885608856088, "grad_norm": 0.016456788405776024, "learning_rate": 3.4418109565711045e-05, "loss": 0.0001631377264857292, "step": 231140 }, { "epoch": 65.61169457848425, "grad_norm": 0.3011413514614105, "learning_rate": 3.441527107578768e-05, "loss": 0.0002016771584749222, "step": 231150 }, { "epoch": 65.61453306840761, "grad_norm": 0.018949178978800774, "learning_rate": 3.441243258586432e-05, "loss": 0.0007985735312104226, "step": 231160 }, { "epoch": 65.61737155833097, "grad_norm": 0.04617413133382797, "learning_rate": 3.440959409594096e-05, "loss": 0.0008039426058530808, "step": 231170 }, { "epoch": 65.62021004825434, "grad_norm": 0.3014024794101715, "learning_rate": 3.44067556060176e-05, "loss": 0.0009149014949798584, "step": 231180 }, { "epoch": 65.62304853817768, "grad_norm": 0.018783049657940865, "learning_rate": 3.4403917116094245e-05, "loss": 0.0010236378759145737, "step": 231190 }, { "epoch": 65.62588702810105, "grad_norm": 0.0816238522529602, "learning_rate": 3.440107862617088e-05, "loss": 0.0007648428902029992, "step": 231200 }, { "epoch": 65.62872551802441, "grad_norm": 0.023723937571048737, "learning_rate": 3.4398240136247514e-05, "loss": 0.0009662536904215813, "step": 231210 }, { "epoch": 65.63156400794777, "grad_norm": 0.02421645075082779, "learning_rate": 3.4395401646324156e-05, "loss": 0.0009558778256177902, "step": 231220 }, { "epoch": 65.63440249787114, "grad_norm": 0.030539389699697495, "learning_rate": 3.43925631564008e-05, "loss": 0.0012198476120829581, "step": 231230 }, { "epoch": 65.6372409877945, "grad_norm": 0.3311913311481476, "learning_rate": 3.438972466647744e-05, "loss": 0.008245759457349778, "step": 231240 }, { "epoch": 65.64007947771785, "grad_norm": 0.09007830172777176, "learning_rate": 3.438688617655407e-05, "loss": 0.00038088858127593994, "step": 231250 }, { "epoch": 65.64291796764121, "grad_norm": 0.07241680473089218, "learning_rate": 3.4384047686630714e-05, "loss": 0.00047122817486524584, "step": 231260 }, { "epoch": 65.64575645756457, "grad_norm": 0.43515029549598694, "learning_rate": 3.4381209196707356e-05, "loss": 0.0005272591486573219, "step": 231270 }, { "epoch": 65.64859494748794, "grad_norm": 0.042012769728899, "learning_rate": 3.437837070678399e-05, "loss": 0.000399027019739151, "step": 231280 }, { "epoch": 65.6514334374113, "grad_norm": 0.02603253535926342, "learning_rate": 3.437553221686063e-05, "loss": 0.000435580313205719, "step": 231290 }, { "epoch": 65.65427192733466, "grad_norm": 0.01575154811143875, "learning_rate": 3.437269372693727e-05, "loss": 0.000903334654867649, "step": 231300 }, { "epoch": 65.65711041725802, "grad_norm": 0.050132691860198975, "learning_rate": 3.436985523701391e-05, "loss": 0.0007720757275819778, "step": 231310 }, { "epoch": 65.65994890718137, "grad_norm": 0.03551299124956131, "learning_rate": 3.436701674709055e-05, "loss": 0.00015041660517454148, "step": 231320 }, { "epoch": 65.66278739710474, "grad_norm": 0.02867250144481659, "learning_rate": 3.436417825716719e-05, "loss": 0.0003714941442012787, "step": 231330 }, { "epoch": 65.6656258870281, "grad_norm": 0.01396878995001316, "learning_rate": 3.4361339767243825e-05, "loss": 8.286423981189728e-05, "step": 231340 }, { "epoch": 65.66846437695146, "grad_norm": 0.003421800909563899, "learning_rate": 3.4358501277320467e-05, "loss": 0.0004221295937895775, "step": 231350 }, { "epoch": 65.67130286687483, "grad_norm": 0.006106202024966478, "learning_rate": 3.435566278739711e-05, "loss": 0.0002126520499587059, "step": 231360 }, { "epoch": 65.67414135679819, "grad_norm": 0.09426310658454895, "learning_rate": 3.435282429747374e-05, "loss": 0.00020603574812412263, "step": 231370 }, { "epoch": 65.67697984672155, "grad_norm": 0.011370202526450157, "learning_rate": 3.4349985807550384e-05, "loss": 0.0002227712422609329, "step": 231380 }, { "epoch": 65.6798183366449, "grad_norm": 0.013650527223944664, "learning_rate": 3.4347147317627025e-05, "loss": 0.0003604702651500702, "step": 231390 }, { "epoch": 65.68265682656826, "grad_norm": 0.017411530017852783, "learning_rate": 3.4344308827703667e-05, "loss": 0.00010625701397657395, "step": 231400 }, { "epoch": 65.68549531649163, "grad_norm": 0.09672416746616364, "learning_rate": 3.43414703377803e-05, "loss": 0.00021499376744031905, "step": 231410 }, { "epoch": 65.68833380641499, "grad_norm": 0.06511159986257553, "learning_rate": 3.4338631847856936e-05, "loss": 0.00035912711173295974, "step": 231420 }, { "epoch": 65.69117229633835, "grad_norm": 0.017238302156329155, "learning_rate": 3.4335793357933584e-05, "loss": 0.0014703292399644852, "step": 231430 }, { "epoch": 65.69401078626171, "grad_norm": 0.016446134075522423, "learning_rate": 3.433295486801022e-05, "loss": 0.0022742325440049172, "step": 231440 }, { "epoch": 65.69684927618506, "grad_norm": 0.3073159456253052, "learning_rate": 3.433011637808686e-05, "loss": 0.0021853266283869744, "step": 231450 }, { "epoch": 65.69968776610843, "grad_norm": 1.3966937065124512, "learning_rate": 3.43272778881635e-05, "loss": 0.004742862284183502, "step": 231460 }, { "epoch": 65.70252625603179, "grad_norm": 0.07108964025974274, "learning_rate": 3.4324439398240136e-05, "loss": 0.00022201966494321822, "step": 231470 }, { "epoch": 65.70536474595515, "grad_norm": 0.07788769900798798, "learning_rate": 3.432160090831678e-05, "loss": 0.00041264742612838744, "step": 231480 }, { "epoch": 65.70820323587851, "grad_norm": 0.8292551636695862, "learning_rate": 3.431876241839342e-05, "loss": 0.0015250638127326966, "step": 231490 }, { "epoch": 65.71104172580188, "grad_norm": 0.7885483503341675, "learning_rate": 3.431592392847005e-05, "loss": 0.0006885522976517678, "step": 231500 }, { "epoch": 65.71104172580188, "eval_accuracy": 0.9771094296432886, "eval_loss": 0.08630332350730896, "eval_runtime": 32.822, "eval_samples_per_second": 479.16, "eval_steps_per_second": 7.495, "step": 231500 }, { "epoch": 65.71388021572524, "grad_norm": 0.0921783596277237, "learning_rate": 3.4313085438546695e-05, "loss": 0.001633262261748314, "step": 231510 }, { "epoch": 65.71671870564859, "grad_norm": 0.05505790933966637, "learning_rate": 3.431024694862333e-05, "loss": 0.0015654198825359345, "step": 231520 }, { "epoch": 65.71955719557195, "grad_norm": 0.1669234186410904, "learning_rate": 3.430740845869998e-05, "loss": 0.0003402795642614365, "step": 231530 }, { "epoch": 65.72239568549531, "grad_norm": 1.4129343032836914, "learning_rate": 3.430456996877661e-05, "loss": 0.004977821186184883, "step": 231540 }, { "epoch": 65.72523417541868, "grad_norm": 0.030669525265693665, "learning_rate": 3.4301731478853247e-05, "loss": 0.00026328042149543763, "step": 231550 }, { "epoch": 65.72807266534204, "grad_norm": 0.2314763367176056, "learning_rate": 3.4298892988929895e-05, "loss": 0.0009986249729990958, "step": 231560 }, { "epoch": 65.7309111552654, "grad_norm": 0.0065121129155159, "learning_rate": 3.429605449900653e-05, "loss": 0.002551092579960823, "step": 231570 }, { "epoch": 65.73374964518877, "grad_norm": 0.06986025720834732, "learning_rate": 3.429321600908317e-05, "loss": 0.0008673910051584244, "step": 231580 }, { "epoch": 65.73658813511211, "grad_norm": 0.1898462474346161, "learning_rate": 3.429037751915981e-05, "loss": 0.0010048115625977516, "step": 231590 }, { "epoch": 65.73942662503548, "grad_norm": 0.11327793449163437, "learning_rate": 3.428753902923645e-05, "loss": 0.0013318700715899468, "step": 231600 }, { "epoch": 65.74226511495884, "grad_norm": 0.020107487216591835, "learning_rate": 3.428470053931309e-05, "loss": 0.004937111586332321, "step": 231610 }, { "epoch": 65.7451036048822, "grad_norm": 0.03388233110308647, "learning_rate": 3.428186204938972e-05, "loss": 0.00048382841050624846, "step": 231620 }, { "epoch": 65.74794209480557, "grad_norm": 0.06529341638088226, "learning_rate": 3.4279023559466364e-05, "loss": 0.001849549449980259, "step": 231630 }, { "epoch": 65.75078058472893, "grad_norm": 1.0802533626556396, "learning_rate": 3.4276185069543005e-05, "loss": 0.00024856925010681153, "step": 231640 }, { "epoch": 65.75361907465229, "grad_norm": 0.013305902481079102, "learning_rate": 3.427334657961964e-05, "loss": 0.0003539029508829117, "step": 231650 }, { "epoch": 65.75645756457564, "grad_norm": 0.006168501451611519, "learning_rate": 3.427050808969629e-05, "loss": 0.0008251743391156196, "step": 231660 }, { "epoch": 65.759296054499, "grad_norm": 0.03794547915458679, "learning_rate": 3.426766959977292e-05, "loss": 0.0021901126950979234, "step": 231670 }, { "epoch": 65.76213454442237, "grad_norm": 0.0072954557836055756, "learning_rate": 3.426483110984956e-05, "loss": 0.00040808729827404024, "step": 231680 }, { "epoch": 65.76497303434573, "grad_norm": 0.025351418182253838, "learning_rate": 3.4261992619926205e-05, "loss": 0.00020852982997894288, "step": 231690 }, { "epoch": 65.76781152426909, "grad_norm": 0.011151624843478203, "learning_rate": 3.425915413000284e-05, "loss": 0.0012509185820817948, "step": 231700 }, { "epoch": 65.77065001419246, "grad_norm": 0.9641257524490356, "learning_rate": 3.425631564007948e-05, "loss": 0.00034696385264396665, "step": 231710 }, { "epoch": 65.7734885041158, "grad_norm": 0.23323750495910645, "learning_rate": 3.425347715015612e-05, "loss": 0.0003921885043382645, "step": 231720 }, { "epoch": 65.77632699403917, "grad_norm": 2.4443697929382324, "learning_rate": 3.425063866023276e-05, "loss": 0.001122518628835678, "step": 231730 }, { "epoch": 65.77916548396253, "grad_norm": 0.18842996656894684, "learning_rate": 3.42478001703094e-05, "loss": 0.0005273265764117241, "step": 231740 }, { "epoch": 65.78200397388589, "grad_norm": 0.19303396344184875, "learning_rate": 3.4244961680386033e-05, "loss": 0.00039974935352802274, "step": 231750 }, { "epoch": 65.78484246380926, "grad_norm": 0.02381720580160618, "learning_rate": 3.4242123190462675e-05, "loss": 0.0001455085352063179, "step": 231760 }, { "epoch": 65.78768095373262, "grad_norm": 0.08430250734090805, "learning_rate": 3.4239284700539316e-05, "loss": 0.0004067510366439819, "step": 231770 }, { "epoch": 65.79051944365598, "grad_norm": 0.09176656603813171, "learning_rate": 3.423644621061595e-05, "loss": 0.0002033540979027748, "step": 231780 }, { "epoch": 65.79335793357933, "grad_norm": 0.23469425737857819, "learning_rate": 3.423360772069259e-05, "loss": 0.0003663640469312668, "step": 231790 }, { "epoch": 65.79619642350269, "grad_norm": 0.027860073372721672, "learning_rate": 3.4230769230769234e-05, "loss": 9.603500366210938e-05, "step": 231800 }, { "epoch": 65.79903491342606, "grad_norm": 0.1567106693983078, "learning_rate": 3.422793074084587e-05, "loss": 0.0001372080296278, "step": 231810 }, { "epoch": 65.80187340334942, "grad_norm": 0.022114720195531845, "learning_rate": 3.4225092250922516e-05, "loss": 9.746979922056198e-05, "step": 231820 }, { "epoch": 65.80471189327278, "grad_norm": 0.013876757584512234, "learning_rate": 3.422225376099915e-05, "loss": 0.0002264091745018959, "step": 231830 }, { "epoch": 65.80755038319614, "grad_norm": 0.006722757127135992, "learning_rate": 3.4219415271075785e-05, "loss": 0.00011606048792600632, "step": 231840 }, { "epoch": 65.8103888731195, "grad_norm": 0.029834387823939323, "learning_rate": 3.421657678115243e-05, "loss": 0.000258730910718441, "step": 231850 }, { "epoch": 65.81322736304286, "grad_norm": 0.1048969104886055, "learning_rate": 3.421373829122907e-05, "loss": 0.0014695528894662856, "step": 231860 }, { "epoch": 65.81606585296622, "grad_norm": 0.5295614004135132, "learning_rate": 3.421089980130571e-05, "loss": 0.0002588074654340744, "step": 231870 }, { "epoch": 65.81890434288958, "grad_norm": 0.08855278044939041, "learning_rate": 3.4208061311382344e-05, "loss": 0.0016183022409677506, "step": 231880 }, { "epoch": 65.82174283281294, "grad_norm": 0.17444369196891785, "learning_rate": 3.4205222821458986e-05, "loss": 0.0003184320405125618, "step": 231890 }, { "epoch": 65.82458132273631, "grad_norm": 0.06564556807279587, "learning_rate": 3.420238433153563e-05, "loss": 0.00018995590507984162, "step": 231900 }, { "epoch": 65.82741981265967, "grad_norm": 0.1842729151248932, "learning_rate": 3.419954584161226e-05, "loss": 0.001046636886894703, "step": 231910 }, { "epoch": 65.83025830258302, "grad_norm": 0.08029181510210037, "learning_rate": 3.41967073516889e-05, "loss": 0.00022217053920030593, "step": 231920 }, { "epoch": 65.83309679250638, "grad_norm": 0.12270428985357285, "learning_rate": 3.4193868861765544e-05, "loss": 0.00029267873615026476, "step": 231930 }, { "epoch": 65.83593528242974, "grad_norm": 0.02139168418943882, "learning_rate": 3.419103037184218e-05, "loss": 0.0002420227974653244, "step": 231940 }, { "epoch": 65.83877377235311, "grad_norm": 0.03319339081645012, "learning_rate": 3.418819188191882e-05, "loss": 0.00037816055119037627, "step": 231950 }, { "epoch": 65.84161226227647, "grad_norm": 0.1663677990436554, "learning_rate": 3.418535339199546e-05, "loss": 0.0010635420680046082, "step": 231960 }, { "epoch": 65.84445075219983, "grad_norm": 0.019096067175269127, "learning_rate": 3.4182514902072096e-05, "loss": 0.0001076769083738327, "step": 231970 }, { "epoch": 65.8472892421232, "grad_norm": 0.043338995426893234, "learning_rate": 3.417967641214874e-05, "loss": 0.0003419457003474236, "step": 231980 }, { "epoch": 65.85012773204654, "grad_norm": 0.061249688267707825, "learning_rate": 3.417683792222538e-05, "loss": 0.00017466023564338684, "step": 231990 }, { "epoch": 65.85296622196991, "grad_norm": 0.17403727769851685, "learning_rate": 3.417399943230202e-05, "loss": 0.0010902166366577148, "step": 232000 }, { "epoch": 65.85296622196991, "eval_accuracy": 0.9783811279964393, "eval_loss": 0.07650379836559296, "eval_runtime": 32.6485, "eval_samples_per_second": 481.706, "eval_steps_per_second": 7.535, "step": 232000 }, { "epoch": 65.85580471189327, "grad_norm": 0.033439431339502335, "learning_rate": 3.4171160942378655e-05, "loss": 0.0003294188529253006, "step": 232010 }, { "epoch": 65.85864320181663, "grad_norm": 5.664687633514404, "learning_rate": 3.4168322452455296e-05, "loss": 0.0005848128348588943, "step": 232020 }, { "epoch": 65.86148169174, "grad_norm": 0.0294631440192461, "learning_rate": 3.416548396253194e-05, "loss": 8.283164352178574e-05, "step": 232030 }, { "epoch": 65.86432018166336, "grad_norm": 0.022696491330862045, "learning_rate": 3.416264547260857e-05, "loss": 0.00017242655158042908, "step": 232040 }, { "epoch": 65.86715867158672, "grad_norm": 0.05554160475730896, "learning_rate": 3.4159806982685214e-05, "loss": 0.0005330335348844528, "step": 232050 }, { "epoch": 65.86999716151007, "grad_norm": 0.1565648913383484, "learning_rate": 3.4156968492761855e-05, "loss": 0.00017522424459457398, "step": 232060 }, { "epoch": 65.87283565143343, "grad_norm": 0.009212814271450043, "learning_rate": 3.415413000283849e-05, "loss": 9.92894172668457e-05, "step": 232070 }, { "epoch": 65.8756741413568, "grad_norm": 0.04812614992260933, "learning_rate": 3.415129151291513e-05, "loss": 0.00022208280861377717, "step": 232080 }, { "epoch": 65.87851263128016, "grad_norm": 0.02265743725001812, "learning_rate": 3.414845302299177e-05, "loss": 5.548335611820221e-05, "step": 232090 }, { "epoch": 65.88135112120352, "grad_norm": 0.03143341466784477, "learning_rate": 3.414561453306841e-05, "loss": 0.00014950018376111985, "step": 232100 }, { "epoch": 65.88418961112689, "grad_norm": 0.09819622337818146, "learning_rate": 3.414277604314505e-05, "loss": 0.0004644446074962616, "step": 232110 }, { "epoch": 65.88702810105025, "grad_norm": 0.01867305114865303, "learning_rate": 3.413993755322169e-05, "loss": 0.0003481244668364525, "step": 232120 }, { "epoch": 65.8898665909736, "grad_norm": 0.001553376903757453, "learning_rate": 3.413709906329833e-05, "loss": 0.0031631909310817718, "step": 232130 }, { "epoch": 65.89270508089696, "grad_norm": 1.1753427982330322, "learning_rate": 3.4134260573374966e-05, "loss": 0.0007697656750679016, "step": 232140 }, { "epoch": 65.89554357082032, "grad_norm": 0.40565139055252075, "learning_rate": 3.41314220834516e-05, "loss": 0.0068141654133796695, "step": 232150 }, { "epoch": 65.89838206074369, "grad_norm": 0.07705086469650269, "learning_rate": 3.412858359352825e-05, "loss": 0.00012869518250226974, "step": 232160 }, { "epoch": 65.90122055066705, "grad_norm": 0.03706358000636101, "learning_rate": 3.412574510360488e-05, "loss": 0.00018549133092164992, "step": 232170 }, { "epoch": 65.90405904059041, "grad_norm": 0.016309289261698723, "learning_rate": 3.4122906613681524e-05, "loss": 0.0007084537297487259, "step": 232180 }, { "epoch": 65.90689753051376, "grad_norm": 0.015377521514892578, "learning_rate": 3.4120068123758166e-05, "loss": 0.00025738198310136794, "step": 232190 }, { "epoch": 65.90973602043712, "grad_norm": 0.016931679099798203, "learning_rate": 3.41172296338348e-05, "loss": 0.00020135659724473953, "step": 232200 }, { "epoch": 65.91257451036049, "grad_norm": 0.18335749208927155, "learning_rate": 3.411439114391144e-05, "loss": 0.00016441401094198228, "step": 232210 }, { "epoch": 65.91541300028385, "grad_norm": 0.03733351081609726, "learning_rate": 3.411155265398808e-05, "loss": 0.00143485888838768, "step": 232220 }, { "epoch": 65.91825149020721, "grad_norm": 0.0708928182721138, "learning_rate": 3.410871416406472e-05, "loss": 0.0003153594210743904, "step": 232230 }, { "epoch": 65.92108998013057, "grad_norm": 0.05072992667555809, "learning_rate": 3.410587567414136e-05, "loss": 0.00040464773774147035, "step": 232240 }, { "epoch": 65.92392847005394, "grad_norm": 0.015472141094505787, "learning_rate": 3.4103037184217994e-05, "loss": 0.0003967206925153732, "step": 232250 }, { "epoch": 65.92676695997729, "grad_norm": 0.10730788856744766, "learning_rate": 3.4100198694294635e-05, "loss": 0.0004399750381708145, "step": 232260 }, { "epoch": 65.92960544990065, "grad_norm": 8.229979515075684, "learning_rate": 3.4097360204371276e-05, "loss": 0.002939872071146965, "step": 232270 }, { "epoch": 65.93244393982401, "grad_norm": 0.016142047941684723, "learning_rate": 3.409452171444791e-05, "loss": 0.0002686440944671631, "step": 232280 }, { "epoch": 65.93528242974737, "grad_norm": 16.668062210083008, "learning_rate": 3.409168322452456e-05, "loss": 0.003414344787597656, "step": 232290 }, { "epoch": 65.93812091967074, "grad_norm": 0.28718504309654236, "learning_rate": 3.4088844734601194e-05, "loss": 0.0007456950843334198, "step": 232300 }, { "epoch": 65.9409594095941, "grad_norm": 0.027415936812758446, "learning_rate": 3.408600624467783e-05, "loss": 0.0003516515716910362, "step": 232310 }, { "epoch": 65.94379789951746, "grad_norm": 0.009290101006627083, "learning_rate": 3.4083167754754477e-05, "loss": 0.004120992869138718, "step": 232320 }, { "epoch": 65.94663638944081, "grad_norm": 0.03956007584929466, "learning_rate": 3.408032926483111e-05, "loss": 0.0002362443134188652, "step": 232330 }, { "epoch": 65.94947487936417, "grad_norm": 0.024351069703698158, "learning_rate": 3.407749077490775e-05, "loss": 0.00025674719363451005, "step": 232340 }, { "epoch": 65.95231336928754, "grad_norm": 0.6920332908630371, "learning_rate": 3.407465228498439e-05, "loss": 0.0003547629341483116, "step": 232350 }, { "epoch": 65.9551518592109, "grad_norm": 0.10338646918535233, "learning_rate": 3.407181379506103e-05, "loss": 0.0008632360026240349, "step": 232360 }, { "epoch": 65.95799034913426, "grad_norm": 4.255619049072266, "learning_rate": 3.406897530513767e-05, "loss": 0.0006251191720366478, "step": 232370 }, { "epoch": 65.96082883905763, "grad_norm": 0.6003546118736267, "learning_rate": 3.4066136815214304e-05, "loss": 0.00033625159412622454, "step": 232380 }, { "epoch": 65.96366732898099, "grad_norm": 1.0008975267410278, "learning_rate": 3.4063298325290946e-05, "loss": 0.00020723268389701843, "step": 232390 }, { "epoch": 65.96650581890434, "grad_norm": 0.22079218924045563, "learning_rate": 3.406045983536759e-05, "loss": 0.00025177020579576493, "step": 232400 }, { "epoch": 65.9693443088277, "grad_norm": 0.07953465729951859, "learning_rate": 3.405762134544422e-05, "loss": 0.00020938832312822342, "step": 232410 }, { "epoch": 65.97218279875106, "grad_norm": 0.008531776256859303, "learning_rate": 3.405478285552087e-05, "loss": 0.00025482177734375, "step": 232420 }, { "epoch": 65.97502128867443, "grad_norm": 0.02798614650964737, "learning_rate": 3.4051944365597505e-05, "loss": 0.00021680444478988647, "step": 232430 }, { "epoch": 65.97785977859779, "grad_norm": 0.03798951953649521, "learning_rate": 3.404910587567414e-05, "loss": 0.0005261555314064025, "step": 232440 }, { "epoch": 65.98069826852115, "grad_norm": 0.0229087732732296, "learning_rate": 3.404626738575078e-05, "loss": 0.0001209799200296402, "step": 232450 }, { "epoch": 65.9835367584445, "grad_norm": 0.0735493153333664, "learning_rate": 3.404342889582742e-05, "loss": 0.0005402615293860436, "step": 232460 }, { "epoch": 65.98637524836786, "grad_norm": 0.039110127836465836, "learning_rate": 3.404059040590406e-05, "loss": 0.00024355165660381318, "step": 232470 }, { "epoch": 65.98921373829123, "grad_norm": 0.014102994464337826, "learning_rate": 3.40377519159807e-05, "loss": 0.0003142576664686203, "step": 232480 }, { "epoch": 65.99205222821459, "grad_norm": 0.19636334478855133, "learning_rate": 3.403491342605734e-05, "loss": 0.000492800772190094, "step": 232490 }, { "epoch": 65.99489071813795, "grad_norm": 0.028438128530979156, "learning_rate": 3.403207493613398e-05, "loss": 0.00031008850783109665, "step": 232500 }, { "epoch": 65.99489071813795, "eval_accuracy": 0.9788898073376995, "eval_loss": 0.07891220599412918, "eval_runtime": 32.7657, "eval_samples_per_second": 479.983, "eval_steps_per_second": 7.508, "step": 232500 }, { "epoch": 65.99772920806132, "grad_norm": 0.027484824880957603, "learning_rate": 3.4029236446210615e-05, "loss": 0.00034668296575546265, "step": 232510 }, { "epoch": 66.00056769798468, "grad_norm": 0.0113536911085248, "learning_rate": 3.402639795628726e-05, "loss": 0.0010433056391775607, "step": 232520 }, { "epoch": 66.00340618790803, "grad_norm": 0.07524393498897552, "learning_rate": 3.40235594663639e-05, "loss": 0.00024408455938100814, "step": 232530 }, { "epoch": 66.00624467783139, "grad_norm": 0.0058694458566606045, "learning_rate": 3.402072097644053e-05, "loss": 9.697992354631423e-05, "step": 232540 }, { "epoch": 66.00908316775475, "grad_norm": 0.034445468336343765, "learning_rate": 3.4017882486517174e-05, "loss": 0.00012011956423521041, "step": 232550 }, { "epoch": 66.01192165767812, "grad_norm": 0.022343549877405167, "learning_rate": 3.4015043996593815e-05, "loss": 0.00018851533532142638, "step": 232560 }, { "epoch": 66.01476014760148, "grad_norm": 0.1105780079960823, "learning_rate": 3.401220550667045e-05, "loss": 0.00010188203305006027, "step": 232570 }, { "epoch": 66.01759863752484, "grad_norm": 0.009078293107450008, "learning_rate": 3.400936701674709e-05, "loss": 7.758978754281997e-05, "step": 232580 }, { "epoch": 66.0204371274482, "grad_norm": 0.03215876594185829, "learning_rate": 3.400652852682373e-05, "loss": 0.0001323288306593895, "step": 232590 }, { "epoch": 66.02327561737155, "grad_norm": 0.015220828354358673, "learning_rate": 3.4003690036900374e-05, "loss": 9.748991578817368e-05, "step": 232600 }, { "epoch": 66.02611410729492, "grad_norm": 0.003970368765294552, "learning_rate": 3.400085154697701e-05, "loss": 9.426083415746689e-05, "step": 232610 }, { "epoch": 66.02895259721828, "grad_norm": 0.005593647249042988, "learning_rate": 3.399801305705365e-05, "loss": 7.393192499876023e-05, "step": 232620 }, { "epoch": 66.03179108714164, "grad_norm": 0.47783565521240234, "learning_rate": 3.399517456713029e-05, "loss": 0.00019857939332723618, "step": 232630 }, { "epoch": 66.034629577065, "grad_norm": 0.05755291134119034, "learning_rate": 3.3992336077206926e-05, "loss": 8.904524147510529e-05, "step": 232640 }, { "epoch": 66.03746806698837, "grad_norm": 0.05923078954219818, "learning_rate": 3.398949758728357e-05, "loss": 6.714519113302231e-05, "step": 232650 }, { "epoch": 66.04030655691172, "grad_norm": 0.011720946989953518, "learning_rate": 3.398665909736021e-05, "loss": 0.00010413862764835358, "step": 232660 }, { "epoch": 66.04314504683508, "grad_norm": 0.07505165040493011, "learning_rate": 3.398382060743684e-05, "loss": 0.0001877306029200554, "step": 232670 }, { "epoch": 66.04598353675844, "grad_norm": 0.024829482659697533, "learning_rate": 3.3980982117513485e-05, "loss": 5.5584311485290525e-05, "step": 232680 }, { "epoch": 66.0488220266818, "grad_norm": 0.0053470553830266, "learning_rate": 3.3978143627590126e-05, "loss": 8.112583309412002e-05, "step": 232690 }, { "epoch": 66.05166051660517, "grad_norm": 0.12765564024448395, "learning_rate": 3.397530513766676e-05, "loss": 0.0001225791871547699, "step": 232700 }, { "epoch": 66.05449900652853, "grad_norm": 0.019378110766410828, "learning_rate": 3.39724666477434e-05, "loss": 0.00013296455144882203, "step": 232710 }, { "epoch": 66.0573374964519, "grad_norm": 0.007320824544876814, "learning_rate": 3.3969628157820043e-05, "loss": 0.00010502766817808152, "step": 232720 }, { "epoch": 66.06017598637524, "grad_norm": 0.05537530779838562, "learning_rate": 3.396678966789668e-05, "loss": 9.499993175268173e-05, "step": 232730 }, { "epoch": 66.0630144762986, "grad_norm": 0.3489086627960205, "learning_rate": 3.396395117797332e-05, "loss": 0.00015031024813652038, "step": 232740 }, { "epoch": 66.06585296622197, "grad_norm": 0.3592424690723419, "learning_rate": 3.3961112688049954e-05, "loss": 0.00036058723926544187, "step": 232750 }, { "epoch": 66.06869145614533, "grad_norm": 0.4094430208206177, "learning_rate": 3.39582741981266e-05, "loss": 0.00020624678581953048, "step": 232760 }, { "epoch": 66.0715299460687, "grad_norm": 0.11119238287210464, "learning_rate": 3.395543570820324e-05, "loss": 0.0001426978036761284, "step": 232770 }, { "epoch": 66.07436843599206, "grad_norm": 0.006968047935515642, "learning_rate": 3.395259721827987e-05, "loss": 9.83385369181633e-05, "step": 232780 }, { "epoch": 66.07720692591542, "grad_norm": 0.016121739521622658, "learning_rate": 3.394975872835652e-05, "loss": 0.0010469211265444756, "step": 232790 }, { "epoch": 66.08004541583877, "grad_norm": 0.018648609519004822, "learning_rate": 3.3946920238433154e-05, "loss": 0.00022006873041391373, "step": 232800 }, { "epoch": 66.08288390576213, "grad_norm": 0.00786043331027031, "learning_rate": 3.3944081748509796e-05, "loss": 7.970035076141358e-05, "step": 232810 }, { "epoch": 66.0857223956855, "grad_norm": 0.10460779815912247, "learning_rate": 3.394124325858644e-05, "loss": 0.0002209451049566269, "step": 232820 }, { "epoch": 66.08856088560886, "grad_norm": 0.002288075629621744, "learning_rate": 3.393840476866307e-05, "loss": 0.0008338961750268936, "step": 232830 }, { "epoch": 66.09139937553222, "grad_norm": 0.004492133855819702, "learning_rate": 3.393556627873971e-05, "loss": 0.0007001109421253205, "step": 232840 }, { "epoch": 66.09423786545558, "grad_norm": 0.12490373104810715, "learning_rate": 3.393272778881635e-05, "loss": 0.0007220461964607238, "step": 232850 }, { "epoch": 66.09707635537895, "grad_norm": 0.007942186668515205, "learning_rate": 3.392988929889299e-05, "loss": 0.0007060157135128975, "step": 232860 }, { "epoch": 66.0999148453023, "grad_norm": 0.010541570372879505, "learning_rate": 3.392705080896963e-05, "loss": 0.0002139287069439888, "step": 232870 }, { "epoch": 66.10275333522566, "grad_norm": 0.011709324084222317, "learning_rate": 3.3924212319046265e-05, "loss": 0.0002936314791440964, "step": 232880 }, { "epoch": 66.10559182514902, "grad_norm": 0.10702188313007355, "learning_rate": 3.392137382912291e-05, "loss": 0.00042288657277822495, "step": 232890 }, { "epoch": 66.10843031507238, "grad_norm": 0.007727478630840778, "learning_rate": 3.391853533919955e-05, "loss": 9.479895234107972e-05, "step": 232900 }, { "epoch": 66.11126880499575, "grad_norm": 0.007817096076905727, "learning_rate": 3.391569684927618e-05, "loss": 0.0002653332427144051, "step": 232910 }, { "epoch": 66.11410729491911, "grad_norm": 0.14381739497184753, "learning_rate": 3.391285835935283e-05, "loss": 0.00012149270623922348, "step": 232920 }, { "epoch": 66.11694578484246, "grad_norm": 0.01283132191747427, "learning_rate": 3.3910019869429465e-05, "loss": 8.952505886554718e-05, "step": 232930 }, { "epoch": 66.11978427476582, "grad_norm": 0.02057928778231144, "learning_rate": 3.3907181379506106e-05, "loss": 0.00014406237751245498, "step": 232940 }, { "epoch": 66.12262276468918, "grad_norm": 0.0033099802676588297, "learning_rate": 3.390434288958274e-05, "loss": 0.0001329910010099411, "step": 232950 }, { "epoch": 66.12546125461255, "grad_norm": 0.012909701094031334, "learning_rate": 3.390150439965938e-05, "loss": 0.00015131011605262756, "step": 232960 }, { "epoch": 66.12829974453591, "grad_norm": 0.031819023191928864, "learning_rate": 3.3898665909736024e-05, "loss": 6.848890334367752e-05, "step": 232970 }, { "epoch": 66.13113823445927, "grad_norm": 0.026120631024241447, "learning_rate": 3.389582741981266e-05, "loss": 0.00014063343405723572, "step": 232980 }, { "epoch": 66.13397672438263, "grad_norm": 0.07679503411054611, "learning_rate": 3.38929889298893e-05, "loss": 0.0005698882043361664, "step": 232990 }, { "epoch": 66.13681521430598, "grad_norm": 0.033672019839286804, "learning_rate": 3.389015043996594e-05, "loss": 0.00016231313347816467, "step": 233000 }, { "epoch": 66.13681521430598, "eval_accuracy": 0.9806701850321103, "eval_loss": 0.06895015388727188, "eval_runtime": 33.4699, "eval_samples_per_second": 469.885, "eval_steps_per_second": 7.35, "step": 233000 }, { "epoch": 66.13965370422935, "grad_norm": 0.006140344776213169, "learning_rate": 3.3887311950042576e-05, "loss": 0.00010113269090652465, "step": 233010 }, { "epoch": 66.14249219415271, "grad_norm": 0.01756472885608673, "learning_rate": 3.3884473460119224e-05, "loss": 0.00012804381549358368, "step": 233020 }, { "epoch": 66.14533068407607, "grad_norm": 0.04008111730217934, "learning_rate": 3.388163497019586e-05, "loss": 0.00011715982109308242, "step": 233030 }, { "epoch": 66.14816917399943, "grad_norm": 0.015966100618243217, "learning_rate": 3.387879648027249e-05, "loss": 0.0001583324745297432, "step": 233040 }, { "epoch": 66.1510076639228, "grad_norm": 0.017154641449451447, "learning_rate": 3.387595799034914e-05, "loss": 8.74202698469162e-05, "step": 233050 }, { "epoch": 66.15384615384616, "grad_norm": 0.004196998663246632, "learning_rate": 3.3873119500425776e-05, "loss": 5.8916211128234866e-05, "step": 233060 }, { "epoch": 66.15668464376951, "grad_norm": 0.0245505440980196, "learning_rate": 3.387028101050242e-05, "loss": 9.220130741596221e-05, "step": 233070 }, { "epoch": 66.15952313369287, "grad_norm": 0.02695852518081665, "learning_rate": 3.386744252057905e-05, "loss": 9.327903389930726e-05, "step": 233080 }, { "epoch": 66.16236162361623, "grad_norm": 0.025310788303613663, "learning_rate": 3.386460403065569e-05, "loss": 7.252376526594162e-05, "step": 233090 }, { "epoch": 66.1652001135396, "grad_norm": 0.004187425132840872, "learning_rate": 3.3861765540732334e-05, "loss": 6.744340062141419e-05, "step": 233100 }, { "epoch": 66.16803860346296, "grad_norm": 0.023863060399889946, "learning_rate": 3.385892705080897e-05, "loss": 6.474070250988007e-05, "step": 233110 }, { "epoch": 66.17087709338632, "grad_norm": 0.024018751457333565, "learning_rate": 3.385608856088561e-05, "loss": 5.5402703583240506e-05, "step": 233120 }, { "epoch": 66.17371558330967, "grad_norm": 0.003072138875722885, "learning_rate": 3.385325007096225e-05, "loss": 0.0002827305346727371, "step": 233130 }, { "epoch": 66.17655407323304, "grad_norm": 0.05802115797996521, "learning_rate": 3.3850411581038886e-05, "loss": 6.876196712255478e-05, "step": 233140 }, { "epoch": 66.1793925631564, "grad_norm": 0.05674227699637413, "learning_rate": 3.384757309111553e-05, "loss": 0.00018185805529356003, "step": 233150 }, { "epoch": 66.18223105307976, "grad_norm": 0.05866632238030434, "learning_rate": 3.384473460119217e-05, "loss": 0.00018876884132623672, "step": 233160 }, { "epoch": 66.18506954300312, "grad_norm": 0.01557983923703432, "learning_rate": 3.3841896111268804e-05, "loss": 0.00028312448412179945, "step": 233170 }, { "epoch": 66.18790803292649, "grad_norm": 0.04470216482877731, "learning_rate": 3.3839057621345445e-05, "loss": 0.00042217858135700225, "step": 233180 }, { "epoch": 66.19074652284985, "grad_norm": 0.013816723600029945, "learning_rate": 3.3836219131422086e-05, "loss": 0.0003851616755127907, "step": 233190 }, { "epoch": 66.1935850127732, "grad_norm": 0.047734037041664124, "learning_rate": 3.383338064149872e-05, "loss": 0.0038664262741804124, "step": 233200 }, { "epoch": 66.19642350269656, "grad_norm": 0.010320871137082577, "learning_rate": 3.38308260005677e-05, "loss": 0.010418751835823059, "step": 233210 }, { "epoch": 66.19926199261992, "grad_norm": 0.17280521988868713, "learning_rate": 3.3827987510644336e-05, "loss": 0.001406494528055191, "step": 233220 }, { "epoch": 66.20210048254329, "grad_norm": 0.04633255675435066, "learning_rate": 3.382514902072098e-05, "loss": 0.0002667145803570747, "step": 233230 }, { "epoch": 66.20493897246665, "grad_norm": 0.04391682893037796, "learning_rate": 3.382231053079762e-05, "loss": 0.0001771513372659683, "step": 233240 }, { "epoch": 66.20777746239001, "grad_norm": 0.014845605008304119, "learning_rate": 3.381947204087425e-05, "loss": 0.00022705458104610443, "step": 233250 }, { "epoch": 66.21061595231338, "grad_norm": 4.894590377807617, "learning_rate": 3.3816633550950894e-05, "loss": 0.0009203938767313957, "step": 233260 }, { "epoch": 66.21345444223672, "grad_norm": 0.4172787070274353, "learning_rate": 3.3813795061027536e-05, "loss": 0.0009730629622936249, "step": 233270 }, { "epoch": 66.21629293216009, "grad_norm": 0.8809516429901123, "learning_rate": 3.381095657110418e-05, "loss": 0.00041850320994853976, "step": 233280 }, { "epoch": 66.21913142208345, "grad_norm": 0.467478483915329, "learning_rate": 3.380811808118081e-05, "loss": 0.00015327110886573792, "step": 233290 }, { "epoch": 66.22196991200681, "grad_norm": 0.016467327252030373, "learning_rate": 3.380527959125745e-05, "loss": 0.00015284866094589233, "step": 233300 }, { "epoch": 66.22480840193018, "grad_norm": 15.403519630432129, "learning_rate": 3.3802441101334094e-05, "loss": 0.001954525150358677, "step": 233310 }, { "epoch": 66.22764689185354, "grad_norm": 0.03341085836291313, "learning_rate": 3.379960261141073e-05, "loss": 0.0007009433582425117, "step": 233320 }, { "epoch": 66.2304853817769, "grad_norm": 0.1050800085067749, "learning_rate": 3.379676412148737e-05, "loss": 0.004274044930934906, "step": 233330 }, { "epoch": 66.23332387170025, "grad_norm": 0.09115778654813766, "learning_rate": 3.379392563156401e-05, "loss": 0.001788605935871601, "step": 233340 }, { "epoch": 66.23616236162361, "grad_norm": 0.02427731454372406, "learning_rate": 3.3791087141640646e-05, "loss": 0.0003413060680031776, "step": 233350 }, { "epoch": 66.23900085154698, "grad_norm": 2.3370800018310547, "learning_rate": 3.378824865171729e-05, "loss": 0.0005744678899645805, "step": 233360 }, { "epoch": 66.24183934147034, "grad_norm": 0.23938773572444916, "learning_rate": 3.378541016179393e-05, "loss": 0.00043056365102529527, "step": 233370 }, { "epoch": 66.2446778313937, "grad_norm": 0.014057081192731857, "learning_rate": 3.3782571671870564e-05, "loss": 0.0003341382369399071, "step": 233380 }, { "epoch": 66.24751632131706, "grad_norm": 0.04909496009349823, "learning_rate": 3.3779733181947205e-05, "loss": 0.00040461476892232894, "step": 233390 }, { "epoch": 66.25035481124041, "grad_norm": 0.12517689168453217, "learning_rate": 3.3776894692023846e-05, "loss": 0.00032141637057065965, "step": 233400 }, { "epoch": 66.25319330116378, "grad_norm": 0.407720148563385, "learning_rate": 3.377405620210049e-05, "loss": 0.0004510160535573959, "step": 233410 }, { "epoch": 66.25603179108714, "grad_norm": 0.022809023037552834, "learning_rate": 3.377121771217712e-05, "loss": 0.00026702694594860077, "step": 233420 }, { "epoch": 66.2588702810105, "grad_norm": 0.08170762658119202, "learning_rate": 3.3768379222253764e-05, "loss": 0.001897326484322548, "step": 233430 }, { "epoch": 66.26170877093386, "grad_norm": 0.09008754789829254, "learning_rate": 3.3765540732330405e-05, "loss": 0.0006391020491719246, "step": 233440 }, { "epoch": 66.26454726085723, "grad_norm": 0.02216252125799656, "learning_rate": 3.376270224240704e-05, "loss": 0.0011498067528009416, "step": 233450 }, { "epoch": 66.26738575078059, "grad_norm": 0.05876651406288147, "learning_rate": 3.375986375248368e-05, "loss": 0.0004479650408029556, "step": 233460 }, { "epoch": 66.27022424070394, "grad_norm": 0.02546064555644989, "learning_rate": 3.375702526256032e-05, "loss": 0.0015215225517749786, "step": 233470 }, { "epoch": 66.2730627306273, "grad_norm": 0.19079254567623138, "learning_rate": 3.375418677263696e-05, "loss": 0.0022386979311704636, "step": 233480 }, { "epoch": 66.27590122055067, "grad_norm": 0.045802898705005646, "learning_rate": 3.37513482827136e-05, "loss": 0.00035517290234565735, "step": 233490 }, { "epoch": 66.27873971047403, "grad_norm": 0.7737349271774292, "learning_rate": 3.374850979279024e-05, "loss": 0.0003695761784911156, "step": 233500 }, { "epoch": 66.27873971047403, "eval_accuracy": 0.9774273542315762, "eval_loss": 0.0814509391784668, "eval_runtime": 32.3127, "eval_samples_per_second": 486.713, "eval_steps_per_second": 7.613, "step": 233500 }, { "epoch": 66.28157820039739, "grad_norm": 0.1334647238254547, "learning_rate": 3.3745671302866874e-05, "loss": 0.0022493500262498857, "step": 233510 }, { "epoch": 66.28441669032075, "grad_norm": 0.044024188071489334, "learning_rate": 3.3742832812943516e-05, "loss": 0.0004253266379237175, "step": 233520 }, { "epoch": 66.28725518024412, "grad_norm": 0.022740356624126434, "learning_rate": 3.373999432302016e-05, "loss": 0.002336791902780533, "step": 233530 }, { "epoch": 66.29009367016747, "grad_norm": 0.8399578332901001, "learning_rate": 3.373715583309679e-05, "loss": 0.01088150218129158, "step": 233540 }, { "epoch": 66.29293216009083, "grad_norm": 0.016112834215164185, "learning_rate": 3.373431734317343e-05, "loss": 0.005236045643687248, "step": 233550 }, { "epoch": 66.29577065001419, "grad_norm": 0.005988808814436197, "learning_rate": 3.373147885325007e-05, "loss": 0.0002348063513636589, "step": 233560 }, { "epoch": 66.29860913993755, "grad_norm": 0.02491454780101776, "learning_rate": 3.3728640363326716e-05, "loss": 0.00030242446810007094, "step": 233570 }, { "epoch": 66.30144762986092, "grad_norm": 0.47121721506118774, "learning_rate": 3.372580187340335e-05, "loss": 0.0002539573237299919, "step": 233580 }, { "epoch": 66.30428611978428, "grad_norm": 0.019390897825360298, "learning_rate": 3.3722963383479985e-05, "loss": 0.00037574581801891327, "step": 233590 }, { "epoch": 66.30712460970764, "grad_norm": 0.07653500884771347, "learning_rate": 3.372012489355663e-05, "loss": 0.0004108380526304245, "step": 233600 }, { "epoch": 66.30996309963099, "grad_norm": 0.022595003247261047, "learning_rate": 3.371728640363327e-05, "loss": 0.0017533550038933755, "step": 233610 }, { "epoch": 66.31280158955435, "grad_norm": 0.12804417312145233, "learning_rate": 3.371444791370991e-05, "loss": 0.00099282618612051, "step": 233620 }, { "epoch": 66.31564007947772, "grad_norm": 0.011718346737325191, "learning_rate": 3.371160942378655e-05, "loss": 0.00020436365157365798, "step": 233630 }, { "epoch": 66.31847856940108, "grad_norm": 0.053090233355760574, "learning_rate": 3.3708770933863185e-05, "loss": 0.0037699155509471893, "step": 233640 }, { "epoch": 66.32131705932444, "grad_norm": 0.025927796959877014, "learning_rate": 3.3705932443939827e-05, "loss": 0.0005738068372011185, "step": 233650 }, { "epoch": 66.3241555492478, "grad_norm": 0.09879015386104584, "learning_rate": 3.370309395401646e-05, "loss": 0.0015709768980741502, "step": 233660 }, { "epoch": 66.32699403917115, "grad_norm": 0.03706477954983711, "learning_rate": 3.37002554640931e-05, "loss": 0.00750417560338974, "step": 233670 }, { "epoch": 66.32983252909452, "grad_norm": 0.05168786272406578, "learning_rate": 3.3697416974169744e-05, "loss": 0.0002309763804078102, "step": 233680 }, { "epoch": 66.33267101901788, "grad_norm": 0.34112548828125, "learning_rate": 3.369457848424638e-05, "loss": 0.0004037173464894295, "step": 233690 }, { "epoch": 66.33550950894124, "grad_norm": 0.075618214905262, "learning_rate": 3.369173999432303e-05, "loss": 0.0028164977207779883, "step": 233700 }, { "epoch": 66.3383479988646, "grad_norm": 0.06753815710544586, "learning_rate": 3.368890150439966e-05, "loss": 0.00039541702717542647, "step": 233710 }, { "epoch": 66.34118648878797, "grad_norm": 0.01673709787428379, "learning_rate": 3.3686063014476296e-05, "loss": 0.0016311874613165855, "step": 233720 }, { "epoch": 66.34402497871133, "grad_norm": 0.06891032308340073, "learning_rate": 3.3683224524552944e-05, "loss": 0.002042749896645546, "step": 233730 }, { "epoch": 66.34686346863468, "grad_norm": 0.17845384776592255, "learning_rate": 3.368038603462958e-05, "loss": 0.00040593873709440233, "step": 233740 }, { "epoch": 66.34970195855804, "grad_norm": 0.0527908019721508, "learning_rate": 3.367754754470622e-05, "loss": 0.0005944699048995971, "step": 233750 }, { "epoch": 66.3525404484814, "grad_norm": 3.797447443008423, "learning_rate": 3.3674709054782855e-05, "loss": 0.0007732525467872619, "step": 233760 }, { "epoch": 66.35537893840477, "grad_norm": 0.7438935041427612, "learning_rate": 3.3671870564859496e-05, "loss": 0.0006641054525971413, "step": 233770 }, { "epoch": 66.35821742832813, "grad_norm": 0.036317892372608185, "learning_rate": 3.366903207493614e-05, "loss": 0.00031139124184846877, "step": 233780 }, { "epoch": 66.3610559182515, "grad_norm": 0.3196953535079956, "learning_rate": 3.366619358501277e-05, "loss": 0.00027534011751413346, "step": 233790 }, { "epoch": 66.36389440817486, "grad_norm": 0.16758906841278076, "learning_rate": 3.366335509508941e-05, "loss": 0.0004460209980607033, "step": 233800 }, { "epoch": 66.3667328980982, "grad_norm": 0.04394743591547012, "learning_rate": 3.3660516605166055e-05, "loss": 0.00039349719882011416, "step": 233810 }, { "epoch": 66.36957138802157, "grad_norm": 0.026773609220981598, "learning_rate": 3.365767811524269e-05, "loss": 0.0003765624016523361, "step": 233820 }, { "epoch": 66.37240987794493, "grad_norm": 0.011990039609372616, "learning_rate": 3.365483962531934e-05, "loss": 0.0022205490618944167, "step": 233830 }, { "epoch": 66.3752483678683, "grad_norm": 0.2686128616333008, "learning_rate": 3.365200113539597e-05, "loss": 0.0003711296245455742, "step": 233840 }, { "epoch": 66.37808685779166, "grad_norm": 0.8872512578964233, "learning_rate": 3.364916264547261e-05, "loss": 0.00031977221369743346, "step": 233850 }, { "epoch": 66.38092534771502, "grad_norm": 0.10426130145788193, "learning_rate": 3.364632415554925e-05, "loss": 0.0003214588388800621, "step": 233860 }, { "epoch": 66.38376383763837, "grad_norm": 0.2205161303281784, "learning_rate": 3.364348566562589e-05, "loss": 0.0002000289037823677, "step": 233870 }, { "epoch": 66.38660232756173, "grad_norm": 2.6638786792755127, "learning_rate": 3.364064717570253e-05, "loss": 0.000481300987303257, "step": 233880 }, { "epoch": 66.3894408174851, "grad_norm": 0.016233140602707863, "learning_rate": 3.3637808685779165e-05, "loss": 0.0003298191353678703, "step": 233890 }, { "epoch": 66.39227930740846, "grad_norm": 0.036975957453250885, "learning_rate": 3.363497019585581e-05, "loss": 0.0002529740333557129, "step": 233900 }, { "epoch": 66.39511779733182, "grad_norm": 3.6877834796905518, "learning_rate": 3.363213170593245e-05, "loss": 0.0009254509583115578, "step": 233910 }, { "epoch": 66.39795628725518, "grad_norm": 0.050573185086250305, "learning_rate": 3.362929321600908e-05, "loss": 0.00644277036190033, "step": 233920 }, { "epoch": 66.40079477717855, "grad_norm": 0.04055232182145119, "learning_rate": 3.3626454726085724e-05, "loss": 0.0003745067864656448, "step": 233930 }, { "epoch": 66.4036332671019, "grad_norm": 0.03012969344854355, "learning_rate": 3.3623616236162365e-05, "loss": 0.012364494800567626, "step": 233940 }, { "epoch": 66.40647175702526, "grad_norm": 0.007608199492096901, "learning_rate": 3.3620777746239e-05, "loss": 0.0003255065530538559, "step": 233950 }, { "epoch": 66.40931024694862, "grad_norm": 0.14091965556144714, "learning_rate": 3.361793925631564e-05, "loss": 0.00043797027319669724, "step": 233960 }, { "epoch": 66.41214873687198, "grad_norm": 0.013468250632286072, "learning_rate": 3.361510076639228e-05, "loss": 0.0003254568204283714, "step": 233970 }, { "epoch": 66.41498722679535, "grad_norm": 0.019465981051325798, "learning_rate": 3.361226227646892e-05, "loss": 0.00018247999250888824, "step": 233980 }, { "epoch": 66.41782571671871, "grad_norm": 0.0021905768662691116, "learning_rate": 3.360942378654556e-05, "loss": 0.0005058735609054565, "step": 233990 }, { "epoch": 66.42066420664207, "grad_norm": 0.07378256320953369, "learning_rate": 3.36065852966222e-05, "loss": 0.00017308015376329421, "step": 234000 }, { "epoch": 66.42066420664207, "eval_accuracy": 0.977745278819864, "eval_loss": 0.08340617269277573, "eval_runtime": 32.698, "eval_samples_per_second": 480.977, "eval_steps_per_second": 7.523, "step": 234000 }, { "epoch": 66.42350269656542, "grad_norm": 0.06589341908693314, "learning_rate": 3.3603746806698835e-05, "loss": 0.0004718272015452385, "step": 234010 }, { "epoch": 66.42634118648878, "grad_norm": 0.2931106388568878, "learning_rate": 3.3600908316775476e-05, "loss": 0.000377688929438591, "step": 234020 }, { "epoch": 66.42917967641215, "grad_norm": 3.7266225814819336, "learning_rate": 3.359806982685212e-05, "loss": 0.0007166512310504914, "step": 234030 }, { "epoch": 66.43201816633551, "grad_norm": 0.04121459648013115, "learning_rate": 3.359523133692876e-05, "loss": 0.00024150721728801727, "step": 234040 }, { "epoch": 66.43485665625887, "grad_norm": 0.048184558749198914, "learning_rate": 3.3592392847005394e-05, "loss": 0.0021021107211709024, "step": 234050 }, { "epoch": 66.43769514618224, "grad_norm": 0.007347151171416044, "learning_rate": 3.358955435708203e-05, "loss": 0.0003110002726316452, "step": 234060 }, { "epoch": 66.4405336361056, "grad_norm": 0.058627285063266754, "learning_rate": 3.3586715867158676e-05, "loss": 0.00026902761310338975, "step": 234070 }, { "epoch": 66.44337212602895, "grad_norm": 0.01747162826359272, "learning_rate": 3.358387737723531e-05, "loss": 0.0001813279464840889, "step": 234080 }, { "epoch": 66.44621061595231, "grad_norm": 0.10107839107513428, "learning_rate": 3.358103888731195e-05, "loss": 0.0001451432704925537, "step": 234090 }, { "epoch": 66.44904910587567, "grad_norm": 0.0721186175942421, "learning_rate": 3.3578200397388594e-05, "loss": 0.00015164129436016083, "step": 234100 }, { "epoch": 66.45188759579904, "grad_norm": 0.029579203575849533, "learning_rate": 3.357536190746523e-05, "loss": 0.00018994417041540146, "step": 234110 }, { "epoch": 66.4547260857224, "grad_norm": 0.04557596147060394, "learning_rate": 3.357252341754187e-05, "loss": 0.00017257016152143477, "step": 234120 }, { "epoch": 66.45756457564576, "grad_norm": 0.11672721803188324, "learning_rate": 3.356968492761851e-05, "loss": 0.0001310892403125763, "step": 234130 }, { "epoch": 66.46040306556911, "grad_norm": 10.214113235473633, "learning_rate": 3.3566846437695146e-05, "loss": 0.0016144296154379846, "step": 234140 }, { "epoch": 66.46324155549247, "grad_norm": 0.012520212680101395, "learning_rate": 3.356400794777179e-05, "loss": 0.0002703780308365822, "step": 234150 }, { "epoch": 66.46608004541584, "grad_norm": 0.015992283821105957, "learning_rate": 3.356116945784842e-05, "loss": 0.0002806922420859337, "step": 234160 }, { "epoch": 66.4689185353392, "grad_norm": 0.10218379646539688, "learning_rate": 3.355833096792507e-05, "loss": 0.0001648576930165291, "step": 234170 }, { "epoch": 66.47175702526256, "grad_norm": 0.026773007586598396, "learning_rate": 3.3555492478001704e-05, "loss": 0.00011934079229831695, "step": 234180 }, { "epoch": 66.47459551518592, "grad_norm": 0.0887552872300148, "learning_rate": 3.355265398807834e-05, "loss": 0.000319310836493969, "step": 234190 }, { "epoch": 66.47743400510929, "grad_norm": 0.06088581681251526, "learning_rate": 3.354981549815499e-05, "loss": 0.00015680640935897828, "step": 234200 }, { "epoch": 66.48027249503264, "grad_norm": 0.027781978249549866, "learning_rate": 3.354697700823162e-05, "loss": 0.0002354322001338005, "step": 234210 }, { "epoch": 66.483110984956, "grad_norm": 0.26573261618614197, "learning_rate": 3.354413851830826e-05, "loss": 0.0003344796597957611, "step": 234220 }, { "epoch": 66.48594947487936, "grad_norm": 0.12593974173069, "learning_rate": 3.3541300028384904e-05, "loss": 0.0006211671978235244, "step": 234230 }, { "epoch": 66.48878796480273, "grad_norm": 0.044329237192869186, "learning_rate": 3.353846153846154e-05, "loss": 0.0008790517225861549, "step": 234240 }, { "epoch": 66.49162645472609, "grad_norm": 0.053087569773197174, "learning_rate": 3.353562304853818e-05, "loss": 0.0009939033538103105, "step": 234250 }, { "epoch": 66.49446494464945, "grad_norm": 0.014603345654904842, "learning_rate": 3.3532784558614815e-05, "loss": 0.00015420559793710709, "step": 234260 }, { "epoch": 66.49730343457281, "grad_norm": 0.027813229709863663, "learning_rate": 3.3529946068691456e-05, "loss": 0.00025293733924627306, "step": 234270 }, { "epoch": 66.50014192449616, "grad_norm": 0.5406585335731506, "learning_rate": 3.35271075787681e-05, "loss": 0.0003246856853365898, "step": 234280 }, { "epoch": 66.50298041441953, "grad_norm": 0.021148554980754852, "learning_rate": 3.352426908884473e-05, "loss": 0.00020490512251853944, "step": 234290 }, { "epoch": 66.50581890434289, "grad_norm": 0.6980146765708923, "learning_rate": 3.352143059892138e-05, "loss": 0.0002507088705897331, "step": 234300 }, { "epoch": 66.50865739426625, "grad_norm": 0.1494617909193039, "learning_rate": 3.3518592108998015e-05, "loss": 0.0009024079889059066, "step": 234310 }, { "epoch": 66.51149588418961, "grad_norm": 0.018520154058933258, "learning_rate": 3.351575361907465e-05, "loss": 9.401645511388778e-05, "step": 234320 }, { "epoch": 66.51433437411298, "grad_norm": 0.023435363546013832, "learning_rate": 3.35129151291513e-05, "loss": 0.00019619259983301164, "step": 234330 }, { "epoch": 66.51717286403633, "grad_norm": 0.007228726055473089, "learning_rate": 3.351007663922793e-05, "loss": 0.0002901187166571617, "step": 234340 }, { "epoch": 66.52001135395969, "grad_norm": 0.061569694429636, "learning_rate": 3.3507238149304574e-05, "loss": 0.0001936899498105049, "step": 234350 }, { "epoch": 66.52284984388305, "grad_norm": 0.01215165015310049, "learning_rate": 3.350439965938121e-05, "loss": 0.00015190783888101577, "step": 234360 }, { "epoch": 66.52568833380641, "grad_norm": 0.02288607880473137, "learning_rate": 3.350156116945785e-05, "loss": 0.000410778820514679, "step": 234370 }, { "epoch": 66.52852682372978, "grad_norm": 0.224228173494339, "learning_rate": 3.349872267953449e-05, "loss": 0.0008495932444930077, "step": 234380 }, { "epoch": 66.53136531365314, "grad_norm": 0.03365559130907059, "learning_rate": 3.3495884189611126e-05, "loss": 0.00028967615216970446, "step": 234390 }, { "epoch": 66.5342038035765, "grad_norm": 0.021349044516682625, "learning_rate": 3.349304569968777e-05, "loss": 8.82122665643692e-05, "step": 234400 }, { "epoch": 66.53704229349985, "grad_norm": 0.010580348782241344, "learning_rate": 3.349020720976441e-05, "loss": 0.00011894870549440384, "step": 234410 }, { "epoch": 66.53988078342321, "grad_norm": 0.007859664037823677, "learning_rate": 3.348736871984104e-05, "loss": 0.00027098767459392547, "step": 234420 }, { "epoch": 66.54271927334658, "grad_norm": 0.17356199026107788, "learning_rate": 3.3484530229917684e-05, "loss": 0.00022247806191444397, "step": 234430 }, { "epoch": 66.54555776326994, "grad_norm": 0.48101910948753357, "learning_rate": 3.3481691739994326e-05, "loss": 0.0006773065775632858, "step": 234440 }, { "epoch": 66.5483962531933, "grad_norm": 0.01915062591433525, "learning_rate": 3.347885325007096e-05, "loss": 0.0001407390460371971, "step": 234450 }, { "epoch": 66.55123474311667, "grad_norm": 0.10281495004892349, "learning_rate": 3.34760147601476e-05, "loss": 0.0003403542563319206, "step": 234460 }, { "epoch": 66.55407323304003, "grad_norm": 0.09245515614748001, "learning_rate": 3.347317627022424e-05, "loss": 0.0005809513852000237, "step": 234470 }, { "epoch": 66.55691172296338, "grad_norm": 0.016669712960720062, "learning_rate": 3.347033778030088e-05, "loss": 0.00020486824214458466, "step": 234480 }, { "epoch": 66.55975021288674, "grad_norm": 0.016073841601610184, "learning_rate": 3.346749929037752e-05, "loss": 0.0020621690899133684, "step": 234490 }, { "epoch": 66.5625887028101, "grad_norm": 0.07154697924852371, "learning_rate": 3.346466080045416e-05, "loss": 0.00030030272901058195, "step": 234500 }, { "epoch": 66.5625887028101, "eval_accuracy": 0.975837731290138, "eval_loss": 0.09168843924999237, "eval_runtime": 32.922, "eval_samples_per_second": 477.705, "eval_steps_per_second": 7.472, "step": 234500 }, { "epoch": 66.56542719273347, "grad_norm": 0.07031968235969543, "learning_rate": 3.34618223105308e-05, "loss": 0.002065764181315899, "step": 234510 }, { "epoch": 66.56826568265683, "grad_norm": 0.22080214321613312, "learning_rate": 3.3458983820607436e-05, "loss": 0.0002716893330216408, "step": 234520 }, { "epoch": 66.57110417258019, "grad_norm": 0.6440873146057129, "learning_rate": 3.345614533068408e-05, "loss": 0.00027624107897281645, "step": 234530 }, { "epoch": 66.57394266250355, "grad_norm": 0.08325404673814774, "learning_rate": 3.345330684076072e-05, "loss": 0.0019241798669099807, "step": 234540 }, { "epoch": 66.5767811524269, "grad_norm": 0.010023496113717556, "learning_rate": 3.3450468350837354e-05, "loss": 0.0005561424419283867, "step": 234550 }, { "epoch": 66.57961964235027, "grad_norm": 0.8107993006706238, "learning_rate": 3.3447629860913995e-05, "loss": 0.0032136693596839907, "step": 234560 }, { "epoch": 66.58245813227363, "grad_norm": 0.03810854256153107, "learning_rate": 3.3444791370990637e-05, "loss": 0.0003692707046866417, "step": 234570 }, { "epoch": 66.58529662219699, "grad_norm": 0.13931377232074738, "learning_rate": 3.344195288106727e-05, "loss": 0.00019149426370859146, "step": 234580 }, { "epoch": 66.58813511212036, "grad_norm": 0.0054755667224526405, "learning_rate": 3.343911439114391e-05, "loss": 0.00030373167246580126, "step": 234590 }, { "epoch": 66.59097360204372, "grad_norm": 0.010160554200410843, "learning_rate": 3.3436275901220554e-05, "loss": 0.00018461793661117554, "step": 234600 }, { "epoch": 66.59381209196707, "grad_norm": 0.02645380049943924, "learning_rate": 3.343343741129719e-05, "loss": 0.0006392018869519234, "step": 234610 }, { "epoch": 66.59665058189043, "grad_norm": 0.21067896485328674, "learning_rate": 3.343059892137383e-05, "loss": 0.00034037008881568907, "step": 234620 }, { "epoch": 66.59948907181379, "grad_norm": 0.009810393676161766, "learning_rate": 3.342776043145047e-05, "loss": 0.00025226250290870665, "step": 234630 }, { "epoch": 66.60232756173716, "grad_norm": 0.017139559611678123, "learning_rate": 3.342492194152711e-05, "loss": 0.0002998366951942444, "step": 234640 }, { "epoch": 66.60516605166052, "grad_norm": 0.04511978477239609, "learning_rate": 3.342208345160375e-05, "loss": 0.00017644967883825302, "step": 234650 }, { "epoch": 66.60800454158388, "grad_norm": 0.04664217680692673, "learning_rate": 3.341924496168038e-05, "loss": 0.000860685296356678, "step": 234660 }, { "epoch": 66.61084303150724, "grad_norm": 1.1817643642425537, "learning_rate": 3.341640647175703e-05, "loss": 0.0003343012183904648, "step": 234670 }, { "epoch": 66.61368152143059, "grad_norm": 0.017945392057299614, "learning_rate": 3.3413567981833665e-05, "loss": 0.003979169577360153, "step": 234680 }, { "epoch": 66.61652001135396, "grad_norm": 0.014816789887845516, "learning_rate": 3.3410729491910306e-05, "loss": 0.00016241706907749176, "step": 234690 }, { "epoch": 66.61935850127732, "grad_norm": 0.007268806919455528, "learning_rate": 3.340789100198695e-05, "loss": 0.0011585166677832604, "step": 234700 }, { "epoch": 66.62219699120068, "grad_norm": 0.46014514565467834, "learning_rate": 3.340505251206358e-05, "loss": 0.00252775214612484, "step": 234710 }, { "epoch": 66.62503548112404, "grad_norm": 0.11951228231191635, "learning_rate": 3.340221402214022e-05, "loss": 0.0001375945284962654, "step": 234720 }, { "epoch": 66.6278739710474, "grad_norm": 0.07979801297187805, "learning_rate": 3.3399375532216865e-05, "loss": 0.0038548178970813753, "step": 234730 }, { "epoch": 66.63071246097077, "grad_norm": 0.5874942541122437, "learning_rate": 3.33965370422935e-05, "loss": 0.0004411023110151291, "step": 234740 }, { "epoch": 66.63355095089412, "grad_norm": 0.02940540947020054, "learning_rate": 3.339369855237014e-05, "loss": 0.0011964548379182816, "step": 234750 }, { "epoch": 66.63638944081748, "grad_norm": 0.2681537866592407, "learning_rate": 3.339086006244678e-05, "loss": 0.002540280483663082, "step": 234760 }, { "epoch": 66.63922793074084, "grad_norm": 6.239147186279297, "learning_rate": 3.3388021572523423e-05, "loss": 0.001138824038207531, "step": 234770 }, { "epoch": 66.64206642066421, "grad_norm": 0.10596702247858047, "learning_rate": 3.338518308260006e-05, "loss": 0.00023179370909929276, "step": 234780 }, { "epoch": 66.64490491058757, "grad_norm": 0.7297640442848206, "learning_rate": 3.338234459267669e-05, "loss": 0.00035857707262039186, "step": 234790 }, { "epoch": 66.64774340051093, "grad_norm": 0.03160128369927406, "learning_rate": 3.337950610275334e-05, "loss": 0.0005787720903754235, "step": 234800 }, { "epoch": 66.6505818904343, "grad_norm": 0.1653982400894165, "learning_rate": 3.3376667612829975e-05, "loss": 0.00025734826922416685, "step": 234810 }, { "epoch": 66.65342038035764, "grad_norm": 0.0072740837931632996, "learning_rate": 3.337382912290662e-05, "loss": 0.0003133151680231094, "step": 234820 }, { "epoch": 66.65625887028101, "grad_norm": 0.127198725938797, "learning_rate": 3.337099063298326e-05, "loss": 0.001746891252696514, "step": 234830 }, { "epoch": 66.65909736020437, "grad_norm": 0.01219212356954813, "learning_rate": 3.336815214305989e-05, "loss": 0.0003551337867975235, "step": 234840 }, { "epoch": 66.66193585012773, "grad_norm": 0.022250259295105934, "learning_rate": 3.3365313653136534e-05, "loss": 0.0051961980760097505, "step": 234850 }, { "epoch": 66.6647743400511, "grad_norm": 0.0717395693063736, "learning_rate": 3.3362475163213175e-05, "loss": 0.0003151543438434601, "step": 234860 }, { "epoch": 66.66761282997446, "grad_norm": 0.3620184659957886, "learning_rate": 3.335963667328981e-05, "loss": 0.00012973099946975709, "step": 234870 }, { "epoch": 66.67045131989781, "grad_norm": 0.8965954184532166, "learning_rate": 3.335679818336645e-05, "loss": 0.0005037592723965645, "step": 234880 }, { "epoch": 66.67328980982117, "grad_norm": 0.0550510510802269, "learning_rate": 3.3353959693443086e-05, "loss": 0.00017678085714578629, "step": 234890 }, { "epoch": 66.67612829974453, "grad_norm": 0.49060869216918945, "learning_rate": 3.335112120351973e-05, "loss": 0.0018667595461010933, "step": 234900 }, { "epoch": 66.6789667896679, "grad_norm": 0.09097166359424591, "learning_rate": 3.334828271359637e-05, "loss": 0.0006500331684947014, "step": 234910 }, { "epoch": 66.68180527959126, "grad_norm": 0.22833950817584991, "learning_rate": 3.3345444223673e-05, "loss": 0.00037789177149534227, "step": 234920 }, { "epoch": 66.68464376951462, "grad_norm": 0.0640188530087471, "learning_rate": 3.334260573374965e-05, "loss": 0.00014430582523345948, "step": 234930 }, { "epoch": 66.68748225943799, "grad_norm": 0.09780203551054001, "learning_rate": 3.3339767243826286e-05, "loss": 0.0003250345587730408, "step": 234940 }, { "epoch": 66.69032074936133, "grad_norm": 0.09841728210449219, "learning_rate": 3.333692875390292e-05, "loss": 0.0013363007456064224, "step": 234950 }, { "epoch": 66.6931592392847, "grad_norm": 0.05241420120000839, "learning_rate": 3.333409026397957e-05, "loss": 0.0004927139729261398, "step": 234960 }, { "epoch": 66.69599772920806, "grad_norm": 0.21192508935928345, "learning_rate": 3.3331251774056203e-05, "loss": 0.0004928139969706535, "step": 234970 }, { "epoch": 66.69883621913142, "grad_norm": 0.016908567398786545, "learning_rate": 3.3328413284132845e-05, "loss": 0.0012470027431845665, "step": 234980 }, { "epoch": 66.70167470905479, "grad_norm": 0.014833590015769005, "learning_rate": 3.332557479420948e-05, "loss": 0.0005156420171260834, "step": 234990 }, { "epoch": 66.70451319897815, "grad_norm": 0.05100671947002411, "learning_rate": 3.332273630428612e-05, "loss": 0.00035948809236288073, "step": 235000 }, { "epoch": 66.70451319897815, "eval_accuracy": 0.9776816939022064, "eval_loss": 0.08278652280569077, "eval_runtime": 32.9879, "eval_samples_per_second": 476.751, "eval_steps_per_second": 7.457, "step": 235000 }, { "epoch": 66.70735168890151, "grad_norm": 0.046206194907426834, "learning_rate": 3.331989781436276e-05, "loss": 0.00023380685597658158, "step": 235010 }, { "epoch": 66.71019017882486, "grad_norm": 0.7454606890678406, "learning_rate": 3.33170593244394e-05, "loss": 0.00045396313071250917, "step": 235020 }, { "epoch": 66.71302866874822, "grad_norm": 1.1556059122085571, "learning_rate": 3.331422083451604e-05, "loss": 0.0007166549563407898, "step": 235030 }, { "epoch": 66.71586715867159, "grad_norm": 0.3082740306854248, "learning_rate": 3.331138234459268e-05, "loss": 0.0019123798236250877, "step": 235040 }, { "epoch": 66.71870564859495, "grad_norm": 0.26297125220298767, "learning_rate": 3.3308543854669314e-05, "loss": 0.0004970375448465347, "step": 235050 }, { "epoch": 66.72154413851831, "grad_norm": 0.06687350571155548, "learning_rate": 3.330570536474596e-05, "loss": 0.0052892804145812985, "step": 235060 }, { "epoch": 66.72438262844167, "grad_norm": 4.687132835388184, "learning_rate": 3.33028668748226e-05, "loss": 0.002394319698214531, "step": 235070 }, { "epoch": 66.72722111836502, "grad_norm": 0.0725567638874054, "learning_rate": 3.330002838489923e-05, "loss": 0.00046697091311216355, "step": 235080 }, { "epoch": 66.73005960828839, "grad_norm": 0.0570666529238224, "learning_rate": 3.329718989497587e-05, "loss": 0.009010647982358932, "step": 235090 }, { "epoch": 66.73289809821175, "grad_norm": 0.051440946757793427, "learning_rate": 3.3294351405052514e-05, "loss": 0.00022988952696323395, "step": 235100 }, { "epoch": 66.73573658813511, "grad_norm": 0.08631904423236847, "learning_rate": 3.3291512915129156e-05, "loss": 0.0002056039869785309, "step": 235110 }, { "epoch": 66.73857507805847, "grad_norm": 0.00829800870269537, "learning_rate": 3.328867442520579e-05, "loss": 0.0003940058872103691, "step": 235120 }, { "epoch": 66.74141356798184, "grad_norm": 0.037061478942632675, "learning_rate": 3.328583593528243e-05, "loss": 0.0004971334710717201, "step": 235130 }, { "epoch": 66.7442520579052, "grad_norm": 0.022929465398192406, "learning_rate": 3.328299744535907e-05, "loss": 0.0016278211027383805, "step": 235140 }, { "epoch": 66.74709054782855, "grad_norm": 0.1881955862045288, "learning_rate": 3.328015895543571e-05, "loss": 0.00035176407545804976, "step": 235150 }, { "epoch": 66.74992903775191, "grad_norm": 0.035488251596689224, "learning_rate": 3.327732046551235e-05, "loss": 0.00022542625665664673, "step": 235160 }, { "epoch": 66.75276752767527, "grad_norm": 0.03501230850815773, "learning_rate": 3.327448197558899e-05, "loss": 0.0013707667589187623, "step": 235170 }, { "epoch": 66.75560601759864, "grad_norm": 0.014143884181976318, "learning_rate": 3.3271643485665625e-05, "loss": 0.0011176902800798417, "step": 235180 }, { "epoch": 66.758444507522, "grad_norm": 0.028458207845687866, "learning_rate": 3.3268804995742266e-05, "loss": 0.00018136892467737197, "step": 235190 }, { "epoch": 66.76128299744536, "grad_norm": 0.04401281103491783, "learning_rate": 3.326596650581891e-05, "loss": 0.0010701535269618035, "step": 235200 }, { "epoch": 66.76412148736873, "grad_norm": 0.1862560659646988, "learning_rate": 3.326312801589554e-05, "loss": 0.00041711609810590744, "step": 235210 }, { "epoch": 66.76695997729207, "grad_norm": 0.028274785727262497, "learning_rate": 3.3260289525972184e-05, "loss": 0.0008732037618756294, "step": 235220 }, { "epoch": 66.76979846721544, "grad_norm": 0.08757782727479935, "learning_rate": 3.3257451036048825e-05, "loss": 0.0005283936858177185, "step": 235230 }, { "epoch": 66.7726369571388, "grad_norm": 0.13802844285964966, "learning_rate": 3.3254612546125466e-05, "loss": 0.0023586975410580637, "step": 235240 }, { "epoch": 66.77547544706216, "grad_norm": 0.08174683898687363, "learning_rate": 3.32517740562021e-05, "loss": 0.0002231638878583908, "step": 235250 }, { "epoch": 66.77831393698553, "grad_norm": 0.00873940996825695, "learning_rate": 3.324893556627874e-05, "loss": 0.0005353214219212532, "step": 235260 }, { "epoch": 66.78115242690889, "grad_norm": 0.027028804644942284, "learning_rate": 3.3246097076355384e-05, "loss": 0.0006082732230424881, "step": 235270 }, { "epoch": 66.78399091683225, "grad_norm": 0.044380154460668564, "learning_rate": 3.324325858643202e-05, "loss": 0.00013571232557296753, "step": 235280 }, { "epoch": 66.7868294067556, "grad_norm": 0.682490348815918, "learning_rate": 3.324042009650866e-05, "loss": 0.0002600835636258125, "step": 235290 }, { "epoch": 66.78966789667896, "grad_norm": 0.0991189032793045, "learning_rate": 3.32375816065853e-05, "loss": 0.00036035943776369095, "step": 235300 }, { "epoch": 66.79250638660233, "grad_norm": 0.14955998957157135, "learning_rate": 3.3234743116661936e-05, "loss": 0.00011868607252836227, "step": 235310 }, { "epoch": 66.79534487652569, "grad_norm": 0.0032521849498152733, "learning_rate": 3.323190462673858e-05, "loss": 7.670726627111435e-05, "step": 235320 }, { "epoch": 66.79818336644905, "grad_norm": 0.07133053243160248, "learning_rate": 3.322906613681522e-05, "loss": 0.00019389744848012923, "step": 235330 }, { "epoch": 66.80102185637242, "grad_norm": 0.022408898919820786, "learning_rate": 3.322622764689185e-05, "loss": 0.00011443421244621277, "step": 235340 }, { "epoch": 66.80386034629576, "grad_norm": 0.014638759195804596, "learning_rate": 3.3223389156968494e-05, "loss": 0.00016908291727304458, "step": 235350 }, { "epoch": 66.80669883621913, "grad_norm": 0.014730146154761314, "learning_rate": 3.3220550667045136e-05, "loss": 0.00011240243911743164, "step": 235360 }, { "epoch": 66.80953732614249, "grad_norm": 1.3466508388519287, "learning_rate": 3.321771217712177e-05, "loss": 0.0005625693127512931, "step": 235370 }, { "epoch": 66.81237581606585, "grad_norm": 0.08747067302465439, "learning_rate": 3.321487368719841e-05, "loss": 0.00019540563225746156, "step": 235380 }, { "epoch": 66.81521430598922, "grad_norm": 0.08474695682525635, "learning_rate": 3.3212035197275046e-05, "loss": 0.00019948557019233704, "step": 235390 }, { "epoch": 66.81805279591258, "grad_norm": 0.08218106627464294, "learning_rate": 3.3209196707351694e-05, "loss": 0.00010387785732746124, "step": 235400 }, { "epoch": 66.82089128583594, "grad_norm": 0.06416016072034836, "learning_rate": 3.320635821742833e-05, "loss": 0.0005580108612775802, "step": 235410 }, { "epoch": 66.82372977575929, "grad_norm": 0.16667059063911438, "learning_rate": 3.32038035764973e-05, "loss": 0.004357136785984039, "step": 235420 }, { "epoch": 66.82656826568265, "grad_norm": 0.018854497000575066, "learning_rate": 3.3200965086573944e-05, "loss": 0.0002620039507746696, "step": 235430 }, { "epoch": 66.82940675560602, "grad_norm": 0.01709855906665325, "learning_rate": 3.3198126596650585e-05, "loss": 9.83433797955513e-05, "step": 235440 }, { "epoch": 66.83224524552938, "grad_norm": 0.2866460084915161, "learning_rate": 3.3195288106727226e-05, "loss": 0.00020274538546800614, "step": 235450 }, { "epoch": 66.83508373545274, "grad_norm": 0.020833667367696762, "learning_rate": 3.319244961680386e-05, "loss": 0.0002082526683807373, "step": 235460 }, { "epoch": 66.8379222253761, "grad_norm": 0.013686666265130043, "learning_rate": 3.3189611126880496e-05, "loss": 8.748061954975129e-05, "step": 235470 }, { "epoch": 66.84076071529947, "grad_norm": 0.007855470292270184, "learning_rate": 3.3186772636957144e-05, "loss": 0.0003111237660050392, "step": 235480 }, { "epoch": 66.84359920522282, "grad_norm": 0.0247490294277668, "learning_rate": 3.318393414703378e-05, "loss": 9.73207876086235e-05, "step": 235490 }, { "epoch": 66.84643769514618, "grad_norm": 0.056430425494909286, "learning_rate": 3.318109565711042e-05, "loss": 0.0012490108609199523, "step": 235500 }, { "epoch": 66.84643769514618, "eval_accuracy": 0.9812424492910282, "eval_loss": 0.07150499522686005, "eval_runtime": 32.4294, "eval_samples_per_second": 484.961, "eval_steps_per_second": 7.586, "step": 235500 }, { "epoch": 66.84927618506954, "grad_norm": 0.13038544356822968, "learning_rate": 3.317825716718706e-05, "loss": 0.00013990867882966994, "step": 235510 }, { "epoch": 66.8521146749929, "grad_norm": 0.012005079537630081, "learning_rate": 3.3175418677263696e-05, "loss": 0.0002109117805957794, "step": 235520 }, { "epoch": 66.85495316491627, "grad_norm": 1.156873106956482, "learning_rate": 3.317258018734034e-05, "loss": 0.0002911534160375595, "step": 235530 }, { "epoch": 66.85779165483963, "grad_norm": 0.01825658790767193, "learning_rate": 3.316974169741698e-05, "loss": 0.00010190289467573165, "step": 235540 }, { "epoch": 66.86063014476298, "grad_norm": 0.11198166757822037, "learning_rate": 3.316690320749361e-05, "loss": 0.0008565917611122132, "step": 235550 }, { "epoch": 66.86346863468634, "grad_norm": 0.011861014179885387, "learning_rate": 3.3164064717570254e-05, "loss": 0.00015586987137794495, "step": 235560 }, { "epoch": 66.8663071246097, "grad_norm": 0.008553036488592625, "learning_rate": 3.316122622764689e-05, "loss": 0.0001423928886651993, "step": 235570 }, { "epoch": 66.86914561453307, "grad_norm": 0.21452787518501282, "learning_rate": 3.315838773772354e-05, "loss": 0.0011643495410680771, "step": 235580 }, { "epoch": 66.87198410445643, "grad_norm": 0.028578972443938255, "learning_rate": 3.315554924780017e-05, "loss": 0.0001456761732697487, "step": 235590 }, { "epoch": 66.8748225943798, "grad_norm": 0.46401247382164, "learning_rate": 3.3152710757876806e-05, "loss": 0.0009338928386569024, "step": 235600 }, { "epoch": 66.87766108430316, "grad_norm": 0.020954176783561707, "learning_rate": 3.3149872267953454e-05, "loss": 0.00014038663357496262, "step": 235610 }, { "epoch": 66.8804995742265, "grad_norm": 0.4040793478488922, "learning_rate": 3.314703377803009e-05, "loss": 0.0003499394282698631, "step": 235620 }, { "epoch": 66.88333806414987, "grad_norm": 0.058987364172935486, "learning_rate": 3.314419528810673e-05, "loss": 0.00220202449709177, "step": 235630 }, { "epoch": 66.88617655407323, "grad_norm": 0.32006245851516724, "learning_rate": 3.314135679818337e-05, "loss": 0.00024179145693778992, "step": 235640 }, { "epoch": 66.8890150439966, "grad_norm": 0.19428128004074097, "learning_rate": 3.3138518308260006e-05, "loss": 0.0007002253085374832, "step": 235650 }, { "epoch": 66.89185353391996, "grad_norm": 0.13538797199726105, "learning_rate": 3.313567981833665e-05, "loss": 0.00021080709993839263, "step": 235660 }, { "epoch": 66.89469202384332, "grad_norm": 6.9469733238220215, "learning_rate": 3.313284132841328e-05, "loss": 0.005340106040239334, "step": 235670 }, { "epoch": 66.89753051376668, "grad_norm": 0.06877300143241882, "learning_rate": 3.3130002838489924e-05, "loss": 0.0005626648664474488, "step": 235680 }, { "epoch": 66.90036900369003, "grad_norm": 4.9698052406311035, "learning_rate": 3.3127164348566565e-05, "loss": 0.0007434243336319924, "step": 235690 }, { "epoch": 66.9032074936134, "grad_norm": 3.7375853061676025, "learning_rate": 3.31243258586432e-05, "loss": 0.0018354076892137527, "step": 235700 }, { "epoch": 66.90604598353676, "grad_norm": 0.010282821953296661, "learning_rate": 3.312148736871985e-05, "loss": 0.0051972895860672, "step": 235710 }, { "epoch": 66.90888447346012, "grad_norm": 0.3919256627559662, "learning_rate": 3.311864887879648e-05, "loss": 0.00022554751485586165, "step": 235720 }, { "epoch": 66.91172296338348, "grad_norm": 0.387600839138031, "learning_rate": 3.311581038887312e-05, "loss": 0.0003899676725268364, "step": 235730 }, { "epoch": 66.91456145330685, "grad_norm": 0.2507328391075134, "learning_rate": 3.3112971898949765e-05, "loss": 0.00034430380910634997, "step": 235740 }, { "epoch": 66.91739994323021, "grad_norm": 0.014165028929710388, "learning_rate": 3.31101334090264e-05, "loss": 0.0001960117369890213, "step": 235750 }, { "epoch": 66.92023843315356, "grad_norm": 0.029842572286725044, "learning_rate": 3.310729491910304e-05, "loss": 0.0002470238134264946, "step": 235760 }, { "epoch": 66.92307692307692, "grad_norm": 0.58656907081604, "learning_rate": 3.3104456429179676e-05, "loss": 0.0002323698252439499, "step": 235770 }, { "epoch": 66.92591541300028, "grad_norm": 0.1217036321759224, "learning_rate": 3.310161793925632e-05, "loss": 0.00032173618674278257, "step": 235780 }, { "epoch": 66.92875390292365, "grad_norm": 0.014387066476047039, "learning_rate": 3.309877944933296e-05, "loss": 0.006360673904418945, "step": 235790 }, { "epoch": 66.93159239284701, "grad_norm": 2.6611039638519287, "learning_rate": 3.309594095940959e-05, "loss": 0.000576053187251091, "step": 235800 }, { "epoch": 66.93443088277037, "grad_norm": 0.014019868336617947, "learning_rate": 3.3093102469486235e-05, "loss": 0.0005475856363773346, "step": 235810 }, { "epoch": 66.93726937269372, "grad_norm": 0.014015834778547287, "learning_rate": 3.3090263979562876e-05, "loss": 0.0019443454220890999, "step": 235820 }, { "epoch": 66.94010786261708, "grad_norm": 0.25374945998191833, "learning_rate": 3.308742548963951e-05, "loss": 0.000277322344481945, "step": 235830 }, { "epoch": 66.94294635254045, "grad_norm": 0.0697697252035141, "learning_rate": 3.308458699971615e-05, "loss": 0.0034619767218828203, "step": 235840 }, { "epoch": 66.94578484246381, "grad_norm": 0.02397332713007927, "learning_rate": 3.308174850979279e-05, "loss": 0.0021901439875364305, "step": 235850 }, { "epoch": 66.94862333238717, "grad_norm": 0.10184179991483688, "learning_rate": 3.307891001986943e-05, "loss": 0.0005130277946591377, "step": 235860 }, { "epoch": 66.95146182231053, "grad_norm": 0.00733168376609683, "learning_rate": 3.307607152994607e-05, "loss": 0.006280055642127991, "step": 235870 }, { "epoch": 66.9543003122339, "grad_norm": 16.944761276245117, "learning_rate": 3.307323304002271e-05, "loss": 0.0033344339579343798, "step": 235880 }, { "epoch": 66.95713880215725, "grad_norm": 1.4785228967666626, "learning_rate": 3.3070394550099345e-05, "loss": 0.001107693649828434, "step": 235890 }, { "epoch": 66.95997729208061, "grad_norm": 0.10607023537158966, "learning_rate": 3.3067556060175987e-05, "loss": 0.0003042459487915039, "step": 235900 }, { "epoch": 66.96281578200397, "grad_norm": 0.45717352628707886, "learning_rate": 3.306471757025263e-05, "loss": 0.00026686694473028184, "step": 235910 }, { "epoch": 66.96565427192733, "grad_norm": 0.029951220378279686, "learning_rate": 3.306187908032927e-05, "loss": 0.0001962263137102127, "step": 235920 }, { "epoch": 66.9684927618507, "grad_norm": 0.10945891588926315, "learning_rate": 3.3059040590405904e-05, "loss": 0.00030364785343408587, "step": 235930 }, { "epoch": 66.97133125177406, "grad_norm": 0.014496280811727047, "learning_rate": 3.3056202100482545e-05, "loss": 0.0002502577379345894, "step": 235940 }, { "epoch": 66.97416974169742, "grad_norm": 0.0050577437505126, "learning_rate": 3.305336361055919e-05, "loss": 0.005977569520473481, "step": 235950 }, { "epoch": 66.97700823162077, "grad_norm": 0.005154362879693508, "learning_rate": 3.305052512063582e-05, "loss": 0.0002639744430780411, "step": 235960 }, { "epoch": 66.97984672154413, "grad_norm": 0.027526360005140305, "learning_rate": 3.304768663071246e-05, "loss": 0.0016635648906230926, "step": 235970 }, { "epoch": 66.9826852114675, "grad_norm": 0.03489028289914131, "learning_rate": 3.3044848140789104e-05, "loss": 0.00024952106177806855, "step": 235980 }, { "epoch": 66.98552370139086, "grad_norm": 0.0067343455739319324, "learning_rate": 3.304200965086574e-05, "loss": 0.00011092647910118102, "step": 235990 }, { "epoch": 66.98836219131422, "grad_norm": 0.25556960701942444, "learning_rate": 3.303917116094238e-05, "loss": 0.00021039173007011413, "step": 236000 }, { "epoch": 66.98836219131422, "eval_accuracy": 0.9786990525847269, "eval_loss": 0.07723768800497055, "eval_runtime": 32.4536, "eval_samples_per_second": 484.6, "eval_steps_per_second": 7.58, "step": 236000 }, { "epoch": 66.99120068123759, "grad_norm": 0.006481558084487915, "learning_rate": 3.303633267101902e-05, "loss": 0.00045680683106184004, "step": 236010 }, { "epoch": 66.99403917116095, "grad_norm": 0.011692636646330357, "learning_rate": 3.3033494181095656e-05, "loss": 0.00029914453625679016, "step": 236020 }, { "epoch": 66.9968776610843, "grad_norm": 0.4017930030822754, "learning_rate": 3.30306556911723e-05, "loss": 0.0002335941419005394, "step": 236030 }, { "epoch": 66.99971615100766, "grad_norm": 0.0355517640709877, "learning_rate": 3.302781720124894e-05, "loss": 0.0002097446471452713, "step": 236040 }, { "epoch": 67.00255464093102, "grad_norm": 0.0060869003646075726, "learning_rate": 3.302497871132558e-05, "loss": 0.00012331365142017602, "step": 236050 }, { "epoch": 67.00539313085439, "grad_norm": 0.00845679547637701, "learning_rate": 3.3022140221402215e-05, "loss": 0.00012698248028755187, "step": 236060 }, { "epoch": 67.00823162077775, "grad_norm": 0.0244741328060627, "learning_rate": 3.301930173147885e-05, "loss": 0.0001370362937450409, "step": 236070 }, { "epoch": 67.01107011070111, "grad_norm": 0.003217400750145316, "learning_rate": 3.30164632415555e-05, "loss": 0.00016958098858594895, "step": 236080 }, { "epoch": 67.01390860062446, "grad_norm": 0.05049051344394684, "learning_rate": 3.301362475163213e-05, "loss": 9.31398943066597e-05, "step": 236090 }, { "epoch": 67.01674709054782, "grad_norm": 0.004134889226406813, "learning_rate": 3.3010786261708773e-05, "loss": 9.798165410757065e-05, "step": 236100 }, { "epoch": 67.01958558047119, "grad_norm": 0.09039821475744247, "learning_rate": 3.3007947771785415e-05, "loss": 9.245406836271286e-05, "step": 236110 }, { "epoch": 67.02242407039455, "grad_norm": 0.06599873304367065, "learning_rate": 3.300510928186205e-05, "loss": 8.301381021738052e-05, "step": 236120 }, { "epoch": 67.02526256031791, "grad_norm": 0.026155656203627586, "learning_rate": 3.300227079193869e-05, "loss": 0.00023772474378347397, "step": 236130 }, { "epoch": 67.02810105024128, "grad_norm": 0.06915438920259476, "learning_rate": 3.299943230201533e-05, "loss": 0.0001914042979478836, "step": 236140 }, { "epoch": 67.03093954016464, "grad_norm": 0.027031930163502693, "learning_rate": 3.299659381209197e-05, "loss": 0.00032555293291807174, "step": 236150 }, { "epoch": 67.03377803008799, "grad_norm": 0.014741223305463791, "learning_rate": 3.299375532216861e-05, "loss": 0.00104784294962883, "step": 236160 }, { "epoch": 67.03661652001135, "grad_norm": 0.0518384650349617, "learning_rate": 3.299091683224524e-05, "loss": 8.876565843820571e-05, "step": 236170 }, { "epoch": 67.03945500993471, "grad_norm": 0.012188301421701908, "learning_rate": 3.298807834232189e-05, "loss": 0.0017452221363782883, "step": 236180 }, { "epoch": 67.04229349985808, "grad_norm": 0.7081728577613831, "learning_rate": 3.2985239852398525e-05, "loss": 0.00017871428281068802, "step": 236190 }, { "epoch": 67.04513198978144, "grad_norm": 0.0620727464556694, "learning_rate": 3.298240136247516e-05, "loss": 0.00018891040235757828, "step": 236200 }, { "epoch": 67.0479704797048, "grad_norm": 0.0177723728120327, "learning_rate": 3.297956287255181e-05, "loss": 0.0001420043408870697, "step": 236210 }, { "epoch": 67.05080896962816, "grad_norm": 0.042667847126722336, "learning_rate": 3.297672438262844e-05, "loss": 0.00010299999266862869, "step": 236220 }, { "epoch": 67.05364745955151, "grad_norm": 0.0521695651113987, "learning_rate": 3.2973885892705084e-05, "loss": 0.00011220201849937439, "step": 236230 }, { "epoch": 67.05648594947488, "grad_norm": 0.016194134950637817, "learning_rate": 3.2971047402781726e-05, "loss": 7.477141916751861e-05, "step": 236240 }, { "epoch": 67.05932443939824, "grad_norm": 0.1353268027305603, "learning_rate": 3.296820891285836e-05, "loss": 8.58057290315628e-05, "step": 236250 }, { "epoch": 67.0621629293216, "grad_norm": 0.029034795239567757, "learning_rate": 3.2965370422935e-05, "loss": 0.00016581341624259948, "step": 236260 }, { "epoch": 67.06500141924496, "grad_norm": 0.09992945194244385, "learning_rate": 3.2962531933011636e-05, "loss": 0.0004752945154905319, "step": 236270 }, { "epoch": 67.06783990916833, "grad_norm": 1.022908329963684, "learning_rate": 3.295969344308828e-05, "loss": 0.00034801457077264785, "step": 236280 }, { "epoch": 67.07067839909168, "grad_norm": 0.0667208656668663, "learning_rate": 3.295685495316492e-05, "loss": 0.00013106036931276321, "step": 236290 }, { "epoch": 67.07351688901504, "grad_norm": 0.050853487104177475, "learning_rate": 3.2954016463241553e-05, "loss": 0.0003141326829791069, "step": 236300 }, { "epoch": 67.0763553789384, "grad_norm": 0.011382436379790306, "learning_rate": 3.2951177973318195e-05, "loss": 0.005402231961488724, "step": 236310 }, { "epoch": 67.07919386886176, "grad_norm": 0.0899774581193924, "learning_rate": 3.2948339483394836e-05, "loss": 0.0026313124224543573, "step": 236320 }, { "epoch": 67.08203235878513, "grad_norm": 0.010656670667231083, "learning_rate": 3.294550099347147e-05, "loss": 0.0004744578152894974, "step": 236330 }, { "epoch": 67.08487084870849, "grad_norm": 0.04786789417266846, "learning_rate": 3.294266250354812e-05, "loss": 0.00016459617763757705, "step": 236340 }, { "epoch": 67.08770933863185, "grad_norm": 0.023630335927009583, "learning_rate": 3.2939824013624754e-05, "loss": 0.002091119438409805, "step": 236350 }, { "epoch": 67.0905478285552, "grad_norm": 0.06522274017333984, "learning_rate": 3.293698552370139e-05, "loss": 0.0006758859381079674, "step": 236360 }, { "epoch": 67.09338631847857, "grad_norm": 0.04336344823241234, "learning_rate": 3.293414703377803e-05, "loss": 0.008488357812166215, "step": 236370 }, { "epoch": 67.09622480840193, "grad_norm": 0.06604571640491486, "learning_rate": 3.293130854385467e-05, "loss": 0.0002777904272079468, "step": 236380 }, { "epoch": 67.09906329832529, "grad_norm": 9.959754943847656, "learning_rate": 3.292847005393131e-05, "loss": 0.001907011680305004, "step": 236390 }, { "epoch": 67.10190178824865, "grad_norm": 0.0034141696523875, "learning_rate": 3.292563156400795e-05, "loss": 0.00022412482649087907, "step": 236400 }, { "epoch": 67.10474027817202, "grad_norm": 0.005250183399766684, "learning_rate": 3.292279307408459e-05, "loss": 0.0001677820459008217, "step": 236410 }, { "epoch": 67.10757876809538, "grad_norm": 0.004074434284120798, "learning_rate": 3.291995458416123e-05, "loss": 0.00010859258472919464, "step": 236420 }, { "epoch": 67.11041725801873, "grad_norm": 0.18865327537059784, "learning_rate": 3.2917116094237864e-05, "loss": 0.00025615859776735307, "step": 236430 }, { "epoch": 67.11325574794209, "grad_norm": 0.05772285908460617, "learning_rate": 3.2914277604314506e-05, "loss": 0.00013003647327423096, "step": 236440 }, { "epoch": 67.11609423786545, "grad_norm": 0.05750928446650505, "learning_rate": 3.291143911439115e-05, "loss": 0.0003446422517299652, "step": 236450 }, { "epoch": 67.11893272778882, "grad_norm": 0.001692999736405909, "learning_rate": 3.290860062446778e-05, "loss": 0.00010253842920064926, "step": 236460 }, { "epoch": 67.12177121771218, "grad_norm": 0.052037693560123444, "learning_rate": 3.290576213454443e-05, "loss": 0.00011726785451173782, "step": 236470 }, { "epoch": 67.12460970763554, "grad_norm": 0.11049552261829376, "learning_rate": 3.2902923644621064e-05, "loss": 7.203705608844757e-05, "step": 236480 }, { "epoch": 67.1274481975589, "grad_norm": 0.0294700525701046, "learning_rate": 3.29000851546977e-05, "loss": 0.00013865381479263305, "step": 236490 }, { "epoch": 67.13028668748225, "grad_norm": 0.006669621914625168, "learning_rate": 3.289724666477434e-05, "loss": 6.206240504980087e-05, "step": 236500 }, { "epoch": 67.13028668748225, "eval_accuracy": 0.9792713168436447, "eval_loss": 0.0756475180387497, "eval_runtime": 32.7558, "eval_samples_per_second": 480.129, "eval_steps_per_second": 7.51, "step": 236500 }, { "epoch": 67.13312517740562, "grad_norm": 0.01817850023508072, "learning_rate": 3.289440817485098e-05, "loss": 0.00012988485395908355, "step": 236510 }, { "epoch": 67.13596366732898, "grad_norm": 0.018373096361756325, "learning_rate": 3.289156968492762e-05, "loss": 9.049717336893082e-05, "step": 236520 }, { "epoch": 67.13880215725234, "grad_norm": 0.007237971760332584, "learning_rate": 3.288873119500426e-05, "loss": 9.212139993906022e-05, "step": 236530 }, { "epoch": 67.1416406471757, "grad_norm": 0.06076979637145996, "learning_rate": 3.28858927050809e-05, "loss": 0.00045496057718992234, "step": 236540 }, { "epoch": 67.14447913709907, "grad_norm": 0.01005479320883751, "learning_rate": 3.288305421515754e-05, "loss": 0.00016459915786981583, "step": 236550 }, { "epoch": 67.14731762702242, "grad_norm": 0.030428579077124596, "learning_rate": 3.2880215725234175e-05, "loss": 0.00015869475901126863, "step": 236560 }, { "epoch": 67.15015611694578, "grad_norm": 0.03179007023572922, "learning_rate": 3.2877377235310816e-05, "loss": 0.00010192859917879105, "step": 236570 }, { "epoch": 67.15299460686914, "grad_norm": 0.04424202814698219, "learning_rate": 3.287453874538746e-05, "loss": 7.434152066707612e-05, "step": 236580 }, { "epoch": 67.1558330967925, "grad_norm": 0.018560616299510002, "learning_rate": 3.287170025546409e-05, "loss": 9.98741015791893e-05, "step": 236590 }, { "epoch": 67.15867158671587, "grad_norm": 0.0035243912134319544, "learning_rate": 3.2868861765540734e-05, "loss": 4.1796080768108365e-05, "step": 236600 }, { "epoch": 67.16151007663923, "grad_norm": 0.004361484199762344, "learning_rate": 3.2866023275617375e-05, "loss": 7.771067321300506e-05, "step": 236610 }, { "epoch": 67.1643485665626, "grad_norm": 0.03471178188920021, "learning_rate": 3.286318478569401e-05, "loss": 5.748681724071503e-05, "step": 236620 }, { "epoch": 67.16718705648594, "grad_norm": 0.012607697397470474, "learning_rate": 3.286034629577065e-05, "loss": 5.701519548892975e-05, "step": 236630 }, { "epoch": 67.1700255464093, "grad_norm": 0.04695073142647743, "learning_rate": 3.285750780584729e-05, "loss": 0.00010181255638599395, "step": 236640 }, { "epoch": 67.17286403633267, "grad_norm": 0.022482389584183693, "learning_rate": 3.2854669315923934e-05, "loss": 0.00010940190404653549, "step": 236650 }, { "epoch": 67.17570252625603, "grad_norm": 0.011296030133962631, "learning_rate": 3.285183082600057e-05, "loss": 0.00041452422738075256, "step": 236660 }, { "epoch": 67.1785410161794, "grad_norm": 0.10751219093799591, "learning_rate": 3.284899233607721e-05, "loss": 0.00025536324828863145, "step": 236670 }, { "epoch": 67.18137950610276, "grad_norm": 0.026243533939123154, "learning_rate": 3.284615384615385e-05, "loss": 0.00016429182142019272, "step": 236680 }, { "epoch": 67.18421799602612, "grad_norm": 0.03632938116788864, "learning_rate": 3.2843315356230486e-05, "loss": 0.0002470681443810463, "step": 236690 }, { "epoch": 67.18705648594947, "grad_norm": 0.11133371293544769, "learning_rate": 3.284047686630713e-05, "loss": 0.00030075274407863616, "step": 236700 }, { "epoch": 67.18989497587283, "grad_norm": 0.005575345829129219, "learning_rate": 3.283763837638377e-05, "loss": 0.0004521509632468224, "step": 236710 }, { "epoch": 67.1927334657962, "grad_norm": 0.018629826605319977, "learning_rate": 3.283508373545274e-05, "loss": 0.0037490658462047575, "step": 236720 }, { "epoch": 67.19557195571956, "grad_norm": 0.7727771401405334, "learning_rate": 3.283224524552938e-05, "loss": 0.0008363824337720871, "step": 236730 }, { "epoch": 67.19841044564292, "grad_norm": 0.3189336061477661, "learning_rate": 3.282940675560602e-05, "loss": 0.004271965846419335, "step": 236740 }, { "epoch": 67.20124893556628, "grad_norm": 0.0867658331990242, "learning_rate": 3.282656826568265e-05, "loss": 0.005433711037039757, "step": 236750 }, { "epoch": 67.20408742548965, "grad_norm": 0.14835533499717712, "learning_rate": 3.28237297757593e-05, "loss": 0.0007791556417942047, "step": 236760 }, { "epoch": 67.206925915413, "grad_norm": 0.014016584493219852, "learning_rate": 3.2820891285835935e-05, "loss": 0.0014142265543341636, "step": 236770 }, { "epoch": 67.20976440533636, "grad_norm": 0.6574096083641052, "learning_rate": 3.2818052795912576e-05, "loss": 0.0007308794185519219, "step": 236780 }, { "epoch": 67.21260289525972, "grad_norm": 0.17764997482299805, "learning_rate": 3.281521430598922e-05, "loss": 0.00012448765337467194, "step": 236790 }, { "epoch": 67.21544138518308, "grad_norm": 1.0512490272521973, "learning_rate": 3.281237581606585e-05, "loss": 0.0016630355268716813, "step": 236800 }, { "epoch": 67.21827987510645, "grad_norm": 0.02039116434752941, "learning_rate": 3.2809537326142494e-05, "loss": 0.0005183152854442597, "step": 236810 }, { "epoch": 67.22111836502981, "grad_norm": 0.32698631286621094, "learning_rate": 3.2806698836219135e-05, "loss": 0.00025154203176498414, "step": 236820 }, { "epoch": 67.22395685495316, "grad_norm": 0.48315221071243286, "learning_rate": 3.280386034629577e-05, "loss": 0.0011653421446681024, "step": 236830 }, { "epoch": 67.22679534487652, "grad_norm": 0.033660952001810074, "learning_rate": 3.280102185637241e-05, "loss": 0.0005238119512796402, "step": 236840 }, { "epoch": 67.22963383479988, "grad_norm": 0.015927234664559364, "learning_rate": 3.279818336644905e-05, "loss": 0.0007678842172026634, "step": 236850 }, { "epoch": 67.23247232472325, "grad_norm": 17.38915252685547, "learning_rate": 3.2795344876525694e-05, "loss": 0.005421456322073937, "step": 236860 }, { "epoch": 67.23531081464661, "grad_norm": 0.11791999638080597, "learning_rate": 3.279250638660233e-05, "loss": 0.0041209693998098375, "step": 236870 }, { "epoch": 67.23814930456997, "grad_norm": 0.08793721348047256, "learning_rate": 3.278966789667896e-05, "loss": 0.00012165363878011704, "step": 236880 }, { "epoch": 67.24098779449334, "grad_norm": 0.020970268175005913, "learning_rate": 3.278682940675561e-05, "loss": 8.084066212177277e-05, "step": 236890 }, { "epoch": 67.24382628441668, "grad_norm": 0.05765959993004799, "learning_rate": 3.2783990916832246e-05, "loss": 0.00013734959065914154, "step": 236900 }, { "epoch": 67.24666477434005, "grad_norm": 0.00980284158140421, "learning_rate": 3.278115242690889e-05, "loss": 9.114164859056473e-05, "step": 236910 }, { "epoch": 67.24950326426341, "grad_norm": 0.308773398399353, "learning_rate": 3.277831393698553e-05, "loss": 0.00011258665472269058, "step": 236920 }, { "epoch": 67.25234175418677, "grad_norm": 0.07570528984069824, "learning_rate": 3.277547544706216e-05, "loss": 0.0001067792996764183, "step": 236930 }, { "epoch": 67.25518024411014, "grad_norm": 0.018651176244020462, "learning_rate": 3.2772636957138805e-05, "loss": 7.504243403673172e-05, "step": 236940 }, { "epoch": 67.2580187340335, "grad_norm": 0.0510127991437912, "learning_rate": 3.2769798467215446e-05, "loss": 0.00013084281235933303, "step": 236950 }, { "epoch": 67.26085722395686, "grad_norm": 0.011131802573800087, "learning_rate": 3.276695997729208e-05, "loss": 0.00013439562171697618, "step": 236960 }, { "epoch": 67.26369571388021, "grad_norm": 0.030319981276988983, "learning_rate": 3.276412148736872e-05, "loss": 8.776187896728516e-05, "step": 236970 }, { "epoch": 67.26653420380357, "grad_norm": 0.026257796213030815, "learning_rate": 3.2761282997445356e-05, "loss": 8.591786026954651e-05, "step": 236980 }, { "epoch": 67.26937269372694, "grad_norm": 0.050062086433172226, "learning_rate": 3.2758444507522005e-05, "loss": 0.00012008324265480041, "step": 236990 }, { "epoch": 67.2722111836503, "grad_norm": 0.04342683032155037, "learning_rate": 3.275560601759864e-05, "loss": 0.001168622449040413, "step": 237000 }, { "epoch": 67.2722111836503, "eval_accuracy": 0.9797164112672474, "eval_loss": 0.07643184810876846, "eval_runtime": 32.5734, "eval_samples_per_second": 482.817, "eval_steps_per_second": 7.552, "step": 237000 }, { "epoch": 67.27504967357366, "grad_norm": 0.092112235724926, "learning_rate": 3.2752767527675274e-05, "loss": 0.00023942440748214722, "step": 237010 }, { "epoch": 67.27788816349702, "grad_norm": 0.012762495316565037, "learning_rate": 3.274992903775192e-05, "loss": 0.00011216085404157639, "step": 237020 }, { "epoch": 67.28072665342037, "grad_norm": 0.04242315888404846, "learning_rate": 3.2747090547828557e-05, "loss": 0.0006746476516127587, "step": 237030 }, { "epoch": 67.28356514334374, "grad_norm": 0.01878080517053604, "learning_rate": 3.27442520579052e-05, "loss": 0.00019951723515987396, "step": 237040 }, { "epoch": 67.2864036332671, "grad_norm": 2.0583741664886475, "learning_rate": 3.274141356798184e-05, "loss": 0.0003763450309634209, "step": 237050 }, { "epoch": 67.28924212319046, "grad_norm": 0.07831819355487823, "learning_rate": 3.2738575078058474e-05, "loss": 0.0023138301447033884, "step": 237060 }, { "epoch": 67.29208061311382, "grad_norm": 0.2165016084909439, "learning_rate": 3.2735736588135115e-05, "loss": 0.00015714094042778014, "step": 237070 }, { "epoch": 67.29491910303719, "grad_norm": 0.04398176446557045, "learning_rate": 3.273289809821175e-05, "loss": 0.0009617276489734649, "step": 237080 }, { "epoch": 67.29775759296055, "grad_norm": 0.0849243700504303, "learning_rate": 3.273005960828839e-05, "loss": 0.00013422854244709014, "step": 237090 }, { "epoch": 67.3005960828839, "grad_norm": 0.03056160733103752, "learning_rate": 3.272722111836503e-05, "loss": 0.00042616799473762514, "step": 237100 }, { "epoch": 67.30343457280726, "grad_norm": 0.008664845488965511, "learning_rate": 3.272438262844167e-05, "loss": 0.00017385035753250122, "step": 237110 }, { "epoch": 67.30627306273063, "grad_norm": 0.29482415318489075, "learning_rate": 3.272154413851831e-05, "loss": 0.00025800149887800217, "step": 237120 }, { "epoch": 67.30911155265399, "grad_norm": 0.10153143107891083, "learning_rate": 3.271870564859495e-05, "loss": 0.00010078474879264832, "step": 237130 }, { "epoch": 67.31195004257735, "grad_norm": 0.021328706294298172, "learning_rate": 3.2715867158671585e-05, "loss": 9.536109864711761e-05, "step": 237140 }, { "epoch": 67.31478853250071, "grad_norm": 0.03882073611021042, "learning_rate": 3.271302866874823e-05, "loss": 0.00020025428384542465, "step": 237150 }, { "epoch": 67.31762702242408, "grad_norm": 0.049883902072906494, "learning_rate": 3.271019017882487e-05, "loss": 8.650124073028564e-05, "step": 237160 }, { "epoch": 67.32046551234743, "grad_norm": 0.022513367235660553, "learning_rate": 3.27073516889015e-05, "loss": 7.63261690735817e-05, "step": 237170 }, { "epoch": 67.32330400227079, "grad_norm": 0.031981728971004486, "learning_rate": 3.270451319897814e-05, "loss": 9.247064590454102e-05, "step": 237180 }, { "epoch": 67.32614249219415, "grad_norm": 0.1449345350265503, "learning_rate": 3.2701674709054785e-05, "loss": 0.0003418838605284691, "step": 237190 }, { "epoch": 67.32898098211751, "grad_norm": 0.02989945374429226, "learning_rate": 3.2698836219131426e-05, "loss": 0.00032199248671531676, "step": 237200 }, { "epoch": 67.33181947204088, "grad_norm": 0.02557341195642948, "learning_rate": 3.269599772920806e-05, "loss": 0.0047337919473648075, "step": 237210 }, { "epoch": 67.33465796196424, "grad_norm": 0.9418328404426575, "learning_rate": 3.26931592392847e-05, "loss": 0.0008564088493585587, "step": 237220 }, { "epoch": 67.3374964518876, "grad_norm": 0.0908663421869278, "learning_rate": 3.2690320749361343e-05, "loss": 0.003247436136007309, "step": 237230 }, { "epoch": 67.34033494181095, "grad_norm": 4.545805931091309, "learning_rate": 3.268748225943798e-05, "loss": 0.0019252188503742218, "step": 237240 }, { "epoch": 67.34317343173431, "grad_norm": 0.6400017142295837, "learning_rate": 3.268464376951462e-05, "loss": 0.0005923531949520111, "step": 237250 }, { "epoch": 67.34601192165768, "grad_norm": 0.15722693502902985, "learning_rate": 3.268180527959126e-05, "loss": 0.0003538699820637703, "step": 237260 }, { "epoch": 67.34885041158104, "grad_norm": 0.0171974990516901, "learning_rate": 3.2678966789667895e-05, "loss": 0.0005620758980512619, "step": 237270 }, { "epoch": 67.3516889015044, "grad_norm": 0.10647895187139511, "learning_rate": 3.267612829974454e-05, "loss": 0.0011112306267023087, "step": 237280 }, { "epoch": 67.35452739142777, "grad_norm": 0.028096171095967293, "learning_rate": 3.267328980982118e-05, "loss": 0.00046977773308753967, "step": 237290 }, { "epoch": 67.35736588135111, "grad_norm": 0.025773795321583748, "learning_rate": 3.267045131989781e-05, "loss": 0.004715617001056671, "step": 237300 }, { "epoch": 67.36020437127448, "grad_norm": 1.836323857307434, "learning_rate": 3.2667612829974454e-05, "loss": 0.0007401857525110245, "step": 237310 }, { "epoch": 67.36304286119784, "grad_norm": 0.15430571138858795, "learning_rate": 3.2664774340051095e-05, "loss": 0.0005004128441214561, "step": 237320 }, { "epoch": 67.3658813511212, "grad_norm": 0.03876098245382309, "learning_rate": 3.266193585012774e-05, "loss": 0.0006790626794099808, "step": 237330 }, { "epoch": 67.36871984104457, "grad_norm": 0.050470758229494095, "learning_rate": 3.265909736020437e-05, "loss": 0.00024403799325227736, "step": 237340 }, { "epoch": 67.37155833096793, "grad_norm": 0.012902499176561832, "learning_rate": 3.265625887028101e-05, "loss": 0.0016576904803514481, "step": 237350 }, { "epoch": 67.37439682089129, "grad_norm": 0.008782655000686646, "learning_rate": 3.2653420380357654e-05, "loss": 0.0021958498284220694, "step": 237360 }, { "epoch": 67.37723531081464, "grad_norm": 0.03203536942601204, "learning_rate": 3.265058189043429e-05, "loss": 0.0003221284598112106, "step": 237370 }, { "epoch": 67.380073800738, "grad_norm": 8.82541561126709, "learning_rate": 3.264774340051093e-05, "loss": 0.0018380090594291687, "step": 237380 }, { "epoch": 67.38291229066137, "grad_norm": 0.11666781455278397, "learning_rate": 3.264490491058757e-05, "loss": 0.00030213315039873123, "step": 237390 }, { "epoch": 67.38575078058473, "grad_norm": 0.07662409543991089, "learning_rate": 3.2642066420664206e-05, "loss": 0.0011859867721796037, "step": 237400 }, { "epoch": 67.38858927050809, "grad_norm": 0.23841720819473267, "learning_rate": 3.263922793074085e-05, "loss": 0.0003433629870414734, "step": 237410 }, { "epoch": 67.39142776043145, "grad_norm": 0.026715701445937157, "learning_rate": 3.263638944081749e-05, "loss": 0.0002550181001424789, "step": 237420 }, { "epoch": 67.39426625035482, "grad_norm": 0.18612638115882874, "learning_rate": 3.2633550950894123e-05, "loss": 0.00021160747855901717, "step": 237430 }, { "epoch": 67.39710474027817, "grad_norm": 0.08201069384813309, "learning_rate": 3.2630712460970765e-05, "loss": 0.002547897957265377, "step": 237440 }, { "epoch": 67.39994323020153, "grad_norm": 0.14940835535526276, "learning_rate": 3.2627873971047406e-05, "loss": 0.00025792531669139863, "step": 237450 }, { "epoch": 67.40278172012489, "grad_norm": 0.021731631830334663, "learning_rate": 3.262503548112405e-05, "loss": 0.00010238997638225555, "step": 237460 }, { "epoch": 67.40562021004826, "grad_norm": 0.00626297015696764, "learning_rate": 3.262219699120068e-05, "loss": 0.00019033048301935195, "step": 237470 }, { "epoch": 67.40845869997162, "grad_norm": 0.036828748881816864, "learning_rate": 3.261935850127732e-05, "loss": 0.0001714169979095459, "step": 237480 }, { "epoch": 67.41129718989498, "grad_norm": 0.19025029242038727, "learning_rate": 3.2616520011353965e-05, "loss": 0.0002760464325547218, "step": 237490 }, { "epoch": 67.41413567981834, "grad_norm": 0.013818745501339436, "learning_rate": 3.26136815214306e-05, "loss": 0.0005547253414988518, "step": 237500 }, { "epoch": 67.41413567981834, "eval_accuracy": 0.9779360335728365, "eval_loss": 0.08018671721220016, "eval_runtime": 33.0481, "eval_samples_per_second": 475.882, "eval_steps_per_second": 7.444, "step": 237500 }, { "epoch": 67.41697416974169, "grad_norm": 0.05217554420232773, "learning_rate": 3.261084303150724e-05, "loss": 0.00018909256905317307, "step": 237510 }, { "epoch": 67.41981265966506, "grad_norm": 0.36982622742652893, "learning_rate": 3.260800454158388e-05, "loss": 0.00015080366283655168, "step": 237520 }, { "epoch": 67.42265114958842, "grad_norm": 0.012583848088979721, "learning_rate": 3.260516605166052e-05, "loss": 0.00018460489809513092, "step": 237530 }, { "epoch": 67.42548963951178, "grad_norm": 0.003976361360400915, "learning_rate": 3.260232756173716e-05, "loss": 0.0025164172053337096, "step": 237540 }, { "epoch": 67.42832812943514, "grad_norm": 0.021744264289736748, "learning_rate": 3.25994890718138e-05, "loss": 0.00041102934628725054, "step": 237550 }, { "epoch": 67.4311666193585, "grad_norm": 0.06705716252326965, "learning_rate": 3.2596650581890434e-05, "loss": 0.00017867665737867356, "step": 237560 }, { "epoch": 67.43400510928186, "grad_norm": 0.010521214455366135, "learning_rate": 3.2593812091967076e-05, "loss": 0.00027242992073297503, "step": 237570 }, { "epoch": 67.43684359920522, "grad_norm": 0.019489815458655357, "learning_rate": 3.259097360204371e-05, "loss": 0.00027269795536994935, "step": 237580 }, { "epoch": 67.43968208912858, "grad_norm": 0.01643611676990986, "learning_rate": 3.258813511212035e-05, "loss": 0.00013379808515310288, "step": 237590 }, { "epoch": 67.44252057905194, "grad_norm": 0.08060578256845474, "learning_rate": 3.258529662219699e-05, "loss": 0.00011404510587453842, "step": 237600 }, { "epoch": 67.4453590689753, "grad_norm": 0.011957686394453049, "learning_rate": 3.258245813227363e-05, "loss": 0.00023435931652784347, "step": 237610 }, { "epoch": 67.44819755889867, "grad_norm": 3.2465033531188965, "learning_rate": 3.2579619642350276e-05, "loss": 0.0003473928198218346, "step": 237620 }, { "epoch": 67.45103604882203, "grad_norm": 0.042978618294000626, "learning_rate": 3.257678115242691e-05, "loss": 0.0008670385926961899, "step": 237630 }, { "epoch": 67.45387453874538, "grad_norm": 0.7776144742965698, "learning_rate": 3.2573942662503545e-05, "loss": 0.000506916269659996, "step": 237640 }, { "epoch": 67.45671302866874, "grad_norm": 3.677915334701538, "learning_rate": 3.257110417258019e-05, "loss": 0.0005712827667593956, "step": 237650 }, { "epoch": 67.45955151859211, "grad_norm": 0.02805529162287712, "learning_rate": 3.256826568265683e-05, "loss": 8.428264409303665e-05, "step": 237660 }, { "epoch": 67.46239000851547, "grad_norm": 0.021795719861984253, "learning_rate": 3.256542719273347e-05, "loss": 0.00023926720023155213, "step": 237670 }, { "epoch": 67.46522849843883, "grad_norm": 1.1590192317962646, "learning_rate": 3.2562588702810104e-05, "loss": 0.0003183698281645775, "step": 237680 }, { "epoch": 67.4680669883622, "grad_norm": 0.06659658253192902, "learning_rate": 3.2559750212886745e-05, "loss": 0.0001026008278131485, "step": 237690 }, { "epoch": 67.47090547828556, "grad_norm": 0.23256492614746094, "learning_rate": 3.2556911722963386e-05, "loss": 0.00030873361974954606, "step": 237700 }, { "epoch": 67.47374396820891, "grad_norm": 13.114998817443848, "learning_rate": 3.255407323304002e-05, "loss": 0.003718564659357071, "step": 237710 }, { "epoch": 67.47658245813227, "grad_norm": 0.1175011396408081, "learning_rate": 3.255123474311666e-05, "loss": 0.0004353182390332222, "step": 237720 }, { "epoch": 67.47942094805563, "grad_norm": 0.02598547376692295, "learning_rate": 3.2548396253193304e-05, "loss": 0.00028145574033260344, "step": 237730 }, { "epoch": 67.482259437979, "grad_norm": 0.08206906169652939, "learning_rate": 3.254555776326994e-05, "loss": 0.00013472083956003189, "step": 237740 }, { "epoch": 67.48509792790236, "grad_norm": 5.496104717254639, "learning_rate": 3.2542719273346586e-05, "loss": 0.0006842566654086113, "step": 237750 }, { "epoch": 67.48793641782572, "grad_norm": 0.03762378543615341, "learning_rate": 3.253988078342322e-05, "loss": 0.00013122893869876862, "step": 237760 }, { "epoch": 67.49077490774907, "grad_norm": 0.07827834784984589, "learning_rate": 3.2537042293499856e-05, "loss": 0.00046607572585344315, "step": 237770 }, { "epoch": 67.49361339767243, "grad_norm": 0.12565934658050537, "learning_rate": 3.25342038035765e-05, "loss": 0.00012675393372774124, "step": 237780 }, { "epoch": 67.4964518875958, "grad_norm": 0.42539021372795105, "learning_rate": 3.253136531365314e-05, "loss": 0.0003472644835710526, "step": 237790 }, { "epoch": 67.49929037751916, "grad_norm": 5.199791431427002, "learning_rate": 3.252852682372978e-05, "loss": 0.002033526450395584, "step": 237800 }, { "epoch": 67.50212886744252, "grad_norm": 9.261383056640625, "learning_rate": 3.2525688333806414e-05, "loss": 0.0019907845184206963, "step": 237810 }, { "epoch": 67.50496735736589, "grad_norm": 0.01654800772666931, "learning_rate": 3.2522849843883056e-05, "loss": 0.0001631900668144226, "step": 237820 }, { "epoch": 67.50780584728925, "grad_norm": 0.021316828206181526, "learning_rate": 3.25200113539597e-05, "loss": 0.00020019393414258957, "step": 237830 }, { "epoch": 67.5106443372126, "grad_norm": 0.03504329174757004, "learning_rate": 3.251717286403633e-05, "loss": 0.00022271033376455306, "step": 237840 }, { "epoch": 67.51348282713596, "grad_norm": 4.582546710968018, "learning_rate": 3.251433437411297e-05, "loss": 0.0007292604073882103, "step": 237850 }, { "epoch": 67.51632131705932, "grad_norm": 0.04747350886464119, "learning_rate": 3.2511495884189614e-05, "loss": 0.0002691950649023056, "step": 237860 }, { "epoch": 67.51915980698269, "grad_norm": 0.013321001082658768, "learning_rate": 3.250865739426625e-05, "loss": 0.0008635884150862694, "step": 237870 }, { "epoch": 67.52199829690605, "grad_norm": 0.016327951103448868, "learning_rate": 3.250581890434289e-05, "loss": 0.0001387348398566246, "step": 237880 }, { "epoch": 67.52483678682941, "grad_norm": 0.007938256487250328, "learning_rate": 3.250298041441953e-05, "loss": 0.0007561881095170975, "step": 237890 }, { "epoch": 67.52767527675277, "grad_norm": 0.34171441197395325, "learning_rate": 3.2500141924496166e-05, "loss": 0.0009125951677560806, "step": 237900 }, { "epoch": 67.53051376667612, "grad_norm": 0.06743182986974716, "learning_rate": 3.249730343457281e-05, "loss": 0.007697883248329163, "step": 237910 }, { "epoch": 67.53335225659949, "grad_norm": 0.018767064437270164, "learning_rate": 3.249446494464945e-05, "loss": 0.0007141321897506714, "step": 237920 }, { "epoch": 67.53619074652285, "grad_norm": 0.11455602198839188, "learning_rate": 3.249162645472609e-05, "loss": 0.00010518655180931091, "step": 237930 }, { "epoch": 67.53902923644621, "grad_norm": 0.04021434113383293, "learning_rate": 3.2488787964802725e-05, "loss": 0.0001988012343645096, "step": 237940 }, { "epoch": 67.54186772636957, "grad_norm": 0.02181648276746273, "learning_rate": 3.2485949474879367e-05, "loss": 0.00028188060969114304, "step": 237950 }, { "epoch": 67.54470621629294, "grad_norm": 0.038336578756570816, "learning_rate": 3.248311098495601e-05, "loss": 0.00033471304923295977, "step": 237960 }, { "epoch": 67.5475447062163, "grad_norm": 0.06564854085445404, "learning_rate": 3.248027249503264e-05, "loss": 0.00010924879461526871, "step": 237970 }, { "epoch": 67.55038319613965, "grad_norm": 6.99076509475708, "learning_rate": 3.2477434005109284e-05, "loss": 0.0008259061723947525, "step": 237980 }, { "epoch": 67.55322168606301, "grad_norm": 0.17916826903820038, "learning_rate": 3.2474595515185925e-05, "loss": 7.845032960176467e-05, "step": 237990 }, { "epoch": 67.55606017598637, "grad_norm": 0.2866898775100708, "learning_rate": 3.247175702526256e-05, "loss": 0.00046958867460489273, "step": 238000 }, { "epoch": 67.55606017598637, "eval_accuracy": 0.9800343358555351, "eval_loss": 0.07462452352046967, "eval_runtime": 32.847, "eval_samples_per_second": 478.796, "eval_steps_per_second": 7.489, "step": 238000 }, { "epoch": 67.55889866590974, "grad_norm": 0.028617478907108307, "learning_rate": 3.24689185353392e-05, "loss": 0.00017633382230997086, "step": 238010 }, { "epoch": 67.5617371558331, "grad_norm": 0.006552290637046099, "learning_rate": 3.246608004541584e-05, "loss": 0.0003404369577765465, "step": 238020 }, { "epoch": 67.56457564575646, "grad_norm": 3.0830724239349365, "learning_rate": 3.246324155549248e-05, "loss": 0.00047385673969984053, "step": 238030 }, { "epoch": 67.56741413567981, "grad_norm": 0.04404829069972038, "learning_rate": 3.246040306556912e-05, "loss": 0.0007031964138150215, "step": 238040 }, { "epoch": 67.57025262560317, "grad_norm": 0.5890796780586243, "learning_rate": 3.245756457564576e-05, "loss": 0.00034064315259456633, "step": 238050 }, { "epoch": 67.57309111552654, "grad_norm": 1.2230132818222046, "learning_rate": 3.2454726085722395e-05, "loss": 0.00346125066280365, "step": 238060 }, { "epoch": 67.5759296054499, "grad_norm": 0.010996498167514801, "learning_rate": 3.2451887595799036e-05, "loss": 0.00019328780472278596, "step": 238070 }, { "epoch": 67.57876809537326, "grad_norm": 0.2707473933696747, "learning_rate": 3.244904910587567e-05, "loss": 0.0001236187294125557, "step": 238080 }, { "epoch": 67.58160658529663, "grad_norm": 0.03466835618019104, "learning_rate": 3.244621061595232e-05, "loss": 0.0014740046113729476, "step": 238090 }, { "epoch": 67.58444507521999, "grad_norm": 8.493850708007812, "learning_rate": 3.244337212602895e-05, "loss": 0.0033713944256305696, "step": 238100 }, { "epoch": 67.58728356514334, "grad_norm": 0.0152988750487566, "learning_rate": 3.244053363610559e-05, "loss": 0.00224619023501873, "step": 238110 }, { "epoch": 67.5901220550667, "grad_norm": 0.15839128196239471, "learning_rate": 3.2437695146182236e-05, "loss": 0.0003886362537741661, "step": 238120 }, { "epoch": 67.59296054499006, "grad_norm": 5.970061302185059, "learning_rate": 3.243485665625887e-05, "loss": 0.0015026973560452461, "step": 238130 }, { "epoch": 67.59579903491343, "grad_norm": 0.044396527111530304, "learning_rate": 3.243201816633551e-05, "loss": 0.010679760575294494, "step": 238140 }, { "epoch": 67.59863752483679, "grad_norm": 0.16105493903160095, "learning_rate": 3.242917967641215e-05, "loss": 0.00022237971425056457, "step": 238150 }, { "epoch": 67.60147601476015, "grad_norm": 0.35511645674705505, "learning_rate": 3.242634118648879e-05, "loss": 0.00040302090346813204, "step": 238160 }, { "epoch": 67.60431450468351, "grad_norm": 0.021419376134872437, "learning_rate": 3.242350269656543e-05, "loss": 0.00015153884887695313, "step": 238170 }, { "epoch": 67.60715299460686, "grad_norm": 0.010115640237927437, "learning_rate": 3.242066420664207e-05, "loss": 0.0003530070185661316, "step": 238180 }, { "epoch": 67.60999148453023, "grad_norm": 0.7202189564704895, "learning_rate": 3.2417825716718705e-05, "loss": 0.0002615800127387047, "step": 238190 }, { "epoch": 67.61282997445359, "grad_norm": 0.181496262550354, "learning_rate": 3.241498722679535e-05, "loss": 0.0006333751603960991, "step": 238200 }, { "epoch": 67.61566846437695, "grad_norm": 0.028508830815553665, "learning_rate": 3.241214873687198e-05, "loss": 0.0011337369680404664, "step": 238210 }, { "epoch": 67.61850695430032, "grad_norm": 0.2563294470310211, "learning_rate": 3.240931024694863e-05, "loss": 0.0006260344758629799, "step": 238220 }, { "epoch": 67.62134544422368, "grad_norm": 0.005411229562014341, "learning_rate": 3.2406471757025264e-05, "loss": 0.00011811982840299606, "step": 238230 }, { "epoch": 67.62418393414703, "grad_norm": 0.026394879445433617, "learning_rate": 3.24036332671019e-05, "loss": 0.0001025468111038208, "step": 238240 }, { "epoch": 67.62702242407039, "grad_norm": 0.04029536247253418, "learning_rate": 3.240079477717855e-05, "loss": 0.0005399642512202262, "step": 238250 }, { "epoch": 67.62986091399375, "grad_norm": 0.07684516161680222, "learning_rate": 3.239795628725518e-05, "loss": 0.0011085314676165582, "step": 238260 }, { "epoch": 67.63269940391712, "grad_norm": 1.394189715385437, "learning_rate": 3.239511779733182e-05, "loss": 0.00030375011265277864, "step": 238270 }, { "epoch": 67.63553789384048, "grad_norm": 1.8145999908447266, "learning_rate": 3.2392279307408464e-05, "loss": 0.0004547718912363052, "step": 238280 }, { "epoch": 67.63837638376384, "grad_norm": 1.5817158222198486, "learning_rate": 3.23894408174851e-05, "loss": 0.00028676725924015045, "step": 238290 }, { "epoch": 67.6412148736872, "grad_norm": 0.2051060050725937, "learning_rate": 3.238660232756174e-05, "loss": 0.00020140688866376877, "step": 238300 }, { "epoch": 67.64405336361055, "grad_norm": 0.018723484128713608, "learning_rate": 3.2383763837638375e-05, "loss": 0.006840449571609497, "step": 238310 }, { "epoch": 67.64689185353392, "grad_norm": 0.039222218096256256, "learning_rate": 3.2380925347715016e-05, "loss": 0.0017077362164855004, "step": 238320 }, { "epoch": 67.64973034345728, "grad_norm": 0.08973008394241333, "learning_rate": 3.237808685779166e-05, "loss": 0.00044512040913105013, "step": 238330 }, { "epoch": 67.65256883338064, "grad_norm": 0.034558989107608795, "learning_rate": 3.237524836786829e-05, "loss": 0.00023877546191215516, "step": 238340 }, { "epoch": 67.655407323304, "grad_norm": 0.05780749395489693, "learning_rate": 3.237240987794494e-05, "loss": 0.014533048868179322, "step": 238350 }, { "epoch": 67.65824581322737, "grad_norm": 0.21600428223609924, "learning_rate": 3.2369571388021575e-05, "loss": 0.002072193473577499, "step": 238360 }, { "epoch": 67.66108430315073, "grad_norm": 0.6035264134407043, "learning_rate": 3.236673289809821e-05, "loss": 0.005184681713581085, "step": 238370 }, { "epoch": 67.66392279307408, "grad_norm": 6.263185501098633, "learning_rate": 3.236389440817486e-05, "loss": 0.0019714921712875367, "step": 238380 }, { "epoch": 67.66676128299744, "grad_norm": 0.07276687026023865, "learning_rate": 3.236105591825149e-05, "loss": 0.006644345819950104, "step": 238390 }, { "epoch": 67.6695997729208, "grad_norm": 0.9308815598487854, "learning_rate": 3.2358217428328134e-05, "loss": 0.002763892337679863, "step": 238400 }, { "epoch": 67.67243826284417, "grad_norm": 0.10474570095539093, "learning_rate": 3.235537893840477e-05, "loss": 0.006199084594845772, "step": 238410 }, { "epoch": 67.67527675276753, "grad_norm": 0.006782242562621832, "learning_rate": 3.235254044848141e-05, "loss": 0.0019528087228536605, "step": 238420 }, { "epoch": 67.6781152426909, "grad_norm": 8.153241157531738, "learning_rate": 3.234970195855805e-05, "loss": 0.0029659675434231757, "step": 238430 }, { "epoch": 67.68095373261426, "grad_norm": 0.15283608436584473, "learning_rate": 3.2346863468634685e-05, "loss": 0.006448875367641449, "step": 238440 }, { "epoch": 67.6837922225376, "grad_norm": 0.07770597189664841, "learning_rate": 3.234402497871133e-05, "loss": 0.0019217194989323617, "step": 238450 }, { "epoch": 67.68663071246097, "grad_norm": 0.01755458675324917, "learning_rate": 3.234118648878797e-05, "loss": 0.0003551799803972244, "step": 238460 }, { "epoch": 67.68946920238433, "grad_norm": 2.079482078552246, "learning_rate": 3.23383479988646e-05, "loss": 0.0006287086755037308, "step": 238470 }, { "epoch": 67.6923076923077, "grad_norm": 0.03817075118422508, "learning_rate": 3.2335509508941244e-05, "loss": 0.0002635115757584572, "step": 238480 }, { "epoch": 67.69514618223106, "grad_norm": 2.0910568237304688, "learning_rate": 3.2332671019017886e-05, "loss": 0.0007419925183057785, "step": 238490 }, { "epoch": 67.69798467215442, "grad_norm": 0.012498090043663979, "learning_rate": 3.232983252909452e-05, "loss": 0.0005417261272668839, "step": 238500 }, { "epoch": 67.69798467215442, "eval_accuracy": 0.9769822598079736, "eval_loss": 0.09116441011428833, "eval_runtime": 32.7187, "eval_samples_per_second": 480.673, "eval_steps_per_second": 7.519, "step": 238500 }, { "epoch": 67.70082316207777, "grad_norm": 0.04386280104517937, "learning_rate": 3.232699403917116e-05, "loss": 0.0015194511041045188, "step": 238510 }, { "epoch": 67.70366165200113, "grad_norm": 0.419534295797348, "learning_rate": 3.23241555492478e-05, "loss": 0.00018956176936626434, "step": 238520 }, { "epoch": 67.7065001419245, "grad_norm": 0.004012344870716333, "learning_rate": 3.232131705932444e-05, "loss": 0.00010520517826080322, "step": 238530 }, { "epoch": 67.70933863184786, "grad_norm": 0.01730985939502716, "learning_rate": 3.231847856940108e-05, "loss": 0.0001427123323082924, "step": 238540 }, { "epoch": 67.71217712177122, "grad_norm": 0.11343719065189362, "learning_rate": 3.231564007947772e-05, "loss": 0.00010906457901000976, "step": 238550 }, { "epoch": 67.71501561169458, "grad_norm": 0.032431695610284805, "learning_rate": 3.231280158955436e-05, "loss": 9.471699595451356e-05, "step": 238560 }, { "epoch": 67.71785410161795, "grad_norm": 0.028268298134207726, "learning_rate": 3.2309963099630996e-05, "loss": 0.00031718965619802474, "step": 238570 }, { "epoch": 67.7206925915413, "grad_norm": 0.04011308401823044, "learning_rate": 3.230712460970764e-05, "loss": 0.0008948517963290215, "step": 238580 }, { "epoch": 67.72353108146466, "grad_norm": 0.02729710005223751, "learning_rate": 3.230428611978428e-05, "loss": 0.0005768748000264168, "step": 238590 }, { "epoch": 67.72636957138802, "grad_norm": 0.009874220006167889, "learning_rate": 3.2301447629860914e-05, "loss": 0.00023008212447166442, "step": 238600 }, { "epoch": 67.72920806131138, "grad_norm": 0.1460953801870346, "learning_rate": 3.2298609139937555e-05, "loss": 0.00011670701205730438, "step": 238610 }, { "epoch": 67.73204655123475, "grad_norm": 0.0077936421148478985, "learning_rate": 3.2295770650014196e-05, "loss": 0.0011231679469347, "step": 238620 }, { "epoch": 67.73488504115811, "grad_norm": 1.9368336200714111, "learning_rate": 3.229293216009083e-05, "loss": 0.0004939043894410134, "step": 238630 }, { "epoch": 67.73772353108147, "grad_norm": 0.024576999247074127, "learning_rate": 3.229009367016747e-05, "loss": 0.0004694797098636627, "step": 238640 }, { "epoch": 67.74056202100482, "grad_norm": 11.660805702209473, "learning_rate": 3.2287255180244114e-05, "loss": 0.0031213566660881044, "step": 238650 }, { "epoch": 67.74340051092818, "grad_norm": 0.08496185392141342, "learning_rate": 3.228441669032075e-05, "loss": 0.005450227856636047, "step": 238660 }, { "epoch": 67.74623900085155, "grad_norm": 0.08500491827726364, "learning_rate": 3.228157820039739e-05, "loss": 0.00013103038072586058, "step": 238670 }, { "epoch": 67.74907749077491, "grad_norm": 0.009951264597475529, "learning_rate": 3.227873971047403e-05, "loss": 0.00030443388968706133, "step": 238680 }, { "epoch": 67.75191598069827, "grad_norm": 0.26333028078079224, "learning_rate": 3.227590122055067e-05, "loss": 0.0004402417689561844, "step": 238690 }, { "epoch": 67.75475447062163, "grad_norm": 0.03946924954652786, "learning_rate": 3.227306273062731e-05, "loss": 0.00012253690510988235, "step": 238700 }, { "epoch": 67.757592960545, "grad_norm": 0.007652658503502607, "learning_rate": 3.227022424070394e-05, "loss": 0.00025032274425029755, "step": 238710 }, { "epoch": 67.76043145046835, "grad_norm": 0.02098272740840912, "learning_rate": 3.226738575078059e-05, "loss": 0.0020718835294246674, "step": 238720 }, { "epoch": 67.76326994039171, "grad_norm": 0.0514928437769413, "learning_rate": 3.2264547260857224e-05, "loss": 0.00013334508985280992, "step": 238730 }, { "epoch": 67.76610843031507, "grad_norm": 0.012351004406809807, "learning_rate": 3.2261708770933866e-05, "loss": 0.001666281372308731, "step": 238740 }, { "epoch": 67.76894692023843, "grad_norm": 0.0037720007821917534, "learning_rate": 3.225887028101051e-05, "loss": 0.0009134652093052865, "step": 238750 }, { "epoch": 67.7717854101618, "grad_norm": 2.0431694984436035, "learning_rate": 3.225603179108714e-05, "loss": 0.000365748442709446, "step": 238760 }, { "epoch": 67.77462390008516, "grad_norm": 0.06703159213066101, "learning_rate": 3.225319330116378e-05, "loss": 0.0005766186863183975, "step": 238770 }, { "epoch": 67.77746239000851, "grad_norm": 17.542850494384766, "learning_rate": 3.2250354811240424e-05, "loss": 0.0034675572067499163, "step": 238780 }, { "epoch": 67.78030087993187, "grad_norm": 0.05222545936703682, "learning_rate": 3.224751632131706e-05, "loss": 0.0011237161234021187, "step": 238790 }, { "epoch": 67.78313936985523, "grad_norm": 0.6651024222373962, "learning_rate": 3.22446778313937e-05, "loss": 0.0012725075706839562, "step": 238800 }, { "epoch": 67.7859778597786, "grad_norm": 1.940011739730835, "learning_rate": 3.2241839341470335e-05, "loss": 0.0006208794191479682, "step": 238810 }, { "epoch": 67.78881634970196, "grad_norm": 0.12565436959266663, "learning_rate": 3.223900085154698e-05, "loss": 0.0002481468021869659, "step": 238820 }, { "epoch": 67.79165483962532, "grad_norm": 0.1693343073129654, "learning_rate": 3.223616236162362e-05, "loss": 0.00035248007625341413, "step": 238830 }, { "epoch": 67.79449332954869, "grad_norm": 0.23362968862056732, "learning_rate": 3.223332387170025e-05, "loss": 0.0004392711445689201, "step": 238840 }, { "epoch": 67.79733181947203, "grad_norm": 0.13409173488616943, "learning_rate": 3.22304853817769e-05, "loss": 0.001935061626136303, "step": 238850 }, { "epoch": 67.8001703093954, "grad_norm": 0.02549491636455059, "learning_rate": 3.2227646891853535e-05, "loss": 0.00033534429967403413, "step": 238860 }, { "epoch": 67.80300879931876, "grad_norm": 0.03381893038749695, "learning_rate": 3.2224808401930177e-05, "loss": 0.00023301038891077042, "step": 238870 }, { "epoch": 67.80584728924212, "grad_norm": 1.7774205207824707, "learning_rate": 3.222196991200682e-05, "loss": 0.00046814922243356707, "step": 238880 }, { "epoch": 67.80868577916549, "grad_norm": 0.019339250400662422, "learning_rate": 3.221913142208345e-05, "loss": 0.00023904722183942795, "step": 238890 }, { "epoch": 67.81152426908885, "grad_norm": 0.1288164258003235, "learning_rate": 3.2216292932160094e-05, "loss": 0.0018879311159253121, "step": 238900 }, { "epoch": 67.81436275901221, "grad_norm": 0.057192686945199966, "learning_rate": 3.221345444223673e-05, "loss": 0.0005377192050218582, "step": 238910 }, { "epoch": 67.81720124893556, "grad_norm": 0.6482744812965393, "learning_rate": 3.221061595231337e-05, "loss": 0.0004144478589296341, "step": 238920 }, { "epoch": 67.82003973885892, "grad_norm": 0.00415580440312624, "learning_rate": 3.220777746239001e-05, "loss": 0.00017345640808343887, "step": 238930 }, { "epoch": 67.82287822878229, "grad_norm": 0.0669969841837883, "learning_rate": 3.2204938972466646e-05, "loss": 0.0003446388989686966, "step": 238940 }, { "epoch": 67.82571671870565, "grad_norm": 0.016007406637072563, "learning_rate": 3.220210048254329e-05, "loss": 0.00019704941660165788, "step": 238950 }, { "epoch": 67.82855520862901, "grad_norm": 0.057914961129426956, "learning_rate": 3.219926199261993e-05, "loss": 0.00020583122968673707, "step": 238960 }, { "epoch": 67.83139369855238, "grad_norm": 0.06766346096992493, "learning_rate": 3.219642350269656e-05, "loss": 0.006274516880512238, "step": 238970 }, { "epoch": 67.83423218847572, "grad_norm": 0.046752460300922394, "learning_rate": 3.219358501277321e-05, "loss": 0.00026107355952262876, "step": 238980 }, { "epoch": 67.83707067839909, "grad_norm": 0.03323877230286598, "learning_rate": 3.2190746522849846e-05, "loss": 0.00024107899516820908, "step": 238990 }, { "epoch": 67.83990916832245, "grad_norm": 0.5278711915016174, "learning_rate": 3.218790803292648e-05, "loss": 0.0002707468345761299, "step": 239000 }, { "epoch": 67.83990916832245, "eval_accuracy": 0.9790169771730146, "eval_loss": 0.0726800188422203, "eval_runtime": 32.7351, "eval_samples_per_second": 480.432, "eval_steps_per_second": 7.515, "step": 239000 }, { "epoch": 67.84274765824581, "grad_norm": 0.08891092240810394, "learning_rate": 3.218506954300312e-05, "loss": 0.00024964120239019395, "step": 239010 }, { "epoch": 67.84558614816918, "grad_norm": 0.010999798774719238, "learning_rate": 3.218223105307976e-05, "loss": 0.0009563446044921875, "step": 239020 }, { "epoch": 67.84842463809254, "grad_norm": 0.09018875658512115, "learning_rate": 3.2179392563156405e-05, "loss": 0.0006124503910541534, "step": 239030 }, { "epoch": 67.8512631280159, "grad_norm": 0.11560098826885223, "learning_rate": 3.217655407323304e-05, "loss": 0.0005053102970123291, "step": 239040 }, { "epoch": 67.85410161793925, "grad_norm": 0.0025701841805130243, "learning_rate": 3.217371558330968e-05, "loss": 0.0006140125915408134, "step": 239050 }, { "epoch": 67.85694010786261, "grad_norm": 0.01077443640679121, "learning_rate": 3.217087709338632e-05, "loss": 8.740071207284928e-05, "step": 239060 }, { "epoch": 67.85977859778598, "grad_norm": 0.027358269318938255, "learning_rate": 3.2168038603462957e-05, "loss": 9.042751044034958e-05, "step": 239070 }, { "epoch": 67.86261708770934, "grad_norm": 0.049554865807294846, "learning_rate": 3.21652001135396e-05, "loss": 0.00022278353571891785, "step": 239080 }, { "epoch": 67.8654555776327, "grad_norm": 0.027202622964978218, "learning_rate": 3.216236162361624e-05, "loss": 6.805919110774994e-05, "step": 239090 }, { "epoch": 67.86829406755606, "grad_norm": 0.18732291460037231, "learning_rate": 3.2159523133692874e-05, "loss": 9.880848228931427e-05, "step": 239100 }, { "epoch": 67.87113255747943, "grad_norm": 0.08058620244264603, "learning_rate": 3.2156684643769515e-05, "loss": 9.300950914621353e-05, "step": 239110 }, { "epoch": 67.87397104740278, "grad_norm": 0.09692882746458054, "learning_rate": 3.215384615384616e-05, "loss": 0.00022635981440544128, "step": 239120 }, { "epoch": 67.87680953732614, "grad_norm": 0.03624165803194046, "learning_rate": 3.215100766392279e-05, "loss": 0.0003705183044075966, "step": 239130 }, { "epoch": 67.8796480272495, "grad_norm": 0.008550352416932583, "learning_rate": 3.214816917399943e-05, "loss": 0.0002673080191016197, "step": 239140 }, { "epoch": 67.88248651717286, "grad_norm": 0.022851472720503807, "learning_rate": 3.2145330684076074e-05, "loss": 0.00014623478055000305, "step": 239150 }, { "epoch": 67.88532500709623, "grad_norm": 0.032431501895189285, "learning_rate": 3.2142492194152715e-05, "loss": 0.0001826748251914978, "step": 239160 }, { "epoch": 67.88816349701959, "grad_norm": 0.0038551161997020245, "learning_rate": 3.213965370422935e-05, "loss": 0.0011141641065478326, "step": 239170 }, { "epoch": 67.89100198694295, "grad_norm": 0.055525630712509155, "learning_rate": 3.213681521430599e-05, "loss": 0.0006488323211669922, "step": 239180 }, { "epoch": 67.8938404768663, "grad_norm": 0.1834893822669983, "learning_rate": 3.213397672438263e-05, "loss": 0.0006844073534011841, "step": 239190 }, { "epoch": 67.89667896678966, "grad_norm": 0.025406358763575554, "learning_rate": 3.213113823445927e-05, "loss": 0.007703074812889099, "step": 239200 }, { "epoch": 67.89951745671303, "grad_norm": 0.056245721876621246, "learning_rate": 3.212829974453591e-05, "loss": 0.00015945956110954284, "step": 239210 }, { "epoch": 67.90235594663639, "grad_norm": 0.017768876627087593, "learning_rate": 3.212546125461255e-05, "loss": 0.00016051698476076125, "step": 239220 }, { "epoch": 67.90519443655975, "grad_norm": 0.010318760760128498, "learning_rate": 3.2122622764689185e-05, "loss": 0.00022235512733459473, "step": 239230 }, { "epoch": 67.90803292648312, "grad_norm": 0.12045524269342422, "learning_rate": 3.2119784274765826e-05, "loss": 0.0001407502219080925, "step": 239240 }, { "epoch": 67.91087141640647, "grad_norm": 0.020207002758979797, "learning_rate": 3.211694578484247e-05, "loss": 0.00024699252098798753, "step": 239250 }, { "epoch": 67.91370990632983, "grad_norm": 0.012565678916871548, "learning_rate": 3.21141072949191e-05, "loss": 0.00024842359125614165, "step": 239260 }, { "epoch": 67.91654839625319, "grad_norm": 0.015937281772494316, "learning_rate": 3.2111268804995743e-05, "loss": 0.0003131747245788574, "step": 239270 }, { "epoch": 67.91938688617655, "grad_norm": 0.015091205015778542, "learning_rate": 3.2108430315072385e-05, "loss": 0.00020168200135231018, "step": 239280 }, { "epoch": 67.92222537609992, "grad_norm": 1.3054227828979492, "learning_rate": 3.2105591825149026e-05, "loss": 0.00245161484926939, "step": 239290 }, { "epoch": 67.92506386602328, "grad_norm": 0.5125373601913452, "learning_rate": 3.210275333522566e-05, "loss": 0.00034980140626430514, "step": 239300 }, { "epoch": 67.92790235594664, "grad_norm": 0.2078283131122589, "learning_rate": 3.2099914845302295e-05, "loss": 0.0002550220116972923, "step": 239310 }, { "epoch": 67.93074084586999, "grad_norm": 0.011252552270889282, "learning_rate": 3.2097076355378943e-05, "loss": 0.0008575843647122383, "step": 239320 }, { "epoch": 67.93357933579335, "grad_norm": 0.05154822766780853, "learning_rate": 3.209423786545558e-05, "loss": 0.00016746986657381057, "step": 239330 }, { "epoch": 67.93641782571672, "grad_norm": 0.26395636796951294, "learning_rate": 3.209139937553222e-05, "loss": 0.0006748367100954056, "step": 239340 }, { "epoch": 67.93925631564008, "grad_norm": 0.018109094351530075, "learning_rate": 3.208856088560886e-05, "loss": 0.00014802105724811553, "step": 239350 }, { "epoch": 67.94209480556344, "grad_norm": 0.8936353325843811, "learning_rate": 3.2085722395685495e-05, "loss": 0.000566783919930458, "step": 239360 }, { "epoch": 67.9449332954868, "grad_norm": 5.948686599731445, "learning_rate": 3.208288390576214e-05, "loss": 0.0006142910569906235, "step": 239370 }, { "epoch": 67.94777178541017, "grad_norm": 2.0921225547790527, "learning_rate": 3.208004541583878e-05, "loss": 0.006413586437702179, "step": 239380 }, { "epoch": 67.95061027533352, "grad_norm": 0.41564705967903137, "learning_rate": 3.207720692591541e-05, "loss": 0.0048904623836278915, "step": 239390 }, { "epoch": 67.95344876525688, "grad_norm": 0.025140922516584396, "learning_rate": 3.2074368435992054e-05, "loss": 0.00023095104843378066, "step": 239400 }, { "epoch": 67.95628725518024, "grad_norm": 0.01304276566952467, "learning_rate": 3.2071529946068696e-05, "loss": 0.0035524383187294007, "step": 239410 }, { "epoch": 67.9591257451036, "grad_norm": 0.01920982077717781, "learning_rate": 3.206869145614533e-05, "loss": 0.0005229920148849488, "step": 239420 }, { "epoch": 67.96196423502697, "grad_norm": 0.024764643982052803, "learning_rate": 3.206585296622197e-05, "loss": 0.00040982067584991453, "step": 239430 }, { "epoch": 67.96480272495033, "grad_norm": 0.1697756052017212, "learning_rate": 3.2063014476298606e-05, "loss": 0.0008992278948426246, "step": 239440 }, { "epoch": 67.96764121487368, "grad_norm": 0.0502573698759079, "learning_rate": 3.2060175986375254e-05, "loss": 0.00046400073915719986, "step": 239450 }, { "epoch": 67.97047970479704, "grad_norm": 0.4333125352859497, "learning_rate": 3.205733749645189e-05, "loss": 0.00027176514267921446, "step": 239460 }, { "epoch": 67.9733181947204, "grad_norm": 0.27061450481414795, "learning_rate": 3.2054499006528523e-05, "loss": 0.00045074857771396635, "step": 239470 }, { "epoch": 67.97615668464377, "grad_norm": 0.020894957706332207, "learning_rate": 3.205166051660517e-05, "loss": 0.00021269712597131728, "step": 239480 }, { "epoch": 67.97899517456713, "grad_norm": 0.018411338329315186, "learning_rate": 3.2048822026681806e-05, "loss": 0.00020320788025856017, "step": 239490 }, { "epoch": 67.9818336644905, "grad_norm": 0.011295558884739876, "learning_rate": 3.204598353675845e-05, "loss": 0.00048033874481916427, "step": 239500 }, { "epoch": 67.9818336644905, "eval_accuracy": 0.9797164112672474, "eval_loss": 0.07567338645458221, "eval_runtime": 32.7689, "eval_samples_per_second": 479.937, "eval_steps_per_second": 7.507, "step": 239500 }, { "epoch": 67.98467215441386, "grad_norm": 0.03979369252920151, "learning_rate": 3.204314504683509e-05, "loss": 0.00019617974758148193, "step": 239510 }, { "epoch": 67.9875106443372, "grad_norm": 0.020957887172698975, "learning_rate": 3.2040306556911724e-05, "loss": 0.0007206477224826813, "step": 239520 }, { "epoch": 67.99034913426057, "grad_norm": 1.2131863832473755, "learning_rate": 3.2037468066988365e-05, "loss": 0.0008562853559851647, "step": 239530 }, { "epoch": 67.99318762418393, "grad_norm": 0.07259994745254517, "learning_rate": 3.2034629577065e-05, "loss": 0.0002510426566004753, "step": 239540 }, { "epoch": 67.9960261141073, "grad_norm": 0.10652611404657364, "learning_rate": 3.203179108714164e-05, "loss": 0.00014256089925765992, "step": 239550 }, { "epoch": 67.99886460403066, "grad_norm": 0.4638199806213379, "learning_rate": 3.202895259721828e-05, "loss": 0.00019073598086833955, "step": 239560 }, { "epoch": 68.00170309395402, "grad_norm": 0.01527088787406683, "learning_rate": 3.202611410729492e-05, "loss": 0.0014104208908975123, "step": 239570 }, { "epoch": 68.00454158387738, "grad_norm": 0.672959566116333, "learning_rate": 3.2023275617371565e-05, "loss": 0.0001778785139322281, "step": 239580 }, { "epoch": 68.00738007380073, "grad_norm": 0.14551571011543274, "learning_rate": 3.20204371274482e-05, "loss": 0.0003165684640407562, "step": 239590 }, { "epoch": 68.0102185637241, "grad_norm": 0.026756251230835915, "learning_rate": 3.2017598637524834e-05, "loss": 0.00011974945664405823, "step": 239600 }, { "epoch": 68.01305705364746, "grad_norm": 0.05024335905909538, "learning_rate": 3.201476014760148e-05, "loss": 0.0001674514263868332, "step": 239610 }, { "epoch": 68.01589554357082, "grad_norm": 0.03060646913945675, "learning_rate": 3.201192165767812e-05, "loss": 0.0003626551479101181, "step": 239620 }, { "epoch": 68.01873403349418, "grad_norm": 0.023660900071263313, "learning_rate": 3.200908316775476e-05, "loss": 0.0005819117650389671, "step": 239630 }, { "epoch": 68.02157252341755, "grad_norm": 0.10477801412343979, "learning_rate": 3.200624467783139e-05, "loss": 0.0010633833706378936, "step": 239640 }, { "epoch": 68.02441101334091, "grad_norm": 0.06702311336994171, "learning_rate": 3.2003406187908034e-05, "loss": 0.0002253001555800438, "step": 239650 }, { "epoch": 68.02724950326426, "grad_norm": 0.03948019817471504, "learning_rate": 3.2000567697984676e-05, "loss": 8.474420756101608e-05, "step": 239660 }, { "epoch": 68.03008799318762, "grad_norm": 0.0032265442423522472, "learning_rate": 3.199772920806131e-05, "loss": 0.00021944548934698104, "step": 239670 }, { "epoch": 68.03292648311098, "grad_norm": 0.11221322417259216, "learning_rate": 3.199489071813795e-05, "loss": 0.00021465197205543517, "step": 239680 }, { "epoch": 68.03576497303435, "grad_norm": 0.3614286184310913, "learning_rate": 3.199205222821459e-05, "loss": 0.0001493636518716812, "step": 239690 }, { "epoch": 68.03860346295771, "grad_norm": 0.016395343467593193, "learning_rate": 3.198921373829123e-05, "loss": 0.00014828331768512726, "step": 239700 }, { "epoch": 68.04144195288107, "grad_norm": 0.05956179276108742, "learning_rate": 3.1986375248367876e-05, "loss": 0.00017050504684448242, "step": 239710 }, { "epoch": 68.04428044280442, "grad_norm": 0.015231164172291756, "learning_rate": 3.198353675844451e-05, "loss": 9.020362049341201e-05, "step": 239720 }, { "epoch": 68.04711893272778, "grad_norm": 0.048278044909238815, "learning_rate": 3.1980698268521145e-05, "loss": 0.00017325934022665023, "step": 239730 }, { "epoch": 68.04995742265115, "grad_norm": 0.14237728714942932, "learning_rate": 3.1977859778597786e-05, "loss": 8.321087807416916e-05, "step": 239740 }, { "epoch": 68.05279591257451, "grad_norm": 0.02084796316921711, "learning_rate": 3.197502128867443e-05, "loss": 9.985268115997314e-05, "step": 239750 }, { "epoch": 68.05563440249787, "grad_norm": 0.0071514141745865345, "learning_rate": 3.197218279875107e-05, "loss": 9.639766067266464e-05, "step": 239760 }, { "epoch": 68.05847289242124, "grad_norm": 0.007235251367092133, "learning_rate": 3.1969344308827704e-05, "loss": 4.810839891433716e-05, "step": 239770 }, { "epoch": 68.0613113823446, "grad_norm": 0.36641374230384827, "learning_rate": 3.1966505818904345e-05, "loss": 0.00017156768590211868, "step": 239780 }, { "epoch": 68.06414987226795, "grad_norm": 0.3767191171646118, "learning_rate": 3.1963667328980986e-05, "loss": 0.00010190773755311966, "step": 239790 }, { "epoch": 68.06698836219131, "grad_norm": 0.01663331314921379, "learning_rate": 3.196082883905762e-05, "loss": 0.00015477370470762253, "step": 239800 }, { "epoch": 68.06982685211467, "grad_norm": 0.018213288858532906, "learning_rate": 3.195799034913426e-05, "loss": 6.615892052650452e-05, "step": 239810 }, { "epoch": 68.07266534203804, "grad_norm": 0.02156265266239643, "learning_rate": 3.1955151859210904e-05, "loss": 9.41479578614235e-05, "step": 239820 }, { "epoch": 68.0755038319614, "grad_norm": 0.009384107775986195, "learning_rate": 3.195231336928754e-05, "loss": 5.077756941318512e-05, "step": 239830 }, { "epoch": 68.07834232188476, "grad_norm": 0.009477132931351662, "learning_rate": 3.194947487936418e-05, "loss": 7.892083376646042e-05, "step": 239840 }, { "epoch": 68.08118081180812, "grad_norm": 0.019423970952630043, "learning_rate": 3.194663638944082e-05, "loss": 7.848497480154038e-05, "step": 239850 }, { "epoch": 68.08401930173147, "grad_norm": 0.029515594244003296, "learning_rate": 3.1943797899517456e-05, "loss": 8.597448468208312e-05, "step": 239860 }, { "epoch": 68.08685779165484, "grad_norm": 0.08134137094020844, "learning_rate": 3.19409594095941e-05, "loss": 6.95016235113144e-05, "step": 239870 }, { "epoch": 68.0896962815782, "grad_norm": 0.01796061173081398, "learning_rate": 3.193812091967074e-05, "loss": 9.283702820539474e-05, "step": 239880 }, { "epoch": 68.09253477150156, "grad_norm": 0.0036668998654931784, "learning_rate": 3.193528242974737e-05, "loss": 0.00021968837827444077, "step": 239890 }, { "epoch": 68.09537326142492, "grad_norm": 0.014884999953210354, "learning_rate": 3.1932443939824014e-05, "loss": 0.00020617879927158355, "step": 239900 }, { "epoch": 68.09821175134829, "grad_norm": 0.02411527745425701, "learning_rate": 3.1929605449900656e-05, "loss": 0.0003760108724236488, "step": 239910 }, { "epoch": 68.10105024127165, "grad_norm": 0.20173344016075134, "learning_rate": 3.19267669599773e-05, "loss": 0.00012201908975839615, "step": 239920 }, { "epoch": 68.103888731195, "grad_norm": 0.009099855087697506, "learning_rate": 3.192392847005393e-05, "loss": 0.00021886173635721207, "step": 239930 }, { "epoch": 68.10672722111836, "grad_norm": 0.025608666241168976, "learning_rate": 3.1921089980130566e-05, "loss": 0.00010616090148687362, "step": 239940 }, { "epoch": 68.10956571104172, "grad_norm": 0.08217760175466537, "learning_rate": 3.1918251490207215e-05, "loss": 0.0012155858799815177, "step": 239950 }, { "epoch": 68.11240420096509, "grad_norm": 0.11911110579967499, "learning_rate": 3.191541300028385e-05, "loss": 0.00010257884860038757, "step": 239960 }, { "epoch": 68.11524269088845, "grad_norm": 0.061949148774147034, "learning_rate": 3.191257451036049e-05, "loss": 0.0002506054937839508, "step": 239970 }, { "epoch": 68.11808118081181, "grad_norm": 0.009435508400201797, "learning_rate": 3.190973602043713e-05, "loss": 0.0008081628009676933, "step": 239980 }, { "epoch": 68.12091967073516, "grad_norm": 0.38836342096328735, "learning_rate": 3.1906897530513767e-05, "loss": 0.001847013644874096, "step": 239990 }, { "epoch": 68.12375816065853, "grad_norm": 0.40096041560173035, "learning_rate": 3.190405904059041e-05, "loss": 0.009953907877206802, "step": 240000 }, { "epoch": 68.12375816065853, "eval_accuracy": 0.9759649011254531, "eval_loss": 0.09003924578428268, "eval_runtime": 32.2303, "eval_samples_per_second": 487.957, "eval_steps_per_second": 7.633, "step": 240000 }, { "epoch": 68.12659665058189, "grad_norm": 0.03186274319887161, "learning_rate": 3.190122055066705e-05, "loss": 0.0025125518441200256, "step": 240010 }, { "epoch": 68.12943514050525, "grad_norm": 0.04875575378537178, "learning_rate": 3.1898382060743684e-05, "loss": 0.00014507118612527847, "step": 240020 }, { "epoch": 68.13227363042861, "grad_norm": 0.15903933346271515, "learning_rate": 3.1895543570820325e-05, "loss": 0.0008055312559008598, "step": 240030 }, { "epoch": 68.13511212035198, "grad_norm": 0.028924977406859398, "learning_rate": 3.189270508089696e-05, "loss": 0.000626072846353054, "step": 240040 }, { "epoch": 68.13795061027534, "grad_norm": 0.023683154955506325, "learning_rate": 3.188986659097361e-05, "loss": 0.00023929905146360397, "step": 240050 }, { "epoch": 68.14078910019869, "grad_norm": 0.09223537147045135, "learning_rate": 3.188702810105024e-05, "loss": 0.00028151758015155793, "step": 240060 }, { "epoch": 68.14362759012205, "grad_norm": 0.15968625247478485, "learning_rate": 3.188418961112688e-05, "loss": 0.00018699206411838533, "step": 240070 }, { "epoch": 68.14646608004541, "grad_norm": 0.03196307271718979, "learning_rate": 3.1881351121203525e-05, "loss": 0.0007780559360980987, "step": 240080 }, { "epoch": 68.14930456996878, "grad_norm": 0.3090411424636841, "learning_rate": 3.187851263128016e-05, "loss": 0.002107728458940983, "step": 240090 }, { "epoch": 68.15214305989214, "grad_norm": 0.04469125717878342, "learning_rate": 3.18756741413568e-05, "loss": 0.018842391669750214, "step": 240100 }, { "epoch": 68.1549815498155, "grad_norm": 0.04250493273139, "learning_rate": 3.187283565143344e-05, "loss": 0.0026277715340256693, "step": 240110 }, { "epoch": 68.15782003973887, "grad_norm": 0.22871221601963043, "learning_rate": 3.186999716151008e-05, "loss": 0.0003323571756482124, "step": 240120 }, { "epoch": 68.16065852966221, "grad_norm": 0.00623375503346324, "learning_rate": 3.186715867158672e-05, "loss": 0.00044661667197942734, "step": 240130 }, { "epoch": 68.16349701958558, "grad_norm": 0.06654512137174606, "learning_rate": 3.186432018166335e-05, "loss": 0.00046465154737234115, "step": 240140 }, { "epoch": 68.16633550950894, "grad_norm": 0.16486942768096924, "learning_rate": 3.1861481691739995e-05, "loss": 0.0005045639351010323, "step": 240150 }, { "epoch": 68.1691739994323, "grad_norm": 0.0629473403096199, "learning_rate": 3.1858643201816636e-05, "loss": 0.006023868173360825, "step": 240160 }, { "epoch": 68.17201248935567, "grad_norm": 0.04611983522772789, "learning_rate": 3.185580471189327e-05, "loss": 0.0003699101507663727, "step": 240170 }, { "epoch": 68.17485097927903, "grad_norm": 0.1408795416355133, "learning_rate": 3.185296622196992e-05, "loss": 0.0002438744530081749, "step": 240180 }, { "epoch": 68.17768946920238, "grad_norm": 0.02219211496412754, "learning_rate": 3.185012773204655e-05, "loss": 0.0004327865317463875, "step": 240190 }, { "epoch": 68.18052795912574, "grad_norm": 0.06677475571632385, "learning_rate": 3.184728924212319e-05, "loss": 0.0001890163868665695, "step": 240200 }, { "epoch": 68.1833664490491, "grad_norm": 0.035636577755212784, "learning_rate": 3.1844450752199836e-05, "loss": 0.00013316087424755096, "step": 240210 }, { "epoch": 68.18620493897247, "grad_norm": 0.03240849822759628, "learning_rate": 3.184161226227647e-05, "loss": 0.00017379093915224076, "step": 240220 }, { "epoch": 68.18904342889583, "grad_norm": 0.004642915446311235, "learning_rate": 3.183877377235311e-05, "loss": 0.0001903582364320755, "step": 240230 }, { "epoch": 68.19188191881919, "grad_norm": 0.05091605335474014, "learning_rate": 3.183593528242975e-05, "loss": 0.00011349134147167205, "step": 240240 }, { "epoch": 68.19472040874255, "grad_norm": 0.07790250331163406, "learning_rate": 3.183309679250639e-05, "loss": 0.00011327918618917466, "step": 240250 }, { "epoch": 68.1975588986659, "grad_norm": 0.015658201649785042, "learning_rate": 3.183025830258303e-05, "loss": 9.598899632692336e-05, "step": 240260 }, { "epoch": 68.20039738858927, "grad_norm": 0.020649883896112442, "learning_rate": 3.1827419812659664e-05, "loss": 0.0001398073509335518, "step": 240270 }, { "epoch": 68.20323587851263, "grad_norm": 0.019328974187374115, "learning_rate": 3.1824581322736305e-05, "loss": 0.00028530675917863845, "step": 240280 }, { "epoch": 68.20607436843599, "grad_norm": 0.045291125774383545, "learning_rate": 3.182174283281295e-05, "loss": 8.118730038404464e-05, "step": 240290 }, { "epoch": 68.20891285835935, "grad_norm": 0.03199334442615509, "learning_rate": 3.181890434288958e-05, "loss": 0.00014265794306993483, "step": 240300 }, { "epoch": 68.21175134828272, "grad_norm": 0.14129292964935303, "learning_rate": 3.181606585296622e-05, "loss": 0.00032215677201747895, "step": 240310 }, { "epoch": 68.21458983820608, "grad_norm": 0.007534525822848082, "learning_rate": 3.1813227363042864e-05, "loss": 9.383615106344223e-05, "step": 240320 }, { "epoch": 68.21742832812943, "grad_norm": 0.03280192241072655, "learning_rate": 3.18103888731195e-05, "loss": 0.00014167334884405137, "step": 240330 }, { "epoch": 68.22026681805279, "grad_norm": 0.016721315681934357, "learning_rate": 3.180755038319614e-05, "loss": 9.130574762821198e-05, "step": 240340 }, { "epoch": 68.22310530797616, "grad_norm": 0.0037161665968596935, "learning_rate": 3.180471189327278e-05, "loss": 0.00033390335738658905, "step": 240350 }, { "epoch": 68.22594379789952, "grad_norm": 0.011655728332698345, "learning_rate": 3.1801873403349416e-05, "loss": 0.00029209908097982407, "step": 240360 }, { "epoch": 68.22878228782288, "grad_norm": 0.2952776253223419, "learning_rate": 3.179903491342606e-05, "loss": 0.00014993790537118912, "step": 240370 }, { "epoch": 68.23162077774624, "grad_norm": 0.037166547030210495, "learning_rate": 3.17961964235027e-05, "loss": 0.0001272350549697876, "step": 240380 }, { "epoch": 68.2344592676696, "grad_norm": 0.21284136176109314, "learning_rate": 3.179335793357934e-05, "loss": 0.00010514315217733384, "step": 240390 }, { "epoch": 68.23729775759296, "grad_norm": 0.03443075343966484, "learning_rate": 3.1790519443655975e-05, "loss": 0.00011640992015600204, "step": 240400 }, { "epoch": 68.24013624751632, "grad_norm": 0.004973619244992733, "learning_rate": 3.1787680953732616e-05, "loss": 0.00010218154639005662, "step": 240410 }, { "epoch": 68.24297473743968, "grad_norm": 2.0718460083007812, "learning_rate": 3.178484246380926e-05, "loss": 0.00034552887082099913, "step": 240420 }, { "epoch": 68.24581322736304, "grad_norm": 0.029423242434859276, "learning_rate": 3.178200397388589e-05, "loss": 0.00013344772160053253, "step": 240430 }, { "epoch": 68.2486517172864, "grad_norm": 0.018500659614801407, "learning_rate": 3.1779165483962534e-05, "loss": 0.00037356726825237273, "step": 240440 }, { "epoch": 68.25149020720977, "grad_norm": 0.03547638654708862, "learning_rate": 3.1776326994039175e-05, "loss": 0.00010527763515710831, "step": 240450 }, { "epoch": 68.25432869713312, "grad_norm": 0.06997086852788925, "learning_rate": 3.177348850411581e-05, "loss": 0.00012017395347356797, "step": 240460 }, { "epoch": 68.25716718705648, "grad_norm": 0.05963843688368797, "learning_rate": 3.177065001419245e-05, "loss": 0.00015008337795734404, "step": 240470 }, { "epoch": 68.26000567697984, "grad_norm": 0.007205261383205652, "learning_rate": 3.176781152426909e-05, "loss": 0.00014366414397954942, "step": 240480 }, { "epoch": 68.26284416690321, "grad_norm": 0.08358176052570343, "learning_rate": 3.176497303434573e-05, "loss": 0.00010570883750915527, "step": 240490 }, { "epoch": 68.26568265682657, "grad_norm": 0.16327393054962158, "learning_rate": 3.176213454442237e-05, "loss": 0.00011135190725326538, "step": 240500 }, { "epoch": 68.26568265682657, "eval_accuracy": 0.9813696191263432, "eval_loss": 0.07210800796747208, "eval_runtime": 33.0941, "eval_samples_per_second": 475.22, "eval_steps_per_second": 7.433, "step": 240500 }, { "epoch": 68.26852114674993, "grad_norm": 0.2555639147758484, "learning_rate": 3.175929605449901e-05, "loss": 0.00024756919592618944, "step": 240510 }, { "epoch": 68.2713596366733, "grad_norm": 0.020155230537056923, "learning_rate": 3.175645756457565e-05, "loss": 0.006480222940444947, "step": 240520 }, { "epoch": 68.27419812659664, "grad_norm": 0.20619036257266998, "learning_rate": 3.1753619074652286e-05, "loss": 0.0003016818314790726, "step": 240530 }, { "epoch": 68.27703661652001, "grad_norm": 0.05673827975988388, "learning_rate": 3.175078058472892e-05, "loss": 7.147938013076782e-05, "step": 240540 }, { "epoch": 68.27987510644337, "grad_norm": 0.08694854378700256, "learning_rate": 3.174794209480557e-05, "loss": 0.00044311489909887314, "step": 240550 }, { "epoch": 68.28271359636673, "grad_norm": 0.08901243656873703, "learning_rate": 3.17451036048822e-05, "loss": 0.0002748033031821251, "step": 240560 }, { "epoch": 68.2855520862901, "grad_norm": 0.04600939527153969, "learning_rate": 3.1742265114958844e-05, "loss": 0.00024493541568517687, "step": 240570 }, { "epoch": 68.28839057621346, "grad_norm": 0.030282894149422646, "learning_rate": 3.1739426625035486e-05, "loss": 0.00032851714640855787, "step": 240580 }, { "epoch": 68.29122906613682, "grad_norm": 0.02183794043958187, "learning_rate": 3.173658813511212e-05, "loss": 0.00020386949181556703, "step": 240590 }, { "epoch": 68.29406755606017, "grad_norm": 0.016404390335083008, "learning_rate": 3.173374964518876e-05, "loss": 0.0004429707303643227, "step": 240600 }, { "epoch": 68.29690604598353, "grad_norm": 0.06766848266124725, "learning_rate": 3.17309111552654e-05, "loss": 0.003656989336013794, "step": 240610 }, { "epoch": 68.2997445359069, "grad_norm": 0.12801086902618408, "learning_rate": 3.172807266534204e-05, "loss": 0.0003150664269924164, "step": 240620 }, { "epoch": 68.30258302583026, "grad_norm": 0.014854386448860168, "learning_rate": 3.172523417541868e-05, "loss": 0.0001234179362654686, "step": 240630 }, { "epoch": 68.30542151575362, "grad_norm": 0.023155972361564636, "learning_rate": 3.1722395685495314e-05, "loss": 0.00023823492228984832, "step": 240640 }, { "epoch": 68.30826000567698, "grad_norm": 0.03179442882537842, "learning_rate": 3.171955719557196e-05, "loss": 0.004988441988825798, "step": 240650 }, { "epoch": 68.31109849560033, "grad_norm": 0.074126698076725, "learning_rate": 3.1716718705648596e-05, "loss": 0.002777617797255516, "step": 240660 }, { "epoch": 68.3139369855237, "grad_norm": 0.02923712134361267, "learning_rate": 3.171388021572523e-05, "loss": 0.00030777230858802795, "step": 240670 }, { "epoch": 68.31677547544706, "grad_norm": 0.0108087919652462, "learning_rate": 3.171104172580188e-05, "loss": 0.0021909184753894804, "step": 240680 }, { "epoch": 68.31961396537042, "grad_norm": 0.028870506212115288, "learning_rate": 3.1708203235878514e-05, "loss": 0.0001361273229122162, "step": 240690 }, { "epoch": 68.32245245529379, "grad_norm": 0.019778065383434296, "learning_rate": 3.1705364745955155e-05, "loss": 0.0007711024954915047, "step": 240700 }, { "epoch": 68.32529094521715, "grad_norm": 0.4539341628551483, "learning_rate": 3.1702526256031796e-05, "loss": 0.0001540055498480797, "step": 240710 }, { "epoch": 68.32812943514051, "grad_norm": 0.007098381407558918, "learning_rate": 3.169968776610843e-05, "loss": 0.00012554153800010682, "step": 240720 }, { "epoch": 68.33096792506386, "grad_norm": 0.05288795009255409, "learning_rate": 3.169684927618507e-05, "loss": 0.00014040619134902954, "step": 240730 }, { "epoch": 68.33380641498722, "grad_norm": 0.11315499246120453, "learning_rate": 3.1694010786261714e-05, "loss": 0.00018118899315595628, "step": 240740 }, { "epoch": 68.33664490491059, "grad_norm": 0.019296443089842796, "learning_rate": 3.169117229633835e-05, "loss": 0.00017240773886442184, "step": 240750 }, { "epoch": 68.33948339483395, "grad_norm": 0.04340608790516853, "learning_rate": 3.168833380641499e-05, "loss": 0.00013088025152683259, "step": 240760 }, { "epoch": 68.34232188475731, "grad_norm": 0.19458745419979095, "learning_rate": 3.1685495316491624e-05, "loss": 0.0001453593373298645, "step": 240770 }, { "epoch": 68.34516037468067, "grad_norm": 0.10309793055057526, "learning_rate": 3.1682656826568266e-05, "loss": 0.00020161587744951248, "step": 240780 }, { "epoch": 68.34799886460404, "grad_norm": 0.018246382474899292, "learning_rate": 3.167981833664491e-05, "loss": 7.169153541326523e-05, "step": 240790 }, { "epoch": 68.35083735452739, "grad_norm": 0.03828425705432892, "learning_rate": 3.167697984672154e-05, "loss": 0.0001765858381986618, "step": 240800 }, { "epoch": 68.35367584445075, "grad_norm": 0.015065059997141361, "learning_rate": 3.167414135679819e-05, "loss": 6.811358034610748e-05, "step": 240810 }, { "epoch": 68.35651433437411, "grad_norm": 0.10984756052494049, "learning_rate": 3.1671302866874824e-05, "loss": 9.066332131624222e-05, "step": 240820 }, { "epoch": 68.35935282429747, "grad_norm": 0.005068101920187473, "learning_rate": 3.166846437695146e-05, "loss": 0.00018775071948766707, "step": 240830 }, { "epoch": 68.36219131422084, "grad_norm": 0.019757352769374847, "learning_rate": 3.166562588702811e-05, "loss": 0.00018245410174131394, "step": 240840 }, { "epoch": 68.3650298041442, "grad_norm": 0.017005158588290215, "learning_rate": 3.166278739710474e-05, "loss": 9.020064026117325e-05, "step": 240850 }, { "epoch": 68.36786829406756, "grad_norm": 0.027766020968556404, "learning_rate": 3.165994890718138e-05, "loss": 0.0007189234718680381, "step": 240860 }, { "epoch": 68.37070678399091, "grad_norm": 0.04713226854801178, "learning_rate": 3.165711041725802e-05, "loss": 0.00016366541385650634, "step": 240870 }, { "epoch": 68.37354527391427, "grad_norm": 0.5719568729400635, "learning_rate": 3.165427192733466e-05, "loss": 0.0006589418277144432, "step": 240880 }, { "epoch": 68.37638376383764, "grad_norm": 0.011872733011841774, "learning_rate": 3.16514334374113e-05, "loss": 0.00018921252340078353, "step": 240890 }, { "epoch": 68.379222253761, "grad_norm": 0.03641471266746521, "learning_rate": 3.1648594947487935e-05, "loss": 0.0002554502338171005, "step": 240900 }, { "epoch": 68.38206074368436, "grad_norm": 0.013758386485278606, "learning_rate": 3.1645756457564576e-05, "loss": 0.0002008989453315735, "step": 240910 }, { "epoch": 68.38489923360773, "grad_norm": 0.007431195117533207, "learning_rate": 3.164291796764122e-05, "loss": 0.00019383709877729416, "step": 240920 }, { "epoch": 68.38773772353107, "grad_norm": 0.025080854073166847, "learning_rate": 3.164007947771785e-05, "loss": 8.591152727603913e-05, "step": 240930 }, { "epoch": 68.39057621345444, "grad_norm": 0.057088349014520645, "learning_rate": 3.16372409877945e-05, "loss": 0.00016024746000766754, "step": 240940 }, { "epoch": 68.3934147033778, "grad_norm": 0.008913598954677582, "learning_rate": 3.1634402497871135e-05, "loss": 0.0001309666782617569, "step": 240950 }, { "epoch": 68.39625319330116, "grad_norm": 0.38123971223831177, "learning_rate": 3.163156400794777e-05, "loss": 0.00014156270772218703, "step": 240960 }, { "epoch": 68.39909168322453, "grad_norm": 0.012061921879649162, "learning_rate": 3.162872551802441e-05, "loss": 8.814632892608642e-05, "step": 240970 }, { "epoch": 68.40193017314789, "grad_norm": 0.016239678487181664, "learning_rate": 3.162588702810105e-05, "loss": 0.0001998158171772957, "step": 240980 }, { "epoch": 68.40476866307125, "grad_norm": 2.3886466026306152, "learning_rate": 3.1623048538177694e-05, "loss": 0.000376242958009243, "step": 240990 }, { "epoch": 68.4076071529946, "grad_norm": 0.03412915766239166, "learning_rate": 3.162021004825433e-05, "loss": 0.0010751022025942802, "step": 241000 }, { "epoch": 68.4076071529946, "eval_accuracy": 0.9796528263495898, "eval_loss": 0.07736816257238388, "eval_runtime": 33.2466, "eval_samples_per_second": 473.041, "eval_steps_per_second": 7.399, "step": 241000 }, { "epoch": 68.41044564291796, "grad_norm": 0.0825410708785057, "learning_rate": 3.161737155833097e-05, "loss": 0.00025074072182178496, "step": 241010 }, { "epoch": 68.41328413284133, "grad_norm": 0.30977728962898254, "learning_rate": 3.161453306840761e-05, "loss": 0.0009983306750655173, "step": 241020 }, { "epoch": 68.41612262276469, "grad_norm": 0.13181768357753754, "learning_rate": 3.1611694578484246e-05, "loss": 0.0002491135150194168, "step": 241030 }, { "epoch": 68.41896111268805, "grad_norm": 0.011325988918542862, "learning_rate": 3.160885608856089e-05, "loss": 0.00018528103828430176, "step": 241040 }, { "epoch": 68.42179960261142, "grad_norm": 0.1627286672592163, "learning_rate": 3.160601759863753e-05, "loss": 0.0002881409600377083, "step": 241050 }, { "epoch": 68.42463809253478, "grad_norm": 0.05323462933301926, "learning_rate": 3.160317910871416e-05, "loss": 8.702389895915985e-05, "step": 241060 }, { "epoch": 68.42747658245813, "grad_norm": 0.06607991456985474, "learning_rate": 3.1600340618790805e-05, "loss": 0.00015744995325803757, "step": 241070 }, { "epoch": 68.43031507238149, "grad_norm": 0.1369789093732834, "learning_rate": 3.1597502128867446e-05, "loss": 0.00010715480893850327, "step": 241080 }, { "epoch": 68.43315356230485, "grad_norm": 0.012049149721860886, "learning_rate": 3.159466363894408e-05, "loss": 7.893908768892288e-05, "step": 241090 }, { "epoch": 68.43599205222822, "grad_norm": 0.1613868772983551, "learning_rate": 3.159182514902072e-05, "loss": 0.0005562428385019302, "step": 241100 }, { "epoch": 68.43883054215158, "grad_norm": 0.030495062470436096, "learning_rate": 3.158898665909736e-05, "loss": 0.0003192981705069542, "step": 241110 }, { "epoch": 68.44166903207494, "grad_norm": 0.00868602842092514, "learning_rate": 3.1586432018166336e-05, "loss": 0.01341680735349655, "step": 241120 }, { "epoch": 68.4445075219983, "grad_norm": 0.023221289739012718, "learning_rate": 3.158359352824298e-05, "loss": 0.0013208821415901183, "step": 241130 }, { "epoch": 68.44734601192165, "grad_norm": 0.03161539509892464, "learning_rate": 3.158075503831961e-05, "loss": 0.00045418161898851394, "step": 241140 }, { "epoch": 68.45018450184502, "grad_norm": 0.011472048237919807, "learning_rate": 3.1577916548396254e-05, "loss": 0.0008943324908614158, "step": 241150 }, { "epoch": 68.45302299176838, "grad_norm": 3.1632308959960938, "learning_rate": 3.1575078058472895e-05, "loss": 0.0006538273766636849, "step": 241160 }, { "epoch": 68.45586148169174, "grad_norm": 4.361955642700195, "learning_rate": 3.157223956854953e-05, "loss": 0.00293334499001503, "step": 241170 }, { "epoch": 68.4586999716151, "grad_norm": 0.5901647806167603, "learning_rate": 3.156940107862617e-05, "loss": 0.00047734398394823075, "step": 241180 }, { "epoch": 68.46153846153847, "grad_norm": 0.008668467402458191, "learning_rate": 3.156656258870281e-05, "loss": 0.002595830149948597, "step": 241190 }, { "epoch": 68.46437695146182, "grad_norm": 0.017537206411361694, "learning_rate": 3.1563724098779454e-05, "loss": 8.814036846160889e-05, "step": 241200 }, { "epoch": 68.46721544138518, "grad_norm": 0.04018620774149895, "learning_rate": 3.156088560885609e-05, "loss": 0.0002083677798509598, "step": 241210 }, { "epoch": 68.47005393130854, "grad_norm": 0.16401775181293488, "learning_rate": 3.155804711893273e-05, "loss": 0.0001417912542819977, "step": 241220 }, { "epoch": 68.4728924212319, "grad_norm": 0.002853651763871312, "learning_rate": 3.155520862900937e-05, "loss": 0.00016058720648288728, "step": 241230 }, { "epoch": 68.47573091115527, "grad_norm": 0.025632109493017197, "learning_rate": 3.1552370139086006e-05, "loss": 0.005535374209284782, "step": 241240 }, { "epoch": 68.47856940107863, "grad_norm": 0.0072481646202504635, "learning_rate": 3.154953164916265e-05, "loss": 0.00026509743183851243, "step": 241250 }, { "epoch": 68.481407891002, "grad_norm": 0.020258864387869835, "learning_rate": 3.154669315923929e-05, "loss": 8.84251669049263e-05, "step": 241260 }, { "epoch": 68.48424638092534, "grad_norm": 16.19149398803711, "learning_rate": 3.154385466931592e-05, "loss": 0.002504400722682476, "step": 241270 }, { "epoch": 68.4870848708487, "grad_norm": 0.016419917345046997, "learning_rate": 3.1541016179392565e-05, "loss": 7.138215005397797e-05, "step": 241280 }, { "epoch": 68.48992336077207, "grad_norm": 0.42231255769729614, "learning_rate": 3.1538177689469206e-05, "loss": 0.0002232946455478668, "step": 241290 }, { "epoch": 68.49276185069543, "grad_norm": 0.006525796838104725, "learning_rate": 3.153533919954584e-05, "loss": 7.52357766032219e-05, "step": 241300 }, { "epoch": 68.4956003406188, "grad_norm": 0.005107279401272535, "learning_rate": 3.153250070962248e-05, "loss": 0.0049756869673728945, "step": 241310 }, { "epoch": 68.49843883054216, "grad_norm": 0.03064819984138012, "learning_rate": 3.152966221969912e-05, "loss": 0.00026240907609462736, "step": 241320 }, { "epoch": 68.50127732046552, "grad_norm": 0.022127514705061913, "learning_rate": 3.1526823729775765e-05, "loss": 0.0006256481632590294, "step": 241330 }, { "epoch": 68.50411581038887, "grad_norm": 10.596150398254395, "learning_rate": 3.15239852398524e-05, "loss": 0.0017246780917048454, "step": 241340 }, { "epoch": 68.50695430031223, "grad_norm": 0.011785885319113731, "learning_rate": 3.1521146749929034e-05, "loss": 0.0106500044465065, "step": 241350 }, { "epoch": 68.5097927902356, "grad_norm": 0.05056037753820419, "learning_rate": 3.151830826000568e-05, "loss": 0.0004159990698099136, "step": 241360 }, { "epoch": 68.51263128015896, "grad_norm": 0.017659587785601616, "learning_rate": 3.151546977008232e-05, "loss": 0.0006187362596392632, "step": 241370 }, { "epoch": 68.51546977008232, "grad_norm": 0.04636266082525253, "learning_rate": 3.151263128015896e-05, "loss": 0.012124112248420716, "step": 241380 }, { "epoch": 68.51830826000568, "grad_norm": 0.020287949591875076, "learning_rate": 3.15097927902356e-05, "loss": 0.0002757444977760315, "step": 241390 }, { "epoch": 68.52114674992903, "grad_norm": 0.009766354225575924, "learning_rate": 3.1506954300312234e-05, "loss": 0.0006340218707919121, "step": 241400 }, { "epoch": 68.5239852398524, "grad_norm": 0.0530916228890419, "learning_rate": 3.1504115810388875e-05, "loss": 0.0010753296315670013, "step": 241410 }, { "epoch": 68.52682372977576, "grad_norm": 0.022510992363095284, "learning_rate": 3.150127732046552e-05, "loss": 0.0005237562581896782, "step": 241420 }, { "epoch": 68.52966221969912, "grad_norm": 0.017935626208782196, "learning_rate": 3.149843883054215e-05, "loss": 0.0001840416342020035, "step": 241430 }, { "epoch": 68.53250070962248, "grad_norm": 0.11937280744314194, "learning_rate": 3.149560034061879e-05, "loss": 0.00029893908649683, "step": 241440 }, { "epoch": 68.53533919954585, "grad_norm": 0.04870352894067764, "learning_rate": 3.149276185069543e-05, "loss": 0.0001819547265768051, "step": 241450 }, { "epoch": 68.53817768946921, "grad_norm": 0.026257047429680824, "learning_rate": 3.1489923360772075e-05, "loss": 0.0003120535984635353, "step": 241460 }, { "epoch": 68.54101617939256, "grad_norm": 0.019711166620254517, "learning_rate": 3.148708487084871e-05, "loss": 0.0002435309812426567, "step": 241470 }, { "epoch": 68.54385466931592, "grad_norm": 0.1013936772942543, "learning_rate": 3.1484246380925345e-05, "loss": 0.0008049558848142624, "step": 241480 }, { "epoch": 68.54669315923928, "grad_norm": 0.11699660867452621, "learning_rate": 3.148140789100199e-05, "loss": 0.0013045551255345344, "step": 241490 }, { "epoch": 68.54953164916265, "grad_norm": 0.5633865594863892, "learning_rate": 3.147856940107863e-05, "loss": 0.0005780719220638275, "step": 241500 }, { "epoch": 68.54953164916265, "eval_accuracy": 0.9786990525847269, "eval_loss": 0.08171876519918442, "eval_runtime": 33.3069, "eval_samples_per_second": 472.184, "eval_steps_per_second": 7.386, "step": 241500 }, { "epoch": 68.55237013908601, "grad_norm": 0.07555211335420609, "learning_rate": 3.147573091115527e-05, "loss": 0.0015536515042185784, "step": 241510 }, { "epoch": 68.55520862900937, "grad_norm": 0.06259137392044067, "learning_rate": 3.147289242123191e-05, "loss": 0.003832193836569786, "step": 241520 }, { "epoch": 68.55804711893273, "grad_norm": 0.007841531187295914, "learning_rate": 3.1470053931308545e-05, "loss": 0.0008786646649241447, "step": 241530 }, { "epoch": 68.56088560885608, "grad_norm": 0.04828483611345291, "learning_rate": 3.1467215441385186e-05, "loss": 0.00023389942944049836, "step": 241540 }, { "epoch": 68.56372409877945, "grad_norm": 0.02786937728524208, "learning_rate": 3.146437695146182e-05, "loss": 0.0003346838057041168, "step": 241550 }, { "epoch": 68.56656258870281, "grad_norm": 1.5541677474975586, "learning_rate": 3.146153846153846e-05, "loss": 0.0006526540964841843, "step": 241560 }, { "epoch": 68.56940107862617, "grad_norm": 0.018594902008771896, "learning_rate": 3.1458699971615103e-05, "loss": 0.000270833820104599, "step": 241570 }, { "epoch": 68.57223956854953, "grad_norm": 0.020332293584942818, "learning_rate": 3.145586148169174e-05, "loss": 0.00022978167980909349, "step": 241580 }, { "epoch": 68.5750780584729, "grad_norm": 0.008709891699254513, "learning_rate": 3.145302299176838e-05, "loss": 0.000669936090707779, "step": 241590 }, { "epoch": 68.57791654839626, "grad_norm": 0.20084045827388763, "learning_rate": 3.145018450184502e-05, "loss": 0.01737380027770996, "step": 241600 }, { "epoch": 68.58075503831961, "grad_norm": 0.003968459088355303, "learning_rate": 3.1447346011921655e-05, "loss": 0.00020779892802238464, "step": 241610 }, { "epoch": 68.58359352824297, "grad_norm": 0.010130992159247398, "learning_rate": 3.1444507521998304e-05, "loss": 0.0008029919117689133, "step": 241620 }, { "epoch": 68.58643201816633, "grad_norm": 0.008957494050264359, "learning_rate": 3.144166903207494e-05, "loss": 0.00015440434217453002, "step": 241630 }, { "epoch": 68.5892705080897, "grad_norm": 0.032504402101039886, "learning_rate": 3.143883054215157e-05, "loss": 6.893835961818695e-05, "step": 241640 }, { "epoch": 68.59210899801306, "grad_norm": 0.04427509382367134, "learning_rate": 3.1435992052228214e-05, "loss": 0.00017499253153800963, "step": 241650 }, { "epoch": 68.59494748793642, "grad_norm": 0.16161316633224487, "learning_rate": 3.1433153562304856e-05, "loss": 0.00047724880278110504, "step": 241660 }, { "epoch": 68.59778597785977, "grad_norm": 0.03222566843032837, "learning_rate": 3.14303150723815e-05, "loss": 0.0006421253085136414, "step": 241670 }, { "epoch": 68.60062446778313, "grad_norm": 0.007077581249177456, "learning_rate": 3.142747658245813e-05, "loss": 0.001435224898159504, "step": 241680 }, { "epoch": 68.6034629577065, "grad_norm": 0.010933981277048588, "learning_rate": 3.142463809253477e-05, "loss": 0.00019157901406288146, "step": 241690 }, { "epoch": 68.60630144762986, "grad_norm": 0.44706401228904724, "learning_rate": 3.1421799602611414e-05, "loss": 0.00028158985078334807, "step": 241700 }, { "epoch": 68.60913993755322, "grad_norm": 0.03889862820506096, "learning_rate": 3.141896111268805e-05, "loss": 0.001148807629942894, "step": 241710 }, { "epoch": 68.61197842747659, "grad_norm": 0.041643064469099045, "learning_rate": 3.141612262276469e-05, "loss": 0.0002703838050365448, "step": 241720 }, { "epoch": 68.61481691739995, "grad_norm": 0.018283188343048096, "learning_rate": 3.141328413284133e-05, "loss": 0.0004537126049399376, "step": 241730 }, { "epoch": 68.6176554073233, "grad_norm": 12.459197998046875, "learning_rate": 3.1410445642917966e-05, "loss": 0.004809238761663437, "step": 241740 }, { "epoch": 68.62049389724666, "grad_norm": 0.35728105902671814, "learning_rate": 3.140760715299461e-05, "loss": 0.00019841250032186507, "step": 241750 }, { "epoch": 68.62333238717002, "grad_norm": 0.018190165981650352, "learning_rate": 3.140476866307125e-05, "loss": 0.00012420006096363067, "step": 241760 }, { "epoch": 68.62617087709339, "grad_norm": 0.044130682945251465, "learning_rate": 3.1401930173147884e-05, "loss": 0.00015591122210025787, "step": 241770 }, { "epoch": 68.62900936701675, "grad_norm": 0.03572811931371689, "learning_rate": 3.1399091683224525e-05, "loss": 0.00026751793920993806, "step": 241780 }, { "epoch": 68.63184785694011, "grad_norm": 0.016564469784498215, "learning_rate": 3.1396253193301166e-05, "loss": 0.0001751726493239403, "step": 241790 }, { "epoch": 68.63468634686348, "grad_norm": 0.005307013634592295, "learning_rate": 3.139341470337781e-05, "loss": 0.00013709627091884613, "step": 241800 }, { "epoch": 68.63752483678682, "grad_norm": 0.03194558247923851, "learning_rate": 3.139057621345444e-05, "loss": 0.00025252960622310636, "step": 241810 }, { "epoch": 68.64036332671019, "grad_norm": 0.05026521161198616, "learning_rate": 3.1387737723531084e-05, "loss": 0.0004874167963862419, "step": 241820 }, { "epoch": 68.64320181663355, "grad_norm": 0.07109824568033218, "learning_rate": 3.1384899233607725e-05, "loss": 0.0002678379416465759, "step": 241830 }, { "epoch": 68.64604030655691, "grad_norm": 0.18799768388271332, "learning_rate": 3.138206074368436e-05, "loss": 0.0005527874454855919, "step": 241840 }, { "epoch": 68.64887879648028, "grad_norm": 0.013537528924643993, "learning_rate": 3.1379222253761e-05, "loss": 9.795837104320526e-05, "step": 241850 }, { "epoch": 68.65171728640364, "grad_norm": 0.025443900376558304, "learning_rate": 3.137638376383764e-05, "loss": 0.00013652704656124114, "step": 241860 }, { "epoch": 68.65455577632699, "grad_norm": 0.03835247829556465, "learning_rate": 3.137354527391428e-05, "loss": 0.0003083115443587303, "step": 241870 }, { "epoch": 68.65739426625035, "grad_norm": 0.007184120826423168, "learning_rate": 3.137070678399092e-05, "loss": 0.0001445794478058815, "step": 241880 }, { "epoch": 68.66023275617371, "grad_norm": 1.0054808855056763, "learning_rate": 3.136786829406756e-05, "loss": 0.00035395976155996325, "step": 241890 }, { "epoch": 68.66307124609708, "grad_norm": 0.18899841606616974, "learning_rate": 3.1365029804144194e-05, "loss": 0.0001590600237250328, "step": 241900 }, { "epoch": 68.66590973602044, "grad_norm": 0.011053543537855148, "learning_rate": 3.1362191314220836e-05, "loss": 0.002397079765796661, "step": 241910 }, { "epoch": 68.6687482259438, "grad_norm": 0.4623057544231415, "learning_rate": 3.135963667328981e-05, "loss": 0.004659312963485718, "step": 241920 }, { "epoch": 68.67158671586716, "grad_norm": 0.031379587948322296, "learning_rate": 3.135679818336645e-05, "loss": 0.0005584681406617164, "step": 241930 }, { "epoch": 68.67442520579051, "grad_norm": 0.3290727138519287, "learning_rate": 3.135395969344309e-05, "loss": 0.0001809835433959961, "step": 241940 }, { "epoch": 68.67726369571388, "grad_norm": 0.8122276663780212, "learning_rate": 3.1351121203519726e-05, "loss": 0.00046157632023096084, "step": 241950 }, { "epoch": 68.68010218563724, "grad_norm": 0.017355209216475487, "learning_rate": 3.134828271359637e-05, "loss": 0.0004881162196397781, "step": 241960 }, { "epoch": 68.6829406755606, "grad_norm": 1.5341253280639648, "learning_rate": 3.134544422367301e-05, "loss": 0.0007528502494096756, "step": 241970 }, { "epoch": 68.68577916548396, "grad_norm": 0.13411690294742584, "learning_rate": 3.134260573374965e-05, "loss": 0.0002561355009675026, "step": 241980 }, { "epoch": 68.68861765540733, "grad_norm": 0.15526454150676727, "learning_rate": 3.1339767243826285e-05, "loss": 0.0005258312448859215, "step": 241990 }, { "epoch": 68.69145614533069, "grad_norm": 0.042030785232782364, "learning_rate": 3.1336928753902926e-05, "loss": 0.0010858336463570595, "step": 242000 }, { "epoch": 68.69145614533069, "eval_accuracy": 0.9779360335728365, "eval_loss": 0.08356881886720657, "eval_runtime": 33.3327, "eval_samples_per_second": 471.819, "eval_steps_per_second": 7.38, "step": 242000 } ], "logging_steps": 10, "max_steps": 352300, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.6285092873819138e+21, "train_batch_size": 64, "trial_name": null, "trial_params": null }