{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 10017, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002994908655286014, "grad_norm": 11.265819549560547, "learning_rate": 1.7964071856287425e-07, "loss": 3.7097137451171873, "step": 10 }, { "epoch": 0.005989817310572028, "grad_norm": 10.610027313232422, "learning_rate": 3.792415169660679e-07, "loss": 3.7116737365722656, "step": 20 }, { "epoch": 0.008984725965858042, "grad_norm": 9.398868560791016, "learning_rate": 5.788423153692615e-07, "loss": 3.5568317413330077, "step": 30 }, { "epoch": 0.011979634621144056, "grad_norm": 8.574817657470703, "learning_rate": 7.784431137724552e-07, "loss": 3.5488380432128905, "step": 40 }, { "epoch": 0.01497454327643007, "grad_norm": 6.501334190368652, "learning_rate": 9.780439121756488e-07, "loss": 3.4215156555175783, "step": 50 }, { "epoch": 0.017969451931716084, "grad_norm": 5.397614479064941, "learning_rate": 1.1776447105788423e-06, "loss": 3.4414459228515626, "step": 60 }, { "epoch": 0.020964360587002098, "grad_norm": 5.161387920379639, "learning_rate": 1.377245508982036e-06, "loss": 3.2817943572998045, "step": 70 }, { "epoch": 0.02395926924228811, "grad_norm": 4.114301681518555, "learning_rate": 1.5768463073852298e-06, "loss": 3.046010208129883, "step": 80 }, { "epoch": 0.026954177897574125, "grad_norm": 3.7204360961914062, "learning_rate": 1.7764471057884233e-06, "loss": 2.8605175018310547, "step": 90 }, { "epoch": 0.02994908655286014, "grad_norm": 3.5608198642730713, "learning_rate": 1.976047904191617e-06, "loss": 2.6643051147460937, "step": 100 }, { "epoch": 0.03294399520814615, "grad_norm": 4.155009746551514, "learning_rate": 2.1756487025948105e-06, "loss": 2.725708770751953, "step": 110 }, { "epoch": 0.03593890386343217, "grad_norm": 3.600083112716675, "learning_rate": 2.3752495009980044e-06, "loss": 2.599940872192383, "step": 120 }, { "epoch": 0.03893381251871818, "grad_norm": 3.8976871967315674, "learning_rate": 2.5748502994011975e-06, "loss": 2.478201675415039, "step": 130 }, { "epoch": 0.041928721174004195, "grad_norm": 3.515232563018799, "learning_rate": 2.7744510978043914e-06, "loss": 2.3760326385498045, "step": 140 }, { "epoch": 0.044923629829290206, "grad_norm": 3.7067975997924805, "learning_rate": 2.974051896207585e-06, "loss": 2.2870811462402343, "step": 150 }, { "epoch": 0.04791853848457622, "grad_norm": 3.6703312397003174, "learning_rate": 3.173652694610779e-06, "loss": 2.2208534240722657, "step": 160 }, { "epoch": 0.050913447139862233, "grad_norm": 4.008607864379883, "learning_rate": 3.373253493013972e-06, "loss": 2.0857887268066406, "step": 170 }, { "epoch": 0.05390835579514825, "grad_norm": 3.115046977996826, "learning_rate": 3.572854291417166e-06, "loss": 2.0886693954467774, "step": 180 }, { "epoch": 0.05690326445043426, "grad_norm": 3.818377733230591, "learning_rate": 3.7724550898203594e-06, "loss": 2.0709930419921876, "step": 190 }, { "epoch": 0.05989817310572028, "grad_norm": 4.294528007507324, "learning_rate": 3.972055888223553e-06, "loss": 1.9934419631958007, "step": 200 }, { "epoch": 0.06289308176100629, "grad_norm": 3.16184663772583, "learning_rate": 4.171656686626747e-06, "loss": 1.9263175964355468, "step": 210 }, { "epoch": 0.0658879904162923, "grad_norm": 3.972487688064575, "learning_rate": 4.371257485029941e-06, "loss": 1.907310676574707, "step": 220 }, { "epoch": 0.06888289907157831, "grad_norm": 3.4438064098358154, "learning_rate": 4.570858283433134e-06, "loss": 1.8906272888183593, "step": 230 }, { "epoch": 0.07187780772686433, "grad_norm": 3.363638162612915, "learning_rate": 4.770459081836328e-06, "loss": 1.827865219116211, "step": 240 }, { "epoch": 0.07487271638215034, "grad_norm": 3.1280667781829834, "learning_rate": 4.970059880239521e-06, "loss": 1.7821989059448242, "step": 250 }, { "epoch": 0.07786762503743636, "grad_norm": 3.62811279296875, "learning_rate": 5.169660678642715e-06, "loss": 1.7012821197509767, "step": 260 }, { "epoch": 0.08086253369272237, "grad_norm": 3.6099653244018555, "learning_rate": 5.369261477045909e-06, "loss": 1.7187646865844726, "step": 270 }, { "epoch": 0.08385744234800839, "grad_norm": 3.0062944889068604, "learning_rate": 5.568862275449102e-06, "loss": 1.706169891357422, "step": 280 }, { "epoch": 0.0868523510032944, "grad_norm": 3.3500194549560547, "learning_rate": 5.7684630738522965e-06, "loss": 1.600248146057129, "step": 290 }, { "epoch": 0.08984725965858041, "grad_norm": 3.580355167388916, "learning_rate": 5.96806387225549e-06, "loss": 1.6295143127441407, "step": 300 }, { "epoch": 0.09284216831386642, "grad_norm": 3.1939895153045654, "learning_rate": 6.167664670658683e-06, "loss": 1.635610580444336, "step": 310 }, { "epoch": 0.09583707696915245, "grad_norm": 2.898219585418701, "learning_rate": 6.367265469061877e-06, "loss": 1.5857232093811036, "step": 320 }, { "epoch": 0.09883198562443846, "grad_norm": 3.525341033935547, "learning_rate": 6.5668662674650705e-06, "loss": 1.5980493545532226, "step": 330 }, { "epoch": 0.10182689427972447, "grad_norm": 3.246626615524292, "learning_rate": 6.7664670658682645e-06, "loss": 1.5958646774291991, "step": 340 }, { "epoch": 0.10482180293501048, "grad_norm": 3.128176212310791, "learning_rate": 6.9660678642714575e-06, "loss": 1.4537940979003907, "step": 350 }, { "epoch": 0.1078167115902965, "grad_norm": 3.4397571086883545, "learning_rate": 7.165668662674651e-06, "loss": 1.5209310531616211, "step": 360 }, { "epoch": 0.11081162024558251, "grad_norm": 2.7937843799591064, "learning_rate": 7.365269461077845e-06, "loss": 1.5005243301391602, "step": 370 }, { "epoch": 0.11380652890086852, "grad_norm": 3.246121406555176, "learning_rate": 7.5648702594810385e-06, "loss": 1.453624153137207, "step": 380 }, { "epoch": 0.11680143755615453, "grad_norm": 2.984189748764038, "learning_rate": 7.764471057884232e-06, "loss": 1.4527925491333007, "step": 390 }, { "epoch": 0.11979634621144056, "grad_norm": 3.031707286834717, "learning_rate": 7.964071856287425e-06, "loss": 1.48089656829834, "step": 400 }, { "epoch": 0.12279125486672657, "grad_norm": 3.1862800121307373, "learning_rate": 8.16367265469062e-06, "loss": 1.4173410415649415, "step": 410 }, { "epoch": 0.12578616352201258, "grad_norm": 2.731788396835327, "learning_rate": 8.363273453093813e-06, "loss": 1.4160845756530762, "step": 420 }, { "epoch": 0.1287810721772986, "grad_norm": 2.994575262069702, "learning_rate": 8.562874251497007e-06, "loss": 1.3780389785766602, "step": 430 }, { "epoch": 0.1317759808325846, "grad_norm": 3.6287078857421875, "learning_rate": 8.7624750499002e-06, "loss": 1.3902482986450195, "step": 440 }, { "epoch": 0.1347708894878706, "grad_norm": 3.0350985527038574, "learning_rate": 8.962075848303395e-06, "loss": 1.3507318496704102, "step": 450 }, { "epoch": 0.13776579814315662, "grad_norm": 3.261089563369751, "learning_rate": 9.161676646706587e-06, "loss": 1.3652063369750977, "step": 460 }, { "epoch": 0.14076070679844266, "grad_norm": 2.7072596549987793, "learning_rate": 9.361277445109781e-06, "loss": 1.3391490936279298, "step": 470 }, { "epoch": 0.14375561545372867, "grad_norm": 2.9523279666900635, "learning_rate": 9.560878243512974e-06, "loss": 1.3071959495544434, "step": 480 }, { "epoch": 0.14675052410901468, "grad_norm": 2.713932752609253, "learning_rate": 9.760479041916169e-06, "loss": 1.3630129814147949, "step": 490 }, { "epoch": 0.1497454327643007, "grad_norm": 3.003009557723999, "learning_rate": 9.960079840319361e-06, "loss": 1.2988483428955078, "step": 500 }, { "epoch": 0.1527403414195867, "grad_norm": 3.2941436767578125, "learning_rate": 1.0159680638722555e-05, "loss": 1.309105110168457, "step": 510 }, { "epoch": 0.1557352500748727, "grad_norm": 2.7856335639953613, "learning_rate": 1.035928143712575e-05, "loss": 1.2957303047180175, "step": 520 }, { "epoch": 0.15873015873015872, "grad_norm": 3.1381421089172363, "learning_rate": 1.0558882235528941e-05, "loss": 1.3377592086791992, "step": 530 }, { "epoch": 0.16172506738544473, "grad_norm": 2.874908924102783, "learning_rate": 1.0758483033932137e-05, "loss": 1.3037958145141602, "step": 540 }, { "epoch": 0.16471997604073077, "grad_norm": 2.5848164558410645, "learning_rate": 1.0958083832335331e-05, "loss": 1.2732759475708009, "step": 550 }, { "epoch": 0.16771488469601678, "grad_norm": 2.7429795265197754, "learning_rate": 1.1157684630738523e-05, "loss": 1.255198860168457, "step": 560 }, { "epoch": 0.1707097933513028, "grad_norm": 2.91156268119812, "learning_rate": 1.1357285429141717e-05, "loss": 1.2870652198791503, "step": 570 }, { "epoch": 0.1737047020065888, "grad_norm": 3.2635698318481445, "learning_rate": 1.155688622754491e-05, "loss": 1.2783642768859864, "step": 580 }, { "epoch": 0.1766996106618748, "grad_norm": 2.566370725631714, "learning_rate": 1.1756487025948105e-05, "loss": 1.2557472229003905, "step": 590 }, { "epoch": 0.17969451931716082, "grad_norm": 2.617335796356201, "learning_rate": 1.1956087824351299e-05, "loss": 1.2616601943969727, "step": 600 }, { "epoch": 0.18268942797244683, "grad_norm": 3.0253615379333496, "learning_rate": 1.2155688622754491e-05, "loss": 1.2677306175231933, "step": 610 }, { "epoch": 0.18568433662773284, "grad_norm": 2.745788335800171, "learning_rate": 1.2355289421157685e-05, "loss": 1.2631946563720704, "step": 620 }, { "epoch": 0.18867924528301888, "grad_norm": 2.8516855239868164, "learning_rate": 1.255489021956088e-05, "loss": 1.207695484161377, "step": 630 }, { "epoch": 0.1916741539383049, "grad_norm": 2.7953591346740723, "learning_rate": 1.2754491017964073e-05, "loss": 1.1995798110961915, "step": 640 }, { "epoch": 0.1946690625935909, "grad_norm": 2.414024591445923, "learning_rate": 1.2954091816367267e-05, "loss": 1.1361705780029296, "step": 650 }, { "epoch": 0.1976639712488769, "grad_norm": 2.6588900089263916, "learning_rate": 1.3153692614770459e-05, "loss": 1.2122285842895508, "step": 660 }, { "epoch": 0.20065887990416292, "grad_norm": 2.770174980163574, "learning_rate": 1.3353293413173653e-05, "loss": 1.2334482192993164, "step": 670 }, { "epoch": 0.20365378855944893, "grad_norm": 3.297915458679199, "learning_rate": 1.3552894211576849e-05, "loss": 1.2045844078063965, "step": 680 }, { "epoch": 0.20664869721473494, "grad_norm": 2.676650285720825, "learning_rate": 1.3752495009980041e-05, "loss": 1.181098747253418, "step": 690 }, { "epoch": 0.20964360587002095, "grad_norm": 2.871534824371338, "learning_rate": 1.3952095808383235e-05, "loss": 1.2326663970947265, "step": 700 }, { "epoch": 0.21263851452530697, "grad_norm": 2.8828704357147217, "learning_rate": 1.4151696606786429e-05, "loss": 1.198216152191162, "step": 710 }, { "epoch": 0.215633423180593, "grad_norm": 2.532287120819092, "learning_rate": 1.4351297405189621e-05, "loss": 1.1558509826660157, "step": 720 }, { "epoch": 0.21862833183587901, "grad_norm": 2.6054844856262207, "learning_rate": 1.4550898203592817e-05, "loss": 1.1505748748779296, "step": 730 }, { "epoch": 0.22162324049116502, "grad_norm": 2.5080556869506836, "learning_rate": 1.4750499001996009e-05, "loss": 1.1882616043090821, "step": 740 }, { "epoch": 0.22461814914645103, "grad_norm": 2.641071081161499, "learning_rate": 1.4950099800399203e-05, "loss": 1.14835844039917, "step": 750 }, { "epoch": 0.22761305780173705, "grad_norm": 2.80320405960083, "learning_rate": 1.5149700598802397e-05, "loss": 1.1875015258789063, "step": 760 }, { "epoch": 0.23060796645702306, "grad_norm": 2.6086411476135254, "learning_rate": 1.534930139720559e-05, "loss": 1.1698853492736816, "step": 770 }, { "epoch": 0.23360287511230907, "grad_norm": 2.6112313270568848, "learning_rate": 1.5548902195608783e-05, "loss": 1.1667849540710449, "step": 780 }, { "epoch": 0.23659778376759508, "grad_norm": 2.603092670440674, "learning_rate": 1.5748502994011977e-05, "loss": 1.1800429344177246, "step": 790 }, { "epoch": 0.23959269242288112, "grad_norm": 2.5828044414520264, "learning_rate": 1.594810379241517e-05, "loss": 1.1859128952026368, "step": 800 }, { "epoch": 0.24258760107816713, "grad_norm": 2.3855056762695312, "learning_rate": 1.6147704590818365e-05, "loss": 1.1251150131225587, "step": 810 }, { "epoch": 0.24558250973345314, "grad_norm": 2.433763265609741, "learning_rate": 1.634730538922156e-05, "loss": 1.152597141265869, "step": 820 }, { "epoch": 0.24857741838873915, "grad_norm": 2.6475374698638916, "learning_rate": 1.6546906187624752e-05, "loss": 1.1266324996948243, "step": 830 }, { "epoch": 0.25157232704402516, "grad_norm": 2.4123408794403076, "learning_rate": 1.6746506986027946e-05, "loss": 1.1574108123779296, "step": 840 }, { "epoch": 0.25456723569931117, "grad_norm": 2.3260607719421387, "learning_rate": 1.6946107784431137e-05, "loss": 1.1445627212524414, "step": 850 }, { "epoch": 0.2575621443545972, "grad_norm": 2.6214170455932617, "learning_rate": 1.7145708582834334e-05, "loss": 1.1470834732055664, "step": 860 }, { "epoch": 0.2605570530098832, "grad_norm": 2.3086888790130615, "learning_rate": 1.7345309381237528e-05, "loss": 1.064506721496582, "step": 870 }, { "epoch": 0.2635519616651692, "grad_norm": 2.7607717514038086, "learning_rate": 1.754491017964072e-05, "loss": 1.0883188247680664, "step": 880 }, { "epoch": 0.2665468703204552, "grad_norm": 2.9048917293548584, "learning_rate": 1.7744510978043913e-05, "loss": 1.135384750366211, "step": 890 }, { "epoch": 0.2695417789757412, "grad_norm": 2.61525559425354, "learning_rate": 1.7944111776447107e-05, "loss": 1.1149521827697755, "step": 900 }, { "epoch": 0.27253668763102723, "grad_norm": 2.410212278366089, "learning_rate": 1.81437125748503e-05, "loss": 1.0511359214782714, "step": 910 }, { "epoch": 0.27553159628631324, "grad_norm": 2.789114475250244, "learning_rate": 1.8343313373253494e-05, "loss": 1.0592556953430177, "step": 920 }, { "epoch": 0.2785265049415993, "grad_norm": 2.6519744396209717, "learning_rate": 1.854291417165669e-05, "loss": 1.0614826202392578, "step": 930 }, { "epoch": 0.2815214135968853, "grad_norm": 2.6041579246520996, "learning_rate": 1.8742514970059882e-05, "loss": 1.1006576538085937, "step": 940 }, { "epoch": 0.2845163222521713, "grad_norm": 2.300130844116211, "learning_rate": 1.8942115768463076e-05, "loss": 1.092987632751465, "step": 950 }, { "epoch": 0.28751123090745734, "grad_norm": 2.857870101928711, "learning_rate": 1.914171656686627e-05, "loss": 1.1021804809570312, "step": 960 }, { "epoch": 0.29050613956274335, "grad_norm": 2.5659523010253906, "learning_rate": 1.9341317365269464e-05, "loss": 1.1216981887817383, "step": 970 }, { "epoch": 0.29350104821802936, "grad_norm": 2.596017599105835, "learning_rate": 1.9540918163672655e-05, "loss": 1.1237613677978515, "step": 980 }, { "epoch": 0.29649595687331537, "grad_norm": 2.391162395477295, "learning_rate": 1.974051896207585e-05, "loss": 1.0967981338500976, "step": 990 }, { "epoch": 0.2994908655286014, "grad_norm": 2.5012688636779785, "learning_rate": 1.9940119760479046e-05, "loss": 1.06771240234375, "step": 1000 }, { "epoch": 0.3024857741838874, "grad_norm": 2.5918989181518555, "learning_rate": 1.9999970246767755e-05, "loss": 1.0973945617675782, "step": 1010 }, { "epoch": 0.3054806828391734, "grad_norm": 2.2349612712860107, "learning_rate": 1.9999824517076846e-05, "loss": 1.1063728332519531, "step": 1020 }, { "epoch": 0.3084755914944594, "grad_norm": 2.0478508472442627, "learning_rate": 1.999955734781544e-05, "loss": 1.070663070678711, "step": 1030 }, { "epoch": 0.3114705001497454, "grad_norm": 2.709059953689575, "learning_rate": 1.9999168742228082e-05, "loss": 1.0881099700927734, "step": 1040 }, { "epoch": 0.31446540880503143, "grad_norm": 2.1217539310455322, "learning_rate": 1.9998658705034068e-05, "loss": 1.0525678634643554, "step": 1050 }, { "epoch": 0.31746031746031744, "grad_norm": 2.183957099914551, "learning_rate": 1.9998027242427373e-05, "loss": 1.0776740074157716, "step": 1060 }, { "epoch": 0.32045522611560345, "grad_norm": 2.300778388977051, "learning_rate": 1.9997274362076588e-05, "loss": 0.9790191650390625, "step": 1070 }, { "epoch": 0.32345013477088946, "grad_norm": 2.1335058212280273, "learning_rate": 1.9996400073124822e-05, "loss": 1.0659798622131347, "step": 1080 }, { "epoch": 0.3264450434261755, "grad_norm": 2.397620916366577, "learning_rate": 1.9995404386189584e-05, "loss": 1.0076449394226075, "step": 1090 }, { "epoch": 0.32943995208146154, "grad_norm": 2.2613308429718018, "learning_rate": 1.999428731336267e-05, "loss": 1.0323925971984864, "step": 1100 }, { "epoch": 0.33243486073674755, "grad_norm": 2.229064702987671, "learning_rate": 1.999304886821e-05, "loss": 1.015975570678711, "step": 1110 }, { "epoch": 0.33542976939203356, "grad_norm": 2.0292208194732666, "learning_rate": 1.9991689065771465e-05, "loss": 1.034627342224121, "step": 1120 }, { "epoch": 0.33842467804731957, "grad_norm": 2.0535295009613037, "learning_rate": 1.9990207922560733e-05, "loss": 1.0778848648071289, "step": 1130 }, { "epoch": 0.3414195867026056, "grad_norm": 2.31610107421875, "learning_rate": 1.9988605456565064e-05, "loss": 1.0127778053283691, "step": 1140 }, { "epoch": 0.3444144953578916, "grad_norm": 2.2170019149780273, "learning_rate": 1.9986881687245076e-05, "loss": 1.0534976959228515, "step": 1150 }, { "epoch": 0.3474094040131776, "grad_norm": 2.850485324859619, "learning_rate": 1.9985036635534513e-05, "loss": 1.0433968544006347, "step": 1160 }, { "epoch": 0.3504043126684636, "grad_norm": 2.169513702392578, "learning_rate": 1.9983070323840004e-05, "loss": 1.0182857513427734, "step": 1170 }, { "epoch": 0.3533992213237496, "grad_norm": 2.373967170715332, "learning_rate": 1.998098277604077e-05, "loss": 1.0277379989624023, "step": 1180 }, { "epoch": 0.35639412997903563, "grad_norm": 2.0569117069244385, "learning_rate": 1.9978774017488345e-05, "loss": 1.0588248252868653, "step": 1190 }, { "epoch": 0.35938903863432164, "grad_norm": 2.008612632751465, "learning_rate": 1.997644407500627e-05, "loss": 1.0627038955688477, "step": 1200 }, { "epoch": 0.36238394728960766, "grad_norm": 2.0754430294036865, "learning_rate": 1.9973992976889763e-05, "loss": 0.9568704605102539, "step": 1210 }, { "epoch": 0.36537885594489367, "grad_norm": 2.1991281509399414, "learning_rate": 1.9971420752905372e-05, "loss": 1.0216650009155273, "step": 1220 }, { "epoch": 0.3683737646001797, "grad_norm": 2.127699375152588, "learning_rate": 1.9968727434290632e-05, "loss": 1.0075697898864746, "step": 1230 }, { "epoch": 0.3713686732554657, "grad_norm": 2.0525062084198, "learning_rate": 1.9965913053753656e-05, "loss": 1.0119134902954101, "step": 1240 }, { "epoch": 0.3743635819107517, "grad_norm": 2.035109519958496, "learning_rate": 1.9962977645472762e-05, "loss": 0.9841228485107422, "step": 1250 }, { "epoch": 0.37735849056603776, "grad_norm": 1.8049895763397217, "learning_rate": 1.9959921245096047e-05, "loss": 0.9867215156555176, "step": 1260 }, { "epoch": 0.3803533992213238, "grad_norm": 1.9257926940917969, "learning_rate": 1.995674388974096e-05, "loss": 1.0146930694580079, "step": 1270 }, { "epoch": 0.3833483078766098, "grad_norm": 1.982143759727478, "learning_rate": 1.995344561799384e-05, "loss": 1.0018959045410156, "step": 1280 }, { "epoch": 0.3863432165318958, "grad_norm": 2.125669240951538, "learning_rate": 1.9950026469909462e-05, "loss": 0.9433177947998047, "step": 1290 }, { "epoch": 0.3893381251871818, "grad_norm": 2.0401179790496826, "learning_rate": 1.9946486487010546e-05, "loss": 0.9733993530273437, "step": 1300 }, { "epoch": 0.3923330338424678, "grad_norm": 2.141977071762085, "learning_rate": 1.994282571228724e-05, "loss": 0.9609798431396485, "step": 1310 }, { "epoch": 0.3953279424977538, "grad_norm": 2.166964054107666, "learning_rate": 1.9939044190196624e-05, "loss": 1.0087587356567382, "step": 1320 }, { "epoch": 0.39832285115303984, "grad_norm": 2.3837554454803467, "learning_rate": 1.9935141966662138e-05, "loss": 0.9748960494995117, "step": 1330 }, { "epoch": 0.40131775980832585, "grad_norm": 2.0114309787750244, "learning_rate": 1.993111908907305e-05, "loss": 0.9450881004333496, "step": 1340 }, { "epoch": 0.40431266846361186, "grad_norm": 1.914791226387024, "learning_rate": 1.9926975606283875e-05, "loss": 1.0085538864135741, "step": 1350 }, { "epoch": 0.40730757711889787, "grad_norm": 2.1861987113952637, "learning_rate": 1.9922711568613765e-05, "loss": 1.0055445671081542, "step": 1360 }, { "epoch": 0.4103024857741839, "grad_norm": 2.173232316970825, "learning_rate": 1.9918327027845926e-05, "loss": 1.003979778289795, "step": 1370 }, { "epoch": 0.4132973944294699, "grad_norm": 1.997961163520813, "learning_rate": 1.9913822037226965e-05, "loss": 0.9993978500366211, "step": 1380 }, { "epoch": 0.4162923030847559, "grad_norm": 1.9082618951797485, "learning_rate": 1.9909196651466255e-05, "loss": 0.9729861259460449, "step": 1390 }, { "epoch": 0.4192872117400419, "grad_norm": 2.1016769409179688, "learning_rate": 1.9904450926735267e-05, "loss": 0.9694742202758789, "step": 1400 }, { "epoch": 0.4222821203953279, "grad_norm": 2.0774240493774414, "learning_rate": 1.9899584920666885e-05, "loss": 1.0101737976074219, "step": 1410 }, { "epoch": 0.42527702905061393, "grad_norm": 1.939172625541687, "learning_rate": 1.989459869235472e-05, "loss": 0.9967576026916504, "step": 1420 }, { "epoch": 0.4282719377059, "grad_norm": 1.7587170600891113, "learning_rate": 1.988949230235238e-05, "loss": 0.9855384826660156, "step": 1430 }, { "epoch": 0.431266846361186, "grad_norm": 2.190992832183838, "learning_rate": 1.988426581267273e-05, "loss": 0.9569406509399414, "step": 1440 }, { "epoch": 0.434261755016472, "grad_norm": 2.0716278553009033, "learning_rate": 1.9878919286787147e-05, "loss": 0.9796838760375977, "step": 1450 }, { "epoch": 0.43725666367175803, "grad_norm": 1.9944990873336792, "learning_rate": 1.9873452789624758e-05, "loss": 0.9943093299865723, "step": 1460 }, { "epoch": 0.44025157232704404, "grad_norm": 1.7507660388946533, "learning_rate": 1.986786638757163e-05, "loss": 1.0118515014648437, "step": 1470 }, { "epoch": 0.44324648098233005, "grad_norm": 1.824371099472046, "learning_rate": 1.9862160148469983e-05, "loss": 0.9747288703918457, "step": 1480 }, { "epoch": 0.44624138963761606, "grad_norm": 1.956614375114441, "learning_rate": 1.9856334141617354e-05, "loss": 0.9127725601196289, "step": 1490 }, { "epoch": 0.44923629829290207, "grad_norm": 1.730757474899292, "learning_rate": 1.985038843776576e-05, "loss": 0.9908318519592285, "step": 1500 }, { "epoch": 0.4522312069481881, "grad_norm": 1.7902590036392212, "learning_rate": 1.984432310912084e-05, "loss": 0.9776639938354492, "step": 1510 }, { "epoch": 0.4552261156034741, "grad_norm": 1.738864779472351, "learning_rate": 1.9838138229340984e-05, "loss": 0.9166988372802735, "step": 1520 }, { "epoch": 0.4582210242587601, "grad_norm": 1.9358223676681519, "learning_rate": 1.9831833873536417e-05, "loss": 0.9750303268432617, "step": 1530 }, { "epoch": 0.4612159329140461, "grad_norm": 2.1201319694519043, "learning_rate": 1.9825410118268313e-05, "loss": 0.937105655670166, "step": 1540 }, { "epoch": 0.4642108415693321, "grad_norm": 2.094879388809204, "learning_rate": 1.981886704154784e-05, "loss": 0.9664621353149414, "step": 1550 }, { "epoch": 0.46720575022461813, "grad_norm": 1.9292011260986328, "learning_rate": 1.9812204722835248e-05, "loss": 0.9639430999755859, "step": 1560 }, { "epoch": 0.47020065887990414, "grad_norm": 1.9128199815750122, "learning_rate": 1.9805423243038863e-05, "loss": 0.9359722137451172, "step": 1570 }, { "epoch": 0.47319556753519015, "grad_norm": 1.8651394844055176, "learning_rate": 1.979852268451413e-05, "loss": 0.9354040145874023, "step": 1580 }, { "epoch": 0.47619047619047616, "grad_norm": 2.0249080657958984, "learning_rate": 1.9791503131062604e-05, "loss": 0.9166906356811524, "step": 1590 }, { "epoch": 0.47918538484576223, "grad_norm": 1.9980604648590088, "learning_rate": 1.978436466793094e-05, "loss": 0.9285884857177734, "step": 1600 }, { "epoch": 0.48218029350104824, "grad_norm": 2.2854161262512207, "learning_rate": 1.9777107381809845e-05, "loss": 0.9732503890991211, "step": 1610 }, { "epoch": 0.48517520215633425, "grad_norm": 2.344902992248535, "learning_rate": 1.9769731360833043e-05, "loss": 0.893382453918457, "step": 1620 }, { "epoch": 0.48817011081162026, "grad_norm": 1.9881426095962524, "learning_rate": 1.976223669457618e-05, "loss": 0.9103918075561523, "step": 1630 }, { "epoch": 0.49116501946690627, "grad_norm": 1.8509609699249268, "learning_rate": 1.9754623474055764e-05, "loss": 0.8984537124633789, "step": 1640 }, { "epoch": 0.4941599281221923, "grad_norm": 1.8210080862045288, "learning_rate": 1.974689179172804e-05, "loss": 0.9495397567749023, "step": 1650 }, { "epoch": 0.4971548367774783, "grad_norm": 2.0903966426849365, "learning_rate": 1.973904174148787e-05, "loss": 0.9040670394897461, "step": 1660 }, { "epoch": 0.5001497454327642, "grad_norm": 2.109459638595581, "learning_rate": 1.97310734186676e-05, "loss": 0.9156620025634765, "step": 1670 }, { "epoch": 0.5031446540880503, "grad_norm": 2.0825653076171875, "learning_rate": 1.9722986920035904e-05, "loss": 0.9217489242553711, "step": 1680 }, { "epoch": 0.5061395627433364, "grad_norm": 1.6379085779190063, "learning_rate": 1.9714782343796593e-05, "loss": 0.8876156806945801, "step": 1690 }, { "epoch": 0.5091344713986223, "grad_norm": 1.769353985786438, "learning_rate": 1.9706459789587437e-05, "loss": 0.9461544036865235, "step": 1700 }, { "epoch": 0.5121293800539084, "grad_norm": 1.868715763092041, "learning_rate": 1.9698019358478948e-05, "loss": 0.9041751861572266, "step": 1710 }, { "epoch": 0.5151242887091944, "grad_norm": 1.946820616722107, "learning_rate": 1.9689461152973166e-05, "loss": 0.9117437362670898, "step": 1720 }, { "epoch": 0.5181191973644804, "grad_norm": 1.759941577911377, "learning_rate": 1.9680785277002388e-05, "loss": 0.9326802253723144, "step": 1730 }, { "epoch": 0.5211141060197664, "grad_norm": 1.9923816919326782, "learning_rate": 1.9671991835927928e-05, "loss": 0.9163030624389649, "step": 1740 }, { "epoch": 0.5241090146750524, "grad_norm": 1.6598330736160278, "learning_rate": 1.9663080936538834e-05, "loss": 0.9651662826538085, "step": 1750 }, { "epoch": 0.5271039233303384, "grad_norm": 1.7775046825408936, "learning_rate": 1.9654052687050583e-05, "loss": 0.9174107551574707, "step": 1760 }, { "epoch": 0.5300988319856245, "grad_norm": 1.663625717163086, "learning_rate": 1.9644907197103772e-05, "loss": 0.9102935791015625, "step": 1770 }, { "epoch": 0.5330937406409104, "grad_norm": 1.7679104804992676, "learning_rate": 1.9635644577762792e-05, "loss": 0.8674448013305665, "step": 1780 }, { "epoch": 0.5360886492961965, "grad_norm": 1.7731411457061768, "learning_rate": 1.962626494151446e-05, "loss": 0.9251704216003418, "step": 1790 }, { "epoch": 0.5390835579514824, "grad_norm": 1.5716463327407837, "learning_rate": 1.961676840226668e-05, "loss": 0.9307645797729492, "step": 1800 }, { "epoch": 0.5420784666067685, "grad_norm": 1.789790391921997, "learning_rate": 1.9607155075347038e-05, "loss": 0.8879091262817382, "step": 1810 }, { "epoch": 0.5450733752620545, "grad_norm": 1.888209342956543, "learning_rate": 1.9597425077501416e-05, "loss": 0.9431265830993653, "step": 1820 }, { "epoch": 0.5480682839173405, "grad_norm": 1.672878384590149, "learning_rate": 1.958757852689256e-05, "loss": 0.9204288482666015, "step": 1830 }, { "epoch": 0.5510631925726265, "grad_norm": 1.5364495515823364, "learning_rate": 1.957761554309866e-05, "loss": 0.9210444450378418, "step": 1840 }, { "epoch": 0.5540581012279125, "grad_norm": 1.8222407102584839, "learning_rate": 1.9567536247111878e-05, "loss": 0.8770456314086914, "step": 1850 }, { "epoch": 0.5570530098831986, "grad_norm": 1.7476502656936646, "learning_rate": 1.955734076133691e-05, "loss": 0.8903690338134765, "step": 1860 }, { "epoch": 0.5600479185384846, "grad_norm": 1.86436927318573, "learning_rate": 1.9547029209589464e-05, "loss": 0.9007977485656739, "step": 1870 }, { "epoch": 0.5630428271937706, "grad_norm": 1.5122853517532349, "learning_rate": 1.9536601717094778e-05, "loss": 0.882940673828125, "step": 1880 }, { "epoch": 0.5660377358490566, "grad_norm": 1.75053071975708, "learning_rate": 1.95260584104861e-05, "loss": 0.8975667953491211, "step": 1890 }, { "epoch": 0.5690326445043427, "grad_norm": 1.824499487876892, "learning_rate": 1.9515399417803135e-05, "loss": 0.8656953811645508, "step": 1900 }, { "epoch": 0.5720275531596286, "grad_norm": 1.7241740226745605, "learning_rate": 1.9504624868490506e-05, "loss": 0.889422607421875, "step": 1910 }, { "epoch": 0.5750224618149147, "grad_norm": 1.6877175569534302, "learning_rate": 1.9493734893396176e-05, "loss": 0.8777622222900391, "step": 1920 }, { "epoch": 0.5780173704702006, "grad_norm": 1.7374579906463623, "learning_rate": 1.948272962476985e-05, "loss": 0.8936216354370117, "step": 1930 }, { "epoch": 0.5810122791254867, "grad_norm": 1.9554184675216675, "learning_rate": 1.9471609196261386e-05, "loss": 0.9030414581298828, "step": 1940 }, { "epoch": 0.5840071877807727, "grad_norm": 2.0266647338867188, "learning_rate": 1.9460373742919158e-05, "loss": 0.8939972877502441, "step": 1950 }, { "epoch": 0.5870020964360587, "grad_norm": 1.811481237411499, "learning_rate": 1.9449023401188427e-05, "loss": 0.9280128479003906, "step": 1960 }, { "epoch": 0.5899970050913447, "grad_norm": 1.7992744445800781, "learning_rate": 1.9437558308909674e-05, "loss": 0.893956470489502, "step": 1970 }, { "epoch": 0.5929919137466307, "grad_norm": 1.6678317785263062, "learning_rate": 1.9425978605316924e-05, "loss": 0.9255929946899414, "step": 1980 }, { "epoch": 0.5959868224019167, "grad_norm": 1.6106699705123901, "learning_rate": 1.9414284431036074e-05, "loss": 0.8314929962158203, "step": 1990 }, { "epoch": 0.5989817310572028, "grad_norm": 1.576809287071228, "learning_rate": 1.9402475928083166e-05, "loss": 0.9036288261413574, "step": 2000 }, { "epoch": 0.6019766397124887, "grad_norm": 1.662400484085083, "learning_rate": 1.9390553239862666e-05, "loss": 0.8632070541381835, "step": 2010 }, { "epoch": 0.6049715483677748, "grad_norm": 2.1808533668518066, "learning_rate": 1.9378516511165733e-05, "loss": 0.8544286727905274, "step": 2020 }, { "epoch": 0.6079664570230608, "grad_norm": 1.6702479124069214, "learning_rate": 1.9366365888168444e-05, "loss": 0.8870140075683594, "step": 2030 }, { "epoch": 0.6109613656783468, "grad_norm": 1.699459433555603, "learning_rate": 1.9354101518430033e-05, "loss": 0.8339980125427247, "step": 2040 }, { "epoch": 0.6139562743336329, "grad_norm": 1.767531394958496, "learning_rate": 1.9341723550891097e-05, "loss": 0.8794610977172852, "step": 2050 }, { "epoch": 0.6169511829889188, "grad_norm": 1.5786960124969482, "learning_rate": 1.9329232135871775e-05, "loss": 0.8840433120727539, "step": 2060 }, { "epoch": 0.6199460916442049, "grad_norm": 1.7652090787887573, "learning_rate": 1.931662742506994e-05, "loss": 0.8960956573486328, "step": 2070 }, { "epoch": 0.6229410002994908, "grad_norm": 1.8086838722229004, "learning_rate": 1.930390957155934e-05, "loss": 0.8429698944091797, "step": 2080 }, { "epoch": 0.6259359089547769, "grad_norm": 1.7018482685089111, "learning_rate": 1.9291078729787764e-05, "loss": 0.8314028739929199, "step": 2090 }, { "epoch": 0.6289308176100629, "grad_norm": 1.4827744960784912, "learning_rate": 1.9278135055575126e-05, "loss": 0.8367262840270996, "step": 2100 }, { "epoch": 0.6319257262653489, "grad_norm": 1.7840453386306763, "learning_rate": 1.9265078706111608e-05, "loss": 0.8233530044555664, "step": 2110 }, { "epoch": 0.6349206349206349, "grad_norm": 1.9676882028579712, "learning_rate": 1.9251909839955742e-05, "loss": 0.8611278533935547, "step": 2120 }, { "epoch": 0.637915543575921, "grad_norm": 1.8171685934066772, "learning_rate": 1.9238628617032483e-05, "loss": 0.8771775245666504, "step": 2130 }, { "epoch": 0.6409104522312069, "grad_norm": 1.8074554204940796, "learning_rate": 1.922523519863126e-05, "loss": 0.884306526184082, "step": 2140 }, { "epoch": 0.643905360886493, "grad_norm": 1.8622629642486572, "learning_rate": 1.9211729747404028e-05, "loss": 0.8650590896606445, "step": 2150 }, { "epoch": 0.6469002695417789, "grad_norm": 1.6245149374008179, "learning_rate": 1.9198112427363275e-05, "loss": 0.8924369812011719, "step": 2160 }, { "epoch": 0.649895178197065, "grad_norm": 1.6202753782272339, "learning_rate": 1.918438340388006e-05, "loss": 0.8587064743041992, "step": 2170 }, { "epoch": 0.652890086852351, "grad_norm": 1.955124020576477, "learning_rate": 1.9170542843681984e-05, "loss": 0.8569240570068359, "step": 2180 }, { "epoch": 0.655884995507637, "grad_norm": 1.6515233516693115, "learning_rate": 1.9156590914851157e-05, "loss": 0.8982840538024902, "step": 2190 }, { "epoch": 0.6588799041629231, "grad_norm": 1.8015732765197754, "learning_rate": 1.9142527786822182e-05, "loss": 0.8220060348510743, "step": 2200 }, { "epoch": 0.661874812818209, "grad_norm": 1.583517074584961, "learning_rate": 1.9128353630380076e-05, "loss": 0.847236442565918, "step": 2210 }, { "epoch": 0.6648697214734951, "grad_norm": 1.8341610431671143, "learning_rate": 1.9114068617658207e-05, "loss": 0.869542121887207, "step": 2220 }, { "epoch": 0.6678646301287811, "grad_norm": 1.5468474626541138, "learning_rate": 1.90996729221362e-05, "loss": 0.8361228942871094, "step": 2230 }, { "epoch": 0.6708595387840671, "grad_norm": 1.6672273874282837, "learning_rate": 1.9085166718637835e-05, "loss": 0.8854676246643066, "step": 2240 }, { "epoch": 0.6738544474393531, "grad_norm": 1.6331413984298706, "learning_rate": 1.907055018332891e-05, "loss": 0.8510169982910156, "step": 2250 }, { "epoch": 0.6768493560946391, "grad_norm": 1.8924387693405151, "learning_rate": 1.9055823493715123e-05, "loss": 0.8499082565307617, "step": 2260 }, { "epoch": 0.6798442647499251, "grad_norm": 1.6619211435317993, "learning_rate": 1.9040986828639892e-05, "loss": 0.8638698577880859, "step": 2270 }, { "epoch": 0.6828391734052112, "grad_norm": 1.6199733018875122, "learning_rate": 1.9026040368282207e-05, "loss": 0.8675064086914063, "step": 2280 }, { "epoch": 0.6858340820604971, "grad_norm": 1.7399113178253174, "learning_rate": 1.901098429415442e-05, "loss": 0.8387475967407226, "step": 2290 }, { "epoch": 0.6888289907157832, "grad_norm": 1.667663812637329, "learning_rate": 1.8995818789100066e-05, "loss": 0.8783481597900391, "step": 2300 }, { "epoch": 0.6918238993710691, "grad_norm": 1.7032908201217651, "learning_rate": 1.8980544037291614e-05, "loss": 0.8562976837158203, "step": 2310 }, { "epoch": 0.6948188080263552, "grad_norm": 1.9778879880905151, "learning_rate": 1.896516022422825e-05, "loss": 0.8344745635986328, "step": 2320 }, { "epoch": 0.6978137166816412, "grad_norm": 1.8774316310882568, "learning_rate": 1.8949667536733614e-05, "loss": 0.8148428916931152, "step": 2330 }, { "epoch": 0.7008086253369272, "grad_norm": 1.6596498489379883, "learning_rate": 1.8934066162953543e-05, "loss": 0.8398752212524414, "step": 2340 }, { "epoch": 0.7038035339922133, "grad_norm": 1.6424564123153687, "learning_rate": 1.8918356292353775e-05, "loss": 0.8635367393493653, "step": 2350 }, { "epoch": 0.7067984426474992, "grad_norm": 1.7502802610397339, "learning_rate": 1.890253811571765e-05, "loss": 0.8395760536193848, "step": 2360 }, { "epoch": 0.7097933513027853, "grad_norm": 1.7914620637893677, "learning_rate": 1.8886611825143796e-05, "loss": 0.809751319885254, "step": 2370 }, { "epoch": 0.7127882599580713, "grad_norm": 1.614733099937439, "learning_rate": 1.88705776140438e-05, "loss": 0.8426692962646485, "step": 2380 }, { "epoch": 0.7157831686133573, "grad_norm": 1.6434929370880127, "learning_rate": 1.885443567713985e-05, "loss": 0.8461108207702637, "step": 2390 }, { "epoch": 0.7187780772686433, "grad_norm": 1.5342862606048584, "learning_rate": 1.8838186210462365e-05, "loss": 0.8506370544433594, "step": 2400 }, { "epoch": 0.7217729859239294, "grad_norm": 1.7060281038284302, "learning_rate": 1.8821829411347642e-05, "loss": 0.8571641921997071, "step": 2410 }, { "epoch": 0.7247678945792153, "grad_norm": 1.5919580459594727, "learning_rate": 1.8805365478435432e-05, "loss": 0.8273300170898438, "step": 2420 }, { "epoch": 0.7277628032345014, "grad_norm": 1.7425918579101562, "learning_rate": 1.8788794611666536e-05, "loss": 0.8230342864990234, "step": 2430 }, { "epoch": 0.7307577118897873, "grad_norm": 1.6442525386810303, "learning_rate": 1.877211701228038e-05, "loss": 0.8350908279418945, "step": 2440 }, { "epoch": 0.7337526205450734, "grad_norm": 1.6684730052947998, "learning_rate": 1.875533288281257e-05, "loss": 0.8504384994506836, "step": 2450 }, { "epoch": 0.7367475292003594, "grad_norm": 1.6235419511795044, "learning_rate": 1.8738442427092428e-05, "loss": 0.8609309196472168, "step": 2460 }, { "epoch": 0.7397424378556454, "grad_norm": 1.45209801197052, "learning_rate": 1.8721445850240522e-05, "loss": 0.8582953453063965, "step": 2470 }, { "epoch": 0.7427373465109314, "grad_norm": 1.640078067779541, "learning_rate": 1.870434335866618e-05, "loss": 0.8432114601135254, "step": 2480 }, { "epoch": 0.7457322551662174, "grad_norm": 1.6859060525894165, "learning_rate": 1.8687135160064956e-05, "loss": 0.8331222534179688, "step": 2490 }, { "epoch": 0.7487271638215034, "grad_norm": 1.6803628206253052, "learning_rate": 1.8669821463416157e-05, "loss": 0.829715633392334, "step": 2500 }, { "epoch": 0.7517220724767895, "grad_norm": 1.5660089254379272, "learning_rate": 1.8652402478980255e-05, "loss": 0.8638070106506348, "step": 2510 }, { "epoch": 0.7547169811320755, "grad_norm": 1.5286768674850464, "learning_rate": 1.8634878418296362e-05, "loss": 0.7757655143737793, "step": 2520 }, { "epoch": 0.7577118897873615, "grad_norm": 1.5210187435150146, "learning_rate": 1.8617249494179644e-05, "loss": 0.7913604736328125, "step": 2530 }, { "epoch": 0.7607067984426475, "grad_norm": 1.6953132152557373, "learning_rate": 1.859951592071877e-05, "loss": 0.7964819431304931, "step": 2540 }, { "epoch": 0.7637017070979335, "grad_norm": 1.8429316282272339, "learning_rate": 1.8581677913273267e-05, "loss": 0.7994976043701172, "step": 2550 }, { "epoch": 0.7666966157532196, "grad_norm": 1.6222190856933594, "learning_rate": 1.856373568847093e-05, "loss": 0.7941509246826172, "step": 2560 }, { "epoch": 0.7696915244085055, "grad_norm": 1.379274845123291, "learning_rate": 1.8545689464205193e-05, "loss": 0.8207425117492676, "step": 2570 }, { "epoch": 0.7726864330637916, "grad_norm": 1.6088320016860962, "learning_rate": 1.8527539459632473e-05, "loss": 0.8137792587280274, "step": 2580 }, { "epoch": 0.7756813417190775, "grad_norm": 1.5247526168823242, "learning_rate": 1.8509285895169516e-05, "loss": 0.863805103302002, "step": 2590 }, { "epoch": 0.7786762503743636, "grad_norm": 1.5615347623825073, "learning_rate": 1.849092899249071e-05, "loss": 0.828615379333496, "step": 2600 }, { "epoch": 0.7816711590296496, "grad_norm": 1.8369919061660767, "learning_rate": 1.847246897452541e-05, "loss": 0.8638320922851562, "step": 2610 }, { "epoch": 0.7846660676849356, "grad_norm": 1.694499135017395, "learning_rate": 1.8453906065455212e-05, "loss": 0.8065310478210449, "step": 2620 }, { "epoch": 0.7876609763402216, "grad_norm": 1.6315211057662964, "learning_rate": 1.8435240490711247e-05, "loss": 0.8603771209716797, "step": 2630 }, { "epoch": 0.7906558849955077, "grad_norm": 1.7139288187026978, "learning_rate": 1.8416472476971424e-05, "loss": 0.8373805999755859, "step": 2640 }, { "epoch": 0.7936507936507936, "grad_norm": 1.6665681600570679, "learning_rate": 1.8397602252157704e-05, "loss": 0.8208301544189454, "step": 2650 }, { "epoch": 0.7966457023060797, "grad_norm": 1.6627062559127808, "learning_rate": 1.8378630045433298e-05, "loss": 0.8139615058898926, "step": 2660 }, { "epoch": 0.7996406109613656, "grad_norm": 1.4767072200775146, "learning_rate": 1.835955608719992e-05, "loss": 0.8225536346435547, "step": 2670 }, { "epoch": 0.8026355196166517, "grad_norm": 1.6780694723129272, "learning_rate": 1.8340380609094962e-05, "loss": 0.842643928527832, "step": 2680 }, { "epoch": 0.8056304282719378, "grad_norm": 1.7037646770477295, "learning_rate": 1.8321103843988695e-05, "loss": 0.8341219902038575, "step": 2690 }, { "epoch": 0.8086253369272237, "grad_norm": 1.53456449508667, "learning_rate": 1.8301726025981427e-05, "loss": 0.814063835144043, "step": 2700 }, { "epoch": 0.8116202455825098, "grad_norm": 1.7273889780044556, "learning_rate": 1.828224739040069e-05, "loss": 0.7797497749328614, "step": 2710 }, { "epoch": 0.8146151542377957, "grad_norm": 1.6220978498458862, "learning_rate": 1.8262668173798336e-05, "loss": 0.8151215553283692, "step": 2720 }, { "epoch": 0.8176100628930818, "grad_norm": 1.5839869976043701, "learning_rate": 1.8242988613947714e-05, "loss": 0.8338854789733887, "step": 2730 }, { "epoch": 0.8206049715483678, "grad_norm": 1.6918904781341553, "learning_rate": 1.822320894984074e-05, "loss": 0.8016552925109863, "step": 2740 }, { "epoch": 0.8235998802036538, "grad_norm": 1.532352328300476, "learning_rate": 1.8203329421685024e-05, "loss": 0.7859272956848145, "step": 2750 }, { "epoch": 0.8265947888589398, "grad_norm": 1.5221245288848877, "learning_rate": 1.8183350270900936e-05, "loss": 0.8459560394287109, "step": 2760 }, { "epoch": 0.8295896975142258, "grad_norm": 1.7007802724838257, "learning_rate": 1.8163271740118687e-05, "loss": 0.8190437316894531, "step": 2770 }, { "epoch": 0.8325846061695118, "grad_norm": 1.5608899593353271, "learning_rate": 1.8143094073175365e-05, "loss": 0.8271324157714843, "step": 2780 }, { "epoch": 0.8355795148247979, "grad_norm": 1.753761649131775, "learning_rate": 1.8122817515112e-05, "loss": 0.8533936500549316, "step": 2790 }, { "epoch": 0.8385744234800838, "grad_norm": 1.615286946296692, "learning_rate": 1.8102442312170553e-05, "loss": 0.8588766098022461, "step": 2800 }, { "epoch": 0.8415693321353699, "grad_norm": 1.587636113166809, "learning_rate": 1.8081968711790964e-05, "loss": 0.8210906028747559, "step": 2810 }, { "epoch": 0.8445642407906558, "grad_norm": 1.455330491065979, "learning_rate": 1.8061396962608115e-05, "loss": 0.8196340560913086, "step": 2820 }, { "epoch": 0.8475591494459419, "grad_norm": 1.492753267288208, "learning_rate": 1.804072731444883e-05, "loss": 0.8112252235412598, "step": 2830 }, { "epoch": 0.8505540581012279, "grad_norm": 1.5428729057312012, "learning_rate": 1.801996001832883e-05, "loss": 0.7767475128173829, "step": 2840 }, { "epoch": 0.8535489667565139, "grad_norm": 1.4639486074447632, "learning_rate": 1.79990953264497e-05, "loss": 0.7648550987243652, "step": 2850 }, { "epoch": 0.8565438754118, "grad_norm": 1.5121395587921143, "learning_rate": 1.7978133492195802e-05, "loss": 0.8193672180175782, "step": 2860 }, { "epoch": 0.859538784067086, "grad_norm": 1.4281710386276245, "learning_rate": 1.7957074770131226e-05, "loss": 0.8272466659545898, "step": 2870 }, { "epoch": 0.862533692722372, "grad_norm": 1.834460973739624, "learning_rate": 1.7935919415996665e-05, "loss": 0.7895036697387695, "step": 2880 }, { "epoch": 0.865528601377658, "grad_norm": 1.4872559309005737, "learning_rate": 1.7914667686706347e-05, "loss": 0.8052210807800293, "step": 2890 }, { "epoch": 0.868523510032944, "grad_norm": 1.609100580215454, "learning_rate": 1.7893319840344886e-05, "loss": 0.8197463989257813, "step": 2900 }, { "epoch": 0.87151841868823, "grad_norm": 1.4353001117706299, "learning_rate": 1.787187613616416e-05, "loss": 0.8479232788085938, "step": 2910 }, { "epoch": 0.8745133273435161, "grad_norm": 1.6909197568893433, "learning_rate": 1.7850336834580166e-05, "loss": 0.8297075271606446, "step": 2920 }, { "epoch": 0.877508235998802, "grad_norm": 1.90815007686615, "learning_rate": 1.7828702197169842e-05, "loss": 0.8151211738586426, "step": 2930 }, { "epoch": 0.8805031446540881, "grad_norm": 1.6685028076171875, "learning_rate": 1.7806972486667914e-05, "loss": 0.8078549385070801, "step": 2940 }, { "epoch": 0.883498053309374, "grad_norm": 1.5216376781463623, "learning_rate": 1.778514796696367e-05, "loss": 0.7762706279754639, "step": 2950 }, { "epoch": 0.8864929619646601, "grad_norm": 1.4812510013580322, "learning_rate": 1.7763228903097807e-05, "loss": 0.8366207122802735, "step": 2960 }, { "epoch": 0.889487870619946, "grad_norm": 1.5821658372879028, "learning_rate": 1.7741215561259155e-05, "loss": 0.8076998710632324, "step": 2970 }, { "epoch": 0.8924827792752321, "grad_norm": 1.7257674932479858, "learning_rate": 1.7719108208781488e-05, "loss": 0.7889442443847656, "step": 2980 }, { "epoch": 0.8954776879305181, "grad_norm": 1.514564871788025, "learning_rate": 1.7696907114140254e-05, "loss": 0.800442123413086, "step": 2990 }, { "epoch": 0.8984725965858041, "grad_norm": 1.5025800466537476, "learning_rate": 1.7674612546949325e-05, "loss": 0.8127084732055664, "step": 3000 }, { "epoch": 0.9014675052410901, "grad_norm": 1.5609087944030762, "learning_rate": 1.7652224777957714e-05, "loss": 0.8040850639343262, "step": 3010 }, { "epoch": 0.9044624138963762, "grad_norm": 1.493508219718933, "learning_rate": 1.762974407904631e-05, "loss": 0.7854836463928223, "step": 3020 }, { "epoch": 0.9074573225516622, "grad_norm": 1.6819945573806763, "learning_rate": 1.7607170723224534e-05, "loss": 0.7625170707702636, "step": 3030 }, { "epoch": 0.9104522312069482, "grad_norm": 1.6430078744888306, "learning_rate": 1.758450498462706e-05, "loss": 0.8078180313110351, "step": 3040 }, { "epoch": 0.9134471398622342, "grad_norm": 1.4024254083633423, "learning_rate": 1.7561747138510487e-05, "loss": 0.7755331516265869, "step": 3050 }, { "epoch": 0.9164420485175202, "grad_norm": 1.6579421758651733, "learning_rate": 1.7538897461249956e-05, "loss": 0.825098991394043, "step": 3060 }, { "epoch": 0.9194369571728063, "grad_norm": 1.4785950183868408, "learning_rate": 1.7515956230335844e-05, "loss": 0.7357244491577148, "step": 3070 }, { "epoch": 0.9224318658280922, "grad_norm": 1.4858529567718506, "learning_rate": 1.7492923724370355e-05, "loss": 0.7988026142120361, "step": 3080 }, { "epoch": 0.9254267744833783, "grad_norm": 1.3988063335418701, "learning_rate": 1.7469800223064172e-05, "loss": 0.8232571601867675, "step": 3090 }, { "epoch": 0.9284216831386642, "grad_norm": 1.4646496772766113, "learning_rate": 1.744658600723302e-05, "loss": 0.8289719581604004, "step": 3100 }, { "epoch": 0.9314165917939503, "grad_norm": 1.9083791971206665, "learning_rate": 1.742328135879429e-05, "loss": 0.7919368743896484, "step": 3110 }, { "epoch": 0.9344115004492363, "grad_norm": 1.7467341423034668, "learning_rate": 1.7399886560763598e-05, "loss": 0.7916288375854492, "step": 3120 }, { "epoch": 0.9374064091045223, "grad_norm": 1.7119777202606201, "learning_rate": 1.7376401897251357e-05, "loss": 0.757789134979248, "step": 3130 }, { "epoch": 0.9404013177598083, "grad_norm": 1.4119514226913452, "learning_rate": 1.7352827653459307e-05, "loss": 0.7901122093200683, "step": 3140 }, { "epoch": 0.9433962264150944, "grad_norm": 1.2908947467803955, "learning_rate": 1.732916411567708e-05, "loss": 0.7934576988220214, "step": 3150 }, { "epoch": 0.9463911350703803, "grad_norm": 1.5804283618927002, "learning_rate": 1.730541157127871e-05, "loss": 0.7917113304138184, "step": 3160 }, { "epoch": 0.9493860437256664, "grad_norm": 1.706937313079834, "learning_rate": 1.728157030871913e-05, "loss": 0.7889931678771973, "step": 3170 }, { "epoch": 0.9523809523809523, "grad_norm": 1.4798022508621216, "learning_rate": 1.7257640617530697e-05, "loss": 0.8395463943481445, "step": 3180 }, { "epoch": 0.9553758610362384, "grad_norm": 1.4213306903839111, "learning_rate": 1.7233622788319646e-05, "loss": 0.8060663223266602, "step": 3190 }, { "epoch": 0.9583707696915245, "grad_norm": 1.4119057655334473, "learning_rate": 1.7209517112762588e-05, "loss": 0.7896999835968017, "step": 3200 }, { "epoch": 0.9613656783468104, "grad_norm": 1.5566002130508423, "learning_rate": 1.7185323883602943e-05, "loss": 0.8031165122985839, "step": 3210 }, { "epoch": 0.9643605870020965, "grad_norm": 1.4153759479522705, "learning_rate": 1.7161043394647407e-05, "loss": 0.759066104888916, "step": 3220 }, { "epoch": 0.9673554956573824, "grad_norm": 1.9067392349243164, "learning_rate": 1.7136675940762367e-05, "loss": 0.7777122497558594, "step": 3230 }, { "epoch": 0.9703504043126685, "grad_norm": 1.4964123964309692, "learning_rate": 1.711222181787033e-05, "loss": 0.7858468055725097, "step": 3240 }, { "epoch": 0.9733453129679545, "grad_norm": 1.4618581533432007, "learning_rate": 1.7087681322946328e-05, "loss": 0.7849390983581543, "step": 3250 }, { "epoch": 0.9763402216232405, "grad_norm": 1.601406216621399, "learning_rate": 1.7063054754014303e-05, "loss": 0.7938404560089112, "step": 3260 }, { "epoch": 0.9793351302785265, "grad_norm": 1.4281142950057983, "learning_rate": 1.70383424101435e-05, "loss": 0.7438766479492187, "step": 3270 }, { "epoch": 0.9823300389338125, "grad_norm": 1.5444836616516113, "learning_rate": 1.7013544591444827e-05, "loss": 0.7451802730560303, "step": 3280 }, { "epoch": 0.9853249475890985, "grad_norm": 1.4732089042663574, "learning_rate": 1.698866159906722e-05, "loss": 0.8167963027954102, "step": 3290 }, { "epoch": 0.9883198562443846, "grad_norm": 1.870408535003662, "learning_rate": 1.6963693735193962e-05, "loss": 0.8137873649597168, "step": 3300 }, { "epoch": 0.9913147648996705, "grad_norm": 1.4198145866394043, "learning_rate": 1.693864130303905e-05, "loss": 0.770867919921875, "step": 3310 }, { "epoch": 0.9943096735549566, "grad_norm": 1.639811635017395, "learning_rate": 1.6913504606843474e-05, "loss": 0.8095382690429688, "step": 3320 }, { "epoch": 0.9973045822102425, "grad_norm": 1.4918293952941895, "learning_rate": 1.688828395187156e-05, "loss": 0.7985510349273681, "step": 3330 }, { "epoch": 1.0002994908655285, "grad_norm": 1.197721004486084, "learning_rate": 1.6862979644407227e-05, "loss": 0.7350101470947266, "step": 3340 }, { "epoch": 1.0032943995208146, "grad_norm": 1.5143485069274902, "learning_rate": 1.6837591991750293e-05, "loss": 0.7106464385986329, "step": 3350 }, { "epoch": 1.0062893081761006, "grad_norm": 1.7330113649368286, "learning_rate": 1.6812121302212728e-05, "loss": 0.7185450553894043, "step": 3360 }, { "epoch": 1.0092842168313867, "grad_norm": 1.672824740409851, "learning_rate": 1.6786567885114924e-05, "loss": 0.700438404083252, "step": 3370 }, { "epoch": 1.0122791254866728, "grad_norm": 1.5598795413970947, "learning_rate": 1.6760932050781927e-05, "loss": 0.6911828994750977, "step": 3380 }, { "epoch": 1.0152740341419586, "grad_norm": 1.6879022121429443, "learning_rate": 1.6735214110539667e-05, "loss": 0.7052880764007569, "step": 3390 }, { "epoch": 1.0182689427972447, "grad_norm": 1.764460802078247, "learning_rate": 1.670941437671119e-05, "loss": 0.731821346282959, "step": 3400 }, { "epoch": 1.0212638514525307, "grad_norm": 1.6526857614517212, "learning_rate": 1.668353316261285e-05, "loss": 0.7477367877960205, "step": 3410 }, { "epoch": 1.0242587601078168, "grad_norm": 1.6531809568405151, "learning_rate": 1.665757078255052e-05, "loss": 0.7096085548400879, "step": 3420 }, { "epoch": 1.0272536687631026, "grad_norm": 1.4806548357009888, "learning_rate": 1.6631527551815757e-05, "loss": 0.711548137664795, "step": 3430 }, { "epoch": 1.0302485774183887, "grad_norm": 1.67629075050354, "learning_rate": 1.6605403786681992e-05, "loss": 0.7366076946258545, "step": 3440 }, { "epoch": 1.0332434860736748, "grad_norm": 1.3547171354293823, "learning_rate": 1.6579199804400667e-05, "loss": 0.6797126770019531, "step": 3450 }, { "epoch": 1.0362383947289608, "grad_norm": 1.343342900276184, "learning_rate": 1.6552915923197404e-05, "loss": 0.6926548480987549, "step": 3460 }, { "epoch": 1.0392333033842467, "grad_norm": 1.5611882209777832, "learning_rate": 1.652655246226813e-05, "loss": 0.7226381778717041, "step": 3470 }, { "epoch": 1.0422282120395328, "grad_norm": 1.3495250940322876, "learning_rate": 1.65001097417752e-05, "loss": 0.6564831733703613, "step": 3480 }, { "epoch": 1.0452231206948188, "grad_norm": 1.4157185554504395, "learning_rate": 1.6473588082843513e-05, "loss": 0.659664249420166, "step": 3490 }, { "epoch": 1.0482180293501049, "grad_norm": 1.4454340934753418, "learning_rate": 1.6446987807556605e-05, "loss": 0.735554313659668, "step": 3500 }, { "epoch": 1.0512129380053907, "grad_norm": 1.6114473342895508, "learning_rate": 1.642030923895275e-05, "loss": 0.7064272880554199, "step": 3510 }, { "epoch": 1.0542078466606768, "grad_norm": 1.393011450767517, "learning_rate": 1.639355270102102e-05, "loss": 0.7071351051330567, "step": 3520 }, { "epoch": 1.0572027553159629, "grad_norm": 1.4447367191314697, "learning_rate": 1.6366718518697366e-05, "loss": 0.6948044776916504, "step": 3530 }, { "epoch": 1.060197663971249, "grad_norm": 1.5719486474990845, "learning_rate": 1.633980701786066e-05, "loss": 0.6962141036987305, "step": 3540 }, { "epoch": 1.063192572626535, "grad_norm": 1.5820865631103516, "learning_rate": 1.6312818525328756e-05, "loss": 0.7146442413330079, "step": 3550 }, { "epoch": 1.0661874812818208, "grad_norm": 1.5105618238449097, "learning_rate": 1.628575336885449e-05, "loss": 0.6941755771636963, "step": 3560 }, { "epoch": 1.069182389937107, "grad_norm": 1.6371991634368896, "learning_rate": 1.6258611877121737e-05, "loss": 0.6982086658477783, "step": 3570 }, { "epoch": 1.072177298592393, "grad_norm": 1.484971523284912, "learning_rate": 1.6231394379741386e-05, "loss": 0.7136051177978515, "step": 3580 }, { "epoch": 1.075172207247679, "grad_norm": 1.6102113723754883, "learning_rate": 1.620410120724736e-05, "loss": 0.699164342880249, "step": 3590 }, { "epoch": 1.0781671159029649, "grad_norm": 1.6524529457092285, "learning_rate": 1.6176732691092584e-05, "loss": 0.6819294929504395, "step": 3600 }, { "epoch": 1.081162024558251, "grad_norm": 1.552896499633789, "learning_rate": 1.6149289163644978e-05, "loss": 0.6616555213928222, "step": 3610 }, { "epoch": 1.084156933213537, "grad_norm": 1.5037239789962769, "learning_rate": 1.612177095818341e-05, "loss": 0.7165458679199219, "step": 3620 }, { "epoch": 1.087151841868823, "grad_norm": 1.4005266427993774, "learning_rate": 1.6094178408893648e-05, "loss": 0.6788459777832031, "step": 3630 }, { "epoch": 1.090146750524109, "grad_norm": 1.5649514198303223, "learning_rate": 1.606651185086431e-05, "loss": 0.6839639663696289, "step": 3640 }, { "epoch": 1.093141659179395, "grad_norm": 1.4369744062423706, "learning_rate": 1.603877162008278e-05, "loss": 0.6825023651123047, "step": 3650 }, { "epoch": 1.096136567834681, "grad_norm": 1.4755173921585083, "learning_rate": 1.601095805343114e-05, "loss": 0.7037545204162597, "step": 3660 }, { "epoch": 1.0991314764899671, "grad_norm": 1.3595280647277832, "learning_rate": 1.598307148868208e-05, "loss": 0.6997042655944824, "step": 3670 }, { "epoch": 1.1021263851452532, "grad_norm": 1.6197599172592163, "learning_rate": 1.5955112264494784e-05, "loss": 0.7151602745056153, "step": 3680 }, { "epoch": 1.105121293800539, "grad_norm": 1.5634205341339111, "learning_rate": 1.5927080720410836e-05, "loss": 0.6688960075378418, "step": 3690 }, { "epoch": 1.108116202455825, "grad_norm": 1.6595895290374756, "learning_rate": 1.5898977196850066e-05, "loss": 0.7106626510620118, "step": 3700 }, { "epoch": 1.1111111111111112, "grad_norm": 1.4825279712677002, "learning_rate": 1.5870802035106452e-05, "loss": 0.7196572303771973, "step": 3710 }, { "epoch": 1.1141060197663972, "grad_norm": 1.4523088932037354, "learning_rate": 1.584255557734395e-05, "loss": 0.7004715442657471, "step": 3720 }, { "epoch": 1.117100928421683, "grad_norm": 1.4041231870651245, "learning_rate": 1.5814238166592352e-05, "loss": 0.7263636112213134, "step": 3730 }, { "epoch": 1.1200958370769691, "grad_norm": 1.5008774995803833, "learning_rate": 1.5785850146743112e-05, "loss": 0.6979952812194824, "step": 3740 }, { "epoch": 1.1230907457322552, "grad_norm": 1.6422677040100098, "learning_rate": 1.5757391862545175e-05, "loss": 0.6974923133850097, "step": 3750 }, { "epoch": 1.1260856543875413, "grad_norm": 1.6295057535171509, "learning_rate": 1.5728863659600785e-05, "loss": 0.6878085136413574, "step": 3760 }, { "epoch": 1.1290805630428271, "grad_norm": 1.5771390199661255, "learning_rate": 1.570026588436129e-05, "loss": 0.7069286823272705, "step": 3770 }, { "epoch": 1.1320754716981132, "grad_norm": 1.5606156587600708, "learning_rate": 1.5671598884122943e-05, "loss": 0.7105122566223144, "step": 3780 }, { "epoch": 1.1350703803533992, "grad_norm": 1.573263168334961, "learning_rate": 1.5642863007022673e-05, "loss": 0.6617315292358399, "step": 3790 }, { "epoch": 1.1380652890086853, "grad_norm": 1.581919550895691, "learning_rate": 1.561405860203386e-05, "loss": 0.6750922679901123, "step": 3800 }, { "epoch": 1.1410601976639712, "grad_norm": 1.6347953081130981, "learning_rate": 1.5585186018962096e-05, "loss": 0.6865742683410645, "step": 3810 }, { "epoch": 1.1440551063192572, "grad_norm": 1.6192823648452759, "learning_rate": 1.555624560844095e-05, "loss": 0.6836994647979736, "step": 3820 }, { "epoch": 1.1470500149745433, "grad_norm": 1.49833345413208, "learning_rate": 1.5527237721927682e-05, "loss": 0.7058408737182618, "step": 3830 }, { "epoch": 1.1500449236298294, "grad_norm": 1.5722405910491943, "learning_rate": 1.5498162711699013e-05, "loss": 0.6902894973754883, "step": 3840 }, { "epoch": 1.1530398322851152, "grad_norm": 1.4945104122161865, "learning_rate": 1.546902093084681e-05, "loss": 0.6969739437103272, "step": 3850 }, { "epoch": 1.1560347409404013, "grad_norm": 1.5300406217575073, "learning_rate": 1.5439812733273814e-05, "loss": 0.6966294288635254, "step": 3860 }, { "epoch": 1.1590296495956873, "grad_norm": 1.4756025075912476, "learning_rate": 1.541053847368935e-05, "loss": 0.6721511840820312, "step": 3870 }, { "epoch": 1.1620245582509734, "grad_norm": 1.3550491333007812, "learning_rate": 1.5381198507605008e-05, "loss": 0.6645829200744628, "step": 3880 }, { "epoch": 1.1650194669062595, "grad_norm": 1.5267562866210938, "learning_rate": 1.5351793191330328e-05, "loss": 0.7032648086547851, "step": 3890 }, { "epoch": 1.1680143755615453, "grad_norm": 1.5716965198516846, "learning_rate": 1.5322322881968476e-05, "loss": 0.7047882556915284, "step": 3900 }, { "epoch": 1.1710092842168314, "grad_norm": 1.3805803060531616, "learning_rate": 1.5292787937411903e-05, "loss": 0.6749917030334472, "step": 3910 }, { "epoch": 1.1740041928721174, "grad_norm": 1.4217103719711304, "learning_rate": 1.5263188716338e-05, "loss": 0.6801820755004883, "step": 3920 }, { "epoch": 1.1769991015274035, "grad_norm": 1.5605568885803223, "learning_rate": 1.5233525578204745e-05, "loss": 0.6716075897216797, "step": 3930 }, { "epoch": 1.1799940101826893, "grad_norm": 1.5446428060531616, "learning_rate": 1.5203798883246334e-05, "loss": 0.6891654968261719, "step": 3940 }, { "epoch": 1.1829889188379754, "grad_norm": 1.4253835678100586, "learning_rate": 1.517400899246881e-05, "loss": 0.702687931060791, "step": 3950 }, { "epoch": 1.1859838274932615, "grad_norm": 1.7337391376495361, "learning_rate": 1.5144156267645675e-05, "loss": 0.6723766326904297, "step": 3960 }, { "epoch": 1.1889787361485475, "grad_norm": 1.723059892654419, "learning_rate": 1.51142410713135e-05, "loss": 0.6978803634643554, "step": 3970 }, { "epoch": 1.1919736448038334, "grad_norm": 1.748765230178833, "learning_rate": 1.5084263766767522e-05, "loss": 0.6807281494140625, "step": 3980 }, { "epoch": 1.1949685534591195, "grad_norm": 1.592793583869934, "learning_rate": 1.505422471805722e-05, "loss": 0.6684311866760254, "step": 3990 }, { "epoch": 1.1979634621144055, "grad_norm": 1.5449707508087158, "learning_rate": 1.502412428998192e-05, "loss": 0.682776689529419, "step": 4000 }, { "epoch": 1.2009583707696916, "grad_norm": 1.5067434310913086, "learning_rate": 1.4993962848086341e-05, "loss": 0.6774695873260498, "step": 4010 }, { "epoch": 1.2039532794249777, "grad_norm": 1.4419279098510742, "learning_rate": 1.4963740758656167e-05, "loss": 0.6701112270355225, "step": 4020 }, { "epoch": 1.2069481880802635, "grad_norm": 1.4293246269226074, "learning_rate": 1.4933458388713591e-05, "loss": 0.6676129341125489, "step": 4030 }, { "epoch": 1.2099430967355496, "grad_norm": 1.5989415645599365, "learning_rate": 1.4903116106012867e-05, "loss": 0.7103249549865722, "step": 4040 }, { "epoch": 1.2129380053908356, "grad_norm": 1.6779661178588867, "learning_rate": 1.4872714279035842e-05, "loss": 0.660029125213623, "step": 4050 }, { "epoch": 1.2159329140461215, "grad_norm": 1.4901732206344604, "learning_rate": 1.4842253276987475e-05, "loss": 0.6614209175109863, "step": 4060 }, { "epoch": 1.2189278227014075, "grad_norm": 1.4033241271972656, "learning_rate": 1.4811733469791357e-05, "loss": 0.7145218849182129, "step": 4070 }, { "epoch": 1.2219227313566936, "grad_norm": 1.7574478387832642, "learning_rate": 1.478115522808522e-05, "loss": 0.6761277675628662, "step": 4080 }, { "epoch": 1.2249176400119797, "grad_norm": 1.482534408569336, "learning_rate": 1.4750518923216435e-05, "loss": 0.6484230041503907, "step": 4090 }, { "epoch": 1.2279125486672657, "grad_norm": 1.5970929861068726, "learning_rate": 1.4719824927237497e-05, "loss": 0.6735719680786133, "step": 4100 }, { "epoch": 1.2309074573225516, "grad_norm": 1.5238401889801025, "learning_rate": 1.4689073612901525e-05, "loss": 0.7009137153625489, "step": 4110 }, { "epoch": 1.2339023659778376, "grad_norm": 1.491351842880249, "learning_rate": 1.4658265353657708e-05, "loss": 0.6697447776794434, "step": 4120 }, { "epoch": 1.2368972746331237, "grad_norm": 1.6152558326721191, "learning_rate": 1.4627400523646788e-05, "loss": 0.7037046909332275, "step": 4130 }, { "epoch": 1.2398921832884098, "grad_norm": 1.5510462522506714, "learning_rate": 1.4596479497696515e-05, "loss": 0.6818698883056641, "step": 4140 }, { "epoch": 1.2428870919436956, "grad_norm": 1.4972020387649536, "learning_rate": 1.4565502651317084e-05, "loss": 0.7084139823913574, "step": 4150 }, { "epoch": 1.2458820005989817, "grad_norm": 1.5813428163528442, "learning_rate": 1.4534470360696596e-05, "loss": 0.6693055152893066, "step": 4160 }, { "epoch": 1.2488769092542678, "grad_norm": 1.3404837846755981, "learning_rate": 1.4503383002696463e-05, "loss": 0.6707363128662109, "step": 4170 }, { "epoch": 1.2518718179095538, "grad_norm": 1.5015774965286255, "learning_rate": 1.4472240954846853e-05, "loss": 0.6856432914733886, "step": 4180 }, { "epoch": 1.2548667265648397, "grad_norm": 1.8516919612884521, "learning_rate": 1.4441044595342092e-05, "loss": 0.6972317218780517, "step": 4190 }, { "epoch": 1.2578616352201257, "grad_norm": 1.5744614601135254, "learning_rate": 1.4409794303036083e-05, "loss": 0.6880950927734375, "step": 4200 }, { "epoch": 1.2608565438754118, "grad_norm": 1.3624542951583862, "learning_rate": 1.4378490457437687e-05, "loss": 0.6077318668365479, "step": 4210 }, { "epoch": 1.2638514525306979, "grad_norm": 1.6590559482574463, "learning_rate": 1.4347133438706138e-05, "loss": 0.676889705657959, "step": 4220 }, { "epoch": 1.266846361185984, "grad_norm": 1.4079532623291016, "learning_rate": 1.4315723627646403e-05, "loss": 0.6575328350067139, "step": 4230 }, { "epoch": 1.2698412698412698, "grad_norm": 1.3833630084991455, "learning_rate": 1.4284261405704572e-05, "loss": 0.6833572387695312, "step": 4240 }, { "epoch": 1.2728361784965558, "grad_norm": 1.5398093461990356, "learning_rate": 1.4252747154963223e-05, "loss": 0.7134138584136963, "step": 4250 }, { "epoch": 1.275831087151842, "grad_norm": 1.440406322479248, "learning_rate": 1.4221181258136779e-05, "loss": 0.6839028835296631, "step": 4260 }, { "epoch": 1.2788259958071277, "grad_norm": 1.5981833934783936, "learning_rate": 1.4189564098566861e-05, "loss": 0.6973752975463867, "step": 4270 }, { "epoch": 1.281820904462414, "grad_norm": 1.3795045614242554, "learning_rate": 1.415789606021764e-05, "loss": 0.6336652278900147, "step": 4280 }, { "epoch": 1.2848158131176999, "grad_norm": 1.555843472480774, "learning_rate": 1.4126177527671157e-05, "loss": 0.7054344654083252, "step": 4290 }, { "epoch": 1.287810721772986, "grad_norm": 1.805177092552185, "learning_rate": 1.4094408886122671e-05, "loss": 0.7191495895385742, "step": 4300 }, { "epoch": 1.290805630428272, "grad_norm": 1.6533230543136597, "learning_rate": 1.406259052137597e-05, "loss": 0.6862345695495605, "step": 4310 }, { "epoch": 1.2938005390835579, "grad_norm": 1.4151486158370972, "learning_rate": 1.4030722819838686e-05, "loss": 0.6652461528778076, "step": 4320 }, { "epoch": 1.296795447738844, "grad_norm": 1.5951013565063477, "learning_rate": 1.3998806168517618e-05, "loss": 0.6539525508880615, "step": 4330 }, { "epoch": 1.29979035639413, "grad_norm": 1.359511375427246, "learning_rate": 1.3966840955014001e-05, "loss": 0.6631481170654296, "step": 4340 }, { "epoch": 1.302785265049416, "grad_norm": 1.619667649269104, "learning_rate": 1.3934827567518832e-05, "loss": 0.6702329635620117, "step": 4350 }, { "epoch": 1.3057801737047021, "grad_norm": 1.466314673423767, "learning_rate": 1.3902766394808135e-05, "loss": 0.6989962100982666, "step": 4360 }, { "epoch": 1.308775082359988, "grad_norm": 1.456148386001587, "learning_rate": 1.387065782623825e-05, "loss": 0.6782450199127197, "step": 4370 }, { "epoch": 1.311769991015274, "grad_norm": 1.3850840330123901, "learning_rate": 1.383850225174109e-05, "loss": 0.6970182418823242, "step": 4380 }, { "epoch": 1.31476489967056, "grad_norm": 1.3463149070739746, "learning_rate": 1.3806300061819431e-05, "loss": 0.6578661441802979, "step": 4390 }, { "epoch": 1.317759808325846, "grad_norm": 1.5610235929489136, "learning_rate": 1.3774051647542143e-05, "loss": 0.6358757019042969, "step": 4400 }, { "epoch": 1.320754716981132, "grad_norm": 1.673736572265625, "learning_rate": 1.374175740053946e-05, "loss": 0.661113166809082, "step": 4410 }, { "epoch": 1.323749625636418, "grad_norm": 1.4001795053482056, "learning_rate": 1.3709417712998206e-05, "loss": 0.6617262363433838, "step": 4420 }, { "epoch": 1.3267445342917041, "grad_norm": 1.2105085849761963, "learning_rate": 1.3677032977657051e-05, "loss": 0.6960249900817871, "step": 4430 }, { "epoch": 1.3297394429469902, "grad_norm": 1.409833312034607, "learning_rate": 1.3644603587801737e-05, "loss": 0.69888334274292, "step": 4440 }, { "epoch": 1.332734351602276, "grad_norm": 1.5228267908096313, "learning_rate": 1.3612129937260288e-05, "loss": 0.6690874099731445, "step": 4450 }, { "epoch": 1.3357292602575621, "grad_norm": 1.4134560823440552, "learning_rate": 1.3579612420398245e-05, "loss": 0.686200761795044, "step": 4460 }, { "epoch": 1.3387241689128482, "grad_norm": 1.491080641746521, "learning_rate": 1.3547051432113862e-05, "loss": 0.687087869644165, "step": 4470 }, { "epoch": 1.3417190775681342, "grad_norm": 1.3831361532211304, "learning_rate": 1.3514447367833325e-05, "loss": 0.6945667266845703, "step": 4480 }, { "epoch": 1.3447139862234203, "grad_norm": 1.3812413215637207, "learning_rate": 1.3481800623505937e-05, "loss": 0.701347827911377, "step": 4490 }, { "epoch": 1.3477088948787062, "grad_norm": 1.4374605417251587, "learning_rate": 1.3449111595599316e-05, "loss": 0.711556339263916, "step": 4500 }, { "epoch": 1.3507038035339922, "grad_norm": 1.4586987495422363, "learning_rate": 1.3416380681094578e-05, "loss": 0.6958023071289062, "step": 4510 }, { "epoch": 1.3536987121892783, "grad_norm": 1.4740244150161743, "learning_rate": 1.338360827748152e-05, "loss": 0.6627859115600586, "step": 4520 }, { "epoch": 1.3566936208445641, "grad_norm": 1.3693221807479858, "learning_rate": 1.3350794782753788e-05, "loss": 0.6928750038146972, "step": 4530 }, { "epoch": 1.3596885294998502, "grad_norm": 1.3615459203720093, "learning_rate": 1.3317940595404046e-05, "loss": 0.7074526786804199, "step": 4540 }, { "epoch": 1.3626834381551363, "grad_norm": 1.4550254344940186, "learning_rate": 1.3285046114419133e-05, "loss": 0.6495938301086426, "step": 4550 }, { "epoch": 1.3656783468104223, "grad_norm": 1.4986083507537842, "learning_rate": 1.3252111739275226e-05, "loss": 0.6903128623962402, "step": 4560 }, { "epoch": 1.3686732554657084, "grad_norm": 1.5021880865097046, "learning_rate": 1.321913786993298e-05, "loss": 0.6855093955993652, "step": 4570 }, { "epoch": 1.3716681641209942, "grad_norm": 1.435672402381897, "learning_rate": 1.3186124906832678e-05, "loss": 0.6734979629516602, "step": 4580 }, { "epoch": 1.3746630727762803, "grad_norm": 1.5334513187408447, "learning_rate": 1.3153073250889354e-05, "loss": 0.6375434398651123, "step": 4590 }, { "epoch": 1.3776579814315664, "grad_norm": 1.5921248197555542, "learning_rate": 1.311998330348795e-05, "loss": 0.6622870445251465, "step": 4600 }, { "epoch": 1.3806528900868522, "grad_norm": 1.2265634536743164, "learning_rate": 1.308685546647841e-05, "loss": 0.6761940002441407, "step": 4610 }, { "epoch": 1.3836477987421385, "grad_norm": 1.3404898643493652, "learning_rate": 1.3053690142170827e-05, "loss": 0.696360969543457, "step": 4620 }, { "epoch": 1.3866427073974243, "grad_norm": 1.4971781969070435, "learning_rate": 1.3020487733330547e-05, "loss": 0.6800951480865478, "step": 4630 }, { "epoch": 1.3896376160527104, "grad_norm": 1.3999356031417847, "learning_rate": 1.2987248643173267e-05, "loss": 0.673220443725586, "step": 4640 }, { "epoch": 1.3926325247079965, "grad_norm": 1.6020697355270386, "learning_rate": 1.2953973275360156e-05, "loss": 0.6516348838806152, "step": 4650 }, { "epoch": 1.3956274333632823, "grad_norm": 1.36565101146698, "learning_rate": 1.2920662033992946e-05, "loss": 0.6564604759216308, "step": 4660 }, { "epoch": 1.3986223420185684, "grad_norm": 1.6752657890319824, "learning_rate": 1.2887315323609016e-05, "loss": 0.6703821182250976, "step": 4670 }, { "epoch": 1.4016172506738545, "grad_norm": 1.5048459768295288, "learning_rate": 1.2853933549176492e-05, "loss": 0.6673481464385986, "step": 4680 }, { "epoch": 1.4046121593291405, "grad_norm": 1.5359078645706177, "learning_rate": 1.2820517116089321e-05, "loss": 0.6794118881225586, "step": 4690 }, { "epoch": 1.4076070679844266, "grad_norm": 1.432215690612793, "learning_rate": 1.2787066430162355e-05, "loss": 0.6648625373840332, "step": 4700 }, { "epoch": 1.4106019766397124, "grad_norm": 1.418803334236145, "learning_rate": 1.2753581897626419e-05, "loss": 0.6332767486572266, "step": 4710 }, { "epoch": 1.4135968852949985, "grad_norm": 1.5143288373947144, "learning_rate": 1.2720063925123367e-05, "loss": 0.7153759002685547, "step": 4720 }, { "epoch": 1.4165917939502846, "grad_norm": 1.3891148567199707, "learning_rate": 1.2686512919701167e-05, "loss": 0.63809814453125, "step": 4730 }, { "epoch": 1.4195867026055704, "grad_norm": 1.3475419282913208, "learning_rate": 1.2652929288808933e-05, "loss": 0.6704463958740234, "step": 4740 }, { "epoch": 1.4225816112608565, "grad_norm": 1.290427327156067, "learning_rate": 1.2619313440291995e-05, "loss": 0.6674720764160156, "step": 4750 }, { "epoch": 1.4255765199161425, "grad_norm": 1.653327465057373, "learning_rate": 1.2585665782386938e-05, "loss": 0.6292222499847412, "step": 4760 }, { "epoch": 1.4285714285714286, "grad_norm": 1.650072455406189, "learning_rate": 1.2551986723716642e-05, "loss": 0.6493176460266114, "step": 4770 }, { "epoch": 1.4315663372267147, "grad_norm": 1.5939024686813354, "learning_rate": 1.2518276673285332e-05, "loss": 0.7008792877197265, "step": 4780 }, { "epoch": 1.4345612458820005, "grad_norm": 1.2825090885162354, "learning_rate": 1.2484536040473593e-05, "loss": 0.6760101318359375, "step": 4790 }, { "epoch": 1.4375561545372866, "grad_norm": 1.2787253856658936, "learning_rate": 1.245076523503341e-05, "loss": 0.6585366249084472, "step": 4800 }, { "epoch": 1.4405510631925726, "grad_norm": 1.4136161804199219, "learning_rate": 1.2416964667083193e-05, "loss": 0.6975108623504639, "step": 4810 }, { "epoch": 1.4435459718478587, "grad_norm": 1.698377013206482, "learning_rate": 1.238313474710279e-05, "loss": 0.6626195430755615, "step": 4820 }, { "epoch": 1.4465408805031448, "grad_norm": 1.4335144758224487, "learning_rate": 1.2349275885928504e-05, "loss": 0.6314863204956055, "step": 4830 }, { "epoch": 1.4495357891584306, "grad_norm": 1.373105764389038, "learning_rate": 1.2315388494748109e-05, "loss": 0.6849304676055908, "step": 4840 }, { "epoch": 1.4525306978137167, "grad_norm": 1.415645718574524, "learning_rate": 1.2281472985095848e-05, "loss": 0.6753826141357422, "step": 4850 }, { "epoch": 1.4555256064690028, "grad_norm": 1.2998967170715332, "learning_rate": 1.2247529768847439e-05, "loss": 0.6722857475280761, "step": 4860 }, { "epoch": 1.4585205151242886, "grad_norm": 1.4371323585510254, "learning_rate": 1.2213559258215084e-05, "loss": 0.646511459350586, "step": 4870 }, { "epoch": 1.4615154237795747, "grad_norm": 1.6259355545043945, "learning_rate": 1.2179561865742437e-05, "loss": 0.6791155815124512, "step": 4880 }, { "epoch": 1.4645103324348607, "grad_norm": 1.5392216444015503, "learning_rate": 1.214553800429962e-05, "loss": 0.6901945114135742, "step": 4890 }, { "epoch": 1.4675052410901468, "grad_norm": 1.5144057273864746, "learning_rate": 1.2111488087078195e-05, "loss": 0.6945788860321045, "step": 4900 }, { "epoch": 1.4705001497454329, "grad_norm": 1.3844542503356934, "learning_rate": 1.2077412527586152e-05, "loss": 0.7096900939941406, "step": 4910 }, { "epoch": 1.4734950584007187, "grad_norm": 1.5048848390579224, "learning_rate": 1.2043311739642882e-05, "loss": 0.6696764945983886, "step": 4920 }, { "epoch": 1.4764899670560048, "grad_norm": 1.4695067405700684, "learning_rate": 1.2009186137374158e-05, "loss": 0.670767879486084, "step": 4930 }, { "epoch": 1.4794848757112908, "grad_norm": 1.2492382526397705, "learning_rate": 1.19750361352071e-05, "loss": 0.6456597328186036, "step": 4940 }, { "epoch": 1.482479784366577, "grad_norm": 1.5186883211135864, "learning_rate": 1.1940862147865145e-05, "loss": 0.6502896308898926, "step": 4950 }, { "epoch": 1.485474693021863, "grad_norm": 1.5641230344772339, "learning_rate": 1.1906664590363008e-05, "loss": 0.6687553405761719, "step": 4960 }, { "epoch": 1.4884696016771488, "grad_norm": 1.5437209606170654, "learning_rate": 1.1872443878001652e-05, "loss": 0.6531869411468506, "step": 4970 }, { "epoch": 1.4914645103324349, "grad_norm": 1.342319369316101, "learning_rate": 1.1838200426363227e-05, "loss": 0.6570711135864258, "step": 4980 }, { "epoch": 1.494459418987721, "grad_norm": 1.4218707084655762, "learning_rate": 1.1803934651306037e-05, "loss": 0.6730245590209961, "step": 4990 }, { "epoch": 1.4974543276430068, "grad_norm": 1.4281178712844849, "learning_rate": 1.1769646968959485e-05, "loss": 0.7147689819335937, "step": 5000 }, { "epoch": 1.5004492362982929, "grad_norm": 1.3982033729553223, "learning_rate": 1.1735337795719018e-05, "loss": 0.6860141754150391, "step": 5010 }, { "epoch": 1.503444144953579, "grad_norm": 1.3819836378097534, "learning_rate": 1.1701007548241077e-05, "loss": 0.6726783752441406, "step": 5020 }, { "epoch": 1.5064390536088648, "grad_norm": 1.347074270248413, "learning_rate": 1.1666656643438029e-05, "loss": 0.6863351821899414, "step": 5030 }, { "epoch": 1.509433962264151, "grad_norm": 1.556456208229065, "learning_rate": 1.1632285498473104e-05, "loss": 0.6616711616516113, "step": 5040 }, { "epoch": 1.512428870919437, "grad_norm": 1.3579554557800293, "learning_rate": 1.1597894530755339e-05, "loss": 0.6517277717590332, "step": 5050 }, { "epoch": 1.515423779574723, "grad_norm": 1.2416077852249146, "learning_rate": 1.1563484157934495e-05, "loss": 0.6717746734619141, "step": 5060 }, { "epoch": 1.518418688230009, "grad_norm": 1.5442560911178589, "learning_rate": 1.1529054797895995e-05, "loss": 0.634144401550293, "step": 5070 }, { "epoch": 1.5214135968852949, "grad_norm": 1.5588805675506592, "learning_rate": 1.1494606868755847e-05, "loss": 0.6501172065734864, "step": 5080 }, { "epoch": 1.5244085055405812, "grad_norm": 1.4256302118301392, "learning_rate": 1.1460140788855563e-05, "loss": 0.6600000858306885, "step": 5090 }, { "epoch": 1.527403414195867, "grad_norm": 1.5612784624099731, "learning_rate": 1.1425656976757083e-05, "loss": 0.6572963237762451, "step": 5100 }, { "epoch": 1.530398322851153, "grad_norm": 1.3370282649993896, "learning_rate": 1.1391155851237687e-05, "loss": 0.6684782028198242, "step": 5110 }, { "epoch": 1.5333932315064391, "grad_norm": 1.5403549671173096, "learning_rate": 1.1356637831284918e-05, "loss": 0.6727892875671386, "step": 5120 }, { "epoch": 1.536388140161725, "grad_norm": 1.370840072631836, "learning_rate": 1.1322103336091479e-05, "loss": 0.6451261520385743, "step": 5130 }, { "epoch": 1.539383048817011, "grad_norm": 1.3316929340362549, "learning_rate": 1.128755278505016e-05, "loss": 0.6273011207580567, "step": 5140 }, { "epoch": 1.5423779574722971, "grad_norm": 1.4078290462493896, "learning_rate": 1.1252986597748726e-05, "loss": 0.6177189826965332, "step": 5150 }, { "epoch": 1.545372866127583, "grad_norm": 1.5209804773330688, "learning_rate": 1.1218405193964846e-05, "loss": 0.6376583576202393, "step": 5160 }, { "epoch": 1.5483677747828692, "grad_norm": 1.4470043182373047, "learning_rate": 1.1183808993660966e-05, "loss": 0.6674811363220214, "step": 5170 }, { "epoch": 1.551362683438155, "grad_norm": 1.353264570236206, "learning_rate": 1.114919841697923e-05, "loss": 0.6187152862548828, "step": 5180 }, { "epoch": 1.5543575920934412, "grad_norm": 1.4463789463043213, "learning_rate": 1.111457388423637e-05, "loss": 0.6588546752929687, "step": 5190 }, { "epoch": 1.5573525007487272, "grad_norm": 1.3931549787521362, "learning_rate": 1.1079935815918608e-05, "loss": 0.6881397247314454, "step": 5200 }, { "epoch": 1.560347409404013, "grad_norm": 1.535813331604004, "learning_rate": 1.1045284632676535e-05, "loss": 0.6458590507507325, "step": 5210 }, { "epoch": 1.5633423180592994, "grad_norm": 1.4285786151885986, "learning_rate": 1.1010620755320018e-05, "loss": 0.6613713264465332, "step": 5220 }, { "epoch": 1.5663372267145852, "grad_norm": 1.361226201057434, "learning_rate": 1.0975944604813083e-05, "loss": 0.6590459823608399, "step": 5230 }, { "epoch": 1.5693321353698713, "grad_norm": 1.4913057088851929, "learning_rate": 1.0941256602268799e-05, "loss": 0.6750634193420411, "step": 5240 }, { "epoch": 1.5723270440251573, "grad_norm": 1.4666550159454346, "learning_rate": 1.0906557168944174e-05, "loss": 0.6445255279541016, "step": 5250 }, { "epoch": 1.5753219526804432, "grad_norm": 1.287156105041504, "learning_rate": 1.0871846726235031e-05, "loss": 0.6598057270050048, "step": 5260 }, { "epoch": 1.5783168613357292, "grad_norm": 1.3992464542388916, "learning_rate": 1.0837125695670892e-05, "loss": 0.6409515380859375, "step": 5270 }, { "epoch": 1.5813117699910153, "grad_norm": 1.4923102855682373, "learning_rate": 1.0802394498909859e-05, "loss": 0.629506254196167, "step": 5280 }, { "epoch": 1.5843066786463011, "grad_norm": 1.6271188259124756, "learning_rate": 1.0767653557733494e-05, "loss": 0.6570216655731201, "step": 5290 }, { "epoch": 1.5873015873015874, "grad_norm": 1.6705671548843384, "learning_rate": 1.0732903294041702e-05, "loss": 0.6791990280151368, "step": 5300 }, { "epoch": 1.5902964959568733, "grad_norm": 1.477299451828003, "learning_rate": 1.0698144129847598e-05, "loss": 0.6323776721954346, "step": 5310 }, { "epoch": 1.5932914046121593, "grad_norm": 1.4466052055358887, "learning_rate": 1.0663376487272386e-05, "loss": 0.635925006866455, "step": 5320 }, { "epoch": 1.5962863132674454, "grad_norm": 1.3402115106582642, "learning_rate": 1.0628600788540232e-05, "loss": 0.6522153377532959, "step": 5330 }, { "epoch": 1.5992812219227313, "grad_norm": 1.5251848697662354, "learning_rate": 1.059381745597314e-05, "loss": 0.6985126495361328, "step": 5340 }, { "epoch": 1.6022761305780173, "grad_norm": 1.3335204124450684, "learning_rate": 1.0559026911985817e-05, "loss": 0.6422924041748047, "step": 5350 }, { "epoch": 1.6052710392333034, "grad_norm": 1.3140618801116943, "learning_rate": 1.0524229579080553e-05, "loss": 0.6553333759307861, "step": 5360 }, { "epoch": 1.6082659478885895, "grad_norm": 1.5221872329711914, "learning_rate": 1.0489425879842079e-05, "loss": 0.6545061588287353, "step": 5370 }, { "epoch": 1.6112608565438755, "grad_norm": 1.257545828819275, "learning_rate": 1.0454616236932437e-05, "loss": 0.6448293209075928, "step": 5380 }, { "epoch": 1.6142557651991614, "grad_norm": 1.5430147647857666, "learning_rate": 1.0419801073085856e-05, "loss": 0.6655144691467285, "step": 5390 }, { "epoch": 1.6172506738544474, "grad_norm": 1.528615117073059, "learning_rate": 1.0384980811103614e-05, "loss": 0.6634177207946778, "step": 5400 }, { "epoch": 1.6202455825097335, "grad_norm": 1.294121503829956, "learning_rate": 1.035015587384889e-05, "loss": 0.6833911895751953, "step": 5410 }, { "epoch": 1.6232404911650193, "grad_norm": 1.3020493984222412, "learning_rate": 1.0315326684241655e-05, "loss": 0.6578948020935058, "step": 5420 }, { "epoch": 1.6262353998203056, "grad_norm": 1.599776268005371, "learning_rate": 1.028049366525351e-05, "loss": 0.6559863567352295, "step": 5430 }, { "epoch": 1.6292303084755915, "grad_norm": 1.5496463775634766, "learning_rate": 1.0245657239902565e-05, "loss": 0.6398555755615234, "step": 5440 }, { "epoch": 1.6322252171308775, "grad_norm": 1.4625442028045654, "learning_rate": 1.0210817831248299e-05, "loss": 0.653658676147461, "step": 5450 }, { "epoch": 1.6352201257861636, "grad_norm": 1.425125241279602, "learning_rate": 1.0175975862386416e-05, "loss": 0.652650260925293, "step": 5460 }, { "epoch": 1.6382150344414494, "grad_norm": 1.4808628559112549, "learning_rate": 1.0141131756443715e-05, "loss": 0.6396486282348632, "step": 5470 }, { "epoch": 1.6412099430967355, "grad_norm": 1.3888201713562012, "learning_rate": 1.0106285936572953e-05, "loss": 0.6401126861572266, "step": 5480 }, { "epoch": 1.6442048517520216, "grad_norm": 1.3387247323989868, "learning_rate": 1.0071438825947689e-05, "loss": 0.623372745513916, "step": 5490 }, { "epoch": 1.6471997604073074, "grad_norm": 1.3711042404174805, "learning_rate": 1.0036590847757166e-05, "loss": 0.653053617477417, "step": 5500 }, { "epoch": 1.6501946690625937, "grad_norm": 1.4318101406097412, "learning_rate": 1.0001742425201164e-05, "loss": 0.6399904727935791, "step": 5510 }, { "epoch": 1.6531895777178796, "grad_norm": 1.3473331928253174, "learning_rate": 9.966893981484852e-06, "loss": 0.6013195037841796, "step": 5520 }, { "epoch": 1.6561844863731656, "grad_norm": 1.404439091682434, "learning_rate": 9.932045939813662e-06, "loss": 0.6521830558776855, "step": 5530 }, { "epoch": 1.6591793950284517, "grad_norm": 1.6330413818359375, "learning_rate": 9.897198723388143e-06, "loss": 0.6607831001281739, "step": 5540 }, { "epoch": 1.6621743036837375, "grad_norm": 1.4696617126464844, "learning_rate": 9.86235275539882e-06, "loss": 0.666562557220459, "step": 5550 }, { "epoch": 1.6651692123390238, "grad_norm": 1.3956501483917236, "learning_rate": 9.827508459021056e-06, "loss": 0.6431893348693848, "step": 5560 }, { "epoch": 1.6681641209943097, "grad_norm": 1.3600293397903442, "learning_rate": 9.792666257409917e-06, "loss": 0.6427026271820069, "step": 5570 }, { "epoch": 1.6711590296495957, "grad_norm": 1.393192172050476, "learning_rate": 9.75782657369503e-06, "loss": 0.6541417598724365, "step": 5580 }, { "epoch": 1.6741539383048818, "grad_norm": 1.4253222942352295, "learning_rate": 9.722989830975439e-06, "loss": 0.6254150867462158, "step": 5590 }, { "epoch": 1.6771488469601676, "grad_norm": 1.3802683353424072, "learning_rate": 9.688156452314475e-06, "loss": 0.6401287078857422, "step": 5600 }, { "epoch": 1.6801437556154537, "grad_norm": 1.3087486028671265, "learning_rate": 9.653326860734617e-06, "loss": 0.6138454437255859, "step": 5610 }, { "epoch": 1.6831386642707398, "grad_norm": 1.7111173868179321, "learning_rate": 9.618501479212355e-06, "loss": 0.6142902851104737, "step": 5620 }, { "epoch": 1.6861335729260256, "grad_norm": 1.510345458984375, "learning_rate": 9.58368073067304e-06, "loss": 0.6585430145263672, "step": 5630 }, { "epoch": 1.689128481581312, "grad_norm": 1.454209804534912, "learning_rate": 9.548865037985776e-06, "loss": 0.6655298233032226, "step": 5640 }, { "epoch": 1.6921233902365977, "grad_norm": 1.5720434188842773, "learning_rate": 9.514054823958254e-06, "loss": 0.6410290718078613, "step": 5650 }, { "epoch": 1.6951182988918838, "grad_norm": 1.4291045665740967, "learning_rate": 9.47925051133164e-06, "loss": 0.6853228569030761, "step": 5660 }, { "epoch": 1.6981132075471699, "grad_norm": 1.4933050870895386, "learning_rate": 9.444452522775424e-06, "loss": 0.6341513633728028, "step": 5670 }, { "epoch": 1.7011081162024557, "grad_norm": 1.4111416339874268, "learning_rate": 9.409661280882306e-06, "loss": 0.6149447441101075, "step": 5680 }, { "epoch": 1.7041030248577418, "grad_norm": 1.4768991470336914, "learning_rate": 9.374877208163042e-06, "loss": 0.6374067306518555, "step": 5690 }, { "epoch": 1.7070979335130279, "grad_norm": 1.5744390487670898, "learning_rate": 9.340100727041334e-06, "loss": 0.6614315986633301, "step": 5700 }, { "epoch": 1.710092842168314, "grad_norm": 1.5393071174621582, "learning_rate": 9.305332259848685e-06, "loss": 0.6411947250366211, "step": 5710 }, { "epoch": 1.7130877508236, "grad_norm": 1.2625375986099243, "learning_rate": 9.270572228819277e-06, "loss": 0.6521016120910644, "step": 5720 }, { "epoch": 1.7160826594788858, "grad_norm": 1.4381405115127563, "learning_rate": 9.235821056084841e-06, "loss": 0.6407829761505127, "step": 5730 }, { "epoch": 1.719077568134172, "grad_norm": 1.542965292930603, "learning_rate": 9.20107916366953e-06, "loss": 0.6585879325866699, "step": 5740 }, { "epoch": 1.722072476789458, "grad_norm": 1.7274558544158936, "learning_rate": 9.166346973484802e-06, "loss": 0.678370475769043, "step": 5750 }, { "epoch": 1.7250673854447438, "grad_norm": 1.3161629438400269, "learning_rate": 9.131624907324281e-06, "loss": 0.6508775234222413, "step": 5760 }, { "epoch": 1.72806229410003, "grad_norm": 1.4791046380996704, "learning_rate": 9.096913386858648e-06, "loss": 0.6735451221466064, "step": 5770 }, { "epoch": 1.731057202755316, "grad_norm": 1.4756172895431519, "learning_rate": 9.062212833630513e-06, "loss": 0.6588196754455566, "step": 5780 }, { "epoch": 1.734052111410602, "grad_norm": 1.4064671993255615, "learning_rate": 9.0275236690493e-06, "loss": 0.6659040451049805, "step": 5790 }, { "epoch": 1.737047020065888, "grad_norm": 1.5382051467895508, "learning_rate": 8.992846314386125e-06, "loss": 0.6591670036315918, "step": 5800 }, { "epoch": 1.740041928721174, "grad_norm": 1.3774088621139526, "learning_rate": 8.958181190768686e-06, "loss": 0.6008991241455078, "step": 5810 }, { "epoch": 1.74303683737646, "grad_norm": 1.5064748525619507, "learning_rate": 8.923528719176141e-06, "loss": 0.6617294311523437, "step": 5820 }, { "epoch": 1.746031746031746, "grad_norm": 1.3881192207336426, "learning_rate": 8.888889320434003e-06, "loss": 0.669343090057373, "step": 5830 }, { "epoch": 1.749026654687032, "grad_norm": 1.4678955078125, "learning_rate": 8.854263415209022e-06, "loss": 0.6319092750549317, "step": 5840 }, { "epoch": 1.7520215633423182, "grad_norm": 1.4094629287719727, "learning_rate": 8.81965142400408e-06, "loss": 0.6414087772369385, "step": 5850 }, { "epoch": 1.755016471997604, "grad_norm": 1.392703652381897, "learning_rate": 8.785053767153098e-06, "loss": 0.6597569942474365, "step": 5860 }, { "epoch": 1.75801138065289, "grad_norm": 1.23224937915802, "learning_rate": 8.7504708648159e-06, "loss": 0.6733821868896485, "step": 5870 }, { "epoch": 1.7610062893081762, "grad_norm": 1.5176384449005127, "learning_rate": 8.715903136973141e-06, "loss": 0.6722299098968506, "step": 5880 }, { "epoch": 1.764001197963462, "grad_norm": 1.4998729228973389, "learning_rate": 8.681351003421189e-06, "loss": 0.6153835773468017, "step": 5890 }, { "epoch": 1.7669961066187483, "grad_norm": 1.5615732669830322, "learning_rate": 8.646814883767028e-06, "loss": 0.6614401340484619, "step": 5900 }, { "epoch": 1.7699910152740341, "grad_norm": 1.4429715871810913, "learning_rate": 8.612295197423178e-06, "loss": 0.6637703895568847, "step": 5910 }, { "epoch": 1.7729859239293202, "grad_norm": 1.5154484510421753, "learning_rate": 8.577792363602582e-06, "loss": 0.6937406539916993, "step": 5920 }, { "epoch": 1.7759808325846063, "grad_norm": 1.523045301437378, "learning_rate": 8.543306801313522e-06, "loss": 0.6500541210174561, "step": 5930 }, { "epoch": 1.778975741239892, "grad_norm": 1.5260372161865234, "learning_rate": 8.508838929354539e-06, "loss": 0.6453513622283935, "step": 5940 }, { "epoch": 1.7819706498951782, "grad_norm": 1.5336843729019165, "learning_rate": 8.474389166309332e-06, "loss": 0.6776984214782715, "step": 5950 }, { "epoch": 1.7849655585504642, "grad_norm": 1.4582264423370361, "learning_rate": 8.439957930541686e-06, "loss": 0.6503573417663574, "step": 5960 }, { "epoch": 1.78796046720575, "grad_norm": 1.3314224481582642, "learning_rate": 8.405545640190387e-06, "loss": 0.5925717353820801, "step": 5970 }, { "epoch": 1.7909553758610364, "grad_norm": 1.3872936964035034, "learning_rate": 8.371152713164146e-06, "loss": 0.6113157272338867, "step": 5980 }, { "epoch": 1.7939502845163222, "grad_norm": 1.4577404260635376, "learning_rate": 8.33677956713652e-06, "loss": 0.665937089920044, "step": 5990 }, { "epoch": 1.7969451931716083, "grad_norm": 1.5423346757888794, "learning_rate": 8.302426619540843e-06, "loss": 0.6487864017486572, "step": 6000 }, { "epoch": 1.7999401018268943, "grad_norm": 1.4728710651397705, "learning_rate": 8.268094287565156e-06, "loss": 0.6546504020690918, "step": 6010 }, { "epoch": 1.8029350104821802, "grad_norm": 1.6181608438491821, "learning_rate": 8.23378298814714e-06, "loss": 0.6671038150787354, "step": 6020 }, { "epoch": 1.8059299191374663, "grad_norm": 1.3112537860870361, "learning_rate": 8.199493137969056e-06, "loss": 0.6506411552429199, "step": 6030 }, { "epoch": 1.8089248277927523, "grad_norm": 1.6379741430282593, "learning_rate": 8.165225153452678e-06, "loss": 0.6582574844360352, "step": 6040 }, { "epoch": 1.8119197364480384, "grad_norm": 1.3852412700653076, "learning_rate": 8.13097945075424e-06, "loss": 0.6609588623046875, "step": 6050 }, { "epoch": 1.8149146451033245, "grad_norm": 1.4980980157852173, "learning_rate": 8.096756445759382e-06, "loss": 0.6495426177978516, "step": 6060 }, { "epoch": 1.8179095537586103, "grad_norm": 1.430310845375061, "learning_rate": 8.062556554078103e-06, "loss": 0.6681442260742188, "step": 6070 }, { "epoch": 1.8209044624138964, "grad_norm": 1.4171323776245117, "learning_rate": 8.028380191039704e-06, "loss": 0.632663631439209, "step": 6080 }, { "epoch": 1.8238993710691824, "grad_norm": 1.4187473058700562, "learning_rate": 7.994227771687757e-06, "loss": 0.6560873031616211, "step": 6090 }, { "epoch": 1.8268942797244683, "grad_norm": 1.5951778888702393, "learning_rate": 7.960099710775049e-06, "loss": 0.6672462940216064, "step": 6100 }, { "epoch": 1.8298891883797546, "grad_norm": 1.2696975469589233, "learning_rate": 7.925996422758561e-06, "loss": 0.6479342937469482, "step": 6110 }, { "epoch": 1.8328840970350404, "grad_norm": 1.590598702430725, "learning_rate": 7.891918321794428e-06, "loss": 0.6272913932800293, "step": 6120 }, { "epoch": 1.8358790056903265, "grad_norm": 1.3780035972595215, "learning_rate": 7.857865821732906e-06, "loss": 0.659095048904419, "step": 6130 }, { "epoch": 1.8388739143456125, "grad_norm": 1.4691357612609863, "learning_rate": 7.823839336113347e-06, "loss": 0.6268105506896973, "step": 6140 }, { "epoch": 1.8418688230008984, "grad_norm": 1.4001188278198242, "learning_rate": 7.789839278159185e-06, "loss": 0.6448341369628906, "step": 6150 }, { "epoch": 1.8448637316561844, "grad_norm": 1.4039057493209839, "learning_rate": 7.75586606077291e-06, "loss": 0.6525530815124512, "step": 6160 }, { "epoch": 1.8478586403114705, "grad_norm": 1.2402323484420776, "learning_rate": 7.721920096531052e-06, "loss": 0.6459396362304688, "step": 6170 }, { "epoch": 1.8508535489667564, "grad_norm": 1.4664818048477173, "learning_rate": 7.688001797679178e-06, "loss": 0.6386150360107422, "step": 6180 }, { "epoch": 1.8538484576220426, "grad_norm": 1.3676190376281738, "learning_rate": 7.654111576126881e-06, "loss": 0.6291984558105469, "step": 6190 }, { "epoch": 1.8568433662773285, "grad_norm": 1.3386749029159546, "learning_rate": 7.620249843442777e-06, "loss": 0.6123722076416016, "step": 6200 }, { "epoch": 1.8598382749326146, "grad_norm": 1.6316471099853516, "learning_rate": 7.5864170108495135e-06, "loss": 0.6253969669342041, "step": 6210 }, { "epoch": 1.8628331835879006, "grad_norm": 1.3244318962097168, "learning_rate": 7.552613489218763e-06, "loss": 0.6519149303436279, "step": 6220 }, { "epoch": 1.8658280922431865, "grad_norm": 1.618364691734314, "learning_rate": 7.518839689066247e-06, "loss": 0.6438776016235351, "step": 6230 }, { "epoch": 1.8688230008984728, "grad_norm": 1.4920052289962769, "learning_rate": 7.485096020546738e-06, "loss": 0.6367332458496093, "step": 6240 }, { "epoch": 1.8718179095537586, "grad_norm": 1.5802627801895142, "learning_rate": 7.451382893449091e-06, "loss": 0.6220839023590088, "step": 6250 }, { "epoch": 1.8748128182090447, "grad_norm": 1.504412293434143, "learning_rate": 7.417700717191255e-06, "loss": 0.6164268493652344, "step": 6260 }, { "epoch": 1.8778077268643307, "grad_norm": 1.4522355794906616, "learning_rate": 7.384049900815313e-06, "loss": 0.624882984161377, "step": 6270 }, { "epoch": 1.8808026355196166, "grad_norm": 1.5092509984970093, "learning_rate": 7.3504308529825045e-06, "loss": 0.630027961730957, "step": 6280 }, { "epoch": 1.8837975441749026, "grad_norm": 1.6086957454681396, "learning_rate": 7.316843981968267e-06, "loss": 0.6275941371917725, "step": 6290 }, { "epoch": 1.8867924528301887, "grad_norm": 1.7092756032943726, "learning_rate": 7.283289695657275e-06, "loss": 0.6458075523376465, "step": 6300 }, { "epoch": 1.8897873614854745, "grad_norm": 1.4698508977890015, "learning_rate": 7.249768401538493e-06, "loss": 0.5995992660522461, "step": 6310 }, { "epoch": 1.8927822701407608, "grad_norm": 1.3332494497299194, "learning_rate": 7.216280506700222e-06, "loss": 0.5948431968688965, "step": 6320 }, { "epoch": 1.8957771787960467, "grad_norm": 1.5438693761825562, "learning_rate": 7.182826417825152e-06, "loss": 0.6605867385864258, "step": 6330 }, { "epoch": 1.8987720874513327, "grad_norm": 1.2717972993850708, "learning_rate": 7.149406541185433e-06, "loss": 0.6348017692565918, "step": 6340 }, { "epoch": 1.9017669961066188, "grad_norm": 1.5023244619369507, "learning_rate": 7.116021282637732e-06, "loss": 0.6453000068664551, "step": 6350 }, { "epoch": 1.9047619047619047, "grad_norm": 1.4482067823410034, "learning_rate": 7.082671047618312e-06, "loss": 0.6501484870910644, "step": 6360 }, { "epoch": 1.9077568134171907, "grad_norm": 1.2380962371826172, "learning_rate": 7.049356241138099e-06, "loss": 0.6227757453918457, "step": 6370 }, { "epoch": 1.9107517220724768, "grad_norm": 1.361412763595581, "learning_rate": 7.016077267777775e-06, "loss": 0.6514645576477051, "step": 6380 }, { "epoch": 1.9137466307277629, "grad_norm": 1.6207096576690674, "learning_rate": 6.982834531682853e-06, "loss": 0.6655488967895508, "step": 6390 }, { "epoch": 1.916741539383049, "grad_norm": 1.3296849727630615, "learning_rate": 6.949628436558777e-06, "loss": 0.6586191177368164, "step": 6400 }, { "epoch": 1.9197364480383348, "grad_norm": 1.333168864250183, "learning_rate": 6.916459385666017e-06, "loss": 0.6382019996643067, "step": 6410 }, { "epoch": 1.9227313566936208, "grad_norm": 1.3755735158920288, "learning_rate": 6.88332778181517e-06, "loss": 0.6329482078552247, "step": 6420 }, { "epoch": 1.925726265348907, "grad_norm": 1.3939225673675537, "learning_rate": 6.850234027362073e-06, "loss": 0.6204883575439453, "step": 6430 }, { "epoch": 1.9287211740041927, "grad_norm": 1.3705857992172241, "learning_rate": 6.817178524202907e-06, "loss": 0.6589064598083496, "step": 6440 }, { "epoch": 1.931716082659479, "grad_norm": 1.3137234449386597, "learning_rate": 6.784161673769332e-06, "loss": 0.6426548004150391, "step": 6450 }, { "epoch": 1.9347109913147649, "grad_norm": 1.5095393657684326, "learning_rate": 6.751183877023595e-06, "loss": 0.6177249908447265, "step": 6460 }, { "epoch": 1.937705899970051, "grad_norm": 1.268416166305542, "learning_rate": 6.718245534453673e-06, "loss": 0.6130592823028564, "step": 6470 }, { "epoch": 1.940700808625337, "grad_norm": 1.2933331727981567, "learning_rate": 6.685347046068402e-06, "loss": 0.6383994579315185, "step": 6480 }, { "epoch": 1.9436957172806228, "grad_norm": 1.5103631019592285, "learning_rate": 6.652488811392622e-06, "loss": 0.6300495147705079, "step": 6490 }, { "epoch": 1.946690625935909, "grad_norm": 1.4330319166183472, "learning_rate": 6.6196712294623276e-06, "loss": 0.631505012512207, "step": 6500 }, { "epoch": 1.949685534591195, "grad_norm": 1.2874356508255005, "learning_rate": 6.5868946988198165e-06, "loss": 0.5962014198303223, "step": 6510 }, { "epoch": 1.9526804432464808, "grad_norm": 1.4397952556610107, "learning_rate": 6.554159617508856e-06, "loss": 0.641713809967041, "step": 6520 }, { "epoch": 1.9556753519017671, "grad_norm": 1.519711971282959, "learning_rate": 6.521466383069841e-06, "loss": 0.6155229568481445, "step": 6530 }, { "epoch": 1.958670260557053, "grad_norm": 1.5349974632263184, "learning_rate": 6.488815392534977e-06, "loss": 0.6498642921447754, "step": 6540 }, { "epoch": 1.961665169212339, "grad_norm": 1.6318473815917969, "learning_rate": 6.456207042423445e-06, "loss": 0.6179317474365235, "step": 6550 }, { "epoch": 1.964660077867625, "grad_norm": 1.4348818063735962, "learning_rate": 6.4236417287366006e-06, "loss": 0.6067376136779785, "step": 6560 }, { "epoch": 1.967654986522911, "grad_norm": 1.6722396612167358, "learning_rate": 6.391119846953153e-06, "loss": 0.6432971000671387, "step": 6570 }, { "epoch": 1.9706498951781972, "grad_norm": 1.5022844076156616, "learning_rate": 6.3586417920243695e-06, "loss": 0.6031882762908936, "step": 6580 }, { "epoch": 1.973644803833483, "grad_norm": 1.2844231128692627, "learning_rate": 6.326207958369273e-06, "loss": 0.661934232711792, "step": 6590 }, { "epoch": 1.9766397124887691, "grad_norm": 1.338340163230896, "learning_rate": 6.2938187398698614e-06, "loss": 0.6218274116516114, "step": 6600 }, { "epoch": 1.9796346211440552, "grad_norm": 1.4525477886199951, "learning_rate": 6.261474529866315e-06, "loss": 0.6390564441680908, "step": 6610 }, { "epoch": 1.982629529799341, "grad_norm": 1.5783872604370117, "learning_rate": 6.229175721152222e-06, "loss": 0.6509233951568604, "step": 6620 }, { "epoch": 1.985624438454627, "grad_norm": 1.5055947303771973, "learning_rate": 6.1969227059698125e-06, "loss": 0.5991942405700683, "step": 6630 }, { "epoch": 1.9886193471099132, "grad_norm": 1.4864978790283203, "learning_rate": 6.1647158760051915e-06, "loss": 0.6327694892883301, "step": 6640 }, { "epoch": 1.991614255765199, "grad_norm": 1.352718710899353, "learning_rate": 6.132555622383581e-06, "loss": 0.6443804740905762, "step": 6650 }, { "epoch": 1.9946091644204853, "grad_norm": 1.567016363143921, "learning_rate": 6.1004423356645744e-06, "loss": 0.6319258689880372, "step": 6660 }, { "epoch": 1.9976040730757711, "grad_norm": 1.4297220706939697, "learning_rate": 6.06837640583739e-06, "loss": 0.6520418167114258, "step": 6670 }, { "epoch": 2.000598981731057, "grad_norm": 1.1015812158584595, "learning_rate": 6.0363582223161345e-06, "loss": 0.5972274303436279, "step": 6680 }, { "epoch": 2.0035938903863433, "grad_norm": 1.2543084621429443, "learning_rate": 6.0043881739350785e-06, "loss": 0.5129719734191894, "step": 6690 }, { "epoch": 2.006588799041629, "grad_norm": 1.2929192781448364, "learning_rate": 5.972466648943929e-06, "loss": 0.5194722652435303, "step": 6700 }, { "epoch": 2.0095837076969154, "grad_norm": 1.4669725894927979, "learning_rate": 5.940594035003119e-06, "loss": 0.5150233268737793, "step": 6710 }, { "epoch": 2.0125786163522013, "grad_norm": 1.4361999034881592, "learning_rate": 5.9087707191790935e-06, "loss": 0.5015038967132568, "step": 6720 }, { "epoch": 2.015573525007487, "grad_norm": 1.3732558488845825, "learning_rate": 5.876997087939614e-06, "loss": 0.5028849601745605, "step": 6730 }, { "epoch": 2.0185684336627734, "grad_norm": 1.3214187622070312, "learning_rate": 5.845273527149067e-06, "loss": 0.5087246894836426, "step": 6740 }, { "epoch": 2.0215633423180592, "grad_norm": 1.564650058746338, "learning_rate": 5.8136004220637746e-06, "loss": 0.5178554058074951, "step": 6750 }, { "epoch": 2.0245582509733455, "grad_norm": 1.4164608716964722, "learning_rate": 5.7819781573273055e-06, "loss": 0.5236745834350586, "step": 6760 }, { "epoch": 2.0275531596286314, "grad_norm": 1.5281411409378052, "learning_rate": 5.750407116965835e-06, "loss": 0.5004557609558106, "step": 6770 }, { "epoch": 2.030548068283917, "grad_norm": 1.6436216831207275, "learning_rate": 5.718887684383441e-06, "loss": 0.5178097248077392, "step": 6780 }, { "epoch": 2.0335429769392035, "grad_norm": 1.497834324836731, "learning_rate": 5.687420242357482e-06, "loss": 0.5175156593322754, "step": 6790 }, { "epoch": 2.0365378855944893, "grad_norm": 1.2958300113677979, "learning_rate": 5.6560051730339226e-06, "loss": 0.5054145336151123, "step": 6800 }, { "epoch": 2.039532794249775, "grad_norm": 1.5535317659378052, "learning_rate": 5.624642857922713e-06, "loss": 0.5114497184753418, "step": 6810 }, { "epoch": 2.0425277029050615, "grad_norm": 1.7876501083374023, "learning_rate": 5.593333677893149e-06, "loss": 0.49465479850769045, "step": 6820 }, { "epoch": 2.0455226115603473, "grad_norm": 1.4220184087753296, "learning_rate": 5.562078013169232e-06, "loss": 0.4663191795349121, "step": 6830 }, { "epoch": 2.0485175202156336, "grad_norm": 1.4079233407974243, "learning_rate": 5.53087624332508e-06, "loss": 0.5256112575531006, "step": 6840 }, { "epoch": 2.0515124288709194, "grad_norm": 1.4762578010559082, "learning_rate": 5.499728747280291e-06, "loss": 0.49046692848205564, "step": 6850 }, { "epoch": 2.0545073375262053, "grad_norm": 1.4836058616638184, "learning_rate": 5.4686359032953595e-06, "loss": 0.5131683349609375, "step": 6860 }, { "epoch": 2.0575022461814916, "grad_norm": 1.5787184238433838, "learning_rate": 5.4375980889670695e-06, "loss": 0.5437060832977295, "step": 6870 }, { "epoch": 2.0604971548367774, "grad_norm": 1.5268107652664185, "learning_rate": 5.406615681223926e-06, "loss": 0.4896749496459961, "step": 6880 }, { "epoch": 2.0634920634920633, "grad_norm": 1.6514142751693726, "learning_rate": 5.375689056321555e-06, "loss": 0.5036890983581543, "step": 6890 }, { "epoch": 2.0664869721473496, "grad_norm": 1.392620325088501, "learning_rate": 5.3448185898381565e-06, "loss": 0.5152482986450195, "step": 6900 }, { "epoch": 2.0694818808026354, "grad_norm": 1.3546650409698486, "learning_rate": 5.314004656669922e-06, "loss": 0.48453149795532224, "step": 6910 }, { "epoch": 2.0724767894579217, "grad_norm": 1.6225578784942627, "learning_rate": 5.283247631026507e-06, "loss": 0.5099971771240235, "step": 6920 }, { "epoch": 2.0754716981132075, "grad_norm": 1.5903658866882324, "learning_rate": 5.252547886426455e-06, "loss": 0.5065332412719726, "step": 6930 }, { "epoch": 2.0784666067684934, "grad_norm": 1.3701387643814087, "learning_rate": 5.2219057956927e-06, "loss": 0.5079869270324707, "step": 6940 }, { "epoch": 2.0814615154237797, "grad_norm": 1.5331077575683594, "learning_rate": 5.191321730947995e-06, "loss": 0.5096177577972412, "step": 6950 }, { "epoch": 2.0844564240790655, "grad_norm": 1.5454105138778687, "learning_rate": 5.160796063610433e-06, "loss": 0.5231037139892578, "step": 6960 }, { "epoch": 2.087451332734352, "grad_norm": 1.5711345672607422, "learning_rate": 5.130329164388909e-06, "loss": 0.4980916023254395, "step": 6970 }, { "epoch": 2.0904462413896376, "grad_norm": 1.5762660503387451, "learning_rate": 5.099921403278631e-06, "loss": 0.5110610008239747, "step": 6980 }, { "epoch": 2.0934411500449235, "grad_norm": 1.6697578430175781, "learning_rate": 5.069573149556628e-06, "loss": 0.5102407455444335, "step": 6990 }, { "epoch": 2.0964360587002098, "grad_norm": 1.7491651773452759, "learning_rate": 5.039284771777258e-06, "loss": 0.5234197616577149, "step": 7000 }, { "epoch": 2.0994309673554956, "grad_norm": 1.546948790550232, "learning_rate": 5.009056637767727e-06, "loss": 0.4833499908447266, "step": 7010 }, { "epoch": 2.1024258760107815, "grad_norm": 1.356787919998169, "learning_rate": 4.9788891146236475e-06, "loss": 0.5116095542907715, "step": 7020 }, { "epoch": 2.1054207846660677, "grad_norm": 1.4842000007629395, "learning_rate": 4.948782568704545e-06, "loss": 0.502721643447876, "step": 7030 }, { "epoch": 2.1084156933213536, "grad_norm": 1.5620037317276, "learning_rate": 4.918737365629444e-06, "loss": 0.508421802520752, "step": 7040 }, { "epoch": 2.11141060197664, "grad_norm": 1.4998836517333984, "learning_rate": 4.888753870272395e-06, "loss": 0.4805330276489258, "step": 7050 }, { "epoch": 2.1144055106319257, "grad_norm": 1.5066370964050293, "learning_rate": 4.858832446758076e-06, "loss": 0.5161166191101074, "step": 7060 }, { "epoch": 2.1174004192872116, "grad_norm": 1.3664103746414185, "learning_rate": 4.8289734584573376e-06, "loss": 0.4755040168762207, "step": 7070 }, { "epoch": 2.120395327942498, "grad_norm": 1.5773537158966064, "learning_rate": 4.799177267982822e-06, "loss": 0.5325294494628906, "step": 7080 }, { "epoch": 2.1233902365977837, "grad_norm": 1.4924392700195312, "learning_rate": 4.769444237184529e-06, "loss": 0.512051773071289, "step": 7090 }, { "epoch": 2.12638514525307, "grad_norm": 1.5971511602401733, "learning_rate": 4.739774727145452e-06, "loss": 0.4878090858459473, "step": 7100 }, { "epoch": 2.129380053908356, "grad_norm": 1.288095474243164, "learning_rate": 4.710169098177161e-06, "loss": 0.4998618125915527, "step": 7110 }, { "epoch": 2.1323749625636417, "grad_norm": 1.4811124801635742, "learning_rate": 4.68062770981546e-06, "loss": 0.4985170364379883, "step": 7120 }, { "epoch": 2.135369871218928, "grad_norm": 1.5706632137298584, "learning_rate": 4.651150920815988e-06, "loss": 0.4773625373840332, "step": 7130 }, { "epoch": 2.138364779874214, "grad_norm": 1.532424807548523, "learning_rate": 4.62173908914989e-06, "loss": 0.490186882019043, "step": 7140 }, { "epoch": 2.1413596885294996, "grad_norm": 1.5899869203567505, "learning_rate": 4.592392571999459e-06, "loss": 0.48595681190490725, "step": 7150 }, { "epoch": 2.144354597184786, "grad_norm": 1.5332787036895752, "learning_rate": 4.563111725753785e-06, "loss": 0.5245419502258301, "step": 7160 }, { "epoch": 2.147349505840072, "grad_norm": 1.5711026191711426, "learning_rate": 4.533896906004455e-06, "loss": 0.47621469497680663, "step": 7170 }, { "epoch": 2.150344414495358, "grad_norm": 1.784626841545105, "learning_rate": 4.504748467541202e-06, "loss": 0.49512577056884766, "step": 7180 }, { "epoch": 2.153339323150644, "grad_norm": 1.442130208015442, "learning_rate": 4.475666764347634e-06, "loss": 0.4948512077331543, "step": 7190 }, { "epoch": 2.1563342318059298, "grad_norm": 1.3904801607131958, "learning_rate": 4.446652149596891e-06, "loss": 0.5106653690338134, "step": 7200 }, { "epoch": 2.159329140461216, "grad_norm": 1.4889200925827026, "learning_rate": 4.4177049756474025e-06, "loss": 0.4727304935455322, "step": 7210 }, { "epoch": 2.162324049116502, "grad_norm": 1.5404807329177856, "learning_rate": 4.388825594038565e-06, "loss": 0.46900529861450196, "step": 7220 }, { "epoch": 2.165318957771788, "grad_norm": 1.5615664720535278, "learning_rate": 4.360014355486511e-06, "loss": 0.5268836975097656, "step": 7230 }, { "epoch": 2.168313866427074, "grad_norm": 1.4813424348831177, "learning_rate": 4.331271609879817e-06, "loss": 0.4924919605255127, "step": 7240 }, { "epoch": 2.17130877508236, "grad_norm": 1.4291577339172363, "learning_rate": 4.302597706275283e-06, "loss": 0.49208860397338866, "step": 7250 }, { "epoch": 2.174303683737646, "grad_norm": 1.6226706504821777, "learning_rate": 4.273992992893667e-06, "loss": 0.47493915557861327, "step": 7260 }, { "epoch": 2.177298592392932, "grad_norm": 1.4558225870132446, "learning_rate": 4.245457817115484e-06, "loss": 0.5071091651916504, "step": 7270 }, { "epoch": 2.180293501048218, "grad_norm": 1.4675588607788086, "learning_rate": 4.216992525476754e-06, "loss": 0.5064915180206299, "step": 7280 }, { "epoch": 2.183288409703504, "grad_norm": 1.6159210205078125, "learning_rate": 4.188597463664832e-06, "loss": 0.5045362949371338, "step": 7290 }, { "epoch": 2.18628331835879, "grad_norm": 1.5918903350830078, "learning_rate": 4.160272976514171e-06, "loss": 0.5072110652923584, "step": 7300 }, { "epoch": 2.1892782270140763, "grad_norm": 1.513226866722107, "learning_rate": 4.132019408002172e-06, "loss": 0.4595001220703125, "step": 7310 }, { "epoch": 2.192273135669362, "grad_norm": 1.323989987373352, "learning_rate": 4.103837101244971e-06, "loss": 0.5201524257659912, "step": 7320 }, { "epoch": 2.195268044324648, "grad_norm": 1.5404670238494873, "learning_rate": 4.075726398493303e-06, "loss": 0.47367110252380373, "step": 7330 }, { "epoch": 2.1982629529799342, "grad_norm": 1.8998429775238037, "learning_rate": 4.0476876411283185e-06, "loss": 0.4952116012573242, "step": 7340 }, { "epoch": 2.20125786163522, "grad_norm": 1.5175492763519287, "learning_rate": 4.019721169657466e-06, "loss": 0.5057971954345704, "step": 7350 }, { "epoch": 2.2042527702905064, "grad_norm": 1.5638384819030762, "learning_rate": 3.991827323710326e-06, "loss": 0.5098119258880616, "step": 7360 }, { "epoch": 2.207247678945792, "grad_norm": 1.5194815397262573, "learning_rate": 3.964006442034514e-06, "loss": 0.5505454063415527, "step": 7370 }, { "epoch": 2.210242587601078, "grad_norm": 1.4322032928466797, "learning_rate": 3.9362588624915535e-06, "loss": 0.5155088424682617, "step": 7380 }, { "epoch": 2.2132374962563643, "grad_norm": 1.5685886144638062, "learning_rate": 3.908584922052766e-06, "loss": 0.4836409568786621, "step": 7390 }, { "epoch": 2.21623240491165, "grad_norm": 1.6502667665481567, "learning_rate": 3.8809849567951994e-06, "loss": 0.49752092361450195, "step": 7400 }, { "epoch": 2.219227313566936, "grad_norm": 1.5383046865463257, "learning_rate": 3.853459301897523e-06, "loss": 0.49851369857788086, "step": 7410 }, { "epoch": 2.2222222222222223, "grad_norm": 1.550384521484375, "learning_rate": 3.826008291635979e-06, "loss": 0.48642563819885254, "step": 7420 }, { "epoch": 2.225217130877508, "grad_norm": 1.7033337354660034, "learning_rate": 3.7986322593803006e-06, "loss": 0.48472137451171876, "step": 7430 }, { "epoch": 2.2282120395327945, "grad_norm": 1.6340550184249878, "learning_rate": 3.7713315375896876e-06, "loss": 0.533723258972168, "step": 7440 }, { "epoch": 2.2312069481880803, "grad_norm": 1.429850459098816, "learning_rate": 3.744106457808746e-06, "loss": 0.4909144401550293, "step": 7450 }, { "epoch": 2.234201856843366, "grad_norm": 1.4978691339492798, "learning_rate": 3.7169573506634824e-06, "loss": 0.4724015235900879, "step": 7460 }, { "epoch": 2.2371967654986524, "grad_norm": 1.7704259157180786, "learning_rate": 3.6898845458572674e-06, "loss": 0.5028561592102051, "step": 7470 }, { "epoch": 2.2401916741539383, "grad_norm": 1.5223942995071411, "learning_rate": 3.6628883721668573e-06, "loss": 0.5258946895599366, "step": 7480 }, { "epoch": 2.243186582809224, "grad_norm": 1.3769805431365967, "learning_rate": 3.6359691574383703e-06, "loss": 0.48286190032958987, "step": 7490 }, { "epoch": 2.2461814914645104, "grad_norm": 1.669264316558838, "learning_rate": 3.609127228583338e-06, "loss": 0.4988402366638184, "step": 7500 }, { "epoch": 2.2491764001197962, "grad_norm": 1.5799616575241089, "learning_rate": 3.582362911574706e-06, "loss": 0.48703784942626954, "step": 7510 }, { "epoch": 2.2521713087750825, "grad_norm": 1.6424161195755005, "learning_rate": 3.5556765314428998e-06, "loss": 0.5006259918212891, "step": 7520 }, { "epoch": 2.2551662174303684, "grad_norm": 1.7363072633743286, "learning_rate": 3.5290684122718544e-06, "loss": 0.5261609554290771, "step": 7530 }, { "epoch": 2.2581611260856542, "grad_norm": 1.6819044351577759, "learning_rate": 3.502538877195104e-06, "loss": 0.48314647674560546, "step": 7540 }, { "epoch": 2.2611560347409405, "grad_norm": 1.7765800952911377, "learning_rate": 3.476088248391829e-06, "loss": 0.5151649475097656, "step": 7550 }, { "epoch": 2.2641509433962264, "grad_norm": 1.5972343683242798, "learning_rate": 3.4497168470829732e-06, "loss": 0.5179537773132324, "step": 7560 }, { "epoch": 2.267145852051512, "grad_norm": 1.4898275136947632, "learning_rate": 3.4234249935273157e-06, "loss": 0.4999067306518555, "step": 7570 }, { "epoch": 2.2701407607067985, "grad_norm": 1.584760069847107, "learning_rate": 3.3972130070176057e-06, "loss": 0.5141147613525391, "step": 7580 }, { "epoch": 2.2731356693620843, "grad_norm": 1.454837441444397, "learning_rate": 3.371081205876662e-06, "loss": 0.5008669376373291, "step": 7590 }, { "epoch": 2.2761305780173706, "grad_norm": 1.274971842765808, "learning_rate": 3.3450299074535297e-06, "loss": 0.48927507400512693, "step": 7600 }, { "epoch": 2.2791254866726565, "grad_norm": 1.7367416620254517, "learning_rate": 3.319059428119603e-06, "loss": 0.4955023765563965, "step": 7610 }, { "epoch": 2.2821203953279423, "grad_norm": 1.5526654720306396, "learning_rate": 3.2931700832648063e-06, "loss": 0.4893807411193848, "step": 7620 }, { "epoch": 2.2851153039832286, "grad_norm": 1.5649467706680298, "learning_rate": 3.267362187293751e-06, "loss": 0.4851066112518311, "step": 7630 }, { "epoch": 2.2881102126385144, "grad_norm": 1.5863004922866821, "learning_rate": 3.2416360536219126e-06, "loss": 0.4791616439819336, "step": 7640 }, { "epoch": 2.2911051212938007, "grad_norm": 1.6295742988586426, "learning_rate": 3.21599199467184e-06, "loss": 0.47314839363098143, "step": 7650 }, { "epoch": 2.2941000299490866, "grad_norm": 1.6049524545669556, "learning_rate": 3.1904303218693444e-06, "loss": 0.5069909572601319, "step": 7660 }, { "epoch": 2.2970949386043724, "grad_norm": 1.652060627937317, "learning_rate": 3.164951345639735e-06, "loss": 0.48629279136657716, "step": 7670 }, { "epoch": 2.3000898472596587, "grad_norm": 1.4900766611099243, "learning_rate": 3.1395553754040275e-06, "loss": 0.4977739334106445, "step": 7680 }, { "epoch": 2.3030847559149445, "grad_norm": 1.5940581560134888, "learning_rate": 3.1142427195752144e-06, "loss": 0.5253914833068848, "step": 7690 }, { "epoch": 2.3060796645702304, "grad_norm": 1.4190114736557007, "learning_rate": 3.0890136855544872e-06, "loss": 0.5198238849639892, "step": 7700 }, { "epoch": 2.3090745732255167, "grad_norm": 1.5904440879821777, "learning_rate": 3.0638685797275357e-06, "loss": 0.5105954170227051, "step": 7710 }, { "epoch": 2.3120694818808025, "grad_norm": 1.4529443979263306, "learning_rate": 3.038807707460796e-06, "loss": 0.4947354316711426, "step": 7720 }, { "epoch": 2.315064390536089, "grad_norm": 1.8188318014144897, "learning_rate": 3.0138313730977718e-06, "loss": 0.5178883075714111, "step": 7730 }, { "epoch": 2.3180592991913747, "grad_norm": 1.6044124364852905, "learning_rate": 2.9889398799553128e-06, "loss": 0.4920680522918701, "step": 7740 }, { "epoch": 2.3210542078466605, "grad_norm": 1.4917323589324951, "learning_rate": 2.9641335303199514e-06, "loss": 0.5030588626861572, "step": 7750 }, { "epoch": 2.324049116501947, "grad_norm": 1.668168544769287, "learning_rate": 2.9394126254442134e-06, "loss": 0.5082870960235596, "step": 7760 }, { "epoch": 2.3270440251572326, "grad_norm": 1.444399118423462, "learning_rate": 2.9147774655429794e-06, "loss": 0.47826013565063474, "step": 7770 }, { "epoch": 2.330038933812519, "grad_norm": 1.5026638507843018, "learning_rate": 2.8902283497898185e-06, "loss": 0.5042776107788086, "step": 7780 }, { "epoch": 2.3330338424678048, "grad_norm": 1.4492841958999634, "learning_rate": 2.865765576313376e-06, "loss": 0.4668389320373535, "step": 7790 }, { "epoch": 2.3360287511230906, "grad_norm": 1.5469386577606201, "learning_rate": 2.841389442193727e-06, "loss": 0.4765936851501465, "step": 7800 }, { "epoch": 2.339023659778377, "grad_norm": 1.667075514793396, "learning_rate": 2.817100243458801e-06, "loss": 0.49718523025512695, "step": 7810 }, { "epoch": 2.3420185684336627, "grad_norm": 1.7210171222686768, "learning_rate": 2.792898275080752e-06, "loss": 0.4858196258544922, "step": 7820 }, { "epoch": 2.3450134770889486, "grad_norm": 1.4533110857009888, "learning_rate": 2.7687838309724104e-06, "loss": 0.5015253543853759, "step": 7830 }, { "epoch": 2.348008385744235, "grad_norm": 1.409629464149475, "learning_rate": 2.7447572039836812e-06, "loss": 0.49271488189697266, "step": 7840 }, { "epoch": 2.3510032943995207, "grad_norm": 1.7250934839248657, "learning_rate": 2.7208186858980148e-06, "loss": 0.5015377998352051, "step": 7850 }, { "epoch": 2.353998203054807, "grad_norm": 1.521403193473816, "learning_rate": 2.696968567428849e-06, "loss": 0.513665771484375, "step": 7860 }, { "epoch": 2.356993111710093, "grad_norm": 1.5374770164489746, "learning_rate": 2.6732071382160785e-06, "loss": 0.5035372734069824, "step": 7870 }, { "epoch": 2.3599880203653787, "grad_norm": 1.53484308719635, "learning_rate": 2.649534686822547e-06, "loss": 0.47299823760986326, "step": 7880 }, { "epoch": 2.362982929020665, "grad_norm": 1.3899140357971191, "learning_rate": 2.6259515007305246e-06, "loss": 0.47503366470336916, "step": 7890 }, { "epoch": 2.365977837675951, "grad_norm": 1.7831887006759644, "learning_rate": 2.6024578663382447e-06, "loss": 0.5038399696350098, "step": 7900 }, { "epoch": 2.368972746331237, "grad_norm": 1.4920977354049683, "learning_rate": 2.579054068956395e-06, "loss": 0.4970669746398926, "step": 7910 }, { "epoch": 2.371967654986523, "grad_norm": 1.6569092273712158, "learning_rate": 2.5557403928046774e-06, "loss": 0.5047991752624512, "step": 7920 }, { "epoch": 2.374962563641809, "grad_norm": 1.3544354438781738, "learning_rate": 2.532517121008338e-06, "loss": 0.4772444248199463, "step": 7930 }, { "epoch": 2.377957472297095, "grad_norm": 1.6597788333892822, "learning_rate": 2.5093845355947446e-06, "loss": 0.4818833351135254, "step": 7940 }, { "epoch": 2.380952380952381, "grad_norm": 1.5855315923690796, "learning_rate": 2.486342917489948e-06, "loss": 0.4844215393066406, "step": 7950 }, { "epoch": 2.3839472896076668, "grad_norm": 1.562936782836914, "learning_rate": 2.463392546515283e-06, "loss": 0.5058174133300781, "step": 7960 }, { "epoch": 2.386942198262953, "grad_norm": 1.5709084272384644, "learning_rate": 2.4405337013839536e-06, "loss": 0.5061359405517578, "step": 7970 }, { "epoch": 2.389937106918239, "grad_norm": 1.5790047645568848, "learning_rate": 2.4177666596976725e-06, "loss": 0.4824088096618652, "step": 7980 }, { "epoch": 2.392932015573525, "grad_norm": 1.5179102420806885, "learning_rate": 2.3950916979432614e-06, "loss": 0.47690744400024415, "step": 7990 }, { "epoch": 2.395926924228811, "grad_norm": 1.661934494972229, "learning_rate": 2.372509091489319e-06, "loss": 0.5297951221466064, "step": 8000 }, { "epoch": 2.398921832884097, "grad_norm": 1.5934616327285767, "learning_rate": 2.3500191145828565e-06, "loss": 0.5042305946350097, "step": 8010 }, { "epoch": 2.401916741539383, "grad_norm": 1.7664084434509277, "learning_rate": 2.327622040345985e-06, "loss": 0.4854135513305664, "step": 8020 }, { "epoch": 2.404911650194669, "grad_norm": 1.6327743530273438, "learning_rate": 2.30531814077258e-06, "loss": 0.5051285743713378, "step": 8030 }, { "epoch": 2.4079065588499553, "grad_norm": 1.6960318088531494, "learning_rate": 2.283107686724998e-06, "loss": 0.48496303558349607, "step": 8040 }, { "epoch": 2.410901467505241, "grad_norm": 1.7003093957901, "learning_rate": 2.2609909479307667e-06, "loss": 0.4897914886474609, "step": 8050 }, { "epoch": 2.413896376160527, "grad_norm": 1.5663912296295166, "learning_rate": 2.2389681929793326e-06, "loss": 0.5030606269836426, "step": 8060 }, { "epoch": 2.4168912848158133, "grad_norm": 1.4921340942382812, "learning_rate": 2.217039689318772e-06, "loss": 0.47916498184204104, "step": 8070 }, { "epoch": 2.419886193471099, "grad_norm": 1.6380950212478638, "learning_rate": 2.195205703252571e-06, "loss": 0.5335843563079834, "step": 8080 }, { "epoch": 2.422881102126385, "grad_norm": 1.5958120822906494, "learning_rate": 2.1734664999363654e-06, "loss": 0.4860078811645508, "step": 8090 }, { "epoch": 2.4258760107816713, "grad_norm": 1.3979578018188477, "learning_rate": 2.151822343374742e-06, "loss": 0.47577743530273436, "step": 8100 }, { "epoch": 2.428870919436957, "grad_norm": 1.4820301532745361, "learning_rate": 2.1302734964180228e-06, "loss": 0.5121123313903808, "step": 8110 }, { "epoch": 2.431865828092243, "grad_norm": 1.49030339717865, "learning_rate": 2.1088202207590725e-06, "loss": 0.5002717018127442, "step": 8120 }, { "epoch": 2.4348607367475292, "grad_norm": 1.5989124774932861, "learning_rate": 2.087462776930117e-06, "loss": 0.4997716903686523, "step": 8130 }, { "epoch": 2.437855645402815, "grad_norm": 1.2835077047348022, "learning_rate": 2.066201424299594e-06, "loss": 0.5007314205169677, "step": 8140 }, { "epoch": 2.4408505540581014, "grad_norm": 1.5404707193374634, "learning_rate": 2.045036421068982e-06, "loss": 0.5124270439147949, "step": 8150 }, { "epoch": 2.443845462713387, "grad_norm": 1.587632417678833, "learning_rate": 2.023968024269687e-06, "loss": 0.4949374198913574, "step": 8160 }, { "epoch": 2.4468403713686735, "grad_norm": 1.4403496980667114, "learning_rate": 2.0029964897598974e-06, "loss": 0.48112049102783205, "step": 8170 }, { "epoch": 2.4498352800239593, "grad_norm": 1.6329265832901, "learning_rate": 1.9821220722215064e-06, "loss": 0.5166867733001709, "step": 8180 }, { "epoch": 2.452830188679245, "grad_norm": 1.4010143280029297, "learning_rate": 1.961345025156983e-06, "loss": 0.4915262222290039, "step": 8190 }, { "epoch": 2.4558250973345315, "grad_norm": 1.7108557224273682, "learning_rate": 1.940665600886327e-06, "loss": 0.503018569946289, "step": 8200 }, { "epoch": 2.4588200059898173, "grad_norm": 1.5818506479263306, "learning_rate": 1.920084050543988e-06, "loss": 0.48973965644836426, "step": 8210 }, { "epoch": 2.461814914645103, "grad_norm": 1.5593360662460327, "learning_rate": 1.8996006240758092e-06, "loss": 0.49617342948913573, "step": 8220 }, { "epoch": 2.4648098233003894, "grad_norm": 1.470671534538269, "learning_rate": 1.8792155702360138e-06, "loss": 0.49179978370666505, "step": 8230 }, { "epoch": 2.4678047319556753, "grad_norm": 1.7106302976608276, "learning_rate": 1.858929136584159e-06, "loss": 0.5138489723205566, "step": 8240 }, { "epoch": 2.470799640610961, "grad_norm": 1.413886547088623, "learning_rate": 1.8387415694821508e-06, "loss": 0.47958765029907224, "step": 8250 }, { "epoch": 2.4737945492662474, "grad_norm": 1.5908961296081543, "learning_rate": 1.8186531140912344e-06, "loss": 0.5095817089080811, "step": 8260 }, { "epoch": 2.4767894579215333, "grad_norm": 1.5044485330581665, "learning_rate": 1.798664014369037e-06, "loss": 0.48099870681762696, "step": 8270 }, { "epoch": 2.4797843665768196, "grad_norm": 1.4462517499923706, "learning_rate": 1.7787745130665802e-06, "loss": 0.47478313446044923, "step": 8280 }, { "epoch": 2.4827792752321054, "grad_norm": 1.5918395519256592, "learning_rate": 1.758984851725357e-06, "loss": 0.5039488792419433, "step": 8290 }, { "epoch": 2.4857741838873912, "grad_norm": 1.4895800352096558, "learning_rate": 1.7392952706743793e-06, "loss": 0.5190446853637696, "step": 8300 }, { "epoch": 2.4887690925426775, "grad_norm": 1.590948462486267, "learning_rate": 1.719706009027272e-06, "loss": 0.48047447204589844, "step": 8310 }, { "epoch": 2.4917640011979634, "grad_norm": 1.5965536832809448, "learning_rate": 1.700217304679359e-06, "loss": 0.49631290435791015, "step": 8320 }, { "epoch": 2.4947589098532497, "grad_norm": 1.6916544437408447, "learning_rate": 1.680829394304786e-06, "loss": 0.48668642044067384, "step": 8330 }, { "epoch": 2.4977538185085355, "grad_norm": 1.553376317024231, "learning_rate": 1.6615425133536312e-06, "loss": 0.4995077133178711, "step": 8340 }, { "epoch": 2.5007487271638213, "grad_norm": 1.5679926872253418, "learning_rate": 1.6423568960490632e-06, "loss": 0.505252456665039, "step": 8350 }, { "epoch": 2.5037436358191076, "grad_norm": 1.6989234685897827, "learning_rate": 1.623272775384479e-06, "loss": 0.49995737075805663, "step": 8360 }, { "epoch": 2.5067385444743935, "grad_norm": 1.5071887969970703, "learning_rate": 1.6042903831206914e-06, "loss": 0.48708696365356446, "step": 8370 }, { "epoch": 2.5097334531296793, "grad_norm": 1.587327003479004, "learning_rate": 1.5854099497830967e-06, "loss": 0.4981412887573242, "step": 8380 }, { "epoch": 2.5127283617849656, "grad_norm": 1.3233060836791992, "learning_rate": 1.5666317046588963e-06, "loss": 0.46573629379272463, "step": 8390 }, { "epoch": 2.5157232704402515, "grad_norm": 1.5084353685379028, "learning_rate": 1.5479558757942882e-06, "loss": 0.47600607872009276, "step": 8400 }, { "epoch": 2.5187181790955377, "grad_norm": 1.765870213508606, "learning_rate": 1.529382689991722e-06, "loss": 0.5007995128631592, "step": 8410 }, { "epoch": 2.5217130877508236, "grad_norm": 1.6081740856170654, "learning_rate": 1.5109123728071208e-06, "loss": 0.5044775009155273, "step": 8420 }, { "epoch": 2.52470799640611, "grad_norm": 1.2260218858718872, "learning_rate": 1.492545148547161e-06, "loss": 0.4685311794281006, "step": 8430 }, { "epoch": 2.5277029050613957, "grad_norm": 1.5107367038726807, "learning_rate": 1.474281240266544e-06, "loss": 0.5116828918457031, "step": 8440 }, { "epoch": 2.5306978137166816, "grad_norm": 1.6438366174697876, "learning_rate": 1.456120869765274e-06, "loss": 0.49548845291137694, "step": 8450 }, { "epoch": 2.533692722371968, "grad_norm": 1.506015658378601, "learning_rate": 1.4380642575859838e-06, "loss": 0.4876260757446289, "step": 8460 }, { "epoch": 2.5366876310272537, "grad_norm": 1.6380552053451538, "learning_rate": 1.4201116230112421e-06, "loss": 0.5124927520751953, "step": 8470 }, { "epoch": 2.5396825396825395, "grad_norm": 1.4299864768981934, "learning_rate": 1.4022631840609002e-06, "loss": 0.5214046955108642, "step": 8480 }, { "epoch": 2.542677448337826, "grad_norm": 1.6097781658172607, "learning_rate": 1.3845191574894345e-06, "loss": 0.48117785453796386, "step": 8490 }, { "epoch": 2.5456723569931117, "grad_norm": 1.8168413639068604, "learning_rate": 1.3668797587833283e-06, "loss": 0.4974005699157715, "step": 8500 }, { "epoch": 2.5486672656483975, "grad_norm": 1.5539957284927368, "learning_rate": 1.3493452021584341e-06, "loss": 0.48285598754882814, "step": 8510 }, { "epoch": 2.551662174303684, "grad_norm": 1.7035820484161377, "learning_rate": 1.331915700557398e-06, "loss": 0.5152078628540039, "step": 8520 }, { "epoch": 2.5546570829589696, "grad_norm": 1.435754418373108, "learning_rate": 1.3145914656470471e-06, "loss": 0.4732780456542969, "step": 8530 }, { "epoch": 2.5576519916142555, "grad_norm": 1.7325928211212158, "learning_rate": 1.2973727078158438e-06, "loss": 0.49252891540527344, "step": 8540 }, { "epoch": 2.560646900269542, "grad_norm": 1.6093993186950684, "learning_rate": 1.2802596361713081e-06, "loss": 0.4925223350524902, "step": 8550 }, { "epoch": 2.563641808924828, "grad_norm": 1.5473220348358154, "learning_rate": 1.2632524585374983e-06, "loss": 0.4907097816467285, "step": 8560 }, { "epoch": 2.566636717580114, "grad_norm": 1.743654727935791, "learning_rate": 1.2463513814524697e-06, "loss": 0.5052000999450683, "step": 8570 }, { "epoch": 2.5696316262353998, "grad_norm": 1.5247002840042114, "learning_rate": 1.229556610165782e-06, "loss": 0.4836299419403076, "step": 8580 }, { "epoch": 2.572626534890686, "grad_norm": 1.6155225038528442, "learning_rate": 1.2128683486359915e-06, "loss": 0.49276161193847656, "step": 8590 }, { "epoch": 2.575621443545972, "grad_norm": 1.6852004528045654, "learning_rate": 1.1962867995281902e-06, "loss": 0.49987125396728516, "step": 8600 }, { "epoch": 2.5786163522012577, "grad_norm": 1.6533702611923218, "learning_rate": 1.1798121642115278e-06, "loss": 0.5120342254638672, "step": 8610 }, { "epoch": 2.581611260856544, "grad_norm": 1.2716710567474365, "learning_rate": 1.1634446427567825e-06, "loss": 0.49103879928588867, "step": 8620 }, { "epoch": 2.58460616951183, "grad_norm": 1.5219924449920654, "learning_rate": 1.1471844339339167e-06, "loss": 0.48928394317626955, "step": 8630 }, { "epoch": 2.5876010781671157, "grad_norm": 1.5612694025039673, "learning_rate": 1.1310317352096757e-06, "loss": 0.5022396087646485, "step": 8640 }, { "epoch": 2.590595986822402, "grad_norm": 1.7304942607879639, "learning_rate": 1.1149867427451788e-06, "loss": 0.47353377342224123, "step": 8650 }, { "epoch": 2.593590895477688, "grad_norm": 1.6413494348526, "learning_rate": 1.0990496513935467e-06, "loss": 0.49196691513061525, "step": 8660 }, { "epoch": 2.5965858041329737, "grad_norm": 1.4566773176193237, "learning_rate": 1.08322065469753e-06, "loss": 0.5039726257324219, "step": 8670 }, { "epoch": 2.59958071278826, "grad_norm": 1.6829912662506104, "learning_rate": 1.0674999448871547e-06, "loss": 0.4889340877532959, "step": 8680 }, { "epoch": 2.602575621443546, "grad_norm": 1.6049522161483765, "learning_rate": 1.0518877128773986e-06, "loss": 0.5027350425720215, "step": 8690 }, { "epoch": 2.605570530098832, "grad_norm": 1.6159955263137817, "learning_rate": 1.036384148265861e-06, "loss": 0.493180513381958, "step": 8700 }, { "epoch": 2.608565438754118, "grad_norm": 1.5600866079330444, "learning_rate": 1.020989439330471e-06, "loss": 0.48960447311401367, "step": 8710 }, { "epoch": 2.6115603474094042, "grad_norm": 1.615113377571106, "learning_rate": 1.0057037730271912e-06, "loss": 0.4720893859863281, "step": 8720 }, { "epoch": 2.61455525606469, "grad_norm": 1.373274803161621, "learning_rate": 9.905273349877574e-07, "loss": 0.4918712615966797, "step": 8730 }, { "epoch": 2.617550164719976, "grad_norm": 1.7302544116973877, "learning_rate": 9.754603095174132e-07, "loss": 0.5329276084899902, "step": 8740 }, { "epoch": 2.620545073375262, "grad_norm": 1.5376338958740234, "learning_rate": 9.605028795926807e-07, "loss": 0.4895726203918457, "step": 8750 }, { "epoch": 2.623539982030548, "grad_norm": 1.4624214172363281, "learning_rate": 9.456552268591312e-07, "loss": 0.4909614086151123, "step": 8760 }, { "epoch": 2.626534890685834, "grad_norm": 1.4393444061279297, "learning_rate": 9.309175316291919e-07, "loss": 0.4822981834411621, "step": 8770 }, { "epoch": 2.62952979934112, "grad_norm": 1.6177499294281006, "learning_rate": 9.162899728799346e-07, "loss": 0.49910993576049806, "step": 8780 }, { "epoch": 2.632524707996406, "grad_norm": 1.9156888723373413, "learning_rate": 9.01772728250927e-07, "loss": 0.4932182788848877, "step": 8790 }, { "epoch": 2.635519616651692, "grad_norm": 1.5032786130905151, "learning_rate": 8.873659740420549e-07, "loss": 0.467896556854248, "step": 8800 }, { "epoch": 2.638514525306978, "grad_norm": 1.5643095970153809, "learning_rate": 8.73069885211395e-07, "loss": 0.5180371284484864, "step": 8810 }, { "epoch": 2.641509433962264, "grad_norm": 1.4355653524398804, "learning_rate": 8.588846353730806e-07, "loss": 0.49057998657226565, "step": 8820 }, { "epoch": 2.6445043426175503, "grad_norm": 1.3352596759796143, "learning_rate": 8.448103967952026e-07, "loss": 0.5041935920715332, "step": 8830 }, { "epoch": 2.647499251272836, "grad_norm": 1.458169937133789, "learning_rate": 8.308473403977057e-07, "loss": 0.5041525840759278, "step": 8840 }, { "epoch": 2.6504941599281224, "grad_norm": 1.5872334241867065, "learning_rate": 8.169956357503262e-07, "loss": 0.5054915428161622, "step": 8850 }, { "epoch": 2.6534890685834083, "grad_norm": 1.5991028547286987, "learning_rate": 8.03255451070517e-07, "loss": 0.49498138427734373, "step": 8860 }, { "epoch": 2.656483977238694, "grad_norm": 1.606046199798584, "learning_rate": 7.896269532214262e-07, "loss": 0.4944211483001709, "step": 8870 }, { "epoch": 2.6594788858939804, "grad_norm": 1.575856328010559, "learning_rate": 7.761103077098431e-07, "loss": 0.46777868270874023, "step": 8880 }, { "epoch": 2.6624737945492662, "grad_norm": 1.7411667108535767, "learning_rate": 7.627056786842169e-07, "loss": 0.48057379722595217, "step": 8890 }, { "epoch": 2.665468703204552, "grad_norm": 1.583531141281128, "learning_rate": 7.494132289326395e-07, "loss": 0.48141913414001464, "step": 8900 }, { "epoch": 2.6684636118598384, "grad_norm": 1.4123398065567017, "learning_rate": 7.362331198808837e-07, "loss": 0.49846878051757815, "step": 8910 }, { "epoch": 2.6714585205151242, "grad_norm": 1.5866191387176514, "learning_rate": 7.23165511590439e-07, "loss": 0.4861551284790039, "step": 8920 }, { "epoch": 2.67445342917041, "grad_norm": 1.664759635925293, "learning_rate": 7.102105627565603e-07, "loss": 0.48258323669433595, "step": 8930 }, { "epoch": 2.6774483378256964, "grad_norm": 1.4542200565338135, "learning_rate": 6.973684307063533e-07, "loss": 0.505281639099121, "step": 8940 }, { "epoch": 2.680443246480982, "grad_norm": 1.619585394859314, "learning_rate": 6.846392713968519e-07, "loss": 0.474824333190918, "step": 8950 }, { "epoch": 2.6834381551362685, "grad_norm": 1.551127552986145, "learning_rate": 6.720232394131365e-07, "loss": 0.49350528717041015, "step": 8960 }, { "epoch": 2.6864330637915543, "grad_norm": 1.3840203285217285, "learning_rate": 6.59520487966443e-07, "loss": 0.47249841690063477, "step": 8970 }, { "epoch": 2.6894279724468406, "grad_norm": 1.509779453277588, "learning_rate": 6.471311688923143e-07, "loss": 0.45883750915527344, "step": 8980 }, { "epoch": 2.6924228811021265, "grad_norm": 1.4412727355957031, "learning_rate": 6.348554326487477e-07, "loss": 0.4847591400146484, "step": 8990 }, { "epoch": 2.6954177897574123, "grad_norm": 1.7219630479812622, "learning_rate": 6.226934283143759e-07, "loss": 0.468625545501709, "step": 9000 }, { "epoch": 2.6984126984126986, "grad_norm": 1.4819846153259277, "learning_rate": 6.106453035866467e-07, "loss": 0.4852116584777832, "step": 9010 }, { "epoch": 2.7014076070679844, "grad_norm": 1.6448779106140137, "learning_rate": 5.987112047800381e-07, "loss": 0.5112523078918457, "step": 9020 }, { "epoch": 2.7044025157232703, "grad_norm": 1.6351248025894165, "learning_rate": 5.868912768242741e-07, "loss": 0.48743634223937987, "step": 9030 }, { "epoch": 2.7073974243785566, "grad_norm": 1.5035450458526611, "learning_rate": 5.751856632625752e-07, "loss": 0.4918667793273926, "step": 9040 }, { "epoch": 2.7103923330338424, "grad_norm": 1.6064460277557373, "learning_rate": 5.635945062499004e-07, "loss": 0.4841439247131348, "step": 9050 }, { "epoch": 2.7133872416891283, "grad_norm": 1.6321065425872803, "learning_rate": 5.521179465512349e-07, "loss": 0.4781044006347656, "step": 9060 }, { "epoch": 2.7163821503444145, "grad_norm": 1.3715041875839233, "learning_rate": 5.407561235398717e-07, "loss": 0.4643882751464844, "step": 9070 }, { "epoch": 2.7193770589997004, "grad_norm": 1.6115351915359497, "learning_rate": 5.295091751957249e-07, "loss": 0.4926904678344727, "step": 9080 }, { "epoch": 2.7223719676549867, "grad_norm": 1.6327753067016602, "learning_rate": 5.183772381036456e-07, "loss": 0.4796736717224121, "step": 9090 }, { "epoch": 2.7253668763102725, "grad_norm": 1.459547519683838, "learning_rate": 5.073604474517757e-07, "loss": 0.4696746826171875, "step": 9100 }, { "epoch": 2.728361784965559, "grad_norm": 1.628287672996521, "learning_rate": 4.964589370298911e-07, "loss": 0.5150102138519287, "step": 9110 }, { "epoch": 2.7313566936208447, "grad_norm": 1.540307641029358, "learning_rate": 4.856728392277943e-07, "loss": 0.4660166263580322, "step": 9120 }, { "epoch": 2.7343516022761305, "grad_norm": 1.4227973222732544, "learning_rate": 4.7500228503368775e-07, "loss": 0.4942507266998291, "step": 9130 }, { "epoch": 2.737346510931417, "grad_norm": 1.383530855178833, "learning_rate": 4.644474040325986e-07, "loss": 0.47557845115661623, "step": 9140 }, { "epoch": 2.7403414195867026, "grad_norm": 1.4156097173690796, "learning_rate": 4.5400832440480105e-07, "loss": 0.48183841705322267, "step": 9150 }, { "epoch": 2.7433363282419885, "grad_norm": 1.4534227848052979, "learning_rate": 4.4368517292425083e-07, "loss": 0.49446706771850585, "step": 9160 }, { "epoch": 2.7463312368972748, "grad_norm": 1.9273598194122314, "learning_rate": 4.3347807495705775e-07, "loss": 0.49333763122558594, "step": 9170 }, { "epoch": 2.7493261455525606, "grad_norm": 1.5973845720291138, "learning_rate": 4.233871544599544e-07, "loss": 0.494874095916748, "step": 9180 }, { "epoch": 2.7523210542078465, "grad_norm": 1.454573154449463, "learning_rate": 4.1341253397879863e-07, "loss": 0.49959392547607423, "step": 9190 }, { "epoch": 2.7553159628631327, "grad_norm": 1.6529881954193115, "learning_rate": 4.0355433464707714e-07, "loss": 0.521381425857544, "step": 9200 }, { "epoch": 2.7583108715184186, "grad_norm": 1.604905605316162, "learning_rate": 3.9381267618444187e-07, "loss": 0.47714853286743164, "step": 9210 }, { "epoch": 2.7613057801737044, "grad_norm": 1.4911741018295288, "learning_rate": 3.8418767689524907e-07, "loss": 0.4643728256225586, "step": 9220 }, { "epoch": 2.7643006888289907, "grad_norm": 1.6951920986175537, "learning_rate": 3.7467945366712833e-07, "loss": 0.49946084022521975, "step": 9230 }, { "epoch": 2.767295597484277, "grad_norm": 1.7115346193313599, "learning_rate": 3.652881219695603e-07, "loss": 0.5134757041931153, "step": 9240 }, { "epoch": 2.770290506139563, "grad_norm": 1.7767481803894043, "learning_rate": 3.5601379585247786e-07, "loss": 0.47066478729248046, "step": 9250 }, { "epoch": 2.7732854147948487, "grad_norm": 1.2878029346466064, "learning_rate": 3.4685658794487153e-07, "loss": 0.5043159484863281, "step": 9260 }, { "epoch": 2.776280323450135, "grad_norm": 1.5294309854507446, "learning_rate": 3.378166094534352e-07, "loss": 0.4968732357025146, "step": 9270 }, { "epoch": 2.779275232105421, "grad_norm": 1.6949282884597778, "learning_rate": 3.2889397016120263e-07, "loss": 0.47728781700134276, "step": 9280 }, { "epoch": 2.7822701407607067, "grad_norm": 1.5146561861038208, "learning_rate": 3.2008877842622853e-07, "loss": 0.4853658676147461, "step": 9290 }, { "epoch": 2.785265049415993, "grad_norm": 1.520474910736084, "learning_rate": 3.1140114118025423e-07, "loss": 0.49115581512451173, "step": 9300 }, { "epoch": 2.788259958071279, "grad_norm": 1.590360164642334, "learning_rate": 3.028311639274295e-07, "loss": 0.5119152545928956, "step": 9310 }, { "epoch": 2.7912548667265646, "grad_norm": 1.6626989841461182, "learning_rate": 2.943789507430128e-07, "loss": 0.481568431854248, "step": 9320 }, { "epoch": 2.794249775381851, "grad_norm": 1.5911420583724976, "learning_rate": 2.86044604272121e-07, "loss": 0.47126045227050783, "step": 9330 }, { "epoch": 2.7972446840371368, "grad_norm": 1.529649257659912, "learning_rate": 2.7782822572847477e-07, "loss": 0.4906682014465332, "step": 9340 }, { "epoch": 2.8002395926924226, "grad_norm": 1.548509120941162, "learning_rate": 2.6972991489317536e-07, "loss": 0.4783353328704834, "step": 9350 }, { "epoch": 2.803234501347709, "grad_norm": 1.6628776788711548, "learning_rate": 2.6174977011348525e-07, "loss": 0.4788211345672607, "step": 9360 }, { "epoch": 2.8062294100029948, "grad_norm": 1.4077582359313965, "learning_rate": 2.538878883016416e-07, "loss": 0.5038501739501953, "step": 9370 }, { "epoch": 2.809224318658281, "grad_norm": 1.5272634029388428, "learning_rate": 2.461443649336748e-07, "loss": 0.5057891845703125, "step": 9380 }, { "epoch": 2.812219227313567, "grad_norm": 1.5372052192687988, "learning_rate": 2.3851929404825057e-07, "loss": 0.5056065559387207, "step": 9390 }, { "epoch": 2.815214135968853, "grad_norm": 1.774721622467041, "learning_rate": 2.3101276824552543e-07, "loss": 0.5029263019561767, "step": 9400 }, { "epoch": 2.818209044624139, "grad_norm": 1.470628261566162, "learning_rate": 2.2362487868602956e-07, "loss": 0.49965524673461914, "step": 9410 }, { "epoch": 2.821203953279425, "grad_norm": 1.3716038465499878, "learning_rate": 2.1635571508954677e-07, "loss": 0.4652759552001953, "step": 9420 }, { "epoch": 2.824198861934711, "grad_norm": 1.5807504653930664, "learning_rate": 2.092053657340398e-07, "loss": 0.49729557037353517, "step": 9430 }, { "epoch": 2.827193770589997, "grad_norm": 1.7162376642227173, "learning_rate": 2.0217391745456673e-07, "loss": 0.49972996711730955, "step": 9440 }, { "epoch": 2.830188679245283, "grad_norm": 1.643149971961975, "learning_rate": 1.9526145564223166e-07, "loss": 0.5318907737731934, "step": 9450 }, { "epoch": 2.833183587900569, "grad_norm": 1.4969817399978638, "learning_rate": 1.884680642431469e-07, "loss": 0.4856600761413574, "step": 9460 }, { "epoch": 2.836178496555855, "grad_norm": 1.4547873735427856, "learning_rate": 1.8179382575741588e-07, "loss": 0.49111547470092776, "step": 9470 }, { "epoch": 2.839173405211141, "grad_norm": 1.4986448287963867, "learning_rate": 1.7523882123812286e-07, "loss": 0.4579866886138916, "step": 9480 }, { "epoch": 2.842168313866427, "grad_norm": 1.484006643295288, "learning_rate": 1.6880313029036033e-07, "loss": 0.48090057373046874, "step": 9490 }, { "epoch": 2.845163222521713, "grad_norm": 1.4930380582809448, "learning_rate": 1.624868310702543e-07, "loss": 0.48984603881835936, "step": 9500 }, { "epoch": 2.8481581311769992, "grad_norm": 1.7047784328460693, "learning_rate": 1.562900002840162e-07, "loss": 0.5113039016723633, "step": 9510 }, { "epoch": 2.851153039832285, "grad_norm": 1.4931029081344604, "learning_rate": 1.502127131870146e-07, "loss": 0.47187018394470215, "step": 9520 }, { "epoch": 2.8541479484875714, "grad_norm": 1.479155421257019, "learning_rate": 1.4425504358285712e-07, "loss": 0.5145605087280274, "step": 9530 }, { "epoch": 2.857142857142857, "grad_norm": 1.5331019163131714, "learning_rate": 1.3841706382249798e-07, "loss": 0.5134733200073243, "step": 9540 }, { "epoch": 2.860137765798143, "grad_norm": 1.461573839187622, "learning_rate": 1.3269884480335726e-07, "loss": 0.5031049728393555, "step": 9550 }, { "epoch": 2.8631326744534293, "grad_norm": 1.4485571384429932, "learning_rate": 1.2710045596845854e-07, "loss": 0.5210261344909668, "step": 9560 }, { "epoch": 2.866127583108715, "grad_norm": 1.7255455255508423, "learning_rate": 1.2162196530558835e-07, "loss": 0.47707977294921877, "step": 9570 }, { "epoch": 2.869122491764001, "grad_norm": 1.5349946022033691, "learning_rate": 1.1626343934647122e-07, "loss": 0.4857036590576172, "step": 9580 }, { "epoch": 2.8721174004192873, "grad_norm": 1.5755033493041992, "learning_rate": 1.1102494316595602e-07, "loss": 0.5038958549499511, "step": 9590 }, { "epoch": 2.875112309074573, "grad_norm": 1.4248707294464111, "learning_rate": 1.0590654038123315e-07, "loss": 0.49042625427246095, "step": 9600 }, { "epoch": 2.878107217729859, "grad_norm": 1.3369414806365967, "learning_rate": 1.0090829315105632e-07, "loss": 0.46802678108215334, "step": 9610 }, { "epoch": 2.8811021263851453, "grad_norm": 1.5832442045211792, "learning_rate": 9.603026217499201e-08, "loss": 0.5051434516906739, "step": 9620 }, { "epoch": 2.884097035040431, "grad_norm": 1.734362244606018, "learning_rate": 9.127250669267563e-08, "loss": 0.513918685913086, "step": 9630 }, { "epoch": 2.8870919436957174, "grad_norm": 1.5473873615264893, "learning_rate": 8.663508448310099e-08, "loss": 0.49352059364318845, "step": 9640 }, { "epoch": 2.8900868523510033, "grad_norm": 1.4340280294418335, "learning_rate": 8.211805186391309e-08, "loss": 0.5073667526245117, "step": 9650 }, { "epoch": 2.8930817610062896, "grad_norm": 1.456013560295105, "learning_rate": 7.772146369072309e-08, "loss": 0.5166115283966064, "step": 9660 }, { "epoch": 2.8960766696615754, "grad_norm": 1.5547895431518555, "learning_rate": 7.344537335644664e-08, "loss": 0.4813490867614746, "step": 9670 }, { "epoch": 2.8990715783168612, "grad_norm": 1.4897866249084473, "learning_rate": 6.928983279065326e-08, "loss": 0.4835244655609131, "step": 9680 }, { "epoch": 2.9020664869721475, "grad_norm": 1.8342303037643433, "learning_rate": 6.525489245893357e-08, "loss": 0.5087207794189453, "step": 9690 }, { "epoch": 2.9050613956274334, "grad_norm": 1.3465903997421265, "learning_rate": 6.134060136228969e-08, "loss": 0.4804349899291992, "step": 9700 }, { "epoch": 2.908056304282719, "grad_norm": 1.667733073234558, "learning_rate": 5.7547007036539146e-08, "loss": 0.5204250335693359, "step": 9710 }, { "epoch": 2.9110512129380055, "grad_norm": 1.7703502178192139, "learning_rate": 5.3874155551735255e-08, "loss": 0.5016478538513184, "step": 9720 }, { "epoch": 2.9140461215932913, "grad_norm": 1.7993836402893066, "learning_rate": 5.0322091511615376e-08, "loss": 0.49267988204956054, "step": 9730 }, { "epoch": 2.917041030248577, "grad_norm": 1.4773681163787842, "learning_rate": 4.689085805304472e-08, "loss": 0.4893134117126465, "step": 9740 }, { "epoch": 2.9200359389038635, "grad_norm": 1.754645586013794, "learning_rate": 4.3580496845510025e-08, "loss": 0.49738759994506837, "step": 9750 }, { "epoch": 2.9230308475591493, "grad_norm": 1.4270977973937988, "learning_rate": 4.039104809060002e-08, "loss": 0.4880670070648193, "step": 9760 }, { "epoch": 2.9260257562144356, "grad_norm": 1.5123835802078247, "learning_rate": 3.732255052152245e-08, "loss": 0.5063368797302246, "step": 9770 }, { "epoch": 2.9290206648697215, "grad_norm": 1.5368832349777222, "learning_rate": 3.437504140263337e-08, "loss": 0.5073044776916504, "step": 9780 }, { "epoch": 2.9320155735250077, "grad_norm": 1.5552453994750977, "learning_rate": 3.154855652898636e-08, "loss": 0.5064907073974609, "step": 9790 }, { "epoch": 2.9350104821802936, "grad_norm": 1.4740110635757446, "learning_rate": 2.884313022589513e-08, "loss": 0.47931804656982424, "step": 9800 }, { "epoch": 2.9380053908355794, "grad_norm": 1.6172736883163452, "learning_rate": 2.6258795348516052e-08, "loss": 0.5590275764465332, "step": 9810 }, { "epoch": 2.9410002994908657, "grad_norm": 1.8231149911880493, "learning_rate": 2.3795583281450708e-08, "loss": 0.49012956619262693, "step": 9820 }, { "epoch": 2.9439952081461516, "grad_norm": 1.6394718885421753, "learning_rate": 2.1453523938367304e-08, "loss": 0.4913629531860352, "step": 9830 }, { "epoch": 2.9469901168014374, "grad_norm": 1.902896761894226, "learning_rate": 1.9232645761633196e-08, "loss": 0.4783756256103516, "step": 9840 }, { "epoch": 2.9499850254567237, "grad_norm": 1.6252793073654175, "learning_rate": 1.713297572196848e-08, "loss": 0.4900232791900635, "step": 9850 }, { "epoch": 2.9529799341120095, "grad_norm": 1.6165684461593628, "learning_rate": 1.515453931812627e-08, "loss": 0.4839812755584717, "step": 9860 }, { "epoch": 2.9559748427672954, "grad_norm": 1.573724389076233, "learning_rate": 1.3297360576572937e-08, "loss": 0.489408016204834, "step": 9870 }, { "epoch": 2.9589697514225817, "grad_norm": 1.5624017715454102, "learning_rate": 1.1561462051203898e-08, "loss": 0.528309440612793, "step": 9880 }, { "epoch": 2.9619646600778675, "grad_norm": 1.8057657480239868, "learning_rate": 9.94686482306606e-09, "loss": 0.492403507232666, "step": 9890 }, { "epoch": 2.964959568733154, "grad_norm": 1.463015079498291, "learning_rate": 8.453588500103582e-09, "loss": 0.48632245063781737, "step": 9900 }, { "epoch": 2.9679544773884396, "grad_norm": 1.603826880455017, "learning_rate": 7.081651216916952e-09, "loss": 0.4464372158050537, "step": 9910 }, { "epoch": 2.970949386043726, "grad_norm": 2.058971405029297, "learning_rate": 5.831069634546494e-09, "loss": 0.4887089729309082, "step": 9920 }, { "epoch": 2.973944294699012, "grad_norm": 1.491803526878357, "learning_rate": 4.7018589402692e-09, "loss": 0.4791905879974365, "step": 9930 }, { "epoch": 2.9769392033542976, "grad_norm": 1.6646156311035156, "learning_rate": 3.6940328474088795e-09, "loss": 0.48751044273376465, "step": 9940 }, { "epoch": 2.979934112009584, "grad_norm": 1.6473972797393799, "learning_rate": 2.80760359517962e-09, "loss": 0.4723640441894531, "step": 9950 }, { "epoch": 2.9829290206648698, "grad_norm": 1.679745078086853, "learning_rate": 2.042581948528133e-09, "loss": 0.4800722122192383, "step": 9960 }, { "epoch": 2.9859239293201556, "grad_norm": 1.5107241868972778, "learning_rate": 1.3989771980083e-09, "loss": 0.49669723510742186, "step": 9970 }, { "epoch": 2.988918837975442, "grad_norm": 1.6462763547897339, "learning_rate": 8.767971596634895e-10, "loss": 0.5274055480957032, "step": 9980 }, { "epoch": 2.9919137466307277, "grad_norm": 1.6557178497314453, "learning_rate": 4.760481749399581e-10, "loss": 0.5069705963134765, "step": 9990 }, { "epoch": 2.9949086552860136, "grad_norm": 1.4907699823379517, "learning_rate": 1.967351105991444e-10, "loss": 0.47740840911865234, "step": 10000 }, { "epoch": 2.9979035639413, "grad_norm": 1.614367961883545, "learning_rate": 3.8861358667707794e-11, "loss": 0.48827524185180665, "step": 10010 }, { "epoch": 3.0, "step": 10017, "total_flos": 6.540774321348936e+17, "train_loss": 0.7543762712715699, "train_runtime": 30332.4713, "train_samples_per_second": 5.284, "train_steps_per_second": 0.33 } ], "logging_steps": 10, "max_steps": 10017, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.540774321348936e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }