| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 10017, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002994908655286014, |
| "grad_norm": 11.265819549560547, |
| "learning_rate": 1.7964071856287425e-07, |
| "loss": 3.7097137451171873, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005989817310572028, |
| "grad_norm": 10.610027313232422, |
| "learning_rate": 3.792415169660679e-07, |
| "loss": 3.7116737365722656, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.008984725965858042, |
| "grad_norm": 9.398868560791016, |
| "learning_rate": 5.788423153692615e-07, |
| "loss": 3.5568317413330077, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.011979634621144056, |
| "grad_norm": 8.574817657470703, |
| "learning_rate": 7.784431137724552e-07, |
| "loss": 3.5488380432128905, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01497454327643007, |
| "grad_norm": 6.501334190368652, |
| "learning_rate": 9.780439121756488e-07, |
| "loss": 3.4215156555175783, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.017969451931716084, |
| "grad_norm": 5.397614479064941, |
| "learning_rate": 1.1776447105788423e-06, |
| "loss": 3.4414459228515626, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.020964360587002098, |
| "grad_norm": 5.161387920379639, |
| "learning_rate": 1.377245508982036e-06, |
| "loss": 3.2817943572998045, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02395926924228811, |
| "grad_norm": 4.114301681518555, |
| "learning_rate": 1.5768463073852298e-06, |
| "loss": 3.046010208129883, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.026954177897574125, |
| "grad_norm": 3.7204360961914062, |
| "learning_rate": 1.7764471057884233e-06, |
| "loss": 2.8605175018310547, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02994908655286014, |
| "grad_norm": 3.5608198642730713, |
| "learning_rate": 1.976047904191617e-06, |
| "loss": 2.6643051147460937, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03294399520814615, |
| "grad_norm": 4.155009746551514, |
| "learning_rate": 2.1756487025948105e-06, |
| "loss": 2.725708770751953, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03593890386343217, |
| "grad_norm": 3.600083112716675, |
| "learning_rate": 2.3752495009980044e-06, |
| "loss": 2.599940872192383, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03893381251871818, |
| "grad_norm": 3.8976871967315674, |
| "learning_rate": 2.5748502994011975e-06, |
| "loss": 2.478201675415039, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.041928721174004195, |
| "grad_norm": 3.515232563018799, |
| "learning_rate": 2.7744510978043914e-06, |
| "loss": 2.3760326385498045, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.044923629829290206, |
| "grad_norm": 3.7067975997924805, |
| "learning_rate": 2.974051896207585e-06, |
| "loss": 2.2870811462402343, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04791853848457622, |
| "grad_norm": 3.6703312397003174, |
| "learning_rate": 3.173652694610779e-06, |
| "loss": 2.2208534240722657, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.050913447139862233, |
| "grad_norm": 4.008607864379883, |
| "learning_rate": 3.373253493013972e-06, |
| "loss": 2.0857887268066406, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05390835579514825, |
| "grad_norm": 3.115046977996826, |
| "learning_rate": 3.572854291417166e-06, |
| "loss": 2.0886693954467774, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.05690326445043426, |
| "grad_norm": 3.818377733230591, |
| "learning_rate": 3.7724550898203594e-06, |
| "loss": 2.0709930419921876, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.05989817310572028, |
| "grad_norm": 4.294528007507324, |
| "learning_rate": 3.972055888223553e-06, |
| "loss": 1.9934419631958007, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06289308176100629, |
| "grad_norm": 3.16184663772583, |
| "learning_rate": 4.171656686626747e-06, |
| "loss": 1.9263175964355468, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0658879904162923, |
| "grad_norm": 3.972487688064575, |
| "learning_rate": 4.371257485029941e-06, |
| "loss": 1.907310676574707, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.06888289907157831, |
| "grad_norm": 3.4438064098358154, |
| "learning_rate": 4.570858283433134e-06, |
| "loss": 1.8906272888183593, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.07187780772686433, |
| "grad_norm": 3.363638162612915, |
| "learning_rate": 4.770459081836328e-06, |
| "loss": 1.827865219116211, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.07487271638215034, |
| "grad_norm": 3.1280667781829834, |
| "learning_rate": 4.970059880239521e-06, |
| "loss": 1.7821989059448242, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.07786762503743636, |
| "grad_norm": 3.62811279296875, |
| "learning_rate": 5.169660678642715e-06, |
| "loss": 1.7012821197509767, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.08086253369272237, |
| "grad_norm": 3.6099653244018555, |
| "learning_rate": 5.369261477045909e-06, |
| "loss": 1.7187646865844726, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.08385744234800839, |
| "grad_norm": 3.0062944889068604, |
| "learning_rate": 5.568862275449102e-06, |
| "loss": 1.706169891357422, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0868523510032944, |
| "grad_norm": 3.3500194549560547, |
| "learning_rate": 5.7684630738522965e-06, |
| "loss": 1.600248146057129, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.08984725965858041, |
| "grad_norm": 3.580355167388916, |
| "learning_rate": 5.96806387225549e-06, |
| "loss": 1.6295143127441407, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.09284216831386642, |
| "grad_norm": 3.1939895153045654, |
| "learning_rate": 6.167664670658683e-06, |
| "loss": 1.635610580444336, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.09583707696915245, |
| "grad_norm": 2.898219585418701, |
| "learning_rate": 6.367265469061877e-06, |
| "loss": 1.5857232093811036, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.09883198562443846, |
| "grad_norm": 3.525341033935547, |
| "learning_rate": 6.5668662674650705e-06, |
| "loss": 1.5980493545532226, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.10182689427972447, |
| "grad_norm": 3.246626615524292, |
| "learning_rate": 6.7664670658682645e-06, |
| "loss": 1.5958646774291991, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.10482180293501048, |
| "grad_norm": 3.128176212310791, |
| "learning_rate": 6.9660678642714575e-06, |
| "loss": 1.4537940979003907, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1078167115902965, |
| "grad_norm": 3.4397571086883545, |
| "learning_rate": 7.165668662674651e-06, |
| "loss": 1.5209310531616211, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.11081162024558251, |
| "grad_norm": 2.7937843799591064, |
| "learning_rate": 7.365269461077845e-06, |
| "loss": 1.5005243301391602, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.11380652890086852, |
| "grad_norm": 3.246121406555176, |
| "learning_rate": 7.5648702594810385e-06, |
| "loss": 1.453624153137207, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.11680143755615453, |
| "grad_norm": 2.984189748764038, |
| "learning_rate": 7.764471057884232e-06, |
| "loss": 1.4527925491333007, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.11979634621144056, |
| "grad_norm": 3.031707286834717, |
| "learning_rate": 7.964071856287425e-06, |
| "loss": 1.48089656829834, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.12279125486672657, |
| "grad_norm": 3.1862800121307373, |
| "learning_rate": 8.16367265469062e-06, |
| "loss": 1.4173410415649415, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.12578616352201258, |
| "grad_norm": 2.731788396835327, |
| "learning_rate": 8.363273453093813e-06, |
| "loss": 1.4160845756530762, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1287810721772986, |
| "grad_norm": 2.994575262069702, |
| "learning_rate": 8.562874251497007e-06, |
| "loss": 1.3780389785766602, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1317759808325846, |
| "grad_norm": 3.6287078857421875, |
| "learning_rate": 8.7624750499002e-06, |
| "loss": 1.3902482986450195, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1347708894878706, |
| "grad_norm": 3.0350985527038574, |
| "learning_rate": 8.962075848303395e-06, |
| "loss": 1.3507318496704102, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.13776579814315662, |
| "grad_norm": 3.261089563369751, |
| "learning_rate": 9.161676646706587e-06, |
| "loss": 1.3652063369750977, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.14076070679844266, |
| "grad_norm": 2.7072596549987793, |
| "learning_rate": 9.361277445109781e-06, |
| "loss": 1.3391490936279298, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.14375561545372867, |
| "grad_norm": 2.9523279666900635, |
| "learning_rate": 9.560878243512974e-06, |
| "loss": 1.3071959495544434, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.14675052410901468, |
| "grad_norm": 2.713932752609253, |
| "learning_rate": 9.760479041916169e-06, |
| "loss": 1.3630129814147949, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1497454327643007, |
| "grad_norm": 3.003009557723999, |
| "learning_rate": 9.960079840319361e-06, |
| "loss": 1.2988483428955078, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1527403414195867, |
| "grad_norm": 3.2941436767578125, |
| "learning_rate": 1.0159680638722555e-05, |
| "loss": 1.309105110168457, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1557352500748727, |
| "grad_norm": 2.7856335639953613, |
| "learning_rate": 1.035928143712575e-05, |
| "loss": 1.2957303047180175, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "grad_norm": 3.1381421089172363, |
| "learning_rate": 1.0558882235528941e-05, |
| "loss": 1.3377592086791992, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.16172506738544473, |
| "grad_norm": 2.874908924102783, |
| "learning_rate": 1.0758483033932137e-05, |
| "loss": 1.3037958145141602, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.16471997604073077, |
| "grad_norm": 2.5848164558410645, |
| "learning_rate": 1.0958083832335331e-05, |
| "loss": 1.2732759475708009, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.16771488469601678, |
| "grad_norm": 2.7429795265197754, |
| "learning_rate": 1.1157684630738523e-05, |
| "loss": 1.255198860168457, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.1707097933513028, |
| "grad_norm": 2.91156268119812, |
| "learning_rate": 1.1357285429141717e-05, |
| "loss": 1.2870652198791503, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.1737047020065888, |
| "grad_norm": 3.2635698318481445, |
| "learning_rate": 1.155688622754491e-05, |
| "loss": 1.2783642768859864, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1766996106618748, |
| "grad_norm": 2.566370725631714, |
| "learning_rate": 1.1756487025948105e-05, |
| "loss": 1.2557472229003905, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.17969451931716082, |
| "grad_norm": 2.617335796356201, |
| "learning_rate": 1.1956087824351299e-05, |
| "loss": 1.2616601943969727, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.18268942797244683, |
| "grad_norm": 3.0253615379333496, |
| "learning_rate": 1.2155688622754491e-05, |
| "loss": 1.2677306175231933, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.18568433662773284, |
| "grad_norm": 2.745788335800171, |
| "learning_rate": 1.2355289421157685e-05, |
| "loss": 1.2631946563720704, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.18867924528301888, |
| "grad_norm": 2.8516855239868164, |
| "learning_rate": 1.255489021956088e-05, |
| "loss": 1.207695484161377, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.1916741539383049, |
| "grad_norm": 2.7953591346740723, |
| "learning_rate": 1.2754491017964073e-05, |
| "loss": 1.1995798110961915, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.1946690625935909, |
| "grad_norm": 2.414024591445923, |
| "learning_rate": 1.2954091816367267e-05, |
| "loss": 1.1361705780029296, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.1976639712488769, |
| "grad_norm": 2.6588900089263916, |
| "learning_rate": 1.3153692614770459e-05, |
| "loss": 1.2122285842895508, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.20065887990416292, |
| "grad_norm": 2.770174980163574, |
| "learning_rate": 1.3353293413173653e-05, |
| "loss": 1.2334482192993164, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.20365378855944893, |
| "grad_norm": 3.297915458679199, |
| "learning_rate": 1.3552894211576849e-05, |
| "loss": 1.2045844078063965, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.20664869721473494, |
| "grad_norm": 2.676650285720825, |
| "learning_rate": 1.3752495009980041e-05, |
| "loss": 1.181098747253418, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.20964360587002095, |
| "grad_norm": 2.871534824371338, |
| "learning_rate": 1.3952095808383235e-05, |
| "loss": 1.2326663970947265, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.21263851452530697, |
| "grad_norm": 2.8828704357147217, |
| "learning_rate": 1.4151696606786429e-05, |
| "loss": 1.198216152191162, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.215633423180593, |
| "grad_norm": 2.532287120819092, |
| "learning_rate": 1.4351297405189621e-05, |
| "loss": 1.1558509826660157, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.21862833183587901, |
| "grad_norm": 2.6054844856262207, |
| "learning_rate": 1.4550898203592817e-05, |
| "loss": 1.1505748748779296, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.22162324049116502, |
| "grad_norm": 2.5080556869506836, |
| "learning_rate": 1.4750499001996009e-05, |
| "loss": 1.1882616043090821, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.22461814914645103, |
| "grad_norm": 2.641071081161499, |
| "learning_rate": 1.4950099800399203e-05, |
| "loss": 1.14835844039917, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.22761305780173705, |
| "grad_norm": 2.80320405960083, |
| "learning_rate": 1.5149700598802397e-05, |
| "loss": 1.1875015258789063, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.23060796645702306, |
| "grad_norm": 2.6086411476135254, |
| "learning_rate": 1.534930139720559e-05, |
| "loss": 1.1698853492736816, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.23360287511230907, |
| "grad_norm": 2.6112313270568848, |
| "learning_rate": 1.5548902195608783e-05, |
| "loss": 1.1667849540710449, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.23659778376759508, |
| "grad_norm": 2.603092670440674, |
| "learning_rate": 1.5748502994011977e-05, |
| "loss": 1.1800429344177246, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.23959269242288112, |
| "grad_norm": 2.5828044414520264, |
| "learning_rate": 1.594810379241517e-05, |
| "loss": 1.1859128952026368, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.24258760107816713, |
| "grad_norm": 2.3855056762695312, |
| "learning_rate": 1.6147704590818365e-05, |
| "loss": 1.1251150131225587, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.24558250973345314, |
| "grad_norm": 2.433763265609741, |
| "learning_rate": 1.634730538922156e-05, |
| "loss": 1.152597141265869, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.24857741838873915, |
| "grad_norm": 2.6475374698638916, |
| "learning_rate": 1.6546906187624752e-05, |
| "loss": 1.1266324996948243, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.25157232704402516, |
| "grad_norm": 2.4123408794403076, |
| "learning_rate": 1.6746506986027946e-05, |
| "loss": 1.1574108123779296, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.25456723569931117, |
| "grad_norm": 2.3260607719421387, |
| "learning_rate": 1.6946107784431137e-05, |
| "loss": 1.1445627212524414, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2575621443545972, |
| "grad_norm": 2.6214170455932617, |
| "learning_rate": 1.7145708582834334e-05, |
| "loss": 1.1470834732055664, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.2605570530098832, |
| "grad_norm": 2.3086888790130615, |
| "learning_rate": 1.7345309381237528e-05, |
| "loss": 1.064506721496582, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.2635519616651692, |
| "grad_norm": 2.7607717514038086, |
| "learning_rate": 1.754491017964072e-05, |
| "loss": 1.0883188247680664, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.2665468703204552, |
| "grad_norm": 2.9048917293548584, |
| "learning_rate": 1.7744510978043913e-05, |
| "loss": 1.135384750366211, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.2695417789757412, |
| "grad_norm": 2.61525559425354, |
| "learning_rate": 1.7944111776447107e-05, |
| "loss": 1.1149521827697755, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.27253668763102723, |
| "grad_norm": 2.410212278366089, |
| "learning_rate": 1.81437125748503e-05, |
| "loss": 1.0511359214782714, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.27553159628631324, |
| "grad_norm": 2.789114475250244, |
| "learning_rate": 1.8343313373253494e-05, |
| "loss": 1.0592556953430177, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2785265049415993, |
| "grad_norm": 2.6519744396209717, |
| "learning_rate": 1.854291417165669e-05, |
| "loss": 1.0614826202392578, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.2815214135968853, |
| "grad_norm": 2.6041579246520996, |
| "learning_rate": 1.8742514970059882e-05, |
| "loss": 1.1006576538085937, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.2845163222521713, |
| "grad_norm": 2.300130844116211, |
| "learning_rate": 1.8942115768463076e-05, |
| "loss": 1.092987632751465, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.28751123090745734, |
| "grad_norm": 2.857870101928711, |
| "learning_rate": 1.914171656686627e-05, |
| "loss": 1.1021804809570312, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.29050613956274335, |
| "grad_norm": 2.5659523010253906, |
| "learning_rate": 1.9341317365269464e-05, |
| "loss": 1.1216981887817383, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.29350104821802936, |
| "grad_norm": 2.596017599105835, |
| "learning_rate": 1.9540918163672655e-05, |
| "loss": 1.1237613677978515, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.29649595687331537, |
| "grad_norm": 2.391162395477295, |
| "learning_rate": 1.974051896207585e-05, |
| "loss": 1.0967981338500976, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2994908655286014, |
| "grad_norm": 2.5012688636779785, |
| "learning_rate": 1.9940119760479046e-05, |
| "loss": 1.06771240234375, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3024857741838874, |
| "grad_norm": 2.5918989181518555, |
| "learning_rate": 1.9999970246767755e-05, |
| "loss": 1.0973945617675782, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3054806828391734, |
| "grad_norm": 2.2349612712860107, |
| "learning_rate": 1.9999824517076846e-05, |
| "loss": 1.1063728332519531, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3084755914944594, |
| "grad_norm": 2.0478508472442627, |
| "learning_rate": 1.999955734781544e-05, |
| "loss": 1.070663070678711, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3114705001497454, |
| "grad_norm": 2.709059953689575, |
| "learning_rate": 1.9999168742228082e-05, |
| "loss": 1.0881099700927734, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.31446540880503143, |
| "grad_norm": 2.1217539310455322, |
| "learning_rate": 1.9998658705034068e-05, |
| "loss": 1.0525678634643554, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.31746031746031744, |
| "grad_norm": 2.183957099914551, |
| "learning_rate": 1.9998027242427373e-05, |
| "loss": 1.0776740074157716, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.32045522611560345, |
| "grad_norm": 2.300778388977051, |
| "learning_rate": 1.9997274362076588e-05, |
| "loss": 0.9790191650390625, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.32345013477088946, |
| "grad_norm": 2.1335058212280273, |
| "learning_rate": 1.9996400073124822e-05, |
| "loss": 1.0659798622131347, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.3264450434261755, |
| "grad_norm": 2.397620916366577, |
| "learning_rate": 1.9995404386189584e-05, |
| "loss": 1.0076449394226075, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.32943995208146154, |
| "grad_norm": 2.2613308429718018, |
| "learning_rate": 1.999428731336267e-05, |
| "loss": 1.0323925971984864, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.33243486073674755, |
| "grad_norm": 2.229064702987671, |
| "learning_rate": 1.999304886821e-05, |
| "loss": 1.015975570678711, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.33542976939203356, |
| "grad_norm": 2.0292208194732666, |
| "learning_rate": 1.9991689065771465e-05, |
| "loss": 1.034627342224121, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.33842467804731957, |
| "grad_norm": 2.0535295009613037, |
| "learning_rate": 1.9990207922560733e-05, |
| "loss": 1.0778848648071289, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.3414195867026056, |
| "grad_norm": 2.31610107421875, |
| "learning_rate": 1.9988605456565064e-05, |
| "loss": 1.0127778053283691, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.3444144953578916, |
| "grad_norm": 2.2170019149780273, |
| "learning_rate": 1.9986881687245076e-05, |
| "loss": 1.0534976959228515, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.3474094040131776, |
| "grad_norm": 2.850485324859619, |
| "learning_rate": 1.9985036635534513e-05, |
| "loss": 1.0433968544006347, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.3504043126684636, |
| "grad_norm": 2.169513702392578, |
| "learning_rate": 1.9983070323840004e-05, |
| "loss": 1.0182857513427734, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.3533992213237496, |
| "grad_norm": 2.373967170715332, |
| "learning_rate": 1.998098277604077e-05, |
| "loss": 1.0277379989624023, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.35639412997903563, |
| "grad_norm": 2.0569117069244385, |
| "learning_rate": 1.9978774017488345e-05, |
| "loss": 1.0588248252868653, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.35938903863432164, |
| "grad_norm": 2.008612632751465, |
| "learning_rate": 1.997644407500627e-05, |
| "loss": 1.0627038955688477, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.36238394728960766, |
| "grad_norm": 2.0754430294036865, |
| "learning_rate": 1.9973992976889763e-05, |
| "loss": 0.9568704605102539, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.36537885594489367, |
| "grad_norm": 2.1991281509399414, |
| "learning_rate": 1.9971420752905372e-05, |
| "loss": 1.0216650009155273, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.3683737646001797, |
| "grad_norm": 2.127699375152588, |
| "learning_rate": 1.9968727434290632e-05, |
| "loss": 1.0075697898864746, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.3713686732554657, |
| "grad_norm": 2.0525062084198, |
| "learning_rate": 1.9965913053753656e-05, |
| "loss": 1.0119134902954101, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.3743635819107517, |
| "grad_norm": 2.035109519958496, |
| "learning_rate": 1.9962977645472762e-05, |
| "loss": 0.9841228485107422, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.37735849056603776, |
| "grad_norm": 1.8049895763397217, |
| "learning_rate": 1.9959921245096047e-05, |
| "loss": 0.9867215156555176, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.3803533992213238, |
| "grad_norm": 1.9257926940917969, |
| "learning_rate": 1.995674388974096e-05, |
| "loss": 1.0146930694580079, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.3833483078766098, |
| "grad_norm": 1.982143759727478, |
| "learning_rate": 1.995344561799384e-05, |
| "loss": 1.0018959045410156, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.3863432165318958, |
| "grad_norm": 2.125669240951538, |
| "learning_rate": 1.9950026469909462e-05, |
| "loss": 0.9433177947998047, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.3893381251871818, |
| "grad_norm": 2.0401179790496826, |
| "learning_rate": 1.9946486487010546e-05, |
| "loss": 0.9733993530273437, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3923330338424678, |
| "grad_norm": 2.141977071762085, |
| "learning_rate": 1.994282571228724e-05, |
| "loss": 0.9609798431396485, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.3953279424977538, |
| "grad_norm": 2.166964054107666, |
| "learning_rate": 1.9939044190196624e-05, |
| "loss": 1.0087587356567382, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.39832285115303984, |
| "grad_norm": 2.3837554454803467, |
| "learning_rate": 1.9935141966662138e-05, |
| "loss": 0.9748960494995117, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.40131775980832585, |
| "grad_norm": 2.0114309787750244, |
| "learning_rate": 1.993111908907305e-05, |
| "loss": 0.9450881004333496, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.40431266846361186, |
| "grad_norm": 1.914791226387024, |
| "learning_rate": 1.9926975606283875e-05, |
| "loss": 1.0085538864135741, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.40730757711889787, |
| "grad_norm": 2.1861987113952637, |
| "learning_rate": 1.9922711568613765e-05, |
| "loss": 1.0055445671081542, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.4103024857741839, |
| "grad_norm": 2.173232316970825, |
| "learning_rate": 1.9918327027845926e-05, |
| "loss": 1.003979778289795, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.4132973944294699, |
| "grad_norm": 1.997961163520813, |
| "learning_rate": 1.9913822037226965e-05, |
| "loss": 0.9993978500366211, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.4162923030847559, |
| "grad_norm": 1.9082618951797485, |
| "learning_rate": 1.9909196651466255e-05, |
| "loss": 0.9729861259460449, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.4192872117400419, |
| "grad_norm": 2.1016769409179688, |
| "learning_rate": 1.9904450926735267e-05, |
| "loss": 0.9694742202758789, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4222821203953279, |
| "grad_norm": 2.0774240493774414, |
| "learning_rate": 1.9899584920666885e-05, |
| "loss": 1.0101737976074219, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.42527702905061393, |
| "grad_norm": 1.939172625541687, |
| "learning_rate": 1.989459869235472e-05, |
| "loss": 0.9967576026916504, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.4282719377059, |
| "grad_norm": 1.7587170600891113, |
| "learning_rate": 1.988949230235238e-05, |
| "loss": 0.9855384826660156, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.431266846361186, |
| "grad_norm": 2.190992832183838, |
| "learning_rate": 1.988426581267273e-05, |
| "loss": 0.9569406509399414, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.434261755016472, |
| "grad_norm": 2.0716278553009033, |
| "learning_rate": 1.9878919286787147e-05, |
| "loss": 0.9796838760375977, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.43725666367175803, |
| "grad_norm": 1.9944990873336792, |
| "learning_rate": 1.9873452789624758e-05, |
| "loss": 0.9943093299865723, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.44025157232704404, |
| "grad_norm": 1.7507660388946533, |
| "learning_rate": 1.986786638757163e-05, |
| "loss": 1.0118515014648437, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.44324648098233005, |
| "grad_norm": 1.824371099472046, |
| "learning_rate": 1.9862160148469983e-05, |
| "loss": 0.9747288703918457, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.44624138963761606, |
| "grad_norm": 1.956614375114441, |
| "learning_rate": 1.9856334141617354e-05, |
| "loss": 0.9127725601196289, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.44923629829290207, |
| "grad_norm": 1.730757474899292, |
| "learning_rate": 1.985038843776576e-05, |
| "loss": 0.9908318519592285, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4522312069481881, |
| "grad_norm": 1.7902590036392212, |
| "learning_rate": 1.984432310912084e-05, |
| "loss": 0.9776639938354492, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.4552261156034741, |
| "grad_norm": 1.738864779472351, |
| "learning_rate": 1.9838138229340984e-05, |
| "loss": 0.9166988372802735, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.4582210242587601, |
| "grad_norm": 1.9358223676681519, |
| "learning_rate": 1.9831833873536417e-05, |
| "loss": 0.9750303268432617, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.4612159329140461, |
| "grad_norm": 2.1201319694519043, |
| "learning_rate": 1.9825410118268313e-05, |
| "loss": 0.937105655670166, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.4642108415693321, |
| "grad_norm": 2.094879388809204, |
| "learning_rate": 1.981886704154784e-05, |
| "loss": 0.9664621353149414, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.46720575022461813, |
| "grad_norm": 1.9292011260986328, |
| "learning_rate": 1.9812204722835248e-05, |
| "loss": 0.9639430999755859, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.47020065887990414, |
| "grad_norm": 1.9128199815750122, |
| "learning_rate": 1.9805423243038863e-05, |
| "loss": 0.9359722137451172, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.47319556753519015, |
| "grad_norm": 1.8651394844055176, |
| "learning_rate": 1.979852268451413e-05, |
| "loss": 0.9354040145874023, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 2.0249080657958984, |
| "learning_rate": 1.9791503131062604e-05, |
| "loss": 0.9166906356811524, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.47918538484576223, |
| "grad_norm": 1.9980604648590088, |
| "learning_rate": 1.978436466793094e-05, |
| "loss": 0.9285884857177734, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.48218029350104824, |
| "grad_norm": 2.2854161262512207, |
| "learning_rate": 1.9777107381809845e-05, |
| "loss": 0.9732503890991211, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.48517520215633425, |
| "grad_norm": 2.344902992248535, |
| "learning_rate": 1.9769731360833043e-05, |
| "loss": 0.893382453918457, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.48817011081162026, |
| "grad_norm": 1.9881426095962524, |
| "learning_rate": 1.976223669457618e-05, |
| "loss": 0.9103918075561523, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.49116501946690627, |
| "grad_norm": 1.8509609699249268, |
| "learning_rate": 1.9754623474055764e-05, |
| "loss": 0.8984537124633789, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.4941599281221923, |
| "grad_norm": 1.8210080862045288, |
| "learning_rate": 1.974689179172804e-05, |
| "loss": 0.9495397567749023, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.4971548367774783, |
| "grad_norm": 2.0903966426849365, |
| "learning_rate": 1.973904174148787e-05, |
| "loss": 0.9040670394897461, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5001497454327642, |
| "grad_norm": 2.109459638595581, |
| "learning_rate": 1.97310734186676e-05, |
| "loss": 0.9156620025634765, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.5031446540880503, |
| "grad_norm": 2.0825653076171875, |
| "learning_rate": 1.9722986920035904e-05, |
| "loss": 0.9217489242553711, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.5061395627433364, |
| "grad_norm": 1.6379085779190063, |
| "learning_rate": 1.9714782343796593e-05, |
| "loss": 0.8876156806945801, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.5091344713986223, |
| "grad_norm": 1.769353985786438, |
| "learning_rate": 1.9706459789587437e-05, |
| "loss": 0.9461544036865235, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5121293800539084, |
| "grad_norm": 1.868715763092041, |
| "learning_rate": 1.9698019358478948e-05, |
| "loss": 0.9041751861572266, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.5151242887091944, |
| "grad_norm": 1.946820616722107, |
| "learning_rate": 1.9689461152973166e-05, |
| "loss": 0.9117437362670898, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.5181191973644804, |
| "grad_norm": 1.759941577911377, |
| "learning_rate": 1.9680785277002388e-05, |
| "loss": 0.9326802253723144, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.5211141060197664, |
| "grad_norm": 1.9923816919326782, |
| "learning_rate": 1.9671991835927928e-05, |
| "loss": 0.9163030624389649, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.5241090146750524, |
| "grad_norm": 1.6598330736160278, |
| "learning_rate": 1.9663080936538834e-05, |
| "loss": 0.9651662826538085, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5271039233303384, |
| "grad_norm": 1.7775046825408936, |
| "learning_rate": 1.9654052687050583e-05, |
| "loss": 0.9174107551574707, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.5300988319856245, |
| "grad_norm": 1.663625717163086, |
| "learning_rate": 1.9644907197103772e-05, |
| "loss": 0.9102935791015625, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.5330937406409104, |
| "grad_norm": 1.7679104804992676, |
| "learning_rate": 1.9635644577762792e-05, |
| "loss": 0.8674448013305665, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.5360886492961965, |
| "grad_norm": 1.7731411457061768, |
| "learning_rate": 1.962626494151446e-05, |
| "loss": 0.9251704216003418, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.5390835579514824, |
| "grad_norm": 1.5716463327407837, |
| "learning_rate": 1.961676840226668e-05, |
| "loss": 0.9307645797729492, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5420784666067685, |
| "grad_norm": 1.789790391921997, |
| "learning_rate": 1.9607155075347038e-05, |
| "loss": 0.8879091262817382, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.5450733752620545, |
| "grad_norm": 1.888209342956543, |
| "learning_rate": 1.9597425077501416e-05, |
| "loss": 0.9431265830993653, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.5480682839173405, |
| "grad_norm": 1.672878384590149, |
| "learning_rate": 1.958757852689256e-05, |
| "loss": 0.9204288482666015, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.5510631925726265, |
| "grad_norm": 1.5364495515823364, |
| "learning_rate": 1.957761554309866e-05, |
| "loss": 0.9210444450378418, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.5540581012279125, |
| "grad_norm": 1.8222407102584839, |
| "learning_rate": 1.9567536247111878e-05, |
| "loss": 0.8770456314086914, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.5570530098831986, |
| "grad_norm": 1.7476502656936646, |
| "learning_rate": 1.955734076133691e-05, |
| "loss": 0.8903690338134765, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.5600479185384846, |
| "grad_norm": 1.86436927318573, |
| "learning_rate": 1.9547029209589464e-05, |
| "loss": 0.9007977485656739, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.5630428271937706, |
| "grad_norm": 1.5122853517532349, |
| "learning_rate": 1.9536601717094778e-05, |
| "loss": 0.882940673828125, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.5660377358490566, |
| "grad_norm": 1.75053071975708, |
| "learning_rate": 1.95260584104861e-05, |
| "loss": 0.8975667953491211, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.5690326445043427, |
| "grad_norm": 1.824499487876892, |
| "learning_rate": 1.9515399417803135e-05, |
| "loss": 0.8656953811645508, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5720275531596286, |
| "grad_norm": 1.7241740226745605, |
| "learning_rate": 1.9504624868490506e-05, |
| "loss": 0.889422607421875, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.5750224618149147, |
| "grad_norm": 1.6877175569534302, |
| "learning_rate": 1.9493734893396176e-05, |
| "loss": 0.8777622222900391, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.5780173704702006, |
| "grad_norm": 1.7374579906463623, |
| "learning_rate": 1.948272962476985e-05, |
| "loss": 0.8936216354370117, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.5810122791254867, |
| "grad_norm": 1.9554184675216675, |
| "learning_rate": 1.9471609196261386e-05, |
| "loss": 0.9030414581298828, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.5840071877807727, |
| "grad_norm": 2.0266647338867188, |
| "learning_rate": 1.9460373742919158e-05, |
| "loss": 0.8939972877502441, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.5870020964360587, |
| "grad_norm": 1.811481237411499, |
| "learning_rate": 1.9449023401188427e-05, |
| "loss": 0.9280128479003906, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.5899970050913447, |
| "grad_norm": 1.7992744445800781, |
| "learning_rate": 1.9437558308909674e-05, |
| "loss": 0.893956470489502, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.5929919137466307, |
| "grad_norm": 1.6678317785263062, |
| "learning_rate": 1.9425978605316924e-05, |
| "loss": 0.9255929946899414, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.5959868224019167, |
| "grad_norm": 1.6106699705123901, |
| "learning_rate": 1.9414284431036074e-05, |
| "loss": 0.8314929962158203, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.5989817310572028, |
| "grad_norm": 1.576809287071228, |
| "learning_rate": 1.9402475928083166e-05, |
| "loss": 0.9036288261413574, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6019766397124887, |
| "grad_norm": 1.662400484085083, |
| "learning_rate": 1.9390553239862666e-05, |
| "loss": 0.8632070541381835, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.6049715483677748, |
| "grad_norm": 2.1808533668518066, |
| "learning_rate": 1.9378516511165733e-05, |
| "loss": 0.8544286727905274, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.6079664570230608, |
| "grad_norm": 1.6702479124069214, |
| "learning_rate": 1.9366365888168444e-05, |
| "loss": 0.8870140075683594, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.6109613656783468, |
| "grad_norm": 1.699459433555603, |
| "learning_rate": 1.9354101518430033e-05, |
| "loss": 0.8339980125427247, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.6139562743336329, |
| "grad_norm": 1.767531394958496, |
| "learning_rate": 1.9341723550891097e-05, |
| "loss": 0.8794610977172852, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.6169511829889188, |
| "grad_norm": 1.5786960124969482, |
| "learning_rate": 1.9329232135871775e-05, |
| "loss": 0.8840433120727539, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.6199460916442049, |
| "grad_norm": 1.7652090787887573, |
| "learning_rate": 1.931662742506994e-05, |
| "loss": 0.8960956573486328, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.6229410002994908, |
| "grad_norm": 1.8086838722229004, |
| "learning_rate": 1.930390957155934e-05, |
| "loss": 0.8429698944091797, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.6259359089547769, |
| "grad_norm": 1.7018482685089111, |
| "learning_rate": 1.9291078729787764e-05, |
| "loss": 0.8314028739929199, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.6289308176100629, |
| "grad_norm": 1.4827744960784912, |
| "learning_rate": 1.9278135055575126e-05, |
| "loss": 0.8367262840270996, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.6319257262653489, |
| "grad_norm": 1.7840453386306763, |
| "learning_rate": 1.9265078706111608e-05, |
| "loss": 0.8233530044555664, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 1.9676882028579712, |
| "learning_rate": 1.9251909839955742e-05, |
| "loss": 0.8611278533935547, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.637915543575921, |
| "grad_norm": 1.8171685934066772, |
| "learning_rate": 1.9238628617032483e-05, |
| "loss": 0.8771775245666504, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.6409104522312069, |
| "grad_norm": 1.8074554204940796, |
| "learning_rate": 1.922523519863126e-05, |
| "loss": 0.884306526184082, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.643905360886493, |
| "grad_norm": 1.8622629642486572, |
| "learning_rate": 1.9211729747404028e-05, |
| "loss": 0.8650590896606445, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.6469002695417789, |
| "grad_norm": 1.6245149374008179, |
| "learning_rate": 1.9198112427363275e-05, |
| "loss": 0.8924369812011719, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.649895178197065, |
| "grad_norm": 1.6202753782272339, |
| "learning_rate": 1.918438340388006e-05, |
| "loss": 0.8587064743041992, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.652890086852351, |
| "grad_norm": 1.955124020576477, |
| "learning_rate": 1.9170542843681984e-05, |
| "loss": 0.8569240570068359, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.655884995507637, |
| "grad_norm": 1.6515233516693115, |
| "learning_rate": 1.9156590914851157e-05, |
| "loss": 0.8982840538024902, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.6588799041629231, |
| "grad_norm": 1.8015732765197754, |
| "learning_rate": 1.9142527786822182e-05, |
| "loss": 0.8220060348510743, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.661874812818209, |
| "grad_norm": 1.583517074584961, |
| "learning_rate": 1.9128353630380076e-05, |
| "loss": 0.847236442565918, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.6648697214734951, |
| "grad_norm": 1.8341610431671143, |
| "learning_rate": 1.9114068617658207e-05, |
| "loss": 0.869542121887207, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.6678646301287811, |
| "grad_norm": 1.5468474626541138, |
| "learning_rate": 1.90996729221362e-05, |
| "loss": 0.8361228942871094, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.6708595387840671, |
| "grad_norm": 1.6672273874282837, |
| "learning_rate": 1.9085166718637835e-05, |
| "loss": 0.8854676246643066, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.6738544474393531, |
| "grad_norm": 1.6331413984298706, |
| "learning_rate": 1.907055018332891e-05, |
| "loss": 0.8510169982910156, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.6768493560946391, |
| "grad_norm": 1.8924387693405151, |
| "learning_rate": 1.9055823493715123e-05, |
| "loss": 0.8499082565307617, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.6798442647499251, |
| "grad_norm": 1.6619211435317993, |
| "learning_rate": 1.9040986828639892e-05, |
| "loss": 0.8638698577880859, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.6828391734052112, |
| "grad_norm": 1.6199733018875122, |
| "learning_rate": 1.9026040368282207e-05, |
| "loss": 0.8675064086914063, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.6858340820604971, |
| "grad_norm": 1.7399113178253174, |
| "learning_rate": 1.901098429415442e-05, |
| "loss": 0.8387475967407226, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.6888289907157832, |
| "grad_norm": 1.667663812637329, |
| "learning_rate": 1.8995818789100066e-05, |
| "loss": 0.8783481597900391, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6918238993710691, |
| "grad_norm": 1.7032908201217651, |
| "learning_rate": 1.8980544037291614e-05, |
| "loss": 0.8562976837158203, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.6948188080263552, |
| "grad_norm": 1.9778879880905151, |
| "learning_rate": 1.896516022422825e-05, |
| "loss": 0.8344745635986328, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.6978137166816412, |
| "grad_norm": 1.8774316310882568, |
| "learning_rate": 1.8949667536733614e-05, |
| "loss": 0.8148428916931152, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.7008086253369272, |
| "grad_norm": 1.6596498489379883, |
| "learning_rate": 1.8934066162953543e-05, |
| "loss": 0.8398752212524414, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.7038035339922133, |
| "grad_norm": 1.6424564123153687, |
| "learning_rate": 1.8918356292353775e-05, |
| "loss": 0.8635367393493653, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.7067984426474992, |
| "grad_norm": 1.7502802610397339, |
| "learning_rate": 1.890253811571765e-05, |
| "loss": 0.8395760536193848, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.7097933513027853, |
| "grad_norm": 1.7914620637893677, |
| "learning_rate": 1.8886611825143796e-05, |
| "loss": 0.809751319885254, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.7127882599580713, |
| "grad_norm": 1.614733099937439, |
| "learning_rate": 1.88705776140438e-05, |
| "loss": 0.8426692962646485, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.7157831686133573, |
| "grad_norm": 1.6434929370880127, |
| "learning_rate": 1.885443567713985e-05, |
| "loss": 0.8461108207702637, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.7187780772686433, |
| "grad_norm": 1.5342862606048584, |
| "learning_rate": 1.8838186210462365e-05, |
| "loss": 0.8506370544433594, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7217729859239294, |
| "grad_norm": 1.7060281038284302, |
| "learning_rate": 1.8821829411347642e-05, |
| "loss": 0.8571641921997071, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.7247678945792153, |
| "grad_norm": 1.5919580459594727, |
| "learning_rate": 1.8805365478435432e-05, |
| "loss": 0.8273300170898438, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.7277628032345014, |
| "grad_norm": 1.7425918579101562, |
| "learning_rate": 1.8788794611666536e-05, |
| "loss": 0.8230342864990234, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.7307577118897873, |
| "grad_norm": 1.6442525386810303, |
| "learning_rate": 1.877211701228038e-05, |
| "loss": 0.8350908279418945, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.7337526205450734, |
| "grad_norm": 1.6684730052947998, |
| "learning_rate": 1.875533288281257e-05, |
| "loss": 0.8504384994506836, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.7367475292003594, |
| "grad_norm": 1.6235419511795044, |
| "learning_rate": 1.8738442427092428e-05, |
| "loss": 0.8609309196472168, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.7397424378556454, |
| "grad_norm": 1.45209801197052, |
| "learning_rate": 1.8721445850240522e-05, |
| "loss": 0.8582953453063965, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.7427373465109314, |
| "grad_norm": 1.640078067779541, |
| "learning_rate": 1.870434335866618e-05, |
| "loss": 0.8432114601135254, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.7457322551662174, |
| "grad_norm": 1.6859060525894165, |
| "learning_rate": 1.8687135160064956e-05, |
| "loss": 0.8331222534179688, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.7487271638215034, |
| "grad_norm": 1.6803628206253052, |
| "learning_rate": 1.8669821463416157e-05, |
| "loss": 0.829715633392334, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7517220724767895, |
| "grad_norm": 1.5660089254379272, |
| "learning_rate": 1.8652402478980255e-05, |
| "loss": 0.8638070106506348, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 1.5286768674850464, |
| "learning_rate": 1.8634878418296362e-05, |
| "loss": 0.7757655143737793, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.7577118897873615, |
| "grad_norm": 1.5210187435150146, |
| "learning_rate": 1.8617249494179644e-05, |
| "loss": 0.7913604736328125, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.7607067984426475, |
| "grad_norm": 1.6953132152557373, |
| "learning_rate": 1.859951592071877e-05, |
| "loss": 0.7964819431304931, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.7637017070979335, |
| "grad_norm": 1.8429316282272339, |
| "learning_rate": 1.8581677913273267e-05, |
| "loss": 0.7994976043701172, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.7666966157532196, |
| "grad_norm": 1.6222190856933594, |
| "learning_rate": 1.856373568847093e-05, |
| "loss": 0.7941509246826172, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.7696915244085055, |
| "grad_norm": 1.379274845123291, |
| "learning_rate": 1.8545689464205193e-05, |
| "loss": 0.8207425117492676, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.7726864330637916, |
| "grad_norm": 1.6088320016860962, |
| "learning_rate": 1.8527539459632473e-05, |
| "loss": 0.8137792587280274, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.7756813417190775, |
| "grad_norm": 1.5247526168823242, |
| "learning_rate": 1.8509285895169516e-05, |
| "loss": 0.863805103302002, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.7786762503743636, |
| "grad_norm": 1.5615347623825073, |
| "learning_rate": 1.849092899249071e-05, |
| "loss": 0.828615379333496, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7816711590296496, |
| "grad_norm": 1.8369919061660767, |
| "learning_rate": 1.847246897452541e-05, |
| "loss": 0.8638320922851562, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.7846660676849356, |
| "grad_norm": 1.694499135017395, |
| "learning_rate": 1.8453906065455212e-05, |
| "loss": 0.8065310478210449, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.7876609763402216, |
| "grad_norm": 1.6315211057662964, |
| "learning_rate": 1.8435240490711247e-05, |
| "loss": 0.8603771209716797, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.7906558849955077, |
| "grad_norm": 1.7139288187026978, |
| "learning_rate": 1.8416472476971424e-05, |
| "loss": 0.8373805999755859, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.7936507936507936, |
| "grad_norm": 1.6665681600570679, |
| "learning_rate": 1.8397602252157704e-05, |
| "loss": 0.8208301544189454, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.7966457023060797, |
| "grad_norm": 1.6627062559127808, |
| "learning_rate": 1.8378630045433298e-05, |
| "loss": 0.8139615058898926, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.7996406109613656, |
| "grad_norm": 1.4767072200775146, |
| "learning_rate": 1.835955608719992e-05, |
| "loss": 0.8225536346435547, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.8026355196166517, |
| "grad_norm": 1.6780694723129272, |
| "learning_rate": 1.8340380609094962e-05, |
| "loss": 0.842643928527832, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.8056304282719378, |
| "grad_norm": 1.7037646770477295, |
| "learning_rate": 1.8321103843988695e-05, |
| "loss": 0.8341219902038575, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.8086253369272237, |
| "grad_norm": 1.53456449508667, |
| "learning_rate": 1.8301726025981427e-05, |
| "loss": 0.814063835144043, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.8116202455825098, |
| "grad_norm": 1.7273889780044556, |
| "learning_rate": 1.828224739040069e-05, |
| "loss": 0.7797497749328614, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.8146151542377957, |
| "grad_norm": 1.6220978498458862, |
| "learning_rate": 1.8262668173798336e-05, |
| "loss": 0.8151215553283692, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.8176100628930818, |
| "grad_norm": 1.5839869976043701, |
| "learning_rate": 1.8242988613947714e-05, |
| "loss": 0.8338854789733887, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.8206049715483678, |
| "grad_norm": 1.6918904781341553, |
| "learning_rate": 1.822320894984074e-05, |
| "loss": 0.8016552925109863, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.8235998802036538, |
| "grad_norm": 1.532352328300476, |
| "learning_rate": 1.8203329421685024e-05, |
| "loss": 0.7859272956848145, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.8265947888589398, |
| "grad_norm": 1.5221245288848877, |
| "learning_rate": 1.8183350270900936e-05, |
| "loss": 0.8459560394287109, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.8295896975142258, |
| "grad_norm": 1.7007802724838257, |
| "learning_rate": 1.8163271740118687e-05, |
| "loss": 0.8190437316894531, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.8325846061695118, |
| "grad_norm": 1.5608899593353271, |
| "learning_rate": 1.8143094073175365e-05, |
| "loss": 0.8271324157714843, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.8355795148247979, |
| "grad_norm": 1.753761649131775, |
| "learning_rate": 1.8122817515112e-05, |
| "loss": 0.8533936500549316, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.8385744234800838, |
| "grad_norm": 1.615286946296692, |
| "learning_rate": 1.8102442312170553e-05, |
| "loss": 0.8588766098022461, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.8415693321353699, |
| "grad_norm": 1.587636113166809, |
| "learning_rate": 1.8081968711790964e-05, |
| "loss": 0.8210906028747559, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.8445642407906558, |
| "grad_norm": 1.455330491065979, |
| "learning_rate": 1.8061396962608115e-05, |
| "loss": 0.8196340560913086, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.8475591494459419, |
| "grad_norm": 1.492753267288208, |
| "learning_rate": 1.804072731444883e-05, |
| "loss": 0.8112252235412598, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.8505540581012279, |
| "grad_norm": 1.5428729057312012, |
| "learning_rate": 1.801996001832883e-05, |
| "loss": 0.7767475128173829, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.8535489667565139, |
| "grad_norm": 1.4639486074447632, |
| "learning_rate": 1.79990953264497e-05, |
| "loss": 0.7648550987243652, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.8565438754118, |
| "grad_norm": 1.5121395587921143, |
| "learning_rate": 1.7978133492195802e-05, |
| "loss": 0.8193672180175782, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.859538784067086, |
| "grad_norm": 1.4281710386276245, |
| "learning_rate": 1.7957074770131226e-05, |
| "loss": 0.8272466659545898, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.862533692722372, |
| "grad_norm": 1.834460973739624, |
| "learning_rate": 1.7935919415996665e-05, |
| "loss": 0.7895036697387695, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.865528601377658, |
| "grad_norm": 1.4872559309005737, |
| "learning_rate": 1.7914667686706347e-05, |
| "loss": 0.8052210807800293, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.868523510032944, |
| "grad_norm": 1.609100580215454, |
| "learning_rate": 1.7893319840344886e-05, |
| "loss": 0.8197463989257813, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.87151841868823, |
| "grad_norm": 1.4353001117706299, |
| "learning_rate": 1.787187613616416e-05, |
| "loss": 0.8479232788085938, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.8745133273435161, |
| "grad_norm": 1.6909197568893433, |
| "learning_rate": 1.7850336834580166e-05, |
| "loss": 0.8297075271606446, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.877508235998802, |
| "grad_norm": 1.90815007686615, |
| "learning_rate": 1.7828702197169842e-05, |
| "loss": 0.8151211738586426, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.8805031446540881, |
| "grad_norm": 1.6685028076171875, |
| "learning_rate": 1.7806972486667914e-05, |
| "loss": 0.8078549385070801, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.883498053309374, |
| "grad_norm": 1.5216376781463623, |
| "learning_rate": 1.778514796696367e-05, |
| "loss": 0.7762706279754639, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.8864929619646601, |
| "grad_norm": 1.4812510013580322, |
| "learning_rate": 1.7763228903097807e-05, |
| "loss": 0.8366207122802735, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.889487870619946, |
| "grad_norm": 1.5821658372879028, |
| "learning_rate": 1.7741215561259155e-05, |
| "loss": 0.8076998710632324, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.8924827792752321, |
| "grad_norm": 1.7257674932479858, |
| "learning_rate": 1.7719108208781488e-05, |
| "loss": 0.7889442443847656, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.8954776879305181, |
| "grad_norm": 1.514564871788025, |
| "learning_rate": 1.7696907114140254e-05, |
| "loss": 0.800442123413086, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.8984725965858041, |
| "grad_norm": 1.5025800466537476, |
| "learning_rate": 1.7674612546949325e-05, |
| "loss": 0.8127084732055664, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9014675052410901, |
| "grad_norm": 1.5609087944030762, |
| "learning_rate": 1.7652224777957714e-05, |
| "loss": 0.8040850639343262, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.9044624138963762, |
| "grad_norm": 1.493508219718933, |
| "learning_rate": 1.762974407904631e-05, |
| "loss": 0.7854836463928223, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.9074573225516622, |
| "grad_norm": 1.6819945573806763, |
| "learning_rate": 1.7607170723224534e-05, |
| "loss": 0.7625170707702636, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.9104522312069482, |
| "grad_norm": 1.6430078744888306, |
| "learning_rate": 1.758450498462706e-05, |
| "loss": 0.8078180313110351, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.9134471398622342, |
| "grad_norm": 1.4024254083633423, |
| "learning_rate": 1.7561747138510487e-05, |
| "loss": 0.7755331516265869, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.9164420485175202, |
| "grad_norm": 1.6579421758651733, |
| "learning_rate": 1.7538897461249956e-05, |
| "loss": 0.825098991394043, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.9194369571728063, |
| "grad_norm": 1.4785950183868408, |
| "learning_rate": 1.7515956230335844e-05, |
| "loss": 0.7357244491577148, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.9224318658280922, |
| "grad_norm": 1.4858529567718506, |
| "learning_rate": 1.7492923724370355e-05, |
| "loss": 0.7988026142120361, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.9254267744833783, |
| "grad_norm": 1.3988063335418701, |
| "learning_rate": 1.7469800223064172e-05, |
| "loss": 0.8232571601867675, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.9284216831386642, |
| "grad_norm": 1.4646496772766113, |
| "learning_rate": 1.744658600723302e-05, |
| "loss": 0.8289719581604004, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.9314165917939503, |
| "grad_norm": 1.9083791971206665, |
| "learning_rate": 1.742328135879429e-05, |
| "loss": 0.7919368743896484, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.9344115004492363, |
| "grad_norm": 1.7467341423034668, |
| "learning_rate": 1.7399886560763598e-05, |
| "loss": 0.7916288375854492, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.9374064091045223, |
| "grad_norm": 1.7119777202606201, |
| "learning_rate": 1.7376401897251357e-05, |
| "loss": 0.757789134979248, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.9404013177598083, |
| "grad_norm": 1.4119514226913452, |
| "learning_rate": 1.7352827653459307e-05, |
| "loss": 0.7901122093200683, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.9433962264150944, |
| "grad_norm": 1.2908947467803955, |
| "learning_rate": 1.732916411567708e-05, |
| "loss": 0.7934576988220214, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.9463911350703803, |
| "grad_norm": 1.5804283618927002, |
| "learning_rate": 1.730541157127871e-05, |
| "loss": 0.7917113304138184, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.9493860437256664, |
| "grad_norm": 1.706937313079834, |
| "learning_rate": 1.728157030871913e-05, |
| "loss": 0.7889931678771973, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 1.4798022508621216, |
| "learning_rate": 1.7257640617530697e-05, |
| "loss": 0.8395463943481445, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.9553758610362384, |
| "grad_norm": 1.4213306903839111, |
| "learning_rate": 1.7233622788319646e-05, |
| "loss": 0.8060663223266602, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.9583707696915245, |
| "grad_norm": 1.4119057655334473, |
| "learning_rate": 1.7209517112762588e-05, |
| "loss": 0.7896999835968017, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.9613656783468104, |
| "grad_norm": 1.5566002130508423, |
| "learning_rate": 1.7185323883602943e-05, |
| "loss": 0.8031165122985839, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.9643605870020965, |
| "grad_norm": 1.4153759479522705, |
| "learning_rate": 1.7161043394647407e-05, |
| "loss": 0.759066104888916, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.9673554956573824, |
| "grad_norm": 1.9067392349243164, |
| "learning_rate": 1.7136675940762367e-05, |
| "loss": 0.7777122497558594, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.9703504043126685, |
| "grad_norm": 1.4964123964309692, |
| "learning_rate": 1.711222181787033e-05, |
| "loss": 0.7858468055725097, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.9733453129679545, |
| "grad_norm": 1.4618581533432007, |
| "learning_rate": 1.7087681322946328e-05, |
| "loss": 0.7849390983581543, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.9763402216232405, |
| "grad_norm": 1.601406216621399, |
| "learning_rate": 1.7063054754014303e-05, |
| "loss": 0.7938404560089112, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.9793351302785265, |
| "grad_norm": 1.4281142950057983, |
| "learning_rate": 1.70383424101435e-05, |
| "loss": 0.7438766479492187, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.9823300389338125, |
| "grad_norm": 1.5444836616516113, |
| "learning_rate": 1.7013544591444827e-05, |
| "loss": 0.7451802730560303, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.9853249475890985, |
| "grad_norm": 1.4732089042663574, |
| "learning_rate": 1.698866159906722e-05, |
| "loss": 0.8167963027954102, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.9883198562443846, |
| "grad_norm": 1.870408535003662, |
| "learning_rate": 1.6963693735193962e-05, |
| "loss": 0.8137873649597168, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.9913147648996705, |
| "grad_norm": 1.4198145866394043, |
| "learning_rate": 1.693864130303905e-05, |
| "loss": 0.770867919921875, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.9943096735549566, |
| "grad_norm": 1.639811635017395, |
| "learning_rate": 1.6913504606843474e-05, |
| "loss": 0.8095382690429688, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.9973045822102425, |
| "grad_norm": 1.4918293952941895, |
| "learning_rate": 1.688828395187156e-05, |
| "loss": 0.7985510349273681, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.0002994908655285, |
| "grad_norm": 1.197721004486084, |
| "learning_rate": 1.6862979644407227e-05, |
| "loss": 0.7350101470947266, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.0032943995208146, |
| "grad_norm": 1.5143485069274902, |
| "learning_rate": 1.6837591991750293e-05, |
| "loss": 0.7106464385986329, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.0062893081761006, |
| "grad_norm": 1.7330113649368286, |
| "learning_rate": 1.6812121302212728e-05, |
| "loss": 0.7185450553894043, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.0092842168313867, |
| "grad_norm": 1.672824740409851, |
| "learning_rate": 1.6786567885114924e-05, |
| "loss": 0.700438404083252, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.0122791254866728, |
| "grad_norm": 1.5598795413970947, |
| "learning_rate": 1.6760932050781927e-05, |
| "loss": 0.6911828994750977, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.0152740341419586, |
| "grad_norm": 1.6879022121429443, |
| "learning_rate": 1.6735214110539667e-05, |
| "loss": 0.7052880764007569, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.0182689427972447, |
| "grad_norm": 1.764460802078247, |
| "learning_rate": 1.670941437671119e-05, |
| "loss": 0.731821346282959, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.0212638514525307, |
| "grad_norm": 1.6526857614517212, |
| "learning_rate": 1.668353316261285e-05, |
| "loss": 0.7477367877960205, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.0242587601078168, |
| "grad_norm": 1.6531809568405151, |
| "learning_rate": 1.665757078255052e-05, |
| "loss": 0.7096085548400879, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.0272536687631026, |
| "grad_norm": 1.4806548357009888, |
| "learning_rate": 1.6631527551815757e-05, |
| "loss": 0.711548137664795, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.0302485774183887, |
| "grad_norm": 1.67629075050354, |
| "learning_rate": 1.6605403786681992e-05, |
| "loss": 0.7366076946258545, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.0332434860736748, |
| "grad_norm": 1.3547171354293823, |
| "learning_rate": 1.6579199804400667e-05, |
| "loss": 0.6797126770019531, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.0362383947289608, |
| "grad_norm": 1.343342900276184, |
| "learning_rate": 1.6552915923197404e-05, |
| "loss": 0.6926548480987549, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.0392333033842467, |
| "grad_norm": 1.5611882209777832, |
| "learning_rate": 1.652655246226813e-05, |
| "loss": 0.7226381778717041, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.0422282120395328, |
| "grad_norm": 1.3495250940322876, |
| "learning_rate": 1.65001097417752e-05, |
| "loss": 0.6564831733703613, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.0452231206948188, |
| "grad_norm": 1.4157185554504395, |
| "learning_rate": 1.6473588082843513e-05, |
| "loss": 0.659664249420166, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.0482180293501049, |
| "grad_norm": 1.4454340934753418, |
| "learning_rate": 1.6446987807556605e-05, |
| "loss": 0.735554313659668, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0512129380053907, |
| "grad_norm": 1.6114473342895508, |
| "learning_rate": 1.642030923895275e-05, |
| "loss": 0.7064272880554199, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.0542078466606768, |
| "grad_norm": 1.393011450767517, |
| "learning_rate": 1.639355270102102e-05, |
| "loss": 0.7071351051330567, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.0572027553159629, |
| "grad_norm": 1.4447367191314697, |
| "learning_rate": 1.6366718518697366e-05, |
| "loss": 0.6948044776916504, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.060197663971249, |
| "grad_norm": 1.5719486474990845, |
| "learning_rate": 1.633980701786066e-05, |
| "loss": 0.6962141036987305, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.063192572626535, |
| "grad_norm": 1.5820865631103516, |
| "learning_rate": 1.6312818525328756e-05, |
| "loss": 0.7146442413330079, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.0661874812818208, |
| "grad_norm": 1.5105618238449097, |
| "learning_rate": 1.628575336885449e-05, |
| "loss": 0.6941755771636963, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.069182389937107, |
| "grad_norm": 1.6371991634368896, |
| "learning_rate": 1.6258611877121737e-05, |
| "loss": 0.6982086658477783, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.072177298592393, |
| "grad_norm": 1.484971523284912, |
| "learning_rate": 1.6231394379741386e-05, |
| "loss": 0.7136051177978515, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.075172207247679, |
| "grad_norm": 1.6102113723754883, |
| "learning_rate": 1.620410120724736e-05, |
| "loss": 0.699164342880249, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.0781671159029649, |
| "grad_norm": 1.6524529457092285, |
| "learning_rate": 1.6176732691092584e-05, |
| "loss": 0.6819294929504395, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.081162024558251, |
| "grad_norm": 1.552896499633789, |
| "learning_rate": 1.6149289163644978e-05, |
| "loss": 0.6616555213928222, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.084156933213537, |
| "grad_norm": 1.5037239789962769, |
| "learning_rate": 1.612177095818341e-05, |
| "loss": 0.7165458679199219, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.087151841868823, |
| "grad_norm": 1.4005266427993774, |
| "learning_rate": 1.6094178408893648e-05, |
| "loss": 0.6788459777832031, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.090146750524109, |
| "grad_norm": 1.5649514198303223, |
| "learning_rate": 1.606651185086431e-05, |
| "loss": 0.6839639663696289, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.093141659179395, |
| "grad_norm": 1.4369744062423706, |
| "learning_rate": 1.603877162008278e-05, |
| "loss": 0.6825023651123047, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.096136567834681, |
| "grad_norm": 1.4755173921585083, |
| "learning_rate": 1.601095805343114e-05, |
| "loss": 0.7037545204162597, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.0991314764899671, |
| "grad_norm": 1.3595280647277832, |
| "learning_rate": 1.598307148868208e-05, |
| "loss": 0.6997042655944824, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.1021263851452532, |
| "grad_norm": 1.6197599172592163, |
| "learning_rate": 1.5955112264494784e-05, |
| "loss": 0.7151602745056153, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.105121293800539, |
| "grad_norm": 1.5634205341339111, |
| "learning_rate": 1.5927080720410836e-05, |
| "loss": 0.6688960075378418, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.108116202455825, |
| "grad_norm": 1.6595895290374756, |
| "learning_rate": 1.5898977196850066e-05, |
| "loss": 0.7106626510620118, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 1.4825279712677002, |
| "learning_rate": 1.5870802035106452e-05, |
| "loss": 0.7196572303771973, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.1141060197663972, |
| "grad_norm": 1.4523088932037354, |
| "learning_rate": 1.584255557734395e-05, |
| "loss": 0.7004715442657471, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.117100928421683, |
| "grad_norm": 1.4041231870651245, |
| "learning_rate": 1.5814238166592352e-05, |
| "loss": 0.7263636112213134, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.1200958370769691, |
| "grad_norm": 1.5008774995803833, |
| "learning_rate": 1.5785850146743112e-05, |
| "loss": 0.6979952812194824, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.1230907457322552, |
| "grad_norm": 1.6422677040100098, |
| "learning_rate": 1.5757391862545175e-05, |
| "loss": 0.6974923133850097, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.1260856543875413, |
| "grad_norm": 1.6295057535171509, |
| "learning_rate": 1.5728863659600785e-05, |
| "loss": 0.6878085136413574, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.1290805630428271, |
| "grad_norm": 1.5771390199661255, |
| "learning_rate": 1.570026588436129e-05, |
| "loss": 0.7069286823272705, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.1320754716981132, |
| "grad_norm": 1.5606156587600708, |
| "learning_rate": 1.5671598884122943e-05, |
| "loss": 0.7105122566223144, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.1350703803533992, |
| "grad_norm": 1.573263168334961, |
| "learning_rate": 1.5642863007022673e-05, |
| "loss": 0.6617315292358399, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.1380652890086853, |
| "grad_norm": 1.581919550895691, |
| "learning_rate": 1.561405860203386e-05, |
| "loss": 0.6750922679901123, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.1410601976639712, |
| "grad_norm": 1.6347953081130981, |
| "learning_rate": 1.5585186018962096e-05, |
| "loss": 0.6865742683410645, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.1440551063192572, |
| "grad_norm": 1.6192823648452759, |
| "learning_rate": 1.555624560844095e-05, |
| "loss": 0.6836994647979736, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.1470500149745433, |
| "grad_norm": 1.49833345413208, |
| "learning_rate": 1.5527237721927682e-05, |
| "loss": 0.7058408737182618, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.1500449236298294, |
| "grad_norm": 1.5722405910491943, |
| "learning_rate": 1.5498162711699013e-05, |
| "loss": 0.6902894973754883, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.1530398322851152, |
| "grad_norm": 1.4945104122161865, |
| "learning_rate": 1.546902093084681e-05, |
| "loss": 0.6969739437103272, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.1560347409404013, |
| "grad_norm": 1.5300406217575073, |
| "learning_rate": 1.5439812733273814e-05, |
| "loss": 0.6966294288635254, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.1590296495956873, |
| "grad_norm": 1.4756025075912476, |
| "learning_rate": 1.541053847368935e-05, |
| "loss": 0.6721511840820312, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.1620245582509734, |
| "grad_norm": 1.3550491333007812, |
| "learning_rate": 1.5381198507605008e-05, |
| "loss": 0.6645829200744628, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.1650194669062595, |
| "grad_norm": 1.5267562866210938, |
| "learning_rate": 1.5351793191330328e-05, |
| "loss": 0.7032648086547851, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.1680143755615453, |
| "grad_norm": 1.5716965198516846, |
| "learning_rate": 1.5322322881968476e-05, |
| "loss": 0.7047882556915284, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.1710092842168314, |
| "grad_norm": 1.3805803060531616, |
| "learning_rate": 1.5292787937411903e-05, |
| "loss": 0.6749917030334472, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.1740041928721174, |
| "grad_norm": 1.4217103719711304, |
| "learning_rate": 1.5263188716338e-05, |
| "loss": 0.6801820755004883, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.1769991015274035, |
| "grad_norm": 1.5605568885803223, |
| "learning_rate": 1.5233525578204745e-05, |
| "loss": 0.6716075897216797, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.1799940101826893, |
| "grad_norm": 1.5446428060531616, |
| "learning_rate": 1.5203798883246334e-05, |
| "loss": 0.6891654968261719, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.1829889188379754, |
| "grad_norm": 1.4253835678100586, |
| "learning_rate": 1.517400899246881e-05, |
| "loss": 0.702687931060791, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.1859838274932615, |
| "grad_norm": 1.7337391376495361, |
| "learning_rate": 1.5144156267645675e-05, |
| "loss": 0.6723766326904297, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.1889787361485475, |
| "grad_norm": 1.723059892654419, |
| "learning_rate": 1.51142410713135e-05, |
| "loss": 0.6978803634643554, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.1919736448038334, |
| "grad_norm": 1.748765230178833, |
| "learning_rate": 1.5084263766767522e-05, |
| "loss": 0.6807281494140625, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.1949685534591195, |
| "grad_norm": 1.592793583869934, |
| "learning_rate": 1.505422471805722e-05, |
| "loss": 0.6684311866760254, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.1979634621144055, |
| "grad_norm": 1.5449707508087158, |
| "learning_rate": 1.502412428998192e-05, |
| "loss": 0.682776689529419, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.2009583707696916, |
| "grad_norm": 1.5067434310913086, |
| "learning_rate": 1.4993962848086341e-05, |
| "loss": 0.6774695873260498, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.2039532794249777, |
| "grad_norm": 1.4419279098510742, |
| "learning_rate": 1.4963740758656167e-05, |
| "loss": 0.6701112270355225, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.2069481880802635, |
| "grad_norm": 1.4293246269226074, |
| "learning_rate": 1.4933458388713591e-05, |
| "loss": 0.6676129341125489, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.2099430967355496, |
| "grad_norm": 1.5989415645599365, |
| "learning_rate": 1.4903116106012867e-05, |
| "loss": 0.7103249549865722, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.2129380053908356, |
| "grad_norm": 1.6779661178588867, |
| "learning_rate": 1.4872714279035842e-05, |
| "loss": 0.660029125213623, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.2159329140461215, |
| "grad_norm": 1.4901732206344604, |
| "learning_rate": 1.4842253276987475e-05, |
| "loss": 0.6614209175109863, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.2189278227014075, |
| "grad_norm": 1.4033241271972656, |
| "learning_rate": 1.4811733469791357e-05, |
| "loss": 0.7145218849182129, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.2219227313566936, |
| "grad_norm": 1.7574478387832642, |
| "learning_rate": 1.478115522808522e-05, |
| "loss": 0.6761277675628662, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.2249176400119797, |
| "grad_norm": 1.482534408569336, |
| "learning_rate": 1.4750518923216435e-05, |
| "loss": 0.6484230041503907, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.2279125486672657, |
| "grad_norm": 1.5970929861068726, |
| "learning_rate": 1.4719824927237497e-05, |
| "loss": 0.6735719680786133, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.2309074573225516, |
| "grad_norm": 1.5238401889801025, |
| "learning_rate": 1.4689073612901525e-05, |
| "loss": 0.7009137153625489, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.2339023659778376, |
| "grad_norm": 1.491351842880249, |
| "learning_rate": 1.4658265353657708e-05, |
| "loss": 0.6697447776794434, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.2368972746331237, |
| "grad_norm": 1.6152558326721191, |
| "learning_rate": 1.4627400523646788e-05, |
| "loss": 0.7037046909332275, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.2398921832884098, |
| "grad_norm": 1.5510462522506714, |
| "learning_rate": 1.4596479497696515e-05, |
| "loss": 0.6818698883056641, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.2428870919436956, |
| "grad_norm": 1.4972020387649536, |
| "learning_rate": 1.4565502651317084e-05, |
| "loss": 0.7084139823913574, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.2458820005989817, |
| "grad_norm": 1.5813428163528442, |
| "learning_rate": 1.4534470360696596e-05, |
| "loss": 0.6693055152893066, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.2488769092542678, |
| "grad_norm": 1.3404837846755981, |
| "learning_rate": 1.4503383002696463e-05, |
| "loss": 0.6707363128662109, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.2518718179095538, |
| "grad_norm": 1.5015774965286255, |
| "learning_rate": 1.4472240954846853e-05, |
| "loss": 0.6856432914733886, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.2548667265648397, |
| "grad_norm": 1.8516919612884521, |
| "learning_rate": 1.4441044595342092e-05, |
| "loss": 0.6972317218780517, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.2578616352201257, |
| "grad_norm": 1.5744614601135254, |
| "learning_rate": 1.4409794303036083e-05, |
| "loss": 0.6880950927734375, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.2608565438754118, |
| "grad_norm": 1.3624542951583862, |
| "learning_rate": 1.4378490457437687e-05, |
| "loss": 0.6077318668365479, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.2638514525306979, |
| "grad_norm": 1.6590559482574463, |
| "learning_rate": 1.4347133438706138e-05, |
| "loss": 0.676889705657959, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.266846361185984, |
| "grad_norm": 1.4079532623291016, |
| "learning_rate": 1.4315723627646403e-05, |
| "loss": 0.6575328350067139, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.2698412698412698, |
| "grad_norm": 1.3833630084991455, |
| "learning_rate": 1.4284261405704572e-05, |
| "loss": 0.6833572387695312, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.2728361784965558, |
| "grad_norm": 1.5398093461990356, |
| "learning_rate": 1.4252747154963223e-05, |
| "loss": 0.7134138584136963, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.275831087151842, |
| "grad_norm": 1.440406322479248, |
| "learning_rate": 1.4221181258136779e-05, |
| "loss": 0.6839028835296631, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.2788259958071277, |
| "grad_norm": 1.5981833934783936, |
| "learning_rate": 1.4189564098566861e-05, |
| "loss": 0.6973752975463867, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.281820904462414, |
| "grad_norm": 1.3795045614242554, |
| "learning_rate": 1.415789606021764e-05, |
| "loss": 0.6336652278900147, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.2848158131176999, |
| "grad_norm": 1.555843472480774, |
| "learning_rate": 1.4126177527671157e-05, |
| "loss": 0.7054344654083252, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.287810721772986, |
| "grad_norm": 1.805177092552185, |
| "learning_rate": 1.4094408886122671e-05, |
| "loss": 0.7191495895385742, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.290805630428272, |
| "grad_norm": 1.6533230543136597, |
| "learning_rate": 1.406259052137597e-05, |
| "loss": 0.6862345695495605, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.2938005390835579, |
| "grad_norm": 1.4151486158370972, |
| "learning_rate": 1.4030722819838686e-05, |
| "loss": 0.6652461528778076, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.296795447738844, |
| "grad_norm": 1.5951013565063477, |
| "learning_rate": 1.3998806168517618e-05, |
| "loss": 0.6539525508880615, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.29979035639413, |
| "grad_norm": 1.359511375427246, |
| "learning_rate": 1.3966840955014001e-05, |
| "loss": 0.6631481170654296, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.302785265049416, |
| "grad_norm": 1.619667649269104, |
| "learning_rate": 1.3934827567518832e-05, |
| "loss": 0.6702329635620117, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.3057801737047021, |
| "grad_norm": 1.466314673423767, |
| "learning_rate": 1.3902766394808135e-05, |
| "loss": 0.6989962100982666, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.308775082359988, |
| "grad_norm": 1.456148386001587, |
| "learning_rate": 1.387065782623825e-05, |
| "loss": 0.6782450199127197, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.311769991015274, |
| "grad_norm": 1.3850840330123901, |
| "learning_rate": 1.383850225174109e-05, |
| "loss": 0.6970182418823242, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.31476489967056, |
| "grad_norm": 1.3463149070739746, |
| "learning_rate": 1.3806300061819431e-05, |
| "loss": 0.6578661441802979, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.317759808325846, |
| "grad_norm": 1.5610235929489136, |
| "learning_rate": 1.3774051647542143e-05, |
| "loss": 0.6358757019042969, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.320754716981132, |
| "grad_norm": 1.673736572265625, |
| "learning_rate": 1.374175740053946e-05, |
| "loss": 0.661113166809082, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.323749625636418, |
| "grad_norm": 1.4001795053482056, |
| "learning_rate": 1.3709417712998206e-05, |
| "loss": 0.6617262363433838, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.3267445342917041, |
| "grad_norm": 1.2105085849761963, |
| "learning_rate": 1.3677032977657051e-05, |
| "loss": 0.6960249900817871, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.3297394429469902, |
| "grad_norm": 1.409833312034607, |
| "learning_rate": 1.3644603587801737e-05, |
| "loss": 0.69888334274292, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.332734351602276, |
| "grad_norm": 1.5228267908096313, |
| "learning_rate": 1.3612129937260288e-05, |
| "loss": 0.6690874099731445, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.3357292602575621, |
| "grad_norm": 1.4134560823440552, |
| "learning_rate": 1.3579612420398245e-05, |
| "loss": 0.686200761795044, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.3387241689128482, |
| "grad_norm": 1.491080641746521, |
| "learning_rate": 1.3547051432113862e-05, |
| "loss": 0.687087869644165, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.3417190775681342, |
| "grad_norm": 1.3831361532211304, |
| "learning_rate": 1.3514447367833325e-05, |
| "loss": 0.6945667266845703, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.3447139862234203, |
| "grad_norm": 1.3812413215637207, |
| "learning_rate": 1.3481800623505937e-05, |
| "loss": 0.701347827911377, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.3477088948787062, |
| "grad_norm": 1.4374605417251587, |
| "learning_rate": 1.3449111595599316e-05, |
| "loss": 0.711556339263916, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.3507038035339922, |
| "grad_norm": 1.4586987495422363, |
| "learning_rate": 1.3416380681094578e-05, |
| "loss": 0.6958023071289062, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.3536987121892783, |
| "grad_norm": 1.4740244150161743, |
| "learning_rate": 1.338360827748152e-05, |
| "loss": 0.6627859115600586, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.3566936208445641, |
| "grad_norm": 1.3693221807479858, |
| "learning_rate": 1.3350794782753788e-05, |
| "loss": 0.6928750038146972, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.3596885294998502, |
| "grad_norm": 1.3615459203720093, |
| "learning_rate": 1.3317940595404046e-05, |
| "loss": 0.7074526786804199, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.3626834381551363, |
| "grad_norm": 1.4550254344940186, |
| "learning_rate": 1.3285046114419133e-05, |
| "loss": 0.6495938301086426, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.3656783468104223, |
| "grad_norm": 1.4986083507537842, |
| "learning_rate": 1.3252111739275226e-05, |
| "loss": 0.6903128623962402, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.3686732554657084, |
| "grad_norm": 1.5021880865097046, |
| "learning_rate": 1.321913786993298e-05, |
| "loss": 0.6855093955993652, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.3716681641209942, |
| "grad_norm": 1.435672402381897, |
| "learning_rate": 1.3186124906832678e-05, |
| "loss": 0.6734979629516602, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.3746630727762803, |
| "grad_norm": 1.5334513187408447, |
| "learning_rate": 1.3153073250889354e-05, |
| "loss": 0.6375434398651123, |
| "step": 4590 |
| }, |
| { |
| "epoch": 1.3776579814315664, |
| "grad_norm": 1.5921248197555542, |
| "learning_rate": 1.311998330348795e-05, |
| "loss": 0.6622870445251465, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.3806528900868522, |
| "grad_norm": 1.2265634536743164, |
| "learning_rate": 1.308685546647841e-05, |
| "loss": 0.6761940002441407, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.3836477987421385, |
| "grad_norm": 1.3404898643493652, |
| "learning_rate": 1.3053690142170827e-05, |
| "loss": 0.696360969543457, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.3866427073974243, |
| "grad_norm": 1.4971781969070435, |
| "learning_rate": 1.3020487733330547e-05, |
| "loss": 0.6800951480865478, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.3896376160527104, |
| "grad_norm": 1.3999356031417847, |
| "learning_rate": 1.2987248643173267e-05, |
| "loss": 0.673220443725586, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.3926325247079965, |
| "grad_norm": 1.6020697355270386, |
| "learning_rate": 1.2953973275360156e-05, |
| "loss": 0.6516348838806152, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.3956274333632823, |
| "grad_norm": 1.36565101146698, |
| "learning_rate": 1.2920662033992946e-05, |
| "loss": 0.6564604759216308, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.3986223420185684, |
| "grad_norm": 1.6752657890319824, |
| "learning_rate": 1.2887315323609016e-05, |
| "loss": 0.6703821182250976, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.4016172506738545, |
| "grad_norm": 1.5048459768295288, |
| "learning_rate": 1.2853933549176492e-05, |
| "loss": 0.6673481464385986, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.4046121593291405, |
| "grad_norm": 1.5359078645706177, |
| "learning_rate": 1.2820517116089321e-05, |
| "loss": 0.6794118881225586, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.4076070679844266, |
| "grad_norm": 1.432215690612793, |
| "learning_rate": 1.2787066430162355e-05, |
| "loss": 0.6648625373840332, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.4106019766397124, |
| "grad_norm": 1.418803334236145, |
| "learning_rate": 1.2753581897626419e-05, |
| "loss": 0.6332767486572266, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.4135968852949985, |
| "grad_norm": 1.5143288373947144, |
| "learning_rate": 1.2720063925123367e-05, |
| "loss": 0.7153759002685547, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.4165917939502846, |
| "grad_norm": 1.3891148567199707, |
| "learning_rate": 1.2686512919701167e-05, |
| "loss": 0.63809814453125, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.4195867026055704, |
| "grad_norm": 1.3475419282913208, |
| "learning_rate": 1.2652929288808933e-05, |
| "loss": 0.6704463958740234, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.4225816112608565, |
| "grad_norm": 1.290427327156067, |
| "learning_rate": 1.2619313440291995e-05, |
| "loss": 0.6674720764160156, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.4255765199161425, |
| "grad_norm": 1.653327465057373, |
| "learning_rate": 1.2585665782386938e-05, |
| "loss": 0.6292222499847412, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 1.650072455406189, |
| "learning_rate": 1.2551986723716642e-05, |
| "loss": 0.6493176460266114, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.4315663372267147, |
| "grad_norm": 1.5939024686813354, |
| "learning_rate": 1.2518276673285332e-05, |
| "loss": 0.7008792877197265, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.4345612458820005, |
| "grad_norm": 1.2825090885162354, |
| "learning_rate": 1.2484536040473593e-05, |
| "loss": 0.6760101318359375, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.4375561545372866, |
| "grad_norm": 1.2787253856658936, |
| "learning_rate": 1.245076523503341e-05, |
| "loss": 0.6585366249084472, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.4405510631925726, |
| "grad_norm": 1.4136161804199219, |
| "learning_rate": 1.2416964667083193e-05, |
| "loss": 0.6975108623504639, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.4435459718478587, |
| "grad_norm": 1.698377013206482, |
| "learning_rate": 1.238313474710279e-05, |
| "loss": 0.6626195430755615, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.4465408805031448, |
| "grad_norm": 1.4335144758224487, |
| "learning_rate": 1.2349275885928504e-05, |
| "loss": 0.6314863204956055, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.4495357891584306, |
| "grad_norm": 1.373105764389038, |
| "learning_rate": 1.2315388494748109e-05, |
| "loss": 0.6849304676055908, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.4525306978137167, |
| "grad_norm": 1.415645718574524, |
| "learning_rate": 1.2281472985095848e-05, |
| "loss": 0.6753826141357422, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.4555256064690028, |
| "grad_norm": 1.2998967170715332, |
| "learning_rate": 1.2247529768847439e-05, |
| "loss": 0.6722857475280761, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.4585205151242886, |
| "grad_norm": 1.4371323585510254, |
| "learning_rate": 1.2213559258215084e-05, |
| "loss": 0.646511459350586, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.4615154237795747, |
| "grad_norm": 1.6259355545043945, |
| "learning_rate": 1.2179561865742437e-05, |
| "loss": 0.6791155815124512, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.4645103324348607, |
| "grad_norm": 1.5392216444015503, |
| "learning_rate": 1.214553800429962e-05, |
| "loss": 0.6901945114135742, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.4675052410901468, |
| "grad_norm": 1.5144057273864746, |
| "learning_rate": 1.2111488087078195e-05, |
| "loss": 0.6945788860321045, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.4705001497454329, |
| "grad_norm": 1.3844542503356934, |
| "learning_rate": 1.2077412527586152e-05, |
| "loss": 0.7096900939941406, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.4734950584007187, |
| "grad_norm": 1.5048848390579224, |
| "learning_rate": 1.2043311739642882e-05, |
| "loss": 0.6696764945983886, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.4764899670560048, |
| "grad_norm": 1.4695067405700684, |
| "learning_rate": 1.2009186137374158e-05, |
| "loss": 0.670767879486084, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.4794848757112908, |
| "grad_norm": 1.2492382526397705, |
| "learning_rate": 1.19750361352071e-05, |
| "loss": 0.6456597328186036, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.482479784366577, |
| "grad_norm": 1.5186883211135864, |
| "learning_rate": 1.1940862147865145e-05, |
| "loss": 0.6502896308898926, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.485474693021863, |
| "grad_norm": 1.5641230344772339, |
| "learning_rate": 1.1906664590363008e-05, |
| "loss": 0.6687553405761719, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.4884696016771488, |
| "grad_norm": 1.5437209606170654, |
| "learning_rate": 1.1872443878001652e-05, |
| "loss": 0.6531869411468506, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.4914645103324349, |
| "grad_norm": 1.342319369316101, |
| "learning_rate": 1.1838200426363227e-05, |
| "loss": 0.6570711135864258, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.494459418987721, |
| "grad_norm": 1.4218707084655762, |
| "learning_rate": 1.1803934651306037e-05, |
| "loss": 0.6730245590209961, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.4974543276430068, |
| "grad_norm": 1.4281178712844849, |
| "learning_rate": 1.1769646968959485e-05, |
| "loss": 0.7147689819335937, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.5004492362982929, |
| "grad_norm": 1.3982033729553223, |
| "learning_rate": 1.1735337795719018e-05, |
| "loss": 0.6860141754150391, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.503444144953579, |
| "grad_norm": 1.3819836378097534, |
| "learning_rate": 1.1701007548241077e-05, |
| "loss": 0.6726783752441406, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.5064390536088648, |
| "grad_norm": 1.347074270248413, |
| "learning_rate": 1.1666656643438029e-05, |
| "loss": 0.6863351821899414, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.509433962264151, |
| "grad_norm": 1.556456208229065, |
| "learning_rate": 1.1632285498473104e-05, |
| "loss": 0.6616711616516113, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.512428870919437, |
| "grad_norm": 1.3579554557800293, |
| "learning_rate": 1.1597894530755339e-05, |
| "loss": 0.6517277717590332, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.515423779574723, |
| "grad_norm": 1.2416077852249146, |
| "learning_rate": 1.1563484157934495e-05, |
| "loss": 0.6717746734619141, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.518418688230009, |
| "grad_norm": 1.5442560911178589, |
| "learning_rate": 1.1529054797895995e-05, |
| "loss": 0.634144401550293, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.5214135968852949, |
| "grad_norm": 1.5588805675506592, |
| "learning_rate": 1.1494606868755847e-05, |
| "loss": 0.6501172065734864, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.5244085055405812, |
| "grad_norm": 1.4256302118301392, |
| "learning_rate": 1.1460140788855563e-05, |
| "loss": 0.6600000858306885, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.527403414195867, |
| "grad_norm": 1.5612784624099731, |
| "learning_rate": 1.1425656976757083e-05, |
| "loss": 0.6572963237762451, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.530398322851153, |
| "grad_norm": 1.3370282649993896, |
| "learning_rate": 1.1391155851237687e-05, |
| "loss": 0.6684782028198242, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.5333932315064391, |
| "grad_norm": 1.5403549671173096, |
| "learning_rate": 1.1356637831284918e-05, |
| "loss": 0.6727892875671386, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.536388140161725, |
| "grad_norm": 1.370840072631836, |
| "learning_rate": 1.1322103336091479e-05, |
| "loss": 0.6451261520385743, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.539383048817011, |
| "grad_norm": 1.3316929340362549, |
| "learning_rate": 1.128755278505016e-05, |
| "loss": 0.6273011207580567, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.5423779574722971, |
| "grad_norm": 1.4078290462493896, |
| "learning_rate": 1.1252986597748726e-05, |
| "loss": 0.6177189826965332, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.545372866127583, |
| "grad_norm": 1.5209804773330688, |
| "learning_rate": 1.1218405193964846e-05, |
| "loss": 0.6376583576202393, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.5483677747828692, |
| "grad_norm": 1.4470043182373047, |
| "learning_rate": 1.1183808993660966e-05, |
| "loss": 0.6674811363220214, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.551362683438155, |
| "grad_norm": 1.353264570236206, |
| "learning_rate": 1.114919841697923e-05, |
| "loss": 0.6187152862548828, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.5543575920934412, |
| "grad_norm": 1.4463789463043213, |
| "learning_rate": 1.111457388423637e-05, |
| "loss": 0.6588546752929687, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.5573525007487272, |
| "grad_norm": 1.3931549787521362, |
| "learning_rate": 1.1079935815918608e-05, |
| "loss": 0.6881397247314454, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.560347409404013, |
| "grad_norm": 1.535813331604004, |
| "learning_rate": 1.1045284632676535e-05, |
| "loss": 0.6458590507507325, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.5633423180592994, |
| "grad_norm": 1.4285786151885986, |
| "learning_rate": 1.1010620755320018e-05, |
| "loss": 0.6613713264465332, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.5663372267145852, |
| "grad_norm": 1.361226201057434, |
| "learning_rate": 1.0975944604813083e-05, |
| "loss": 0.6590459823608399, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.5693321353698713, |
| "grad_norm": 1.4913057088851929, |
| "learning_rate": 1.0941256602268799e-05, |
| "loss": 0.6750634193420411, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.5723270440251573, |
| "grad_norm": 1.4666550159454346, |
| "learning_rate": 1.0906557168944174e-05, |
| "loss": 0.6445255279541016, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.5753219526804432, |
| "grad_norm": 1.287156105041504, |
| "learning_rate": 1.0871846726235031e-05, |
| "loss": 0.6598057270050048, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.5783168613357292, |
| "grad_norm": 1.3992464542388916, |
| "learning_rate": 1.0837125695670892e-05, |
| "loss": 0.6409515380859375, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.5813117699910153, |
| "grad_norm": 1.4923102855682373, |
| "learning_rate": 1.0802394498909859e-05, |
| "loss": 0.629506254196167, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.5843066786463011, |
| "grad_norm": 1.6271188259124756, |
| "learning_rate": 1.0767653557733494e-05, |
| "loss": 0.6570216655731201, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.5873015873015874, |
| "grad_norm": 1.6705671548843384, |
| "learning_rate": 1.0732903294041702e-05, |
| "loss": 0.6791990280151368, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.5902964959568733, |
| "grad_norm": 1.477299451828003, |
| "learning_rate": 1.0698144129847598e-05, |
| "loss": 0.6323776721954346, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.5932914046121593, |
| "grad_norm": 1.4466052055358887, |
| "learning_rate": 1.0663376487272386e-05, |
| "loss": 0.635925006866455, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.5962863132674454, |
| "grad_norm": 1.3402115106582642, |
| "learning_rate": 1.0628600788540232e-05, |
| "loss": 0.6522153377532959, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.5992812219227313, |
| "grad_norm": 1.5251848697662354, |
| "learning_rate": 1.059381745597314e-05, |
| "loss": 0.6985126495361328, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.6022761305780173, |
| "grad_norm": 1.3335204124450684, |
| "learning_rate": 1.0559026911985817e-05, |
| "loss": 0.6422924041748047, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.6052710392333034, |
| "grad_norm": 1.3140618801116943, |
| "learning_rate": 1.0524229579080553e-05, |
| "loss": 0.6553333759307861, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.6082659478885895, |
| "grad_norm": 1.5221872329711914, |
| "learning_rate": 1.0489425879842079e-05, |
| "loss": 0.6545061588287353, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.6112608565438755, |
| "grad_norm": 1.257545828819275, |
| "learning_rate": 1.0454616236932437e-05, |
| "loss": 0.6448293209075928, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.6142557651991614, |
| "grad_norm": 1.5430147647857666, |
| "learning_rate": 1.0419801073085856e-05, |
| "loss": 0.6655144691467285, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.6172506738544474, |
| "grad_norm": 1.528615117073059, |
| "learning_rate": 1.0384980811103614e-05, |
| "loss": 0.6634177207946778, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.6202455825097335, |
| "grad_norm": 1.294121503829956, |
| "learning_rate": 1.035015587384889e-05, |
| "loss": 0.6833911895751953, |
| "step": 5410 |
| }, |
| { |
| "epoch": 1.6232404911650193, |
| "grad_norm": 1.3020493984222412, |
| "learning_rate": 1.0315326684241655e-05, |
| "loss": 0.6578948020935058, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.6262353998203056, |
| "grad_norm": 1.599776268005371, |
| "learning_rate": 1.028049366525351e-05, |
| "loss": 0.6559863567352295, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.6292303084755915, |
| "grad_norm": 1.5496463775634766, |
| "learning_rate": 1.0245657239902565e-05, |
| "loss": 0.6398555755615234, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.6322252171308775, |
| "grad_norm": 1.4625442028045654, |
| "learning_rate": 1.0210817831248299e-05, |
| "loss": 0.653658676147461, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.6352201257861636, |
| "grad_norm": 1.425125241279602, |
| "learning_rate": 1.0175975862386416e-05, |
| "loss": 0.652650260925293, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.6382150344414494, |
| "grad_norm": 1.4808628559112549, |
| "learning_rate": 1.0141131756443715e-05, |
| "loss": 0.6396486282348632, |
| "step": 5470 |
| }, |
| { |
| "epoch": 1.6412099430967355, |
| "grad_norm": 1.3888201713562012, |
| "learning_rate": 1.0106285936572953e-05, |
| "loss": 0.6401126861572266, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.6442048517520216, |
| "grad_norm": 1.3387247323989868, |
| "learning_rate": 1.0071438825947689e-05, |
| "loss": 0.623372745513916, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.6471997604073074, |
| "grad_norm": 1.3711042404174805, |
| "learning_rate": 1.0036590847757166e-05, |
| "loss": 0.653053617477417, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.6501946690625937, |
| "grad_norm": 1.4318101406097412, |
| "learning_rate": 1.0001742425201164e-05, |
| "loss": 0.6399904727935791, |
| "step": 5510 |
| }, |
| { |
| "epoch": 1.6531895777178796, |
| "grad_norm": 1.3473331928253174, |
| "learning_rate": 9.966893981484852e-06, |
| "loss": 0.6013195037841796, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.6561844863731656, |
| "grad_norm": 1.404439091682434, |
| "learning_rate": 9.932045939813662e-06, |
| "loss": 0.6521830558776855, |
| "step": 5530 |
| }, |
| { |
| "epoch": 1.6591793950284517, |
| "grad_norm": 1.6330413818359375, |
| "learning_rate": 9.897198723388143e-06, |
| "loss": 0.6607831001281739, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.6621743036837375, |
| "grad_norm": 1.4696617126464844, |
| "learning_rate": 9.86235275539882e-06, |
| "loss": 0.666562557220459, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.6651692123390238, |
| "grad_norm": 1.3956501483917236, |
| "learning_rate": 9.827508459021056e-06, |
| "loss": 0.6431893348693848, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.6681641209943097, |
| "grad_norm": 1.3600293397903442, |
| "learning_rate": 9.792666257409917e-06, |
| "loss": 0.6427026271820069, |
| "step": 5570 |
| }, |
| { |
| "epoch": 1.6711590296495957, |
| "grad_norm": 1.393192172050476, |
| "learning_rate": 9.75782657369503e-06, |
| "loss": 0.6541417598724365, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.6741539383048818, |
| "grad_norm": 1.4253222942352295, |
| "learning_rate": 9.722989830975439e-06, |
| "loss": 0.6254150867462158, |
| "step": 5590 |
| }, |
| { |
| "epoch": 1.6771488469601676, |
| "grad_norm": 1.3802683353424072, |
| "learning_rate": 9.688156452314475e-06, |
| "loss": 0.6401287078857422, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.6801437556154537, |
| "grad_norm": 1.3087486028671265, |
| "learning_rate": 9.653326860734617e-06, |
| "loss": 0.6138454437255859, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.6831386642707398, |
| "grad_norm": 1.7111173868179321, |
| "learning_rate": 9.618501479212355e-06, |
| "loss": 0.6142902851104737, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.6861335729260256, |
| "grad_norm": 1.510345458984375, |
| "learning_rate": 9.58368073067304e-06, |
| "loss": 0.6585430145263672, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.689128481581312, |
| "grad_norm": 1.454209804534912, |
| "learning_rate": 9.548865037985776e-06, |
| "loss": 0.6655298233032226, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.6921233902365977, |
| "grad_norm": 1.5720434188842773, |
| "learning_rate": 9.514054823958254e-06, |
| "loss": 0.6410290718078613, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.6951182988918838, |
| "grad_norm": 1.4291045665740967, |
| "learning_rate": 9.47925051133164e-06, |
| "loss": 0.6853228569030761, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.6981132075471699, |
| "grad_norm": 1.4933050870895386, |
| "learning_rate": 9.444452522775424e-06, |
| "loss": 0.6341513633728028, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.7011081162024557, |
| "grad_norm": 1.4111416339874268, |
| "learning_rate": 9.409661280882306e-06, |
| "loss": 0.6149447441101075, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.7041030248577418, |
| "grad_norm": 1.4768991470336914, |
| "learning_rate": 9.374877208163042e-06, |
| "loss": 0.6374067306518555, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.7070979335130279, |
| "grad_norm": 1.5744390487670898, |
| "learning_rate": 9.340100727041334e-06, |
| "loss": 0.6614315986633301, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.710092842168314, |
| "grad_norm": 1.5393071174621582, |
| "learning_rate": 9.305332259848685e-06, |
| "loss": 0.6411947250366211, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.7130877508236, |
| "grad_norm": 1.2625375986099243, |
| "learning_rate": 9.270572228819277e-06, |
| "loss": 0.6521016120910644, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.7160826594788858, |
| "grad_norm": 1.4381405115127563, |
| "learning_rate": 9.235821056084841e-06, |
| "loss": 0.6407829761505127, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.719077568134172, |
| "grad_norm": 1.542965292930603, |
| "learning_rate": 9.20107916366953e-06, |
| "loss": 0.6585879325866699, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.722072476789458, |
| "grad_norm": 1.7274558544158936, |
| "learning_rate": 9.166346973484802e-06, |
| "loss": 0.678370475769043, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.7250673854447438, |
| "grad_norm": 1.3161629438400269, |
| "learning_rate": 9.131624907324281e-06, |
| "loss": 0.6508775234222413, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.72806229410003, |
| "grad_norm": 1.4791046380996704, |
| "learning_rate": 9.096913386858648e-06, |
| "loss": 0.6735451221466064, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.731057202755316, |
| "grad_norm": 1.4756172895431519, |
| "learning_rate": 9.062212833630513e-06, |
| "loss": 0.6588196754455566, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.734052111410602, |
| "grad_norm": 1.4064671993255615, |
| "learning_rate": 9.0275236690493e-06, |
| "loss": 0.6659040451049805, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.737047020065888, |
| "grad_norm": 1.5382051467895508, |
| "learning_rate": 8.992846314386125e-06, |
| "loss": 0.6591670036315918, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.740041928721174, |
| "grad_norm": 1.3774088621139526, |
| "learning_rate": 8.958181190768686e-06, |
| "loss": 0.6008991241455078, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.74303683737646, |
| "grad_norm": 1.5064748525619507, |
| "learning_rate": 8.923528719176141e-06, |
| "loss": 0.6617294311523437, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.746031746031746, |
| "grad_norm": 1.3881192207336426, |
| "learning_rate": 8.888889320434003e-06, |
| "loss": 0.669343090057373, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.749026654687032, |
| "grad_norm": 1.4678955078125, |
| "learning_rate": 8.854263415209022e-06, |
| "loss": 0.6319092750549317, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.7520215633423182, |
| "grad_norm": 1.4094629287719727, |
| "learning_rate": 8.81965142400408e-06, |
| "loss": 0.6414087772369385, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.755016471997604, |
| "grad_norm": 1.392703652381897, |
| "learning_rate": 8.785053767153098e-06, |
| "loss": 0.6597569942474365, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.75801138065289, |
| "grad_norm": 1.23224937915802, |
| "learning_rate": 8.7504708648159e-06, |
| "loss": 0.6733821868896485, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.7610062893081762, |
| "grad_norm": 1.5176384449005127, |
| "learning_rate": 8.715903136973141e-06, |
| "loss": 0.6722299098968506, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.764001197963462, |
| "grad_norm": 1.4998729228973389, |
| "learning_rate": 8.681351003421189e-06, |
| "loss": 0.6153835773468017, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.7669961066187483, |
| "grad_norm": 1.5615732669830322, |
| "learning_rate": 8.646814883767028e-06, |
| "loss": 0.6614401340484619, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.7699910152740341, |
| "grad_norm": 1.4429715871810913, |
| "learning_rate": 8.612295197423178e-06, |
| "loss": 0.6637703895568847, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.7729859239293202, |
| "grad_norm": 1.5154484510421753, |
| "learning_rate": 8.577792363602582e-06, |
| "loss": 0.6937406539916993, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.7759808325846063, |
| "grad_norm": 1.523045301437378, |
| "learning_rate": 8.543306801313522e-06, |
| "loss": 0.6500541210174561, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.778975741239892, |
| "grad_norm": 1.5260372161865234, |
| "learning_rate": 8.508838929354539e-06, |
| "loss": 0.6453513622283935, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.7819706498951782, |
| "grad_norm": 1.5336843729019165, |
| "learning_rate": 8.474389166309332e-06, |
| "loss": 0.6776984214782715, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.7849655585504642, |
| "grad_norm": 1.4582264423370361, |
| "learning_rate": 8.439957930541686e-06, |
| "loss": 0.6503573417663574, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.78796046720575, |
| "grad_norm": 1.3314224481582642, |
| "learning_rate": 8.405545640190387e-06, |
| "loss": 0.5925717353820801, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.7909553758610364, |
| "grad_norm": 1.3872936964035034, |
| "learning_rate": 8.371152713164146e-06, |
| "loss": 0.6113157272338867, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.7939502845163222, |
| "grad_norm": 1.4577404260635376, |
| "learning_rate": 8.33677956713652e-06, |
| "loss": 0.665937089920044, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.7969451931716083, |
| "grad_norm": 1.5423346757888794, |
| "learning_rate": 8.302426619540843e-06, |
| "loss": 0.6487864017486572, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.7999401018268943, |
| "grad_norm": 1.4728710651397705, |
| "learning_rate": 8.268094287565156e-06, |
| "loss": 0.6546504020690918, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.8029350104821802, |
| "grad_norm": 1.6181608438491821, |
| "learning_rate": 8.23378298814714e-06, |
| "loss": 0.6671038150787354, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.8059299191374663, |
| "grad_norm": 1.3112537860870361, |
| "learning_rate": 8.199493137969056e-06, |
| "loss": 0.6506411552429199, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.8089248277927523, |
| "grad_norm": 1.6379741430282593, |
| "learning_rate": 8.165225153452678e-06, |
| "loss": 0.6582574844360352, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.8119197364480384, |
| "grad_norm": 1.3852412700653076, |
| "learning_rate": 8.13097945075424e-06, |
| "loss": 0.6609588623046875, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.8149146451033245, |
| "grad_norm": 1.4980980157852173, |
| "learning_rate": 8.096756445759382e-06, |
| "loss": 0.6495426177978516, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.8179095537586103, |
| "grad_norm": 1.430310845375061, |
| "learning_rate": 8.062556554078103e-06, |
| "loss": 0.6681442260742188, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.8209044624138964, |
| "grad_norm": 1.4171323776245117, |
| "learning_rate": 8.028380191039704e-06, |
| "loss": 0.632663631439209, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.8238993710691824, |
| "grad_norm": 1.4187473058700562, |
| "learning_rate": 7.994227771687757e-06, |
| "loss": 0.6560873031616211, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.8268942797244683, |
| "grad_norm": 1.5951778888702393, |
| "learning_rate": 7.960099710775049e-06, |
| "loss": 0.6672462940216064, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.8298891883797546, |
| "grad_norm": 1.2696975469589233, |
| "learning_rate": 7.925996422758561e-06, |
| "loss": 0.6479342937469482, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.8328840970350404, |
| "grad_norm": 1.590598702430725, |
| "learning_rate": 7.891918321794428e-06, |
| "loss": 0.6272913932800293, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.8358790056903265, |
| "grad_norm": 1.3780035972595215, |
| "learning_rate": 7.857865821732906e-06, |
| "loss": 0.659095048904419, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.8388739143456125, |
| "grad_norm": 1.4691357612609863, |
| "learning_rate": 7.823839336113347e-06, |
| "loss": 0.6268105506896973, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.8418688230008984, |
| "grad_norm": 1.4001188278198242, |
| "learning_rate": 7.789839278159185e-06, |
| "loss": 0.6448341369628906, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.8448637316561844, |
| "grad_norm": 1.4039057493209839, |
| "learning_rate": 7.75586606077291e-06, |
| "loss": 0.6525530815124512, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.8478586403114705, |
| "grad_norm": 1.2402323484420776, |
| "learning_rate": 7.721920096531052e-06, |
| "loss": 0.6459396362304688, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.8508535489667564, |
| "grad_norm": 1.4664818048477173, |
| "learning_rate": 7.688001797679178e-06, |
| "loss": 0.6386150360107422, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.8538484576220426, |
| "grad_norm": 1.3676190376281738, |
| "learning_rate": 7.654111576126881e-06, |
| "loss": 0.6291984558105469, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.8568433662773285, |
| "grad_norm": 1.3386749029159546, |
| "learning_rate": 7.620249843442777e-06, |
| "loss": 0.6123722076416016, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.8598382749326146, |
| "grad_norm": 1.6316471099853516, |
| "learning_rate": 7.5864170108495135e-06, |
| "loss": 0.6253969669342041, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.8628331835879006, |
| "grad_norm": 1.3244318962097168, |
| "learning_rate": 7.552613489218763e-06, |
| "loss": 0.6519149303436279, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.8658280922431865, |
| "grad_norm": 1.618364691734314, |
| "learning_rate": 7.518839689066247e-06, |
| "loss": 0.6438776016235351, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.8688230008984728, |
| "grad_norm": 1.4920052289962769, |
| "learning_rate": 7.485096020546738e-06, |
| "loss": 0.6367332458496093, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.8718179095537586, |
| "grad_norm": 1.5802627801895142, |
| "learning_rate": 7.451382893449091e-06, |
| "loss": 0.6220839023590088, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.8748128182090447, |
| "grad_norm": 1.504412293434143, |
| "learning_rate": 7.417700717191255e-06, |
| "loss": 0.6164268493652344, |
| "step": 6260 |
| }, |
| { |
| "epoch": 1.8778077268643307, |
| "grad_norm": 1.4522355794906616, |
| "learning_rate": 7.384049900815313e-06, |
| "loss": 0.624882984161377, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.8808026355196166, |
| "grad_norm": 1.5092509984970093, |
| "learning_rate": 7.3504308529825045e-06, |
| "loss": 0.630027961730957, |
| "step": 6280 |
| }, |
| { |
| "epoch": 1.8837975441749026, |
| "grad_norm": 1.6086957454681396, |
| "learning_rate": 7.316843981968267e-06, |
| "loss": 0.6275941371917725, |
| "step": 6290 |
| }, |
| { |
| "epoch": 1.8867924528301887, |
| "grad_norm": 1.7092756032943726, |
| "learning_rate": 7.283289695657275e-06, |
| "loss": 0.6458075523376465, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.8897873614854745, |
| "grad_norm": 1.4698508977890015, |
| "learning_rate": 7.249768401538493e-06, |
| "loss": 0.5995992660522461, |
| "step": 6310 |
| }, |
| { |
| "epoch": 1.8927822701407608, |
| "grad_norm": 1.3332494497299194, |
| "learning_rate": 7.216280506700222e-06, |
| "loss": 0.5948431968688965, |
| "step": 6320 |
| }, |
| { |
| "epoch": 1.8957771787960467, |
| "grad_norm": 1.5438693761825562, |
| "learning_rate": 7.182826417825152e-06, |
| "loss": 0.6605867385864258, |
| "step": 6330 |
| }, |
| { |
| "epoch": 1.8987720874513327, |
| "grad_norm": 1.2717972993850708, |
| "learning_rate": 7.149406541185433e-06, |
| "loss": 0.6348017692565918, |
| "step": 6340 |
| }, |
| { |
| "epoch": 1.9017669961066188, |
| "grad_norm": 1.5023244619369507, |
| "learning_rate": 7.116021282637732e-06, |
| "loss": 0.6453000068664551, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 1.4482067823410034, |
| "learning_rate": 7.082671047618312e-06, |
| "loss": 0.6501484870910644, |
| "step": 6360 |
| }, |
| { |
| "epoch": 1.9077568134171907, |
| "grad_norm": 1.2380962371826172, |
| "learning_rate": 7.049356241138099e-06, |
| "loss": 0.6227757453918457, |
| "step": 6370 |
| }, |
| { |
| "epoch": 1.9107517220724768, |
| "grad_norm": 1.361412763595581, |
| "learning_rate": 7.016077267777775e-06, |
| "loss": 0.6514645576477051, |
| "step": 6380 |
| }, |
| { |
| "epoch": 1.9137466307277629, |
| "grad_norm": 1.6207096576690674, |
| "learning_rate": 6.982834531682853e-06, |
| "loss": 0.6655488967895508, |
| "step": 6390 |
| }, |
| { |
| "epoch": 1.916741539383049, |
| "grad_norm": 1.3296849727630615, |
| "learning_rate": 6.949628436558777e-06, |
| "loss": 0.6586191177368164, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.9197364480383348, |
| "grad_norm": 1.333168864250183, |
| "learning_rate": 6.916459385666017e-06, |
| "loss": 0.6382019996643067, |
| "step": 6410 |
| }, |
| { |
| "epoch": 1.9227313566936208, |
| "grad_norm": 1.3755735158920288, |
| "learning_rate": 6.88332778181517e-06, |
| "loss": 0.6329482078552247, |
| "step": 6420 |
| }, |
| { |
| "epoch": 1.925726265348907, |
| "grad_norm": 1.3939225673675537, |
| "learning_rate": 6.850234027362073e-06, |
| "loss": 0.6204883575439453, |
| "step": 6430 |
| }, |
| { |
| "epoch": 1.9287211740041927, |
| "grad_norm": 1.3705857992172241, |
| "learning_rate": 6.817178524202907e-06, |
| "loss": 0.6589064598083496, |
| "step": 6440 |
| }, |
| { |
| "epoch": 1.931716082659479, |
| "grad_norm": 1.3137234449386597, |
| "learning_rate": 6.784161673769332e-06, |
| "loss": 0.6426548004150391, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.9347109913147649, |
| "grad_norm": 1.5095393657684326, |
| "learning_rate": 6.751183877023595e-06, |
| "loss": 0.6177249908447265, |
| "step": 6460 |
| }, |
| { |
| "epoch": 1.937705899970051, |
| "grad_norm": 1.268416166305542, |
| "learning_rate": 6.718245534453673e-06, |
| "loss": 0.6130592823028564, |
| "step": 6470 |
| }, |
| { |
| "epoch": 1.940700808625337, |
| "grad_norm": 1.2933331727981567, |
| "learning_rate": 6.685347046068402e-06, |
| "loss": 0.6383994579315185, |
| "step": 6480 |
| }, |
| { |
| "epoch": 1.9436957172806228, |
| "grad_norm": 1.5103631019592285, |
| "learning_rate": 6.652488811392622e-06, |
| "loss": 0.6300495147705079, |
| "step": 6490 |
| }, |
| { |
| "epoch": 1.946690625935909, |
| "grad_norm": 1.4330319166183472, |
| "learning_rate": 6.6196712294623276e-06, |
| "loss": 0.631505012512207, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.949685534591195, |
| "grad_norm": 1.2874356508255005, |
| "learning_rate": 6.5868946988198165e-06, |
| "loss": 0.5962014198303223, |
| "step": 6510 |
| }, |
| { |
| "epoch": 1.9526804432464808, |
| "grad_norm": 1.4397952556610107, |
| "learning_rate": 6.554159617508856e-06, |
| "loss": 0.641713809967041, |
| "step": 6520 |
| }, |
| { |
| "epoch": 1.9556753519017671, |
| "grad_norm": 1.519711971282959, |
| "learning_rate": 6.521466383069841e-06, |
| "loss": 0.6155229568481445, |
| "step": 6530 |
| }, |
| { |
| "epoch": 1.958670260557053, |
| "grad_norm": 1.5349974632263184, |
| "learning_rate": 6.488815392534977e-06, |
| "loss": 0.6498642921447754, |
| "step": 6540 |
| }, |
| { |
| "epoch": 1.961665169212339, |
| "grad_norm": 1.6318473815917969, |
| "learning_rate": 6.456207042423445e-06, |
| "loss": 0.6179317474365235, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.964660077867625, |
| "grad_norm": 1.4348818063735962, |
| "learning_rate": 6.4236417287366006e-06, |
| "loss": 0.6067376136779785, |
| "step": 6560 |
| }, |
| { |
| "epoch": 1.967654986522911, |
| "grad_norm": 1.6722396612167358, |
| "learning_rate": 6.391119846953153e-06, |
| "loss": 0.6432971000671387, |
| "step": 6570 |
| }, |
| { |
| "epoch": 1.9706498951781972, |
| "grad_norm": 1.5022844076156616, |
| "learning_rate": 6.3586417920243695e-06, |
| "loss": 0.6031882762908936, |
| "step": 6580 |
| }, |
| { |
| "epoch": 1.973644803833483, |
| "grad_norm": 1.2844231128692627, |
| "learning_rate": 6.326207958369273e-06, |
| "loss": 0.661934232711792, |
| "step": 6590 |
| }, |
| { |
| "epoch": 1.9766397124887691, |
| "grad_norm": 1.338340163230896, |
| "learning_rate": 6.2938187398698614e-06, |
| "loss": 0.6218274116516114, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.9796346211440552, |
| "grad_norm": 1.4525477886199951, |
| "learning_rate": 6.261474529866315e-06, |
| "loss": 0.6390564441680908, |
| "step": 6610 |
| }, |
| { |
| "epoch": 1.982629529799341, |
| "grad_norm": 1.5783872604370117, |
| "learning_rate": 6.229175721152222e-06, |
| "loss": 0.6509233951568604, |
| "step": 6620 |
| }, |
| { |
| "epoch": 1.985624438454627, |
| "grad_norm": 1.5055947303771973, |
| "learning_rate": 6.1969227059698125e-06, |
| "loss": 0.5991942405700683, |
| "step": 6630 |
| }, |
| { |
| "epoch": 1.9886193471099132, |
| "grad_norm": 1.4864978790283203, |
| "learning_rate": 6.1647158760051915e-06, |
| "loss": 0.6327694892883301, |
| "step": 6640 |
| }, |
| { |
| "epoch": 1.991614255765199, |
| "grad_norm": 1.352718710899353, |
| "learning_rate": 6.132555622383581e-06, |
| "loss": 0.6443804740905762, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.9946091644204853, |
| "grad_norm": 1.567016363143921, |
| "learning_rate": 6.1004423356645744e-06, |
| "loss": 0.6319258689880372, |
| "step": 6660 |
| }, |
| { |
| "epoch": 1.9976040730757711, |
| "grad_norm": 1.4297220706939697, |
| "learning_rate": 6.06837640583739e-06, |
| "loss": 0.6520418167114258, |
| "step": 6670 |
| }, |
| { |
| "epoch": 2.000598981731057, |
| "grad_norm": 1.1015812158584595, |
| "learning_rate": 6.0363582223161345e-06, |
| "loss": 0.5972274303436279, |
| "step": 6680 |
| }, |
| { |
| "epoch": 2.0035938903863433, |
| "grad_norm": 1.2543084621429443, |
| "learning_rate": 6.0043881739350785e-06, |
| "loss": 0.5129719734191894, |
| "step": 6690 |
| }, |
| { |
| "epoch": 2.006588799041629, |
| "grad_norm": 1.2929192781448364, |
| "learning_rate": 5.972466648943929e-06, |
| "loss": 0.5194722652435303, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.0095837076969154, |
| "grad_norm": 1.4669725894927979, |
| "learning_rate": 5.940594035003119e-06, |
| "loss": 0.5150233268737793, |
| "step": 6710 |
| }, |
| { |
| "epoch": 2.0125786163522013, |
| "grad_norm": 1.4361999034881592, |
| "learning_rate": 5.9087707191790935e-06, |
| "loss": 0.5015038967132568, |
| "step": 6720 |
| }, |
| { |
| "epoch": 2.015573525007487, |
| "grad_norm": 1.3732558488845825, |
| "learning_rate": 5.876997087939614e-06, |
| "loss": 0.5028849601745605, |
| "step": 6730 |
| }, |
| { |
| "epoch": 2.0185684336627734, |
| "grad_norm": 1.3214187622070312, |
| "learning_rate": 5.845273527149067e-06, |
| "loss": 0.5087246894836426, |
| "step": 6740 |
| }, |
| { |
| "epoch": 2.0215633423180592, |
| "grad_norm": 1.564650058746338, |
| "learning_rate": 5.8136004220637746e-06, |
| "loss": 0.5178554058074951, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.0245582509733455, |
| "grad_norm": 1.4164608716964722, |
| "learning_rate": 5.7819781573273055e-06, |
| "loss": 0.5236745834350586, |
| "step": 6760 |
| }, |
| { |
| "epoch": 2.0275531596286314, |
| "grad_norm": 1.5281411409378052, |
| "learning_rate": 5.750407116965835e-06, |
| "loss": 0.5004557609558106, |
| "step": 6770 |
| }, |
| { |
| "epoch": 2.030548068283917, |
| "grad_norm": 1.6436216831207275, |
| "learning_rate": 5.718887684383441e-06, |
| "loss": 0.5178097248077392, |
| "step": 6780 |
| }, |
| { |
| "epoch": 2.0335429769392035, |
| "grad_norm": 1.497834324836731, |
| "learning_rate": 5.687420242357482e-06, |
| "loss": 0.5175156593322754, |
| "step": 6790 |
| }, |
| { |
| "epoch": 2.0365378855944893, |
| "grad_norm": 1.2958300113677979, |
| "learning_rate": 5.6560051730339226e-06, |
| "loss": 0.5054145336151123, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.039532794249775, |
| "grad_norm": 1.5535317659378052, |
| "learning_rate": 5.624642857922713e-06, |
| "loss": 0.5114497184753418, |
| "step": 6810 |
| }, |
| { |
| "epoch": 2.0425277029050615, |
| "grad_norm": 1.7876501083374023, |
| "learning_rate": 5.593333677893149e-06, |
| "loss": 0.49465479850769045, |
| "step": 6820 |
| }, |
| { |
| "epoch": 2.0455226115603473, |
| "grad_norm": 1.4220184087753296, |
| "learning_rate": 5.562078013169232e-06, |
| "loss": 0.4663191795349121, |
| "step": 6830 |
| }, |
| { |
| "epoch": 2.0485175202156336, |
| "grad_norm": 1.4079233407974243, |
| "learning_rate": 5.53087624332508e-06, |
| "loss": 0.5256112575531006, |
| "step": 6840 |
| }, |
| { |
| "epoch": 2.0515124288709194, |
| "grad_norm": 1.4762578010559082, |
| "learning_rate": 5.499728747280291e-06, |
| "loss": 0.49046692848205564, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.0545073375262053, |
| "grad_norm": 1.4836058616638184, |
| "learning_rate": 5.4686359032953595e-06, |
| "loss": 0.5131683349609375, |
| "step": 6860 |
| }, |
| { |
| "epoch": 2.0575022461814916, |
| "grad_norm": 1.5787184238433838, |
| "learning_rate": 5.4375980889670695e-06, |
| "loss": 0.5437060832977295, |
| "step": 6870 |
| }, |
| { |
| "epoch": 2.0604971548367774, |
| "grad_norm": 1.5268107652664185, |
| "learning_rate": 5.406615681223926e-06, |
| "loss": 0.4896749496459961, |
| "step": 6880 |
| }, |
| { |
| "epoch": 2.0634920634920633, |
| "grad_norm": 1.6514142751693726, |
| "learning_rate": 5.375689056321555e-06, |
| "loss": 0.5036890983581543, |
| "step": 6890 |
| }, |
| { |
| "epoch": 2.0664869721473496, |
| "grad_norm": 1.392620325088501, |
| "learning_rate": 5.3448185898381565e-06, |
| "loss": 0.5152482986450195, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.0694818808026354, |
| "grad_norm": 1.3546650409698486, |
| "learning_rate": 5.314004656669922e-06, |
| "loss": 0.48453149795532224, |
| "step": 6910 |
| }, |
| { |
| "epoch": 2.0724767894579217, |
| "grad_norm": 1.6225578784942627, |
| "learning_rate": 5.283247631026507e-06, |
| "loss": 0.5099971771240235, |
| "step": 6920 |
| }, |
| { |
| "epoch": 2.0754716981132075, |
| "grad_norm": 1.5903658866882324, |
| "learning_rate": 5.252547886426455e-06, |
| "loss": 0.5065332412719726, |
| "step": 6930 |
| }, |
| { |
| "epoch": 2.0784666067684934, |
| "grad_norm": 1.3701387643814087, |
| "learning_rate": 5.2219057956927e-06, |
| "loss": 0.5079869270324707, |
| "step": 6940 |
| }, |
| { |
| "epoch": 2.0814615154237797, |
| "grad_norm": 1.5331077575683594, |
| "learning_rate": 5.191321730947995e-06, |
| "loss": 0.5096177577972412, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.0844564240790655, |
| "grad_norm": 1.5454105138778687, |
| "learning_rate": 5.160796063610433e-06, |
| "loss": 0.5231037139892578, |
| "step": 6960 |
| }, |
| { |
| "epoch": 2.087451332734352, |
| "grad_norm": 1.5711345672607422, |
| "learning_rate": 5.130329164388909e-06, |
| "loss": 0.4980916023254395, |
| "step": 6970 |
| }, |
| { |
| "epoch": 2.0904462413896376, |
| "grad_norm": 1.5762660503387451, |
| "learning_rate": 5.099921403278631e-06, |
| "loss": 0.5110610008239747, |
| "step": 6980 |
| }, |
| { |
| "epoch": 2.0934411500449235, |
| "grad_norm": 1.6697578430175781, |
| "learning_rate": 5.069573149556628e-06, |
| "loss": 0.5102407455444335, |
| "step": 6990 |
| }, |
| { |
| "epoch": 2.0964360587002098, |
| "grad_norm": 1.7491651773452759, |
| "learning_rate": 5.039284771777258e-06, |
| "loss": 0.5234197616577149, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.0994309673554956, |
| "grad_norm": 1.546948790550232, |
| "learning_rate": 5.009056637767727e-06, |
| "loss": 0.4833499908447266, |
| "step": 7010 |
| }, |
| { |
| "epoch": 2.1024258760107815, |
| "grad_norm": 1.356787919998169, |
| "learning_rate": 4.9788891146236475e-06, |
| "loss": 0.5116095542907715, |
| "step": 7020 |
| }, |
| { |
| "epoch": 2.1054207846660677, |
| "grad_norm": 1.4842000007629395, |
| "learning_rate": 4.948782568704545e-06, |
| "loss": 0.502721643447876, |
| "step": 7030 |
| }, |
| { |
| "epoch": 2.1084156933213536, |
| "grad_norm": 1.5620037317276, |
| "learning_rate": 4.918737365629444e-06, |
| "loss": 0.508421802520752, |
| "step": 7040 |
| }, |
| { |
| "epoch": 2.11141060197664, |
| "grad_norm": 1.4998836517333984, |
| "learning_rate": 4.888753870272395e-06, |
| "loss": 0.4805330276489258, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.1144055106319257, |
| "grad_norm": 1.5066370964050293, |
| "learning_rate": 4.858832446758076e-06, |
| "loss": 0.5161166191101074, |
| "step": 7060 |
| }, |
| { |
| "epoch": 2.1174004192872116, |
| "grad_norm": 1.3664103746414185, |
| "learning_rate": 4.8289734584573376e-06, |
| "loss": 0.4755040168762207, |
| "step": 7070 |
| }, |
| { |
| "epoch": 2.120395327942498, |
| "grad_norm": 1.5773537158966064, |
| "learning_rate": 4.799177267982822e-06, |
| "loss": 0.5325294494628906, |
| "step": 7080 |
| }, |
| { |
| "epoch": 2.1233902365977837, |
| "grad_norm": 1.4924392700195312, |
| "learning_rate": 4.769444237184529e-06, |
| "loss": 0.512051773071289, |
| "step": 7090 |
| }, |
| { |
| "epoch": 2.12638514525307, |
| "grad_norm": 1.5971511602401733, |
| "learning_rate": 4.739774727145452e-06, |
| "loss": 0.4878090858459473, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.129380053908356, |
| "grad_norm": 1.288095474243164, |
| "learning_rate": 4.710169098177161e-06, |
| "loss": 0.4998618125915527, |
| "step": 7110 |
| }, |
| { |
| "epoch": 2.1323749625636417, |
| "grad_norm": 1.4811124801635742, |
| "learning_rate": 4.68062770981546e-06, |
| "loss": 0.4985170364379883, |
| "step": 7120 |
| }, |
| { |
| "epoch": 2.135369871218928, |
| "grad_norm": 1.5706632137298584, |
| "learning_rate": 4.651150920815988e-06, |
| "loss": 0.4773625373840332, |
| "step": 7130 |
| }, |
| { |
| "epoch": 2.138364779874214, |
| "grad_norm": 1.532424807548523, |
| "learning_rate": 4.62173908914989e-06, |
| "loss": 0.490186882019043, |
| "step": 7140 |
| }, |
| { |
| "epoch": 2.1413596885294996, |
| "grad_norm": 1.5899869203567505, |
| "learning_rate": 4.592392571999459e-06, |
| "loss": 0.48595681190490725, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.144354597184786, |
| "grad_norm": 1.5332787036895752, |
| "learning_rate": 4.563111725753785e-06, |
| "loss": 0.5245419502258301, |
| "step": 7160 |
| }, |
| { |
| "epoch": 2.147349505840072, |
| "grad_norm": 1.5711026191711426, |
| "learning_rate": 4.533896906004455e-06, |
| "loss": 0.47621469497680663, |
| "step": 7170 |
| }, |
| { |
| "epoch": 2.150344414495358, |
| "grad_norm": 1.784626841545105, |
| "learning_rate": 4.504748467541202e-06, |
| "loss": 0.49512577056884766, |
| "step": 7180 |
| }, |
| { |
| "epoch": 2.153339323150644, |
| "grad_norm": 1.442130208015442, |
| "learning_rate": 4.475666764347634e-06, |
| "loss": 0.4948512077331543, |
| "step": 7190 |
| }, |
| { |
| "epoch": 2.1563342318059298, |
| "grad_norm": 1.3904801607131958, |
| "learning_rate": 4.446652149596891e-06, |
| "loss": 0.5106653690338134, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.159329140461216, |
| "grad_norm": 1.4889200925827026, |
| "learning_rate": 4.4177049756474025e-06, |
| "loss": 0.4727304935455322, |
| "step": 7210 |
| }, |
| { |
| "epoch": 2.162324049116502, |
| "grad_norm": 1.5404807329177856, |
| "learning_rate": 4.388825594038565e-06, |
| "loss": 0.46900529861450196, |
| "step": 7220 |
| }, |
| { |
| "epoch": 2.165318957771788, |
| "grad_norm": 1.5615664720535278, |
| "learning_rate": 4.360014355486511e-06, |
| "loss": 0.5268836975097656, |
| "step": 7230 |
| }, |
| { |
| "epoch": 2.168313866427074, |
| "grad_norm": 1.4813424348831177, |
| "learning_rate": 4.331271609879817e-06, |
| "loss": 0.4924919605255127, |
| "step": 7240 |
| }, |
| { |
| "epoch": 2.17130877508236, |
| "grad_norm": 1.4291577339172363, |
| "learning_rate": 4.302597706275283e-06, |
| "loss": 0.49208860397338866, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.174303683737646, |
| "grad_norm": 1.6226706504821777, |
| "learning_rate": 4.273992992893667e-06, |
| "loss": 0.47493915557861327, |
| "step": 7260 |
| }, |
| { |
| "epoch": 2.177298592392932, |
| "grad_norm": 1.4558225870132446, |
| "learning_rate": 4.245457817115484e-06, |
| "loss": 0.5071091651916504, |
| "step": 7270 |
| }, |
| { |
| "epoch": 2.180293501048218, |
| "grad_norm": 1.4675588607788086, |
| "learning_rate": 4.216992525476754e-06, |
| "loss": 0.5064915180206299, |
| "step": 7280 |
| }, |
| { |
| "epoch": 2.183288409703504, |
| "grad_norm": 1.6159210205078125, |
| "learning_rate": 4.188597463664832e-06, |
| "loss": 0.5045362949371338, |
| "step": 7290 |
| }, |
| { |
| "epoch": 2.18628331835879, |
| "grad_norm": 1.5918903350830078, |
| "learning_rate": 4.160272976514171e-06, |
| "loss": 0.5072110652923584, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.1892782270140763, |
| "grad_norm": 1.513226866722107, |
| "learning_rate": 4.132019408002172e-06, |
| "loss": 0.4595001220703125, |
| "step": 7310 |
| }, |
| { |
| "epoch": 2.192273135669362, |
| "grad_norm": 1.323989987373352, |
| "learning_rate": 4.103837101244971e-06, |
| "loss": 0.5201524257659912, |
| "step": 7320 |
| }, |
| { |
| "epoch": 2.195268044324648, |
| "grad_norm": 1.5404670238494873, |
| "learning_rate": 4.075726398493303e-06, |
| "loss": 0.47367110252380373, |
| "step": 7330 |
| }, |
| { |
| "epoch": 2.1982629529799342, |
| "grad_norm": 1.8998429775238037, |
| "learning_rate": 4.0476876411283185e-06, |
| "loss": 0.4952116012573242, |
| "step": 7340 |
| }, |
| { |
| "epoch": 2.20125786163522, |
| "grad_norm": 1.5175492763519287, |
| "learning_rate": 4.019721169657466e-06, |
| "loss": 0.5057971954345704, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.2042527702905064, |
| "grad_norm": 1.5638384819030762, |
| "learning_rate": 3.991827323710326e-06, |
| "loss": 0.5098119258880616, |
| "step": 7360 |
| }, |
| { |
| "epoch": 2.207247678945792, |
| "grad_norm": 1.5194815397262573, |
| "learning_rate": 3.964006442034514e-06, |
| "loss": 0.5505454063415527, |
| "step": 7370 |
| }, |
| { |
| "epoch": 2.210242587601078, |
| "grad_norm": 1.4322032928466797, |
| "learning_rate": 3.9362588624915535e-06, |
| "loss": 0.5155088424682617, |
| "step": 7380 |
| }, |
| { |
| "epoch": 2.2132374962563643, |
| "grad_norm": 1.5685886144638062, |
| "learning_rate": 3.908584922052766e-06, |
| "loss": 0.4836409568786621, |
| "step": 7390 |
| }, |
| { |
| "epoch": 2.21623240491165, |
| "grad_norm": 1.6502667665481567, |
| "learning_rate": 3.8809849567951994e-06, |
| "loss": 0.49752092361450195, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.219227313566936, |
| "grad_norm": 1.5383046865463257, |
| "learning_rate": 3.853459301897523e-06, |
| "loss": 0.49851369857788086, |
| "step": 7410 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 1.550384521484375, |
| "learning_rate": 3.826008291635979e-06, |
| "loss": 0.48642563819885254, |
| "step": 7420 |
| }, |
| { |
| "epoch": 2.225217130877508, |
| "grad_norm": 1.7033337354660034, |
| "learning_rate": 3.7986322593803006e-06, |
| "loss": 0.48472137451171876, |
| "step": 7430 |
| }, |
| { |
| "epoch": 2.2282120395327945, |
| "grad_norm": 1.6340550184249878, |
| "learning_rate": 3.7713315375896876e-06, |
| "loss": 0.533723258972168, |
| "step": 7440 |
| }, |
| { |
| "epoch": 2.2312069481880803, |
| "grad_norm": 1.429850459098816, |
| "learning_rate": 3.744106457808746e-06, |
| "loss": 0.4909144401550293, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.234201856843366, |
| "grad_norm": 1.4978691339492798, |
| "learning_rate": 3.7169573506634824e-06, |
| "loss": 0.4724015235900879, |
| "step": 7460 |
| }, |
| { |
| "epoch": 2.2371967654986524, |
| "grad_norm": 1.7704259157180786, |
| "learning_rate": 3.6898845458572674e-06, |
| "loss": 0.5028561592102051, |
| "step": 7470 |
| }, |
| { |
| "epoch": 2.2401916741539383, |
| "grad_norm": 1.5223942995071411, |
| "learning_rate": 3.6628883721668573e-06, |
| "loss": 0.5258946895599366, |
| "step": 7480 |
| }, |
| { |
| "epoch": 2.243186582809224, |
| "grad_norm": 1.3769805431365967, |
| "learning_rate": 3.6359691574383703e-06, |
| "loss": 0.48286190032958987, |
| "step": 7490 |
| }, |
| { |
| "epoch": 2.2461814914645104, |
| "grad_norm": 1.669264316558838, |
| "learning_rate": 3.609127228583338e-06, |
| "loss": 0.4988402366638184, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.2491764001197962, |
| "grad_norm": 1.5799616575241089, |
| "learning_rate": 3.582362911574706e-06, |
| "loss": 0.48703784942626954, |
| "step": 7510 |
| }, |
| { |
| "epoch": 2.2521713087750825, |
| "grad_norm": 1.6424161195755005, |
| "learning_rate": 3.5556765314428998e-06, |
| "loss": 0.5006259918212891, |
| "step": 7520 |
| }, |
| { |
| "epoch": 2.2551662174303684, |
| "grad_norm": 1.7363072633743286, |
| "learning_rate": 3.5290684122718544e-06, |
| "loss": 0.5261609554290771, |
| "step": 7530 |
| }, |
| { |
| "epoch": 2.2581611260856542, |
| "grad_norm": 1.6819044351577759, |
| "learning_rate": 3.502538877195104e-06, |
| "loss": 0.48314647674560546, |
| "step": 7540 |
| }, |
| { |
| "epoch": 2.2611560347409405, |
| "grad_norm": 1.7765800952911377, |
| "learning_rate": 3.476088248391829e-06, |
| "loss": 0.5151649475097656, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.2641509433962264, |
| "grad_norm": 1.5972343683242798, |
| "learning_rate": 3.4497168470829732e-06, |
| "loss": 0.5179537773132324, |
| "step": 7560 |
| }, |
| { |
| "epoch": 2.267145852051512, |
| "grad_norm": 1.4898275136947632, |
| "learning_rate": 3.4234249935273157e-06, |
| "loss": 0.4999067306518555, |
| "step": 7570 |
| }, |
| { |
| "epoch": 2.2701407607067985, |
| "grad_norm": 1.584760069847107, |
| "learning_rate": 3.3972130070176057e-06, |
| "loss": 0.5141147613525391, |
| "step": 7580 |
| }, |
| { |
| "epoch": 2.2731356693620843, |
| "grad_norm": 1.454837441444397, |
| "learning_rate": 3.371081205876662e-06, |
| "loss": 0.5008669376373291, |
| "step": 7590 |
| }, |
| { |
| "epoch": 2.2761305780173706, |
| "grad_norm": 1.274971842765808, |
| "learning_rate": 3.3450299074535297e-06, |
| "loss": 0.48927507400512693, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.2791254866726565, |
| "grad_norm": 1.7367416620254517, |
| "learning_rate": 3.319059428119603e-06, |
| "loss": 0.4955023765563965, |
| "step": 7610 |
| }, |
| { |
| "epoch": 2.2821203953279423, |
| "grad_norm": 1.5526654720306396, |
| "learning_rate": 3.2931700832648063e-06, |
| "loss": 0.4893807411193848, |
| "step": 7620 |
| }, |
| { |
| "epoch": 2.2851153039832286, |
| "grad_norm": 1.5649467706680298, |
| "learning_rate": 3.267362187293751e-06, |
| "loss": 0.4851066112518311, |
| "step": 7630 |
| }, |
| { |
| "epoch": 2.2881102126385144, |
| "grad_norm": 1.5863004922866821, |
| "learning_rate": 3.2416360536219126e-06, |
| "loss": 0.4791616439819336, |
| "step": 7640 |
| }, |
| { |
| "epoch": 2.2911051212938007, |
| "grad_norm": 1.6295742988586426, |
| "learning_rate": 3.21599199467184e-06, |
| "loss": 0.47314839363098143, |
| "step": 7650 |
| }, |
| { |
| "epoch": 2.2941000299490866, |
| "grad_norm": 1.6049524545669556, |
| "learning_rate": 3.1904303218693444e-06, |
| "loss": 0.5069909572601319, |
| "step": 7660 |
| }, |
| { |
| "epoch": 2.2970949386043724, |
| "grad_norm": 1.652060627937317, |
| "learning_rate": 3.164951345639735e-06, |
| "loss": 0.48629279136657716, |
| "step": 7670 |
| }, |
| { |
| "epoch": 2.3000898472596587, |
| "grad_norm": 1.4900766611099243, |
| "learning_rate": 3.1395553754040275e-06, |
| "loss": 0.4977739334106445, |
| "step": 7680 |
| }, |
| { |
| "epoch": 2.3030847559149445, |
| "grad_norm": 1.5940581560134888, |
| "learning_rate": 3.1142427195752144e-06, |
| "loss": 0.5253914833068848, |
| "step": 7690 |
| }, |
| { |
| "epoch": 2.3060796645702304, |
| "grad_norm": 1.4190114736557007, |
| "learning_rate": 3.0890136855544872e-06, |
| "loss": 0.5198238849639892, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.3090745732255167, |
| "grad_norm": 1.5904440879821777, |
| "learning_rate": 3.0638685797275357e-06, |
| "loss": 0.5105954170227051, |
| "step": 7710 |
| }, |
| { |
| "epoch": 2.3120694818808025, |
| "grad_norm": 1.4529443979263306, |
| "learning_rate": 3.038807707460796e-06, |
| "loss": 0.4947354316711426, |
| "step": 7720 |
| }, |
| { |
| "epoch": 2.315064390536089, |
| "grad_norm": 1.8188318014144897, |
| "learning_rate": 3.0138313730977718e-06, |
| "loss": 0.5178883075714111, |
| "step": 7730 |
| }, |
| { |
| "epoch": 2.3180592991913747, |
| "grad_norm": 1.6044124364852905, |
| "learning_rate": 2.9889398799553128e-06, |
| "loss": 0.4920680522918701, |
| "step": 7740 |
| }, |
| { |
| "epoch": 2.3210542078466605, |
| "grad_norm": 1.4917323589324951, |
| "learning_rate": 2.9641335303199514e-06, |
| "loss": 0.5030588626861572, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.324049116501947, |
| "grad_norm": 1.668168544769287, |
| "learning_rate": 2.9394126254442134e-06, |
| "loss": 0.5082870960235596, |
| "step": 7760 |
| }, |
| { |
| "epoch": 2.3270440251572326, |
| "grad_norm": 1.444399118423462, |
| "learning_rate": 2.9147774655429794e-06, |
| "loss": 0.47826013565063474, |
| "step": 7770 |
| }, |
| { |
| "epoch": 2.330038933812519, |
| "grad_norm": 1.5026638507843018, |
| "learning_rate": 2.8902283497898185e-06, |
| "loss": 0.5042776107788086, |
| "step": 7780 |
| }, |
| { |
| "epoch": 2.3330338424678048, |
| "grad_norm": 1.4492841958999634, |
| "learning_rate": 2.865765576313376e-06, |
| "loss": 0.4668389320373535, |
| "step": 7790 |
| }, |
| { |
| "epoch": 2.3360287511230906, |
| "grad_norm": 1.5469386577606201, |
| "learning_rate": 2.841389442193727e-06, |
| "loss": 0.4765936851501465, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.339023659778377, |
| "grad_norm": 1.667075514793396, |
| "learning_rate": 2.817100243458801e-06, |
| "loss": 0.49718523025512695, |
| "step": 7810 |
| }, |
| { |
| "epoch": 2.3420185684336627, |
| "grad_norm": 1.7210171222686768, |
| "learning_rate": 2.792898275080752e-06, |
| "loss": 0.4858196258544922, |
| "step": 7820 |
| }, |
| { |
| "epoch": 2.3450134770889486, |
| "grad_norm": 1.4533110857009888, |
| "learning_rate": 2.7687838309724104e-06, |
| "loss": 0.5015253543853759, |
| "step": 7830 |
| }, |
| { |
| "epoch": 2.348008385744235, |
| "grad_norm": 1.409629464149475, |
| "learning_rate": 2.7447572039836812e-06, |
| "loss": 0.49271488189697266, |
| "step": 7840 |
| }, |
| { |
| "epoch": 2.3510032943995207, |
| "grad_norm": 1.7250934839248657, |
| "learning_rate": 2.7208186858980148e-06, |
| "loss": 0.5015377998352051, |
| "step": 7850 |
| }, |
| { |
| "epoch": 2.353998203054807, |
| "grad_norm": 1.521403193473816, |
| "learning_rate": 2.696968567428849e-06, |
| "loss": 0.513665771484375, |
| "step": 7860 |
| }, |
| { |
| "epoch": 2.356993111710093, |
| "grad_norm": 1.5374770164489746, |
| "learning_rate": 2.6732071382160785e-06, |
| "loss": 0.5035372734069824, |
| "step": 7870 |
| }, |
| { |
| "epoch": 2.3599880203653787, |
| "grad_norm": 1.53484308719635, |
| "learning_rate": 2.649534686822547e-06, |
| "loss": 0.47299823760986326, |
| "step": 7880 |
| }, |
| { |
| "epoch": 2.362982929020665, |
| "grad_norm": 1.3899140357971191, |
| "learning_rate": 2.6259515007305246e-06, |
| "loss": 0.47503366470336916, |
| "step": 7890 |
| }, |
| { |
| "epoch": 2.365977837675951, |
| "grad_norm": 1.7831887006759644, |
| "learning_rate": 2.6024578663382447e-06, |
| "loss": 0.5038399696350098, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.368972746331237, |
| "grad_norm": 1.4920977354049683, |
| "learning_rate": 2.579054068956395e-06, |
| "loss": 0.4970669746398926, |
| "step": 7910 |
| }, |
| { |
| "epoch": 2.371967654986523, |
| "grad_norm": 1.6569092273712158, |
| "learning_rate": 2.5557403928046774e-06, |
| "loss": 0.5047991752624512, |
| "step": 7920 |
| }, |
| { |
| "epoch": 2.374962563641809, |
| "grad_norm": 1.3544354438781738, |
| "learning_rate": 2.532517121008338e-06, |
| "loss": 0.4772444248199463, |
| "step": 7930 |
| }, |
| { |
| "epoch": 2.377957472297095, |
| "grad_norm": 1.6597788333892822, |
| "learning_rate": 2.5093845355947446e-06, |
| "loss": 0.4818833351135254, |
| "step": 7940 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "grad_norm": 1.5855315923690796, |
| "learning_rate": 2.486342917489948e-06, |
| "loss": 0.4844215393066406, |
| "step": 7950 |
| }, |
| { |
| "epoch": 2.3839472896076668, |
| "grad_norm": 1.562936782836914, |
| "learning_rate": 2.463392546515283e-06, |
| "loss": 0.5058174133300781, |
| "step": 7960 |
| }, |
| { |
| "epoch": 2.386942198262953, |
| "grad_norm": 1.5709084272384644, |
| "learning_rate": 2.4405337013839536e-06, |
| "loss": 0.5061359405517578, |
| "step": 7970 |
| }, |
| { |
| "epoch": 2.389937106918239, |
| "grad_norm": 1.5790047645568848, |
| "learning_rate": 2.4177666596976725e-06, |
| "loss": 0.4824088096618652, |
| "step": 7980 |
| }, |
| { |
| "epoch": 2.392932015573525, |
| "grad_norm": 1.5179102420806885, |
| "learning_rate": 2.3950916979432614e-06, |
| "loss": 0.47690744400024415, |
| "step": 7990 |
| }, |
| { |
| "epoch": 2.395926924228811, |
| "grad_norm": 1.661934494972229, |
| "learning_rate": 2.372509091489319e-06, |
| "loss": 0.5297951221466064, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.398921832884097, |
| "grad_norm": 1.5934616327285767, |
| "learning_rate": 2.3500191145828565e-06, |
| "loss": 0.5042305946350097, |
| "step": 8010 |
| }, |
| { |
| "epoch": 2.401916741539383, |
| "grad_norm": 1.7664084434509277, |
| "learning_rate": 2.327622040345985e-06, |
| "loss": 0.4854135513305664, |
| "step": 8020 |
| }, |
| { |
| "epoch": 2.404911650194669, |
| "grad_norm": 1.6327743530273438, |
| "learning_rate": 2.30531814077258e-06, |
| "loss": 0.5051285743713378, |
| "step": 8030 |
| }, |
| { |
| "epoch": 2.4079065588499553, |
| "grad_norm": 1.6960318088531494, |
| "learning_rate": 2.283107686724998e-06, |
| "loss": 0.48496303558349607, |
| "step": 8040 |
| }, |
| { |
| "epoch": 2.410901467505241, |
| "grad_norm": 1.7003093957901, |
| "learning_rate": 2.2609909479307667e-06, |
| "loss": 0.4897914886474609, |
| "step": 8050 |
| }, |
| { |
| "epoch": 2.413896376160527, |
| "grad_norm": 1.5663912296295166, |
| "learning_rate": 2.2389681929793326e-06, |
| "loss": 0.5030606269836426, |
| "step": 8060 |
| }, |
| { |
| "epoch": 2.4168912848158133, |
| "grad_norm": 1.4921340942382812, |
| "learning_rate": 2.217039689318772e-06, |
| "loss": 0.47916498184204104, |
| "step": 8070 |
| }, |
| { |
| "epoch": 2.419886193471099, |
| "grad_norm": 1.6380950212478638, |
| "learning_rate": 2.195205703252571e-06, |
| "loss": 0.5335843563079834, |
| "step": 8080 |
| }, |
| { |
| "epoch": 2.422881102126385, |
| "grad_norm": 1.5958120822906494, |
| "learning_rate": 2.1734664999363654e-06, |
| "loss": 0.4860078811645508, |
| "step": 8090 |
| }, |
| { |
| "epoch": 2.4258760107816713, |
| "grad_norm": 1.3979578018188477, |
| "learning_rate": 2.151822343374742e-06, |
| "loss": 0.47577743530273436, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.428870919436957, |
| "grad_norm": 1.4820301532745361, |
| "learning_rate": 2.1302734964180228e-06, |
| "loss": 0.5121123313903808, |
| "step": 8110 |
| }, |
| { |
| "epoch": 2.431865828092243, |
| "grad_norm": 1.49030339717865, |
| "learning_rate": 2.1088202207590725e-06, |
| "loss": 0.5002717018127442, |
| "step": 8120 |
| }, |
| { |
| "epoch": 2.4348607367475292, |
| "grad_norm": 1.5989124774932861, |
| "learning_rate": 2.087462776930117e-06, |
| "loss": 0.4997716903686523, |
| "step": 8130 |
| }, |
| { |
| "epoch": 2.437855645402815, |
| "grad_norm": 1.2835077047348022, |
| "learning_rate": 2.066201424299594e-06, |
| "loss": 0.5007314205169677, |
| "step": 8140 |
| }, |
| { |
| "epoch": 2.4408505540581014, |
| "grad_norm": 1.5404707193374634, |
| "learning_rate": 2.045036421068982e-06, |
| "loss": 0.5124270439147949, |
| "step": 8150 |
| }, |
| { |
| "epoch": 2.443845462713387, |
| "grad_norm": 1.587632417678833, |
| "learning_rate": 2.023968024269687e-06, |
| "loss": 0.4949374198913574, |
| "step": 8160 |
| }, |
| { |
| "epoch": 2.4468403713686735, |
| "grad_norm": 1.4403496980667114, |
| "learning_rate": 2.0029964897598974e-06, |
| "loss": 0.48112049102783205, |
| "step": 8170 |
| }, |
| { |
| "epoch": 2.4498352800239593, |
| "grad_norm": 1.6329265832901, |
| "learning_rate": 1.9821220722215064e-06, |
| "loss": 0.5166867733001709, |
| "step": 8180 |
| }, |
| { |
| "epoch": 2.452830188679245, |
| "grad_norm": 1.4010143280029297, |
| "learning_rate": 1.961345025156983e-06, |
| "loss": 0.4915262222290039, |
| "step": 8190 |
| }, |
| { |
| "epoch": 2.4558250973345315, |
| "grad_norm": 1.7108557224273682, |
| "learning_rate": 1.940665600886327e-06, |
| "loss": 0.503018569946289, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.4588200059898173, |
| "grad_norm": 1.5818506479263306, |
| "learning_rate": 1.920084050543988e-06, |
| "loss": 0.48973965644836426, |
| "step": 8210 |
| }, |
| { |
| "epoch": 2.461814914645103, |
| "grad_norm": 1.5593360662460327, |
| "learning_rate": 1.8996006240758092e-06, |
| "loss": 0.49617342948913573, |
| "step": 8220 |
| }, |
| { |
| "epoch": 2.4648098233003894, |
| "grad_norm": 1.470671534538269, |
| "learning_rate": 1.8792155702360138e-06, |
| "loss": 0.49179978370666505, |
| "step": 8230 |
| }, |
| { |
| "epoch": 2.4678047319556753, |
| "grad_norm": 1.7106302976608276, |
| "learning_rate": 1.858929136584159e-06, |
| "loss": 0.5138489723205566, |
| "step": 8240 |
| }, |
| { |
| "epoch": 2.470799640610961, |
| "grad_norm": 1.413886547088623, |
| "learning_rate": 1.8387415694821508e-06, |
| "loss": 0.47958765029907224, |
| "step": 8250 |
| }, |
| { |
| "epoch": 2.4737945492662474, |
| "grad_norm": 1.5908961296081543, |
| "learning_rate": 1.8186531140912344e-06, |
| "loss": 0.5095817089080811, |
| "step": 8260 |
| }, |
| { |
| "epoch": 2.4767894579215333, |
| "grad_norm": 1.5044485330581665, |
| "learning_rate": 1.798664014369037e-06, |
| "loss": 0.48099870681762696, |
| "step": 8270 |
| }, |
| { |
| "epoch": 2.4797843665768196, |
| "grad_norm": 1.4462517499923706, |
| "learning_rate": 1.7787745130665802e-06, |
| "loss": 0.47478313446044923, |
| "step": 8280 |
| }, |
| { |
| "epoch": 2.4827792752321054, |
| "grad_norm": 1.5918395519256592, |
| "learning_rate": 1.758984851725357e-06, |
| "loss": 0.5039488792419433, |
| "step": 8290 |
| }, |
| { |
| "epoch": 2.4857741838873912, |
| "grad_norm": 1.4895800352096558, |
| "learning_rate": 1.7392952706743793e-06, |
| "loss": 0.5190446853637696, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.4887690925426775, |
| "grad_norm": 1.590948462486267, |
| "learning_rate": 1.719706009027272e-06, |
| "loss": 0.48047447204589844, |
| "step": 8310 |
| }, |
| { |
| "epoch": 2.4917640011979634, |
| "grad_norm": 1.5965536832809448, |
| "learning_rate": 1.700217304679359e-06, |
| "loss": 0.49631290435791015, |
| "step": 8320 |
| }, |
| { |
| "epoch": 2.4947589098532497, |
| "grad_norm": 1.6916544437408447, |
| "learning_rate": 1.680829394304786e-06, |
| "loss": 0.48668642044067384, |
| "step": 8330 |
| }, |
| { |
| "epoch": 2.4977538185085355, |
| "grad_norm": 1.553376317024231, |
| "learning_rate": 1.6615425133536312e-06, |
| "loss": 0.4995077133178711, |
| "step": 8340 |
| }, |
| { |
| "epoch": 2.5007487271638213, |
| "grad_norm": 1.5679926872253418, |
| "learning_rate": 1.6423568960490632e-06, |
| "loss": 0.505252456665039, |
| "step": 8350 |
| }, |
| { |
| "epoch": 2.5037436358191076, |
| "grad_norm": 1.6989234685897827, |
| "learning_rate": 1.623272775384479e-06, |
| "loss": 0.49995737075805663, |
| "step": 8360 |
| }, |
| { |
| "epoch": 2.5067385444743935, |
| "grad_norm": 1.5071887969970703, |
| "learning_rate": 1.6042903831206914e-06, |
| "loss": 0.48708696365356446, |
| "step": 8370 |
| }, |
| { |
| "epoch": 2.5097334531296793, |
| "grad_norm": 1.587327003479004, |
| "learning_rate": 1.5854099497830967e-06, |
| "loss": 0.4981412887573242, |
| "step": 8380 |
| }, |
| { |
| "epoch": 2.5127283617849656, |
| "grad_norm": 1.3233060836791992, |
| "learning_rate": 1.5666317046588963e-06, |
| "loss": 0.46573629379272463, |
| "step": 8390 |
| }, |
| { |
| "epoch": 2.5157232704402515, |
| "grad_norm": 1.5084353685379028, |
| "learning_rate": 1.5479558757942882e-06, |
| "loss": 0.47600607872009276, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.5187181790955377, |
| "grad_norm": 1.765870213508606, |
| "learning_rate": 1.529382689991722e-06, |
| "loss": 0.5007995128631592, |
| "step": 8410 |
| }, |
| { |
| "epoch": 2.5217130877508236, |
| "grad_norm": 1.6081740856170654, |
| "learning_rate": 1.5109123728071208e-06, |
| "loss": 0.5044775009155273, |
| "step": 8420 |
| }, |
| { |
| "epoch": 2.52470799640611, |
| "grad_norm": 1.2260218858718872, |
| "learning_rate": 1.492545148547161e-06, |
| "loss": 0.4685311794281006, |
| "step": 8430 |
| }, |
| { |
| "epoch": 2.5277029050613957, |
| "grad_norm": 1.5107367038726807, |
| "learning_rate": 1.474281240266544e-06, |
| "loss": 0.5116828918457031, |
| "step": 8440 |
| }, |
| { |
| "epoch": 2.5306978137166816, |
| "grad_norm": 1.6438366174697876, |
| "learning_rate": 1.456120869765274e-06, |
| "loss": 0.49548845291137694, |
| "step": 8450 |
| }, |
| { |
| "epoch": 2.533692722371968, |
| "grad_norm": 1.506015658378601, |
| "learning_rate": 1.4380642575859838e-06, |
| "loss": 0.4876260757446289, |
| "step": 8460 |
| }, |
| { |
| "epoch": 2.5366876310272537, |
| "grad_norm": 1.6380552053451538, |
| "learning_rate": 1.4201116230112421e-06, |
| "loss": 0.5124927520751953, |
| "step": 8470 |
| }, |
| { |
| "epoch": 2.5396825396825395, |
| "grad_norm": 1.4299864768981934, |
| "learning_rate": 1.4022631840609002e-06, |
| "loss": 0.5214046955108642, |
| "step": 8480 |
| }, |
| { |
| "epoch": 2.542677448337826, |
| "grad_norm": 1.6097781658172607, |
| "learning_rate": 1.3845191574894345e-06, |
| "loss": 0.48117785453796386, |
| "step": 8490 |
| }, |
| { |
| "epoch": 2.5456723569931117, |
| "grad_norm": 1.8168413639068604, |
| "learning_rate": 1.3668797587833283e-06, |
| "loss": 0.4974005699157715, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.5486672656483975, |
| "grad_norm": 1.5539957284927368, |
| "learning_rate": 1.3493452021584341e-06, |
| "loss": 0.48285598754882814, |
| "step": 8510 |
| }, |
| { |
| "epoch": 2.551662174303684, |
| "grad_norm": 1.7035820484161377, |
| "learning_rate": 1.331915700557398e-06, |
| "loss": 0.5152078628540039, |
| "step": 8520 |
| }, |
| { |
| "epoch": 2.5546570829589696, |
| "grad_norm": 1.435754418373108, |
| "learning_rate": 1.3145914656470471e-06, |
| "loss": 0.4732780456542969, |
| "step": 8530 |
| }, |
| { |
| "epoch": 2.5576519916142555, |
| "grad_norm": 1.7325928211212158, |
| "learning_rate": 1.2973727078158438e-06, |
| "loss": 0.49252891540527344, |
| "step": 8540 |
| }, |
| { |
| "epoch": 2.560646900269542, |
| "grad_norm": 1.6093993186950684, |
| "learning_rate": 1.2802596361713081e-06, |
| "loss": 0.4925223350524902, |
| "step": 8550 |
| }, |
| { |
| "epoch": 2.563641808924828, |
| "grad_norm": 1.5473220348358154, |
| "learning_rate": 1.2632524585374983e-06, |
| "loss": 0.4907097816467285, |
| "step": 8560 |
| }, |
| { |
| "epoch": 2.566636717580114, |
| "grad_norm": 1.743654727935791, |
| "learning_rate": 1.2463513814524697e-06, |
| "loss": 0.5052000999450683, |
| "step": 8570 |
| }, |
| { |
| "epoch": 2.5696316262353998, |
| "grad_norm": 1.5247002840042114, |
| "learning_rate": 1.229556610165782e-06, |
| "loss": 0.4836299419403076, |
| "step": 8580 |
| }, |
| { |
| "epoch": 2.572626534890686, |
| "grad_norm": 1.6155225038528442, |
| "learning_rate": 1.2128683486359915e-06, |
| "loss": 0.49276161193847656, |
| "step": 8590 |
| }, |
| { |
| "epoch": 2.575621443545972, |
| "grad_norm": 1.6852004528045654, |
| "learning_rate": 1.1962867995281902e-06, |
| "loss": 0.49987125396728516, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.5786163522012577, |
| "grad_norm": 1.6533702611923218, |
| "learning_rate": 1.1798121642115278e-06, |
| "loss": 0.5120342254638672, |
| "step": 8610 |
| }, |
| { |
| "epoch": 2.581611260856544, |
| "grad_norm": 1.2716710567474365, |
| "learning_rate": 1.1634446427567825e-06, |
| "loss": 0.49103879928588867, |
| "step": 8620 |
| }, |
| { |
| "epoch": 2.58460616951183, |
| "grad_norm": 1.5219924449920654, |
| "learning_rate": 1.1471844339339167e-06, |
| "loss": 0.48928394317626955, |
| "step": 8630 |
| }, |
| { |
| "epoch": 2.5876010781671157, |
| "grad_norm": 1.5612694025039673, |
| "learning_rate": 1.1310317352096757e-06, |
| "loss": 0.5022396087646485, |
| "step": 8640 |
| }, |
| { |
| "epoch": 2.590595986822402, |
| "grad_norm": 1.7304942607879639, |
| "learning_rate": 1.1149867427451788e-06, |
| "loss": 0.47353377342224123, |
| "step": 8650 |
| }, |
| { |
| "epoch": 2.593590895477688, |
| "grad_norm": 1.6413494348526, |
| "learning_rate": 1.0990496513935467e-06, |
| "loss": 0.49196691513061525, |
| "step": 8660 |
| }, |
| { |
| "epoch": 2.5965858041329737, |
| "grad_norm": 1.4566773176193237, |
| "learning_rate": 1.08322065469753e-06, |
| "loss": 0.5039726257324219, |
| "step": 8670 |
| }, |
| { |
| "epoch": 2.59958071278826, |
| "grad_norm": 1.6829912662506104, |
| "learning_rate": 1.0674999448871547e-06, |
| "loss": 0.4889340877532959, |
| "step": 8680 |
| }, |
| { |
| "epoch": 2.602575621443546, |
| "grad_norm": 1.6049522161483765, |
| "learning_rate": 1.0518877128773986e-06, |
| "loss": 0.5027350425720215, |
| "step": 8690 |
| }, |
| { |
| "epoch": 2.605570530098832, |
| "grad_norm": 1.6159955263137817, |
| "learning_rate": 1.036384148265861e-06, |
| "loss": 0.493180513381958, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.608565438754118, |
| "grad_norm": 1.5600866079330444, |
| "learning_rate": 1.020989439330471e-06, |
| "loss": 0.48960447311401367, |
| "step": 8710 |
| }, |
| { |
| "epoch": 2.6115603474094042, |
| "grad_norm": 1.615113377571106, |
| "learning_rate": 1.0057037730271912e-06, |
| "loss": 0.4720893859863281, |
| "step": 8720 |
| }, |
| { |
| "epoch": 2.61455525606469, |
| "grad_norm": 1.373274803161621, |
| "learning_rate": 9.905273349877574e-07, |
| "loss": 0.4918712615966797, |
| "step": 8730 |
| }, |
| { |
| "epoch": 2.617550164719976, |
| "grad_norm": 1.7302544116973877, |
| "learning_rate": 9.754603095174132e-07, |
| "loss": 0.5329276084899902, |
| "step": 8740 |
| }, |
| { |
| "epoch": 2.620545073375262, |
| "grad_norm": 1.5376338958740234, |
| "learning_rate": 9.605028795926807e-07, |
| "loss": 0.4895726203918457, |
| "step": 8750 |
| }, |
| { |
| "epoch": 2.623539982030548, |
| "grad_norm": 1.4624214172363281, |
| "learning_rate": 9.456552268591312e-07, |
| "loss": 0.4909614086151123, |
| "step": 8760 |
| }, |
| { |
| "epoch": 2.626534890685834, |
| "grad_norm": 1.4393444061279297, |
| "learning_rate": 9.309175316291919e-07, |
| "loss": 0.4822981834411621, |
| "step": 8770 |
| }, |
| { |
| "epoch": 2.62952979934112, |
| "grad_norm": 1.6177499294281006, |
| "learning_rate": 9.162899728799346e-07, |
| "loss": 0.49910993576049806, |
| "step": 8780 |
| }, |
| { |
| "epoch": 2.632524707996406, |
| "grad_norm": 1.9156888723373413, |
| "learning_rate": 9.01772728250927e-07, |
| "loss": 0.4932182788848877, |
| "step": 8790 |
| }, |
| { |
| "epoch": 2.635519616651692, |
| "grad_norm": 1.5032786130905151, |
| "learning_rate": 8.873659740420549e-07, |
| "loss": 0.467896556854248, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.638514525306978, |
| "grad_norm": 1.5643095970153809, |
| "learning_rate": 8.73069885211395e-07, |
| "loss": 0.5180371284484864, |
| "step": 8810 |
| }, |
| { |
| "epoch": 2.641509433962264, |
| "grad_norm": 1.4355653524398804, |
| "learning_rate": 8.588846353730806e-07, |
| "loss": 0.49057998657226565, |
| "step": 8820 |
| }, |
| { |
| "epoch": 2.6445043426175503, |
| "grad_norm": 1.3352596759796143, |
| "learning_rate": 8.448103967952026e-07, |
| "loss": 0.5041935920715332, |
| "step": 8830 |
| }, |
| { |
| "epoch": 2.647499251272836, |
| "grad_norm": 1.458169937133789, |
| "learning_rate": 8.308473403977057e-07, |
| "loss": 0.5041525840759278, |
| "step": 8840 |
| }, |
| { |
| "epoch": 2.6504941599281224, |
| "grad_norm": 1.5872334241867065, |
| "learning_rate": 8.169956357503262e-07, |
| "loss": 0.5054915428161622, |
| "step": 8850 |
| }, |
| { |
| "epoch": 2.6534890685834083, |
| "grad_norm": 1.5991028547286987, |
| "learning_rate": 8.03255451070517e-07, |
| "loss": 0.49498138427734373, |
| "step": 8860 |
| }, |
| { |
| "epoch": 2.656483977238694, |
| "grad_norm": 1.606046199798584, |
| "learning_rate": 7.896269532214262e-07, |
| "loss": 0.4944211483001709, |
| "step": 8870 |
| }, |
| { |
| "epoch": 2.6594788858939804, |
| "grad_norm": 1.575856328010559, |
| "learning_rate": 7.761103077098431e-07, |
| "loss": 0.46777868270874023, |
| "step": 8880 |
| }, |
| { |
| "epoch": 2.6624737945492662, |
| "grad_norm": 1.7411667108535767, |
| "learning_rate": 7.627056786842169e-07, |
| "loss": 0.48057379722595217, |
| "step": 8890 |
| }, |
| { |
| "epoch": 2.665468703204552, |
| "grad_norm": 1.583531141281128, |
| "learning_rate": 7.494132289326395e-07, |
| "loss": 0.48141913414001464, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.6684636118598384, |
| "grad_norm": 1.4123398065567017, |
| "learning_rate": 7.362331198808837e-07, |
| "loss": 0.49846878051757815, |
| "step": 8910 |
| }, |
| { |
| "epoch": 2.6714585205151242, |
| "grad_norm": 1.5866191387176514, |
| "learning_rate": 7.23165511590439e-07, |
| "loss": 0.4861551284790039, |
| "step": 8920 |
| }, |
| { |
| "epoch": 2.67445342917041, |
| "grad_norm": 1.664759635925293, |
| "learning_rate": 7.102105627565603e-07, |
| "loss": 0.48258323669433595, |
| "step": 8930 |
| }, |
| { |
| "epoch": 2.6774483378256964, |
| "grad_norm": 1.4542200565338135, |
| "learning_rate": 6.973684307063533e-07, |
| "loss": 0.505281639099121, |
| "step": 8940 |
| }, |
| { |
| "epoch": 2.680443246480982, |
| "grad_norm": 1.619585394859314, |
| "learning_rate": 6.846392713968519e-07, |
| "loss": 0.474824333190918, |
| "step": 8950 |
| }, |
| { |
| "epoch": 2.6834381551362685, |
| "grad_norm": 1.551127552986145, |
| "learning_rate": 6.720232394131365e-07, |
| "loss": 0.49350528717041015, |
| "step": 8960 |
| }, |
| { |
| "epoch": 2.6864330637915543, |
| "grad_norm": 1.3840203285217285, |
| "learning_rate": 6.59520487966443e-07, |
| "loss": 0.47249841690063477, |
| "step": 8970 |
| }, |
| { |
| "epoch": 2.6894279724468406, |
| "grad_norm": 1.509779453277588, |
| "learning_rate": 6.471311688923143e-07, |
| "loss": 0.45883750915527344, |
| "step": 8980 |
| }, |
| { |
| "epoch": 2.6924228811021265, |
| "grad_norm": 1.4412727355957031, |
| "learning_rate": 6.348554326487477e-07, |
| "loss": 0.4847591400146484, |
| "step": 8990 |
| }, |
| { |
| "epoch": 2.6954177897574123, |
| "grad_norm": 1.7219630479812622, |
| "learning_rate": 6.226934283143759e-07, |
| "loss": 0.468625545501709, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.6984126984126986, |
| "grad_norm": 1.4819846153259277, |
| "learning_rate": 6.106453035866467e-07, |
| "loss": 0.4852116584777832, |
| "step": 9010 |
| }, |
| { |
| "epoch": 2.7014076070679844, |
| "grad_norm": 1.6448779106140137, |
| "learning_rate": 5.987112047800381e-07, |
| "loss": 0.5112523078918457, |
| "step": 9020 |
| }, |
| { |
| "epoch": 2.7044025157232703, |
| "grad_norm": 1.6351248025894165, |
| "learning_rate": 5.868912768242741e-07, |
| "loss": 0.48743634223937987, |
| "step": 9030 |
| }, |
| { |
| "epoch": 2.7073974243785566, |
| "grad_norm": 1.5035450458526611, |
| "learning_rate": 5.751856632625752e-07, |
| "loss": 0.4918667793273926, |
| "step": 9040 |
| }, |
| { |
| "epoch": 2.7103923330338424, |
| "grad_norm": 1.6064460277557373, |
| "learning_rate": 5.635945062499004e-07, |
| "loss": 0.4841439247131348, |
| "step": 9050 |
| }, |
| { |
| "epoch": 2.7133872416891283, |
| "grad_norm": 1.6321065425872803, |
| "learning_rate": 5.521179465512349e-07, |
| "loss": 0.4781044006347656, |
| "step": 9060 |
| }, |
| { |
| "epoch": 2.7163821503444145, |
| "grad_norm": 1.3715041875839233, |
| "learning_rate": 5.407561235398717e-07, |
| "loss": 0.4643882751464844, |
| "step": 9070 |
| }, |
| { |
| "epoch": 2.7193770589997004, |
| "grad_norm": 1.6115351915359497, |
| "learning_rate": 5.295091751957249e-07, |
| "loss": 0.4926904678344727, |
| "step": 9080 |
| }, |
| { |
| "epoch": 2.7223719676549867, |
| "grad_norm": 1.6327753067016602, |
| "learning_rate": 5.183772381036456e-07, |
| "loss": 0.4796736717224121, |
| "step": 9090 |
| }, |
| { |
| "epoch": 2.7253668763102725, |
| "grad_norm": 1.459547519683838, |
| "learning_rate": 5.073604474517757e-07, |
| "loss": 0.4696746826171875, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.728361784965559, |
| "grad_norm": 1.628287672996521, |
| "learning_rate": 4.964589370298911e-07, |
| "loss": 0.5150102138519287, |
| "step": 9110 |
| }, |
| { |
| "epoch": 2.7313566936208447, |
| "grad_norm": 1.540307641029358, |
| "learning_rate": 4.856728392277943e-07, |
| "loss": 0.4660166263580322, |
| "step": 9120 |
| }, |
| { |
| "epoch": 2.7343516022761305, |
| "grad_norm": 1.4227973222732544, |
| "learning_rate": 4.7500228503368775e-07, |
| "loss": 0.4942507266998291, |
| "step": 9130 |
| }, |
| { |
| "epoch": 2.737346510931417, |
| "grad_norm": 1.383530855178833, |
| "learning_rate": 4.644474040325986e-07, |
| "loss": 0.47557845115661623, |
| "step": 9140 |
| }, |
| { |
| "epoch": 2.7403414195867026, |
| "grad_norm": 1.4156097173690796, |
| "learning_rate": 4.5400832440480105e-07, |
| "loss": 0.48183841705322267, |
| "step": 9150 |
| }, |
| { |
| "epoch": 2.7433363282419885, |
| "grad_norm": 1.4534227848052979, |
| "learning_rate": 4.4368517292425083e-07, |
| "loss": 0.49446706771850585, |
| "step": 9160 |
| }, |
| { |
| "epoch": 2.7463312368972748, |
| "grad_norm": 1.9273598194122314, |
| "learning_rate": 4.3347807495705775e-07, |
| "loss": 0.49333763122558594, |
| "step": 9170 |
| }, |
| { |
| "epoch": 2.7493261455525606, |
| "grad_norm": 1.5973845720291138, |
| "learning_rate": 4.233871544599544e-07, |
| "loss": 0.494874095916748, |
| "step": 9180 |
| }, |
| { |
| "epoch": 2.7523210542078465, |
| "grad_norm": 1.454573154449463, |
| "learning_rate": 4.1341253397879863e-07, |
| "loss": 0.49959392547607423, |
| "step": 9190 |
| }, |
| { |
| "epoch": 2.7553159628631327, |
| "grad_norm": 1.6529881954193115, |
| "learning_rate": 4.0355433464707714e-07, |
| "loss": 0.521381425857544, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.7583108715184186, |
| "grad_norm": 1.604905605316162, |
| "learning_rate": 3.9381267618444187e-07, |
| "loss": 0.47714853286743164, |
| "step": 9210 |
| }, |
| { |
| "epoch": 2.7613057801737044, |
| "grad_norm": 1.4911741018295288, |
| "learning_rate": 3.8418767689524907e-07, |
| "loss": 0.4643728256225586, |
| "step": 9220 |
| }, |
| { |
| "epoch": 2.7643006888289907, |
| "grad_norm": 1.6951920986175537, |
| "learning_rate": 3.7467945366712833e-07, |
| "loss": 0.49946084022521975, |
| "step": 9230 |
| }, |
| { |
| "epoch": 2.767295597484277, |
| "grad_norm": 1.7115346193313599, |
| "learning_rate": 3.652881219695603e-07, |
| "loss": 0.5134757041931153, |
| "step": 9240 |
| }, |
| { |
| "epoch": 2.770290506139563, |
| "grad_norm": 1.7767481803894043, |
| "learning_rate": 3.5601379585247786e-07, |
| "loss": 0.47066478729248046, |
| "step": 9250 |
| }, |
| { |
| "epoch": 2.7732854147948487, |
| "grad_norm": 1.2878029346466064, |
| "learning_rate": 3.4685658794487153e-07, |
| "loss": 0.5043159484863281, |
| "step": 9260 |
| }, |
| { |
| "epoch": 2.776280323450135, |
| "grad_norm": 1.5294309854507446, |
| "learning_rate": 3.378166094534352e-07, |
| "loss": 0.4968732357025146, |
| "step": 9270 |
| }, |
| { |
| "epoch": 2.779275232105421, |
| "grad_norm": 1.6949282884597778, |
| "learning_rate": 3.2889397016120263e-07, |
| "loss": 0.47728781700134276, |
| "step": 9280 |
| }, |
| { |
| "epoch": 2.7822701407607067, |
| "grad_norm": 1.5146561861038208, |
| "learning_rate": 3.2008877842622853e-07, |
| "loss": 0.4853658676147461, |
| "step": 9290 |
| }, |
| { |
| "epoch": 2.785265049415993, |
| "grad_norm": 1.520474910736084, |
| "learning_rate": 3.1140114118025423e-07, |
| "loss": 0.49115581512451173, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.788259958071279, |
| "grad_norm": 1.590360164642334, |
| "learning_rate": 3.028311639274295e-07, |
| "loss": 0.5119152545928956, |
| "step": 9310 |
| }, |
| { |
| "epoch": 2.7912548667265646, |
| "grad_norm": 1.6626989841461182, |
| "learning_rate": 2.943789507430128e-07, |
| "loss": 0.481568431854248, |
| "step": 9320 |
| }, |
| { |
| "epoch": 2.794249775381851, |
| "grad_norm": 1.5911420583724976, |
| "learning_rate": 2.86044604272121e-07, |
| "loss": 0.47126045227050783, |
| "step": 9330 |
| }, |
| { |
| "epoch": 2.7972446840371368, |
| "grad_norm": 1.529649257659912, |
| "learning_rate": 2.7782822572847477e-07, |
| "loss": 0.4906682014465332, |
| "step": 9340 |
| }, |
| { |
| "epoch": 2.8002395926924226, |
| "grad_norm": 1.548509120941162, |
| "learning_rate": 2.6972991489317536e-07, |
| "loss": 0.4783353328704834, |
| "step": 9350 |
| }, |
| { |
| "epoch": 2.803234501347709, |
| "grad_norm": 1.6628776788711548, |
| "learning_rate": 2.6174977011348525e-07, |
| "loss": 0.4788211345672607, |
| "step": 9360 |
| }, |
| { |
| "epoch": 2.8062294100029948, |
| "grad_norm": 1.4077582359313965, |
| "learning_rate": 2.538878883016416e-07, |
| "loss": 0.5038501739501953, |
| "step": 9370 |
| }, |
| { |
| "epoch": 2.809224318658281, |
| "grad_norm": 1.5272634029388428, |
| "learning_rate": 2.461443649336748e-07, |
| "loss": 0.5057891845703125, |
| "step": 9380 |
| }, |
| { |
| "epoch": 2.812219227313567, |
| "grad_norm": 1.5372052192687988, |
| "learning_rate": 2.3851929404825057e-07, |
| "loss": 0.5056065559387207, |
| "step": 9390 |
| }, |
| { |
| "epoch": 2.815214135968853, |
| "grad_norm": 1.774721622467041, |
| "learning_rate": 2.3101276824552543e-07, |
| "loss": 0.5029263019561767, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.818209044624139, |
| "grad_norm": 1.470628261566162, |
| "learning_rate": 2.2362487868602956e-07, |
| "loss": 0.49965524673461914, |
| "step": 9410 |
| }, |
| { |
| "epoch": 2.821203953279425, |
| "grad_norm": 1.3716038465499878, |
| "learning_rate": 2.1635571508954677e-07, |
| "loss": 0.4652759552001953, |
| "step": 9420 |
| }, |
| { |
| "epoch": 2.824198861934711, |
| "grad_norm": 1.5807504653930664, |
| "learning_rate": 2.092053657340398e-07, |
| "loss": 0.49729557037353517, |
| "step": 9430 |
| }, |
| { |
| "epoch": 2.827193770589997, |
| "grad_norm": 1.7162376642227173, |
| "learning_rate": 2.0217391745456673e-07, |
| "loss": 0.49972996711730955, |
| "step": 9440 |
| }, |
| { |
| "epoch": 2.830188679245283, |
| "grad_norm": 1.643149971961975, |
| "learning_rate": 1.9526145564223166e-07, |
| "loss": 0.5318907737731934, |
| "step": 9450 |
| }, |
| { |
| "epoch": 2.833183587900569, |
| "grad_norm": 1.4969817399978638, |
| "learning_rate": 1.884680642431469e-07, |
| "loss": 0.4856600761413574, |
| "step": 9460 |
| }, |
| { |
| "epoch": 2.836178496555855, |
| "grad_norm": 1.4547873735427856, |
| "learning_rate": 1.8179382575741588e-07, |
| "loss": 0.49111547470092776, |
| "step": 9470 |
| }, |
| { |
| "epoch": 2.839173405211141, |
| "grad_norm": 1.4986448287963867, |
| "learning_rate": 1.7523882123812286e-07, |
| "loss": 0.4579866886138916, |
| "step": 9480 |
| }, |
| { |
| "epoch": 2.842168313866427, |
| "grad_norm": 1.484006643295288, |
| "learning_rate": 1.6880313029036033e-07, |
| "loss": 0.48090057373046874, |
| "step": 9490 |
| }, |
| { |
| "epoch": 2.845163222521713, |
| "grad_norm": 1.4930380582809448, |
| "learning_rate": 1.624868310702543e-07, |
| "loss": 0.48984603881835936, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.8481581311769992, |
| "grad_norm": 1.7047784328460693, |
| "learning_rate": 1.562900002840162e-07, |
| "loss": 0.5113039016723633, |
| "step": 9510 |
| }, |
| { |
| "epoch": 2.851153039832285, |
| "grad_norm": 1.4931029081344604, |
| "learning_rate": 1.502127131870146e-07, |
| "loss": 0.47187018394470215, |
| "step": 9520 |
| }, |
| { |
| "epoch": 2.8541479484875714, |
| "grad_norm": 1.479155421257019, |
| "learning_rate": 1.4425504358285712e-07, |
| "loss": 0.5145605087280274, |
| "step": 9530 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 1.5331019163131714, |
| "learning_rate": 1.3841706382249798e-07, |
| "loss": 0.5134733200073243, |
| "step": 9540 |
| }, |
| { |
| "epoch": 2.860137765798143, |
| "grad_norm": 1.461573839187622, |
| "learning_rate": 1.3269884480335726e-07, |
| "loss": 0.5031049728393555, |
| "step": 9550 |
| }, |
| { |
| "epoch": 2.8631326744534293, |
| "grad_norm": 1.4485571384429932, |
| "learning_rate": 1.2710045596845854e-07, |
| "loss": 0.5210261344909668, |
| "step": 9560 |
| }, |
| { |
| "epoch": 2.866127583108715, |
| "grad_norm": 1.7255455255508423, |
| "learning_rate": 1.2162196530558835e-07, |
| "loss": 0.47707977294921877, |
| "step": 9570 |
| }, |
| { |
| "epoch": 2.869122491764001, |
| "grad_norm": 1.5349946022033691, |
| "learning_rate": 1.1626343934647122e-07, |
| "loss": 0.4857036590576172, |
| "step": 9580 |
| }, |
| { |
| "epoch": 2.8721174004192873, |
| "grad_norm": 1.5755033493041992, |
| "learning_rate": 1.1102494316595602e-07, |
| "loss": 0.5038958549499511, |
| "step": 9590 |
| }, |
| { |
| "epoch": 2.875112309074573, |
| "grad_norm": 1.4248707294464111, |
| "learning_rate": 1.0590654038123315e-07, |
| "loss": 0.49042625427246095, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.878107217729859, |
| "grad_norm": 1.3369414806365967, |
| "learning_rate": 1.0090829315105632e-07, |
| "loss": 0.46802678108215334, |
| "step": 9610 |
| }, |
| { |
| "epoch": 2.8811021263851453, |
| "grad_norm": 1.5832442045211792, |
| "learning_rate": 9.603026217499201e-08, |
| "loss": 0.5051434516906739, |
| "step": 9620 |
| }, |
| { |
| "epoch": 2.884097035040431, |
| "grad_norm": 1.734362244606018, |
| "learning_rate": 9.127250669267563e-08, |
| "loss": 0.513918685913086, |
| "step": 9630 |
| }, |
| { |
| "epoch": 2.8870919436957174, |
| "grad_norm": 1.5473873615264893, |
| "learning_rate": 8.663508448310099e-08, |
| "loss": 0.49352059364318845, |
| "step": 9640 |
| }, |
| { |
| "epoch": 2.8900868523510033, |
| "grad_norm": 1.4340280294418335, |
| "learning_rate": 8.211805186391309e-08, |
| "loss": 0.5073667526245117, |
| "step": 9650 |
| }, |
| { |
| "epoch": 2.8930817610062896, |
| "grad_norm": 1.456013560295105, |
| "learning_rate": 7.772146369072309e-08, |
| "loss": 0.5166115283966064, |
| "step": 9660 |
| }, |
| { |
| "epoch": 2.8960766696615754, |
| "grad_norm": 1.5547895431518555, |
| "learning_rate": 7.344537335644664e-08, |
| "loss": 0.4813490867614746, |
| "step": 9670 |
| }, |
| { |
| "epoch": 2.8990715783168612, |
| "grad_norm": 1.4897866249084473, |
| "learning_rate": 6.928983279065326e-08, |
| "loss": 0.4835244655609131, |
| "step": 9680 |
| }, |
| { |
| "epoch": 2.9020664869721475, |
| "grad_norm": 1.8342303037643433, |
| "learning_rate": 6.525489245893357e-08, |
| "loss": 0.5087207794189453, |
| "step": 9690 |
| }, |
| { |
| "epoch": 2.9050613956274334, |
| "grad_norm": 1.3465903997421265, |
| "learning_rate": 6.134060136228969e-08, |
| "loss": 0.4804349899291992, |
| "step": 9700 |
| }, |
| { |
| "epoch": 2.908056304282719, |
| "grad_norm": 1.667733073234558, |
| "learning_rate": 5.7547007036539146e-08, |
| "loss": 0.5204250335693359, |
| "step": 9710 |
| }, |
| { |
| "epoch": 2.9110512129380055, |
| "grad_norm": 1.7703502178192139, |
| "learning_rate": 5.3874155551735255e-08, |
| "loss": 0.5016478538513184, |
| "step": 9720 |
| }, |
| { |
| "epoch": 2.9140461215932913, |
| "grad_norm": 1.7993836402893066, |
| "learning_rate": 5.0322091511615376e-08, |
| "loss": 0.49267988204956054, |
| "step": 9730 |
| }, |
| { |
| "epoch": 2.917041030248577, |
| "grad_norm": 1.4773681163787842, |
| "learning_rate": 4.689085805304472e-08, |
| "loss": 0.4893134117126465, |
| "step": 9740 |
| }, |
| { |
| "epoch": 2.9200359389038635, |
| "grad_norm": 1.754645586013794, |
| "learning_rate": 4.3580496845510025e-08, |
| "loss": 0.49738759994506837, |
| "step": 9750 |
| }, |
| { |
| "epoch": 2.9230308475591493, |
| "grad_norm": 1.4270977973937988, |
| "learning_rate": 4.039104809060002e-08, |
| "loss": 0.4880670070648193, |
| "step": 9760 |
| }, |
| { |
| "epoch": 2.9260257562144356, |
| "grad_norm": 1.5123835802078247, |
| "learning_rate": 3.732255052152245e-08, |
| "loss": 0.5063368797302246, |
| "step": 9770 |
| }, |
| { |
| "epoch": 2.9290206648697215, |
| "grad_norm": 1.5368832349777222, |
| "learning_rate": 3.437504140263337e-08, |
| "loss": 0.5073044776916504, |
| "step": 9780 |
| }, |
| { |
| "epoch": 2.9320155735250077, |
| "grad_norm": 1.5552453994750977, |
| "learning_rate": 3.154855652898636e-08, |
| "loss": 0.5064907073974609, |
| "step": 9790 |
| }, |
| { |
| "epoch": 2.9350104821802936, |
| "grad_norm": 1.4740110635757446, |
| "learning_rate": 2.884313022589513e-08, |
| "loss": 0.47931804656982424, |
| "step": 9800 |
| }, |
| { |
| "epoch": 2.9380053908355794, |
| "grad_norm": 1.6172736883163452, |
| "learning_rate": 2.6258795348516052e-08, |
| "loss": 0.5590275764465332, |
| "step": 9810 |
| }, |
| { |
| "epoch": 2.9410002994908657, |
| "grad_norm": 1.8231149911880493, |
| "learning_rate": 2.3795583281450708e-08, |
| "loss": 0.49012956619262693, |
| "step": 9820 |
| }, |
| { |
| "epoch": 2.9439952081461516, |
| "grad_norm": 1.6394718885421753, |
| "learning_rate": 2.1453523938367304e-08, |
| "loss": 0.4913629531860352, |
| "step": 9830 |
| }, |
| { |
| "epoch": 2.9469901168014374, |
| "grad_norm": 1.902896761894226, |
| "learning_rate": 1.9232645761633196e-08, |
| "loss": 0.4783756256103516, |
| "step": 9840 |
| }, |
| { |
| "epoch": 2.9499850254567237, |
| "grad_norm": 1.6252793073654175, |
| "learning_rate": 1.713297572196848e-08, |
| "loss": 0.4900232791900635, |
| "step": 9850 |
| }, |
| { |
| "epoch": 2.9529799341120095, |
| "grad_norm": 1.6165684461593628, |
| "learning_rate": 1.515453931812627e-08, |
| "loss": 0.4839812755584717, |
| "step": 9860 |
| }, |
| { |
| "epoch": 2.9559748427672954, |
| "grad_norm": 1.573724389076233, |
| "learning_rate": 1.3297360576572937e-08, |
| "loss": 0.489408016204834, |
| "step": 9870 |
| }, |
| { |
| "epoch": 2.9589697514225817, |
| "grad_norm": 1.5624017715454102, |
| "learning_rate": 1.1561462051203898e-08, |
| "loss": 0.528309440612793, |
| "step": 9880 |
| }, |
| { |
| "epoch": 2.9619646600778675, |
| "grad_norm": 1.8057657480239868, |
| "learning_rate": 9.94686482306606e-09, |
| "loss": 0.492403507232666, |
| "step": 9890 |
| }, |
| { |
| "epoch": 2.964959568733154, |
| "grad_norm": 1.463015079498291, |
| "learning_rate": 8.453588500103582e-09, |
| "loss": 0.48632245063781737, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.9679544773884396, |
| "grad_norm": 1.603826880455017, |
| "learning_rate": 7.081651216916952e-09, |
| "loss": 0.4464372158050537, |
| "step": 9910 |
| }, |
| { |
| "epoch": 2.970949386043726, |
| "grad_norm": 2.058971405029297, |
| "learning_rate": 5.831069634546494e-09, |
| "loss": 0.4887089729309082, |
| "step": 9920 |
| }, |
| { |
| "epoch": 2.973944294699012, |
| "grad_norm": 1.491803526878357, |
| "learning_rate": 4.7018589402692e-09, |
| "loss": 0.4791905879974365, |
| "step": 9930 |
| }, |
| { |
| "epoch": 2.9769392033542976, |
| "grad_norm": 1.6646156311035156, |
| "learning_rate": 3.6940328474088795e-09, |
| "loss": 0.48751044273376465, |
| "step": 9940 |
| }, |
| { |
| "epoch": 2.979934112009584, |
| "grad_norm": 1.6473972797393799, |
| "learning_rate": 2.80760359517962e-09, |
| "loss": 0.4723640441894531, |
| "step": 9950 |
| }, |
| { |
| "epoch": 2.9829290206648698, |
| "grad_norm": 1.679745078086853, |
| "learning_rate": 2.042581948528133e-09, |
| "loss": 0.4800722122192383, |
| "step": 9960 |
| }, |
| { |
| "epoch": 2.9859239293201556, |
| "grad_norm": 1.5107241868972778, |
| "learning_rate": 1.3989771980083e-09, |
| "loss": 0.49669723510742186, |
| "step": 9970 |
| }, |
| { |
| "epoch": 2.988918837975442, |
| "grad_norm": 1.6462763547897339, |
| "learning_rate": 8.767971596634895e-10, |
| "loss": 0.5274055480957032, |
| "step": 9980 |
| }, |
| { |
| "epoch": 2.9919137466307277, |
| "grad_norm": 1.6557178497314453, |
| "learning_rate": 4.760481749399581e-10, |
| "loss": 0.5069705963134765, |
| "step": 9990 |
| }, |
| { |
| "epoch": 2.9949086552860136, |
| "grad_norm": 1.4907699823379517, |
| "learning_rate": 1.967351105991444e-10, |
| "loss": 0.47740840911865234, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.9979035639413, |
| "grad_norm": 1.614367961883545, |
| "learning_rate": 3.8861358667707794e-11, |
| "loss": 0.48827524185180665, |
| "step": 10010 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 10017, |
| "total_flos": 6.540774321348936e+17, |
| "train_loss": 0.7543762712715699, |
| "train_runtime": 30332.4713, |
| "train_samples_per_second": 5.284, |
| "train_steps_per_second": 0.33 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10017, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.540774321348936e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|