GUI-Actor-2B-Qwen2-VL / trainer_state.json
qianhuiwu's picture
Upload model weights.
42bcf36 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 19614,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0005098399102681758,
"grad_norm": 215.18466064925408,
"learning_rate": 8.488964346349746e-08,
"loss": 6.8111,
"step": 10
},
{
"epoch": 0.0010196798205363517,
"grad_norm": 219.88111047519627,
"learning_rate": 1.6977928692699493e-07,
"loss": 7.1816,
"step": 20
},
{
"epoch": 0.0015295197308045274,
"grad_norm": 170.13457977621968,
"learning_rate": 2.546689303904924e-07,
"loss": 6.7293,
"step": 30
},
{
"epoch": 0.0020393596410727033,
"grad_norm": 131.10367853446866,
"learning_rate": 3.3955857385398986e-07,
"loss": 6.3436,
"step": 40
},
{
"epoch": 0.002549199551340879,
"grad_norm": 108.7650629050789,
"learning_rate": 4.244482173174873e-07,
"loss": 5.2497,
"step": 50
},
{
"epoch": 0.0030590394616090547,
"grad_norm": 69.67714338597908,
"learning_rate": 5.093378607809848e-07,
"loss": 4.1472,
"step": 60
},
{
"epoch": 0.0035688793718772305,
"grad_norm": 44.60481926442496,
"learning_rate": 5.942275042444822e-07,
"loss": 2.9594,
"step": 70
},
{
"epoch": 0.004078719282145407,
"grad_norm": 28.846738070749627,
"learning_rate": 6.791171477079797e-07,
"loss": 2.2558,
"step": 80
},
{
"epoch": 0.004588559192413582,
"grad_norm": 36.28883225607615,
"learning_rate": 7.640067911714771e-07,
"loss": 1.4451,
"step": 90
},
{
"epoch": 0.005098399102681758,
"grad_norm": 36.81218950235616,
"learning_rate": 8.488964346349746e-07,
"loss": 1.1103,
"step": 100
},
{
"epoch": 0.005608239012949934,
"grad_norm": 28.829109675052642,
"learning_rate": 9.337860780984721e-07,
"loss": 1.0771,
"step": 110
},
{
"epoch": 0.0061180789232181095,
"grad_norm": 24.232000087226275,
"learning_rate": 1.0186757215619695e-06,
"loss": 1.1438,
"step": 120
},
{
"epoch": 0.006627918833486285,
"grad_norm": 36.35815387327349,
"learning_rate": 1.103565365025467e-06,
"loss": 1.077,
"step": 130
},
{
"epoch": 0.007137758743754461,
"grad_norm": 15.316337219949325,
"learning_rate": 1.1884550084889644e-06,
"loss": 1.0881,
"step": 140
},
{
"epoch": 0.007647598654022637,
"grad_norm": 22.571122277337075,
"learning_rate": 1.273344651952462e-06,
"loss": 0.9984,
"step": 150
},
{
"epoch": 0.008157438564290813,
"grad_norm": 38.62851842039302,
"learning_rate": 1.3582342954159594e-06,
"loss": 0.9902,
"step": 160
},
{
"epoch": 0.008667278474558988,
"grad_norm": 23.208290443273548,
"learning_rate": 1.4431239388794567e-06,
"loss": 1.0136,
"step": 170
},
{
"epoch": 0.009177118384827165,
"grad_norm": 18.837649784378357,
"learning_rate": 1.5280135823429543e-06,
"loss": 1.0705,
"step": 180
},
{
"epoch": 0.00968695829509534,
"grad_norm": 19.274656406628434,
"learning_rate": 1.6129032258064516e-06,
"loss": 0.9721,
"step": 190
},
{
"epoch": 0.010196798205363516,
"grad_norm": 27.512664662386538,
"learning_rate": 1.6977928692699491e-06,
"loss": 1.1001,
"step": 200
},
{
"epoch": 0.010706638115631691,
"grad_norm": 16.798198513182278,
"learning_rate": 1.7826825127334467e-06,
"loss": 0.9913,
"step": 210
},
{
"epoch": 0.011216478025899868,
"grad_norm": 20.535315077672745,
"learning_rate": 1.8675721561969442e-06,
"loss": 1.0835,
"step": 220
},
{
"epoch": 0.011726317936168042,
"grad_norm": 18.27476725664,
"learning_rate": 1.9524617996604417e-06,
"loss": 1.0584,
"step": 230
},
{
"epoch": 0.012236157846436219,
"grad_norm": 18.264102344206144,
"learning_rate": 2.037351443123939e-06,
"loss": 0.9261,
"step": 240
},
{
"epoch": 0.012745997756704396,
"grad_norm": 16.52982348890768,
"learning_rate": 2.1222410865874364e-06,
"loss": 0.9504,
"step": 250
},
{
"epoch": 0.01325583766697257,
"grad_norm": 10.199188026139336,
"learning_rate": 2.207130730050934e-06,
"loss": 1.0036,
"step": 260
},
{
"epoch": 0.013765677577240747,
"grad_norm": 18.443328298579846,
"learning_rate": 2.2920203735144314e-06,
"loss": 1.0933,
"step": 270
},
{
"epoch": 0.014275517487508922,
"grad_norm": 17.757959214914596,
"learning_rate": 2.3769100169779287e-06,
"loss": 0.9943,
"step": 280
},
{
"epoch": 0.014785357397777098,
"grad_norm": 28.072161703048337,
"learning_rate": 2.4617996604414265e-06,
"loss": 0.9534,
"step": 290
},
{
"epoch": 0.015295197308045273,
"grad_norm": 20.097642372484483,
"learning_rate": 2.546689303904924e-06,
"loss": 0.9326,
"step": 300
},
{
"epoch": 0.015805037218313448,
"grad_norm": 10.341561770081771,
"learning_rate": 2.631578947368421e-06,
"loss": 1.005,
"step": 310
},
{
"epoch": 0.016314877128581626,
"grad_norm": 10.162378026648376,
"learning_rate": 2.716468590831919e-06,
"loss": 0.9727,
"step": 320
},
{
"epoch": 0.0168247170388498,
"grad_norm": 12.176296641756432,
"learning_rate": 2.801358234295416e-06,
"loss": 1.0249,
"step": 330
},
{
"epoch": 0.017334556949117976,
"grad_norm": 12.238005154376538,
"learning_rate": 2.8862478777589135e-06,
"loss": 0.9513,
"step": 340
},
{
"epoch": 0.017844396859386154,
"grad_norm": 17.377022873874797,
"learning_rate": 2.971137521222411e-06,
"loss": 0.9618,
"step": 350
},
{
"epoch": 0.01835423676965433,
"grad_norm": 15.971105421534515,
"learning_rate": 3.0560271646859086e-06,
"loss": 0.9649,
"step": 360
},
{
"epoch": 0.018864076679922504,
"grad_norm": 8.956526742076267,
"learning_rate": 3.1409168081494063e-06,
"loss": 0.9891,
"step": 370
},
{
"epoch": 0.01937391659019068,
"grad_norm": 12.939695708500839,
"learning_rate": 3.225806451612903e-06,
"loss": 0.9415,
"step": 380
},
{
"epoch": 0.019883756500458857,
"grad_norm": 35.84310928796281,
"learning_rate": 3.310696095076401e-06,
"loss": 0.9341,
"step": 390
},
{
"epoch": 0.020393596410727032,
"grad_norm": 16.025463470269354,
"learning_rate": 3.3955857385398982e-06,
"loss": 1.0041,
"step": 400
},
{
"epoch": 0.020903436320995207,
"grad_norm": 10.755243534162783,
"learning_rate": 3.480475382003396e-06,
"loss": 0.9825,
"step": 410
},
{
"epoch": 0.021413276231263382,
"grad_norm": 12.821217264014344,
"learning_rate": 3.5653650254668933e-06,
"loss": 1.0095,
"step": 420
},
{
"epoch": 0.02192311614153156,
"grad_norm": 29.012011695616565,
"learning_rate": 3.6502546689303906e-06,
"loss": 0.9463,
"step": 430
},
{
"epoch": 0.022432956051799735,
"grad_norm": 9.173893886257291,
"learning_rate": 3.7351443123938884e-06,
"loss": 1.0037,
"step": 440
},
{
"epoch": 0.02294279596206791,
"grad_norm": 10.310359401533425,
"learning_rate": 3.820033955857386e-06,
"loss": 0.8711,
"step": 450
},
{
"epoch": 0.023452635872336085,
"grad_norm": 13.051897985681515,
"learning_rate": 3.9049235993208834e-06,
"loss": 0.9636,
"step": 460
},
{
"epoch": 0.023962475782604263,
"grad_norm": 13.140982779333044,
"learning_rate": 3.98981324278438e-06,
"loss": 0.8905,
"step": 470
},
{
"epoch": 0.024472315692872438,
"grad_norm": 8.351892707315637,
"learning_rate": 4.074702886247878e-06,
"loss": 0.8835,
"step": 480
},
{
"epoch": 0.024982155603140613,
"grad_norm": 11.150280570311768,
"learning_rate": 4.159592529711376e-06,
"loss": 0.9217,
"step": 490
},
{
"epoch": 0.02549199551340879,
"grad_norm": 11.494610320215013,
"learning_rate": 4.244482173174873e-06,
"loss": 0.884,
"step": 500
},
{
"epoch": 0.026001835423676966,
"grad_norm": 14.45497354174508,
"learning_rate": 4.3293718166383704e-06,
"loss": 0.9152,
"step": 510
},
{
"epoch": 0.02651167533394514,
"grad_norm": 10.482114761115332,
"learning_rate": 4.414261460101868e-06,
"loss": 0.9131,
"step": 520
},
{
"epoch": 0.027021515244213316,
"grad_norm": 13.407976380963598,
"learning_rate": 4.499151103565366e-06,
"loss": 0.9012,
"step": 530
},
{
"epoch": 0.027531355154481494,
"grad_norm": 8.997050503060855,
"learning_rate": 4.584040747028863e-06,
"loss": 0.9099,
"step": 540
},
{
"epoch": 0.02804119506474967,
"grad_norm": 14.624732754677236,
"learning_rate": 4.6689303904923606e-06,
"loss": 0.8908,
"step": 550
},
{
"epoch": 0.028551034975017844,
"grad_norm": 14.2317056757979,
"learning_rate": 4.7538200339558575e-06,
"loss": 0.8662,
"step": 560
},
{
"epoch": 0.02906087488528602,
"grad_norm": 11.594040833210547,
"learning_rate": 4.838709677419355e-06,
"loss": 0.8798,
"step": 570
},
{
"epoch": 0.029570714795554197,
"grad_norm": 12.083331858349247,
"learning_rate": 4.923599320882853e-06,
"loss": 0.8925,
"step": 580
},
{
"epoch": 0.03008055470582237,
"grad_norm": 14.834879746564301,
"learning_rate": 4.999999965915225e-06,
"loss": 0.9507,
"step": 590
},
{
"epoch": 0.030590394616090547,
"grad_norm": 13.567706048400137,
"learning_rate": 4.9999958757433976e-06,
"loss": 0.8153,
"step": 600
},
{
"epoch": 0.031100234526358725,
"grad_norm": 10.38915537278335,
"learning_rate": 4.999984968629425e-06,
"loss": 0.9365,
"step": 610
},
{
"epoch": 0.031610074436626896,
"grad_norm": 6.070697799113855,
"learning_rate": 4.999967244603053e-06,
"loss": 0.8939,
"step": 620
},
{
"epoch": 0.032119914346895075,
"grad_norm": 8.45904354399202,
"learning_rate": 4.999942703712609e-06,
"loss": 0.8757,
"step": 630
},
{
"epoch": 0.03262975425716325,
"grad_norm": 11.902106269954084,
"learning_rate": 4.99991134602501e-06,
"loss": 0.8616,
"step": 640
},
{
"epoch": 0.033139594167431424,
"grad_norm": 10.096727236748936,
"learning_rate": 4.999873171625763e-06,
"loss": 0.856,
"step": 650
},
{
"epoch": 0.0336494340776996,
"grad_norm": 7.27949207428146,
"learning_rate": 4.99982818061896e-06,
"loss": 0.7616,
"step": 660
},
{
"epoch": 0.03415927398796778,
"grad_norm": 6.8237635856680185,
"learning_rate": 4.999776373127283e-06,
"loss": 0.9223,
"step": 670
},
{
"epoch": 0.03466911389823595,
"grad_norm": 8.044300871702433,
"learning_rate": 4.999717749291998e-06,
"loss": 0.8135,
"step": 680
},
{
"epoch": 0.03517895380850413,
"grad_norm": 10.88283187557019,
"learning_rate": 4.999652309272962e-06,
"loss": 0.8679,
"step": 690
},
{
"epoch": 0.03568879371877231,
"grad_norm": 37.57610309835507,
"learning_rate": 4.9995800532486126e-06,
"loss": 0.8049,
"step": 700
},
{
"epoch": 0.03619863362904048,
"grad_norm": 10.774055925846897,
"learning_rate": 4.999500981415978e-06,
"loss": 0.8529,
"step": 710
},
{
"epoch": 0.03670847353930866,
"grad_norm": 9.152996851991478,
"learning_rate": 4.99941509399067e-06,
"loss": 0.86,
"step": 720
},
{
"epoch": 0.03721831344957683,
"grad_norm": 16.5543430131688,
"learning_rate": 4.999322391206884e-06,
"loss": 0.846,
"step": 730
},
{
"epoch": 0.03772815335984501,
"grad_norm": 13.830565944921807,
"learning_rate": 4.999222873317398e-06,
"loss": 0.8746,
"step": 740
},
{
"epoch": 0.03823799327011319,
"grad_norm": 7.60341761134231,
"learning_rate": 4.999116540593581e-06,
"loss": 0.8409,
"step": 750
},
{
"epoch": 0.03874783318038136,
"grad_norm": 8.539066466761206,
"learning_rate": 4.999003393325375e-06,
"loss": 0.8283,
"step": 760
},
{
"epoch": 0.039257673090649536,
"grad_norm": 9.24618160613829,
"learning_rate": 4.998883431821309e-06,
"loss": 0.921,
"step": 770
},
{
"epoch": 0.039767513000917715,
"grad_norm": 8.093853460574069,
"learning_rate": 4.998756656408491e-06,
"loss": 0.7619,
"step": 780
},
{
"epoch": 0.040277352911185886,
"grad_norm": 6.794395079197444,
"learning_rate": 4.998623067432612e-06,
"loss": 0.8814,
"step": 790
},
{
"epoch": 0.040787192821454064,
"grad_norm": 9.012058431172532,
"learning_rate": 4.9984826652579366e-06,
"loss": 0.9254,
"step": 800
},
{
"epoch": 0.041297032731722236,
"grad_norm": 12.321568909786127,
"learning_rate": 4.998335450267314e-06,
"loss": 0.7774,
"step": 810
},
{
"epoch": 0.041806872641990414,
"grad_norm": 8.950683897140426,
"learning_rate": 4.998181422862166e-06,
"loss": 0.8321,
"step": 820
},
{
"epoch": 0.04231671255225859,
"grad_norm": 6.951979933252045,
"learning_rate": 4.99802058346249e-06,
"loss": 0.8969,
"step": 830
},
{
"epoch": 0.042826552462526764,
"grad_norm": 9.865920504865308,
"learning_rate": 4.997852932506864e-06,
"loss": 0.8671,
"step": 840
},
{
"epoch": 0.04333639237279494,
"grad_norm": 7.296885757679494,
"learning_rate": 4.997678470452431e-06,
"loss": 0.8707,
"step": 850
},
{
"epoch": 0.04384623228306312,
"grad_norm": 8.665129816349026,
"learning_rate": 4.997497197774914e-06,
"loss": 0.855,
"step": 860
},
{
"epoch": 0.04435607219333129,
"grad_norm": 37.92068675254019,
"learning_rate": 4.997309114968603e-06,
"loss": 0.8382,
"step": 870
},
{
"epoch": 0.04486591210359947,
"grad_norm": 12.589230911443163,
"learning_rate": 4.9971142225463575e-06,
"loss": 0.8082,
"step": 880
},
{
"epoch": 0.04537575201386765,
"grad_norm": 6.886321997404546,
"learning_rate": 4.996912521039608e-06,
"loss": 0.8022,
"step": 890
},
{
"epoch": 0.04588559192413582,
"grad_norm": 7.170326651845555,
"learning_rate": 4.99670401099835e-06,
"loss": 0.7989,
"step": 900
},
{
"epoch": 0.046395431834404,
"grad_norm": 8.830287514854776,
"learning_rate": 4.996488692991145e-06,
"loss": 0.8226,
"step": 910
},
{
"epoch": 0.04690527174467217,
"grad_norm": 6.456877639525797,
"learning_rate": 4.996266567605117e-06,
"loss": 0.7846,
"step": 920
},
{
"epoch": 0.04741511165494035,
"grad_norm": 6.41348343065121,
"learning_rate": 4.996037635445955e-06,
"loss": 0.829,
"step": 930
},
{
"epoch": 0.047924951565208526,
"grad_norm": 17.926515402783725,
"learning_rate": 4.995801897137906e-06,
"loss": 0.8034,
"step": 940
},
{
"epoch": 0.0484347914754767,
"grad_norm": 8.840256664203807,
"learning_rate": 4.995559353323778e-06,
"loss": 0.7975,
"step": 950
},
{
"epoch": 0.048944631385744876,
"grad_norm": 14.336277963651737,
"learning_rate": 4.9953100046649324e-06,
"loss": 0.833,
"step": 960
},
{
"epoch": 0.049454471296013054,
"grad_norm": 5.4893366959746235,
"learning_rate": 4.99505385184129e-06,
"loss": 0.7806,
"step": 970
},
{
"epoch": 0.049964311206281226,
"grad_norm": 10.01287888044387,
"learning_rate": 4.994790895551325e-06,
"loss": 0.8425,
"step": 980
},
{
"epoch": 0.050474151116549404,
"grad_norm": 9.922905767799122,
"learning_rate": 4.994521136512059e-06,
"loss": 0.8063,
"step": 990
},
{
"epoch": 0.05098399102681758,
"grad_norm": 11.153065093623807,
"learning_rate": 4.994244575459068e-06,
"loss": 0.7624,
"step": 1000
},
{
"epoch": 0.051493830937085754,
"grad_norm": 8.572218592109504,
"learning_rate": 4.993961213146473e-06,
"loss": 0.7944,
"step": 1010
},
{
"epoch": 0.05200367084735393,
"grad_norm": 6.800790253609876,
"learning_rate": 4.9936710503469396e-06,
"loss": 0.851,
"step": 1020
},
{
"epoch": 0.0525135107576221,
"grad_norm": 8.97302774530518,
"learning_rate": 4.993374087851681e-06,
"loss": 0.7947,
"step": 1030
},
{
"epoch": 0.05302335066789028,
"grad_norm": 10.83742147749751,
"learning_rate": 4.993070326470446e-06,
"loss": 0.791,
"step": 1040
},
{
"epoch": 0.05353319057815846,
"grad_norm": 7.231682074880805,
"learning_rate": 4.992759767031528e-06,
"loss": 0.8604,
"step": 1050
},
{
"epoch": 0.05404303048842663,
"grad_norm": 5.478837361756769,
"learning_rate": 4.992442410381754e-06,
"loss": 0.8228,
"step": 1060
},
{
"epoch": 0.05455287039869481,
"grad_norm": 6.069701766924315,
"learning_rate": 4.992118257386485e-06,
"loss": 0.831,
"step": 1070
},
{
"epoch": 0.05506271030896299,
"grad_norm": 9.074606195280532,
"learning_rate": 4.9917873089296165e-06,
"loss": 0.8228,
"step": 1080
},
{
"epoch": 0.05557255021923116,
"grad_norm": 7.568069432304031,
"learning_rate": 4.991449565913572e-06,
"loss": 0.8161,
"step": 1090
},
{
"epoch": 0.05608239012949934,
"grad_norm": 6.337139036216719,
"learning_rate": 4.991105029259303e-06,
"loss": 0.864,
"step": 1100
},
{
"epoch": 0.056592230039767516,
"grad_norm": 4.85384079272741,
"learning_rate": 4.990753699906287e-06,
"loss": 0.7435,
"step": 1110
},
{
"epoch": 0.05710206995003569,
"grad_norm": 23.06176907013709,
"learning_rate": 4.990395578812519e-06,
"loss": 0.8387,
"step": 1120
},
{
"epoch": 0.057611909860303866,
"grad_norm": 9.257916277248716,
"learning_rate": 4.99003066695452e-06,
"loss": 0.8052,
"step": 1130
},
{
"epoch": 0.05812174977057204,
"grad_norm": 14.213956943317537,
"learning_rate": 4.989658965327326e-06,
"loss": 0.7379,
"step": 1140
},
{
"epoch": 0.058631589680840215,
"grad_norm": 5.59861062668767,
"learning_rate": 4.9892804749444815e-06,
"loss": 0.8732,
"step": 1150
},
{
"epoch": 0.059141429591108394,
"grad_norm": 5.5444133087316,
"learning_rate": 4.988895196838049e-06,
"loss": 0.7925,
"step": 1160
},
{
"epoch": 0.059651269501376565,
"grad_norm": 7.838806497184927,
"learning_rate": 4.9885031320586e-06,
"loss": 0.8226,
"step": 1170
},
{
"epoch": 0.06016110941164474,
"grad_norm": 5.627963160054227,
"learning_rate": 4.988104281675207e-06,
"loss": 0.8089,
"step": 1180
},
{
"epoch": 0.06067094932191292,
"grad_norm": 7.036269155097154,
"learning_rate": 4.987698646775448e-06,
"loss": 0.8416,
"step": 1190
},
{
"epoch": 0.06118078923218109,
"grad_norm": 4.754307667913581,
"learning_rate": 4.987286228465401e-06,
"loss": 0.7775,
"step": 1200
},
{
"epoch": 0.06169062914244927,
"grad_norm": 11.813278059906052,
"learning_rate": 4.98686702786964e-06,
"loss": 0.7668,
"step": 1210
},
{
"epoch": 0.06220046905271745,
"grad_norm": 13.451249920074707,
"learning_rate": 4.9864410461312345e-06,
"loss": 0.885,
"step": 1220
},
{
"epoch": 0.06271030896298563,
"grad_norm": 6.566369193435987,
"learning_rate": 4.986008284411743e-06,
"loss": 0.7959,
"step": 1230
},
{
"epoch": 0.06322014887325379,
"grad_norm": 5.242039350280819,
"learning_rate": 4.9855687438912125e-06,
"loss": 0.7566,
"step": 1240
},
{
"epoch": 0.06372998878352197,
"grad_norm": 9.389379702091885,
"learning_rate": 4.985122425768173e-06,
"loss": 0.8128,
"step": 1250
},
{
"epoch": 0.06423982869379015,
"grad_norm": 7.772633581033415,
"learning_rate": 4.984669331259637e-06,
"loss": 0.8504,
"step": 1260
},
{
"epoch": 0.06474966860405833,
"grad_norm": 13.024464610100363,
"learning_rate": 4.9842094616010935e-06,
"loss": 0.7655,
"step": 1270
},
{
"epoch": 0.0652595085143265,
"grad_norm": 6.043220311757897,
"learning_rate": 4.983742818046508e-06,
"loss": 0.8156,
"step": 1280
},
{
"epoch": 0.06576934842459468,
"grad_norm": 6.6317422419035905,
"learning_rate": 4.983269401868315e-06,
"loss": 0.7613,
"step": 1290
},
{
"epoch": 0.06627918833486285,
"grad_norm": 10.007390673123645,
"learning_rate": 4.982789214357415e-06,
"loss": 0.8288,
"step": 1300
},
{
"epoch": 0.06678902824513103,
"grad_norm": 6.703445173152822,
"learning_rate": 4.982302256823177e-06,
"loss": 0.7863,
"step": 1310
},
{
"epoch": 0.0672988681553992,
"grad_norm": 13.17033144646381,
"learning_rate": 4.981808530593426e-06,
"loss": 0.8067,
"step": 1320
},
{
"epoch": 0.06780870806566738,
"grad_norm": 6.868166773790616,
"learning_rate": 4.9813080370144465e-06,
"loss": 0.6958,
"step": 1330
},
{
"epoch": 0.06831854797593556,
"grad_norm": 4.450089752857003,
"learning_rate": 4.9808007774509735e-06,
"loss": 0.796,
"step": 1340
},
{
"epoch": 0.06882838788620373,
"grad_norm": 5.959360066034546,
"learning_rate": 4.980286753286196e-06,
"loss": 0.8243,
"step": 1350
},
{
"epoch": 0.0693382277964719,
"grad_norm": 11.494584805053085,
"learning_rate": 4.9797659659217415e-06,
"loss": 0.7686,
"step": 1360
},
{
"epoch": 0.06984806770674008,
"grad_norm": 8.135166100860559,
"learning_rate": 4.979238416777686e-06,
"loss": 0.8252,
"step": 1370
},
{
"epoch": 0.07035790761700826,
"grad_norm": 9.895844896154614,
"learning_rate": 4.978704107292539e-06,
"loss": 0.7843,
"step": 1380
},
{
"epoch": 0.07086774752727644,
"grad_norm": 8.070685299867323,
"learning_rate": 4.978163038923247e-06,
"loss": 0.8037,
"step": 1390
},
{
"epoch": 0.07137758743754462,
"grad_norm": 4.89215341651025,
"learning_rate": 4.977615213145186e-06,
"loss": 0.7959,
"step": 1400
},
{
"epoch": 0.07188742734781278,
"grad_norm": 22.345815306723065,
"learning_rate": 4.977060631452155e-06,
"loss": 0.7987,
"step": 1410
},
{
"epoch": 0.07239726725808096,
"grad_norm": 6.511965526778362,
"learning_rate": 4.9764992953563775e-06,
"loss": 0.7504,
"step": 1420
},
{
"epoch": 0.07290710716834914,
"grad_norm": 4.1076061621225515,
"learning_rate": 4.975931206388495e-06,
"loss": 0.8102,
"step": 1430
},
{
"epoch": 0.07341694707861732,
"grad_norm": 8.383778664151821,
"learning_rate": 4.975356366097561e-06,
"loss": 0.7873,
"step": 1440
},
{
"epoch": 0.0739267869888855,
"grad_norm": 9.476594416859264,
"learning_rate": 4.9747747760510415e-06,
"loss": 0.7719,
"step": 1450
},
{
"epoch": 0.07443662689915366,
"grad_norm": 7.228239489842561,
"learning_rate": 4.974186437834802e-06,
"loss": 0.83,
"step": 1460
},
{
"epoch": 0.07494646680942184,
"grad_norm": 13.083848695215309,
"learning_rate": 4.973591353053115e-06,
"loss": 0.8329,
"step": 1470
},
{
"epoch": 0.07545630671969002,
"grad_norm": 7.244276576725081,
"learning_rate": 4.972989523328645e-06,
"loss": 0.7355,
"step": 1480
},
{
"epoch": 0.0759661466299582,
"grad_norm": 7.565240910571869,
"learning_rate": 4.972380950302451e-06,
"loss": 0.7863,
"step": 1490
},
{
"epoch": 0.07647598654022637,
"grad_norm": 7.4420015963814965,
"learning_rate": 4.9717656356339774e-06,
"loss": 0.7542,
"step": 1500
},
{
"epoch": 0.07698582645049455,
"grad_norm": 4.376438340928698,
"learning_rate": 4.971143581001055e-06,
"loss": 0.7276,
"step": 1510
},
{
"epoch": 0.07749566636076272,
"grad_norm": 5.0799625450411785,
"learning_rate": 4.970514788099887e-06,
"loss": 0.7817,
"step": 1520
},
{
"epoch": 0.0780055062710309,
"grad_norm": 8.39817074720597,
"learning_rate": 4.969879258645058e-06,
"loss": 0.7972,
"step": 1530
},
{
"epoch": 0.07851534618129907,
"grad_norm": 8.582616357794231,
"learning_rate": 4.969236994369516e-06,
"loss": 0.8136,
"step": 1540
},
{
"epoch": 0.07902518609156725,
"grad_norm": 7.693135362484604,
"learning_rate": 4.9685879970245755e-06,
"loss": 0.7817,
"step": 1550
},
{
"epoch": 0.07953502600183543,
"grad_norm": 5.8090513056891195,
"learning_rate": 4.967932268379911e-06,
"loss": 0.7347,
"step": 1560
},
{
"epoch": 0.0800448659121036,
"grad_norm": 10.010341249214227,
"learning_rate": 4.967269810223551e-06,
"loss": 0.7534,
"step": 1570
},
{
"epoch": 0.08055470582237177,
"grad_norm": 5.981515708126521,
"learning_rate": 4.9666006243618725e-06,
"loss": 0.765,
"step": 1580
},
{
"epoch": 0.08106454573263995,
"grad_norm": 7.094363295811945,
"learning_rate": 4.9659247126196e-06,
"loss": 0.8159,
"step": 1590
},
{
"epoch": 0.08157438564290813,
"grad_norm": 20.99913292874124,
"learning_rate": 4.965242076839798e-06,
"loss": 0.7662,
"step": 1600
},
{
"epoch": 0.08208422555317631,
"grad_norm": 7.880336422502132,
"learning_rate": 4.964552718883864e-06,
"loss": 0.8259,
"step": 1610
},
{
"epoch": 0.08259406546344447,
"grad_norm": 4.875726972240783,
"learning_rate": 4.963856640631527e-06,
"loss": 0.7778,
"step": 1620
},
{
"epoch": 0.08310390537371265,
"grad_norm": 11.977363941587642,
"learning_rate": 4.963153843980839e-06,
"loss": 0.7319,
"step": 1630
},
{
"epoch": 0.08361374528398083,
"grad_norm": 9.796137081866668,
"learning_rate": 4.962444330848174e-06,
"loss": 0.7142,
"step": 1640
},
{
"epoch": 0.084123585194249,
"grad_norm": 9.15051356386946,
"learning_rate": 4.961728103168219e-06,
"loss": 0.7158,
"step": 1650
},
{
"epoch": 0.08463342510451718,
"grad_norm": 17.041557462391484,
"learning_rate": 4.961005162893971e-06,
"loss": 0.7543,
"step": 1660
},
{
"epoch": 0.08514326501478536,
"grad_norm": 8.852481776870182,
"learning_rate": 4.960275511996727e-06,
"loss": 0.7537,
"step": 1670
},
{
"epoch": 0.08565310492505353,
"grad_norm": 6.976763521138977,
"learning_rate": 4.9595391524660895e-06,
"loss": 0.7968,
"step": 1680
},
{
"epoch": 0.0861629448353217,
"grad_norm": 5.881800137973928,
"learning_rate": 4.958796086309947e-06,
"loss": 0.6895,
"step": 1690
},
{
"epoch": 0.08667278474558988,
"grad_norm": 10.026275980749878,
"learning_rate": 4.95804631555448e-06,
"loss": 0.7675,
"step": 1700
},
{
"epoch": 0.08718262465585806,
"grad_norm": 5.118657960339985,
"learning_rate": 4.95728984224415e-06,
"loss": 0.6597,
"step": 1710
},
{
"epoch": 0.08769246456612624,
"grad_norm": 13.438286994359856,
"learning_rate": 4.956526668441691e-06,
"loss": 0.7503,
"step": 1720
},
{
"epoch": 0.0882023044763944,
"grad_norm": 8.753698256161398,
"learning_rate": 4.955756796228115e-06,
"loss": 0.7627,
"step": 1730
},
{
"epoch": 0.08871214438666258,
"grad_norm": 9.610428746701263,
"learning_rate": 4.954980227702693e-06,
"loss": 0.8647,
"step": 1740
},
{
"epoch": 0.08922198429693076,
"grad_norm": 5.560524806888825,
"learning_rate": 4.954196964982958e-06,
"loss": 0.7385,
"step": 1750
},
{
"epoch": 0.08973182420719894,
"grad_norm": 7.505032897258174,
"learning_rate": 4.953407010204696e-06,
"loss": 0.7191,
"step": 1760
},
{
"epoch": 0.09024166411746712,
"grad_norm": 7.49436313947957,
"learning_rate": 4.952610365521943e-06,
"loss": 0.7665,
"step": 1770
},
{
"epoch": 0.0907515040277353,
"grad_norm": 6.706182544199552,
"learning_rate": 4.951807033106971e-06,
"loss": 0.7885,
"step": 1780
},
{
"epoch": 0.09126134393800346,
"grad_norm": 6.103625658232912,
"learning_rate": 4.950997015150295e-06,
"loss": 0.7186,
"step": 1790
},
{
"epoch": 0.09177118384827164,
"grad_norm": 7.682633426500706,
"learning_rate": 4.950180313860656e-06,
"loss": 0.8101,
"step": 1800
},
{
"epoch": 0.09228102375853982,
"grad_norm": 6.378699282840451,
"learning_rate": 4.9493569314650195e-06,
"loss": 0.7503,
"step": 1810
},
{
"epoch": 0.092790863668808,
"grad_norm": 6.663692460313854,
"learning_rate": 4.9485268702085696e-06,
"loss": 0.7873,
"step": 1820
},
{
"epoch": 0.09330070357907617,
"grad_norm": 6.9760481067593965,
"learning_rate": 4.947690132354701e-06,
"loss": 0.7683,
"step": 1830
},
{
"epoch": 0.09381054348934434,
"grad_norm": 4.7753822007692825,
"learning_rate": 4.9468467201850164e-06,
"loss": 0.6955,
"step": 1840
},
{
"epoch": 0.09432038339961252,
"grad_norm": 8.491455975033007,
"learning_rate": 4.945996635999315e-06,
"loss": 0.7547,
"step": 1850
},
{
"epoch": 0.0948302233098807,
"grad_norm": 5.484238174982753,
"learning_rate": 4.945139882115592e-06,
"loss": 0.7407,
"step": 1860
},
{
"epoch": 0.09534006322014887,
"grad_norm": 10.414743097331185,
"learning_rate": 4.9442764608700265e-06,
"loss": 0.783,
"step": 1870
},
{
"epoch": 0.09584990313041705,
"grad_norm": 3.6763403899859135,
"learning_rate": 4.943406374616979e-06,
"loss": 0.7562,
"step": 1880
},
{
"epoch": 0.09635974304068523,
"grad_norm": 16.514574124164987,
"learning_rate": 4.942529625728987e-06,
"loss": 0.7621,
"step": 1890
},
{
"epoch": 0.0968695829509534,
"grad_norm": 4.557523714954824,
"learning_rate": 4.94164621659675e-06,
"loss": 0.653,
"step": 1900
},
{
"epoch": 0.09737942286122157,
"grad_norm": 10.124320109104412,
"learning_rate": 4.940756149629134e-06,
"loss": 0.7732,
"step": 1910
},
{
"epoch": 0.09788926277148975,
"grad_norm": 7.10338704991059,
"learning_rate": 4.9398594272531555e-06,
"loss": 0.784,
"step": 1920
},
{
"epoch": 0.09839910268175793,
"grad_norm": 5.911653427620028,
"learning_rate": 4.938956051913981e-06,
"loss": 0.7515,
"step": 1930
},
{
"epoch": 0.09890894259202611,
"grad_norm": 6.858904786752444,
"learning_rate": 4.938046026074917e-06,
"loss": 0.7726,
"step": 1940
},
{
"epoch": 0.09941878250229427,
"grad_norm": 23.42653675383494,
"learning_rate": 4.9371293522174066e-06,
"loss": 0.7582,
"step": 1950
},
{
"epoch": 0.09992862241256245,
"grad_norm": 5.2867772228150125,
"learning_rate": 4.9362060328410175e-06,
"loss": 0.7133,
"step": 1960
},
{
"epoch": 0.10043846232283063,
"grad_norm": 22.95327668318175,
"learning_rate": 4.9352760704634395e-06,
"loss": 0.7742,
"step": 1970
},
{
"epoch": 0.10094830223309881,
"grad_norm": 7.604620776606244,
"learning_rate": 4.934339467620477e-06,
"loss": 0.7832,
"step": 1980
},
{
"epoch": 0.10145814214336699,
"grad_norm": 5.932239152828343,
"learning_rate": 4.933396226866042e-06,
"loss": 0.8363,
"step": 1990
},
{
"epoch": 0.10196798205363516,
"grad_norm": 17.658248083890346,
"learning_rate": 4.932446350772144e-06,
"loss": 0.8283,
"step": 2000
},
{
"epoch": 0.10247782196390333,
"grad_norm": 7.043628818863735,
"learning_rate": 4.93148984192889e-06,
"loss": 0.7165,
"step": 2010
},
{
"epoch": 0.10298766187417151,
"grad_norm": 6.11017035839943,
"learning_rate": 4.930526702944469e-06,
"loss": 0.7581,
"step": 2020
},
{
"epoch": 0.10349750178443969,
"grad_norm": 6.322573253930642,
"learning_rate": 4.92955693644515e-06,
"loss": 0.6907,
"step": 2030
},
{
"epoch": 0.10400734169470786,
"grad_norm": 7.8946250598744,
"learning_rate": 4.928580545075275e-06,
"loss": 0.7201,
"step": 2040
},
{
"epoch": 0.10451718160497604,
"grad_norm": 10.233641745490514,
"learning_rate": 4.927597531497249e-06,
"loss": 0.7304,
"step": 2050
},
{
"epoch": 0.1050270215152442,
"grad_norm": 7.505943530690484,
"learning_rate": 4.926607898391536e-06,
"loss": 0.7475,
"step": 2060
},
{
"epoch": 0.10553686142551238,
"grad_norm": 4.526947597480208,
"learning_rate": 4.925611648456649e-06,
"loss": 0.7546,
"step": 2070
},
{
"epoch": 0.10604670133578056,
"grad_norm": 4.704302007274791,
"learning_rate": 4.924608784409143e-06,
"loss": 0.705,
"step": 2080
},
{
"epoch": 0.10655654124604874,
"grad_norm": 6.799257942483482,
"learning_rate": 4.923599308983609e-06,
"loss": 0.6615,
"step": 2090
},
{
"epoch": 0.10706638115631692,
"grad_norm": 7.838655315489779,
"learning_rate": 4.9225832249326665e-06,
"loss": 0.7278,
"step": 2100
},
{
"epoch": 0.1075762210665851,
"grad_norm": 20.422823564090418,
"learning_rate": 4.921560535026954e-06,
"loss": 0.7493,
"step": 2110
},
{
"epoch": 0.10808606097685326,
"grad_norm": 5.158524533523995,
"learning_rate": 4.920531242055124e-06,
"loss": 0.7138,
"step": 2120
},
{
"epoch": 0.10859590088712144,
"grad_norm": 5.737372357034715,
"learning_rate": 4.919495348823833e-06,
"loss": 0.7343,
"step": 2130
},
{
"epoch": 0.10910574079738962,
"grad_norm": 6.7885279145387365,
"learning_rate": 4.918452858157736e-06,
"loss": 0.7818,
"step": 2140
},
{
"epoch": 0.1096155807076578,
"grad_norm": 24.320520866421703,
"learning_rate": 4.917403772899475e-06,
"loss": 0.7478,
"step": 2150
},
{
"epoch": 0.11012542061792598,
"grad_norm": 8.984483013819245,
"learning_rate": 4.916348095909677e-06,
"loss": 0.6942,
"step": 2160
},
{
"epoch": 0.11063526052819414,
"grad_norm": 6.323042084343602,
"learning_rate": 4.915285830066945e-06,
"loss": 0.82,
"step": 2170
},
{
"epoch": 0.11114510043846232,
"grad_norm": 7.541343111464914,
"learning_rate": 4.914216978267842e-06,
"loss": 0.7914,
"step": 2180
},
{
"epoch": 0.1116549403487305,
"grad_norm": 14.131358325160548,
"learning_rate": 4.9131415434268945e-06,
"loss": 0.7259,
"step": 2190
},
{
"epoch": 0.11216478025899868,
"grad_norm": 8.731190605993358,
"learning_rate": 4.912059528476579e-06,
"loss": 0.8269,
"step": 2200
},
{
"epoch": 0.11267462016926685,
"grad_norm": 8.485689942952286,
"learning_rate": 4.910970936367313e-06,
"loss": 0.7259,
"step": 2210
},
{
"epoch": 0.11318446007953503,
"grad_norm": 6.49198494931806,
"learning_rate": 4.909875770067449e-06,
"loss": 0.7211,
"step": 2220
},
{
"epoch": 0.1136942999898032,
"grad_norm": 9.663884193967293,
"learning_rate": 4.908774032563267e-06,
"loss": 0.7496,
"step": 2230
},
{
"epoch": 0.11420413990007137,
"grad_norm": 5.343015916386732,
"learning_rate": 4.9076657268589626e-06,
"loss": 0.8119,
"step": 2240
},
{
"epoch": 0.11471397981033955,
"grad_norm": 11.278186554065213,
"learning_rate": 4.906550855976644e-06,
"loss": 0.7602,
"step": 2250
},
{
"epoch": 0.11522381972060773,
"grad_norm": 10.446576961455326,
"learning_rate": 4.90542942295632e-06,
"loss": 0.7871,
"step": 2260
},
{
"epoch": 0.11573365963087591,
"grad_norm": 6.386379090111534,
"learning_rate": 4.904301430855895e-06,
"loss": 0.7258,
"step": 2270
},
{
"epoch": 0.11624349954114407,
"grad_norm": 10.380891530519655,
"learning_rate": 4.903166882751155e-06,
"loss": 0.6213,
"step": 2280
},
{
"epoch": 0.11675333945141225,
"grad_norm": 6.383115962201617,
"learning_rate": 4.902025781735765e-06,
"loss": 0.7381,
"step": 2290
},
{
"epoch": 0.11726317936168043,
"grad_norm": 7.029755940352045,
"learning_rate": 4.9008781309212585e-06,
"loss": 0.6886,
"step": 2300
},
{
"epoch": 0.11777301927194861,
"grad_norm": 15.692969999317937,
"learning_rate": 4.899723933437027e-06,
"loss": 0.6906,
"step": 2310
},
{
"epoch": 0.11828285918221679,
"grad_norm": 15.838306381587808,
"learning_rate": 4.898563192430316e-06,
"loss": 0.7165,
"step": 2320
},
{
"epoch": 0.11879269909248497,
"grad_norm": 29.985686517549937,
"learning_rate": 4.897395911066212e-06,
"loss": 0.8469,
"step": 2330
},
{
"epoch": 0.11930253900275313,
"grad_norm": 10.242594527023613,
"learning_rate": 4.896222092527636e-06,
"loss": 0.7412,
"step": 2340
},
{
"epoch": 0.11981237891302131,
"grad_norm": 6.75946263973171,
"learning_rate": 4.895041740015335e-06,
"loss": 0.7117,
"step": 2350
},
{
"epoch": 0.12032221882328949,
"grad_norm": 11.589006949505885,
"learning_rate": 4.893854856747872e-06,
"loss": 0.7867,
"step": 2360
},
{
"epoch": 0.12083205873355767,
"grad_norm": 8.349503343129639,
"learning_rate": 4.8926614459616174e-06,
"loss": 0.6924,
"step": 2370
},
{
"epoch": 0.12134189864382584,
"grad_norm": 8.819365346854005,
"learning_rate": 4.8914615109107425e-06,
"loss": 0.72,
"step": 2380
},
{
"epoch": 0.12185173855409401,
"grad_norm": 4.770793227267069,
"learning_rate": 4.890255054867207e-06,
"loss": 0.7286,
"step": 2390
},
{
"epoch": 0.12236157846436219,
"grad_norm": 13.622150791690203,
"learning_rate": 4.889042081120753e-06,
"loss": 0.7528,
"step": 2400
},
{
"epoch": 0.12287141837463036,
"grad_norm": 5.008840415546251,
"learning_rate": 4.887822592978895e-06,
"loss": 0.7024,
"step": 2410
},
{
"epoch": 0.12338125828489854,
"grad_norm": 10.357815675726904,
"learning_rate": 4.88659659376691e-06,
"loss": 0.7578,
"step": 2420
},
{
"epoch": 0.12389109819516672,
"grad_norm": 4.9766029310415645,
"learning_rate": 4.885364086827831e-06,
"loss": 0.6866,
"step": 2430
},
{
"epoch": 0.1244009381054349,
"grad_norm": 12.267670210202498,
"learning_rate": 4.884125075522434e-06,
"loss": 0.7947,
"step": 2440
},
{
"epoch": 0.12491077801570306,
"grad_norm": 4.706392994439642,
"learning_rate": 4.882879563229232e-06,
"loss": 0.6574,
"step": 2450
},
{
"epoch": 0.12542061792597126,
"grad_norm": 11.923730716703353,
"learning_rate": 4.881627553344464e-06,
"loss": 0.7269,
"step": 2460
},
{
"epoch": 0.12593045783623943,
"grad_norm": 4.61966253507641,
"learning_rate": 4.880369049282089e-06,
"loss": 0.7375,
"step": 2470
},
{
"epoch": 0.12644029774650759,
"grad_norm": 8.525447192765746,
"learning_rate": 4.87910405447377e-06,
"loss": 0.8006,
"step": 2480
},
{
"epoch": 0.12695013765677576,
"grad_norm": 7.545366514028092,
"learning_rate": 4.877832572368874e-06,
"loss": 0.6392,
"step": 2490
},
{
"epoch": 0.12745997756704394,
"grad_norm": 4.9755773421155025,
"learning_rate": 4.876554606434452e-06,
"loss": 0.7601,
"step": 2500
},
{
"epoch": 0.12796981747731212,
"grad_norm": 9.24031611206335,
"learning_rate": 4.87527016015524e-06,
"loss": 0.6617,
"step": 2510
},
{
"epoch": 0.1284796573875803,
"grad_norm": 4.580283694822131,
"learning_rate": 4.873979237033641e-06,
"loss": 0.7334,
"step": 2520
},
{
"epoch": 0.12898949729784848,
"grad_norm": 4.6316009967309935,
"learning_rate": 4.8726818405897206e-06,
"loss": 0.6627,
"step": 2530
},
{
"epoch": 0.12949933720811665,
"grad_norm": 4.278940967682888,
"learning_rate": 4.871377974361194e-06,
"loss": 0.6552,
"step": 2540
},
{
"epoch": 0.13000917711838483,
"grad_norm": 5.021516810375643,
"learning_rate": 4.870067641903421e-06,
"loss": 0.7374,
"step": 2550
},
{
"epoch": 0.130519017028653,
"grad_norm": 8.465617471643727,
"learning_rate": 4.8687508467893895e-06,
"loss": 0.6968,
"step": 2560
},
{
"epoch": 0.1310288569389212,
"grad_norm": 5.852309211002095,
"learning_rate": 4.867427592609715e-06,
"loss": 0.7839,
"step": 2570
},
{
"epoch": 0.13153869684918937,
"grad_norm": 5.8066378546185815,
"learning_rate": 4.86609788297262e-06,
"loss": 0.6999,
"step": 2580
},
{
"epoch": 0.13204853675945752,
"grad_norm": 9.992950270407439,
"learning_rate": 4.864761721503932e-06,
"loss": 0.7512,
"step": 2590
},
{
"epoch": 0.1325583766697257,
"grad_norm": 9.15276481014689,
"learning_rate": 4.863419111847072e-06,
"loss": 0.6774,
"step": 2600
},
{
"epoch": 0.13306821657999388,
"grad_norm": 10.262913953533229,
"learning_rate": 4.862070057663043e-06,
"loss": 0.6872,
"step": 2610
},
{
"epoch": 0.13357805649026205,
"grad_norm": 4.362603777338605,
"learning_rate": 4.860714562630421e-06,
"loss": 0.7258,
"step": 2620
},
{
"epoch": 0.13408789640053023,
"grad_norm": 5.703957736552648,
"learning_rate": 4.859352630445343e-06,
"loss": 0.7552,
"step": 2630
},
{
"epoch": 0.1345977363107984,
"grad_norm": 13.041968804263744,
"learning_rate": 4.857984264821503e-06,
"loss": 0.7127,
"step": 2640
},
{
"epoch": 0.1351075762210666,
"grad_norm": 13.814739101752501,
"learning_rate": 4.856609469490131e-06,
"loss": 0.6816,
"step": 2650
},
{
"epoch": 0.13561741613133477,
"grad_norm": 16.028749882673008,
"learning_rate": 4.855228248199997e-06,
"loss": 0.6352,
"step": 2660
},
{
"epoch": 0.13612725604160295,
"grad_norm": 9.850894944277773,
"learning_rate": 4.853840604717388e-06,
"loss": 0.6368,
"step": 2670
},
{
"epoch": 0.13663709595187112,
"grad_norm": 7.052708235930174,
"learning_rate": 4.8524465428261044e-06,
"loss": 0.7308,
"step": 2680
},
{
"epoch": 0.1371469358621393,
"grad_norm": 11.56688607820047,
"learning_rate": 4.8510460663274475e-06,
"loss": 0.6724,
"step": 2690
},
{
"epoch": 0.13765677577240745,
"grad_norm": 8.774888175799715,
"learning_rate": 4.849639179040212e-06,
"loss": 0.6837,
"step": 2700
},
{
"epoch": 0.13816661568267563,
"grad_norm": 4.599398383648445,
"learning_rate": 4.8482258848006705e-06,
"loss": 0.6933,
"step": 2710
},
{
"epoch": 0.1386764555929438,
"grad_norm": 10.339036808448,
"learning_rate": 4.8468061874625685e-06,
"loss": 0.6613,
"step": 2720
},
{
"epoch": 0.139186295503212,
"grad_norm": 7.209990153494447,
"learning_rate": 4.8453800908971085e-06,
"loss": 0.7649,
"step": 2730
},
{
"epoch": 0.13969613541348017,
"grad_norm": 8.572787727941169,
"learning_rate": 4.843947598992947e-06,
"loss": 0.6306,
"step": 2740
},
{
"epoch": 0.14020597532374834,
"grad_norm": 6.564730091855565,
"learning_rate": 4.842508715656172e-06,
"loss": 0.6728,
"step": 2750
},
{
"epoch": 0.14071581523401652,
"grad_norm": 11.252632796952437,
"learning_rate": 4.841063444810307e-06,
"loss": 0.7112,
"step": 2760
},
{
"epoch": 0.1412256551442847,
"grad_norm": 6.820198946958281,
"learning_rate": 4.83961179039629e-06,
"loss": 0.7419,
"step": 2770
},
{
"epoch": 0.14173549505455288,
"grad_norm": 4.2602884636294585,
"learning_rate": 4.838153756372464e-06,
"loss": 0.7157,
"step": 2780
},
{
"epoch": 0.14224533496482106,
"grad_norm": 6.671116553278363,
"learning_rate": 4.836689346714568e-06,
"loss": 0.6695,
"step": 2790
},
{
"epoch": 0.14275517487508924,
"grad_norm": 8.002664608813316,
"learning_rate": 4.835218565415728e-06,
"loss": 0.6996,
"step": 2800
},
{
"epoch": 0.1432650147853574,
"grad_norm": 5.973736804854662,
"learning_rate": 4.833741416486444e-06,
"loss": 0.7318,
"step": 2810
},
{
"epoch": 0.14377485469562556,
"grad_norm": 8.68257045485749,
"learning_rate": 4.832257903954576e-06,
"loss": 0.7366,
"step": 2820
},
{
"epoch": 0.14428469460589374,
"grad_norm": 10.526944905618214,
"learning_rate": 4.83076803186534e-06,
"loss": 0.658,
"step": 2830
},
{
"epoch": 0.14479453451616192,
"grad_norm": 4.585792847679198,
"learning_rate": 4.829271804281291e-06,
"loss": 0.7048,
"step": 2840
},
{
"epoch": 0.1453043744264301,
"grad_norm": 10.047956297782614,
"learning_rate": 4.827769225282314e-06,
"loss": 0.7355,
"step": 2850
},
{
"epoch": 0.14581421433669828,
"grad_norm": 7.835889892504949,
"learning_rate": 4.826260298965613e-06,
"loss": 0.6578,
"step": 2860
},
{
"epoch": 0.14632405424696646,
"grad_norm": 6.807222092276076,
"learning_rate": 4.824745029445702e-06,
"loss": 0.7083,
"step": 2870
},
{
"epoch": 0.14683389415723463,
"grad_norm": 4.365352847668174,
"learning_rate": 4.823223420854387e-06,
"loss": 0.6925,
"step": 2880
},
{
"epoch": 0.1473437340675028,
"grad_norm": 3.8753003474177543,
"learning_rate": 4.821695477340765e-06,
"loss": 0.6893,
"step": 2890
},
{
"epoch": 0.147853573977771,
"grad_norm": 8.213692008404406,
"learning_rate": 4.820161203071202e-06,
"loss": 0.6485,
"step": 2900
},
{
"epoch": 0.14836341388803917,
"grad_norm": 8.957873544634902,
"learning_rate": 4.818620602229329e-06,
"loss": 0.7395,
"step": 2910
},
{
"epoch": 0.14887325379830732,
"grad_norm": 4.310902982886003,
"learning_rate": 4.8170736790160275e-06,
"loss": 0.6767,
"step": 2920
},
{
"epoch": 0.1493830937085755,
"grad_norm": 5.1513296158245545,
"learning_rate": 4.815520437649419e-06,
"loss": 0.7145,
"step": 2930
},
{
"epoch": 0.14989293361884368,
"grad_norm": 4.061785353893275,
"learning_rate": 4.813960882364852e-06,
"loss": 0.6508,
"step": 2940
},
{
"epoch": 0.15040277352911186,
"grad_norm": 5.571896711942683,
"learning_rate": 4.812395017414894e-06,
"loss": 0.6501,
"step": 2950
},
{
"epoch": 0.15091261343938003,
"grad_norm": 4.973519966201374,
"learning_rate": 4.810822847069317e-06,
"loss": 0.6836,
"step": 2960
},
{
"epoch": 0.1514224533496482,
"grad_norm": 3.8590735759641133,
"learning_rate": 4.809244375615085e-06,
"loss": 0.6585,
"step": 2970
},
{
"epoch": 0.1519322932599164,
"grad_norm": 26.4507503100321,
"learning_rate": 4.807659607356343e-06,
"loss": 0.7201,
"step": 2980
},
{
"epoch": 0.15244213317018457,
"grad_norm": 6.361120242534623,
"learning_rate": 4.80606854661441e-06,
"loss": 0.7532,
"step": 2990
},
{
"epoch": 0.15295197308045275,
"grad_norm": 7.171280417983545,
"learning_rate": 4.80447119772776e-06,
"loss": 0.6422,
"step": 3000
},
{
"epoch": 0.15346181299072092,
"grad_norm": 15.581803950564717,
"learning_rate": 4.802867565052013e-06,
"loss": 0.7753,
"step": 3010
},
{
"epoch": 0.1539716529009891,
"grad_norm": 4.652350958171924,
"learning_rate": 4.8012576529599266e-06,
"loss": 0.7554,
"step": 3020
},
{
"epoch": 0.15448149281125725,
"grad_norm": 7.8002467029745075,
"learning_rate": 4.799641465841377e-06,
"loss": 0.7257,
"step": 3030
},
{
"epoch": 0.15499133272152543,
"grad_norm": 5.020330603963672,
"learning_rate": 4.798019008103354e-06,
"loss": 0.6583,
"step": 3040
},
{
"epoch": 0.1555011726317936,
"grad_norm": 25.527336664699217,
"learning_rate": 4.796390284169946e-06,
"loss": 0.6731,
"step": 3050
},
{
"epoch": 0.1560110125420618,
"grad_norm": 27.14951922353312,
"learning_rate": 4.7947552984823265e-06,
"loss": 0.6747,
"step": 3060
},
{
"epoch": 0.15652085245232997,
"grad_norm": 15.01183014995001,
"learning_rate": 4.793114055498743e-06,
"loss": 0.7711,
"step": 3070
},
{
"epoch": 0.15703069236259815,
"grad_norm": 5.266851401040007,
"learning_rate": 4.791466559694508e-06,
"loss": 0.6616,
"step": 3080
},
{
"epoch": 0.15754053227286632,
"grad_norm": 4.459749463292788,
"learning_rate": 4.789812815561981e-06,
"loss": 0.635,
"step": 3090
},
{
"epoch": 0.1580503721831345,
"grad_norm": 8.87398114879771,
"learning_rate": 4.78815282761056e-06,
"loss": 0.6847,
"step": 3100
},
{
"epoch": 0.15856021209340268,
"grad_norm": 8.384445249856304,
"learning_rate": 4.786486600366672e-06,
"loss": 0.6466,
"step": 3110
},
{
"epoch": 0.15907005200367086,
"grad_norm": 5.231146536799489,
"learning_rate": 4.784814138373751e-06,
"loss": 0.6377,
"step": 3120
},
{
"epoch": 0.159579891913939,
"grad_norm": 6.8125691550144705,
"learning_rate": 4.783135446192238e-06,
"loss": 0.7553,
"step": 3130
},
{
"epoch": 0.1600897318242072,
"grad_norm": 6.285648196710817,
"learning_rate": 4.781450528399558e-06,
"loss": 0.7185,
"step": 3140
},
{
"epoch": 0.16059957173447537,
"grad_norm": 6.623255994991425,
"learning_rate": 4.779759389590114e-06,
"loss": 0.7047,
"step": 3150
},
{
"epoch": 0.16110941164474354,
"grad_norm": 11.264105177873368,
"learning_rate": 4.778062034375271e-06,
"loss": 0.682,
"step": 3160
},
{
"epoch": 0.16161925155501172,
"grad_norm": 8.931574690777653,
"learning_rate": 4.7763584673833476e-06,
"loss": 0.6066,
"step": 3170
},
{
"epoch": 0.1621290914652799,
"grad_norm": 7.944887533725402,
"learning_rate": 4.774648693259596e-06,
"loss": 0.7101,
"step": 3180
},
{
"epoch": 0.16263893137554808,
"grad_norm": 6.845198661829901,
"learning_rate": 4.7729327166661975e-06,
"loss": 0.6817,
"step": 3190
},
{
"epoch": 0.16314877128581626,
"grad_norm": 6.763894618289275,
"learning_rate": 4.771210542282245e-06,
"loss": 0.6203,
"step": 3200
},
{
"epoch": 0.16365861119608444,
"grad_norm": 5.6556994863687295,
"learning_rate": 4.7694821748037315e-06,
"loss": 0.6707,
"step": 3210
},
{
"epoch": 0.16416845110635261,
"grad_norm": 8.736722880916833,
"learning_rate": 4.767747618943537e-06,
"loss": 0.6971,
"step": 3220
},
{
"epoch": 0.1646782910166208,
"grad_norm": 8.552198094063863,
"learning_rate": 4.766006879431417e-06,
"loss": 0.6517,
"step": 3230
},
{
"epoch": 0.16518813092688894,
"grad_norm": 5.27160834021644,
"learning_rate": 4.764259961013986e-06,
"loss": 0.617,
"step": 3240
},
{
"epoch": 0.16569797083715712,
"grad_norm": 7.549479646937416,
"learning_rate": 4.76250686845471e-06,
"loss": 0.7104,
"step": 3250
},
{
"epoch": 0.1662078107474253,
"grad_norm": 4.877994166469665,
"learning_rate": 4.760747606533888e-06,
"loss": 0.6473,
"step": 3260
},
{
"epoch": 0.16671765065769348,
"grad_norm": 4.469180917122692,
"learning_rate": 4.758982180048644e-06,
"loss": 0.7034,
"step": 3270
},
{
"epoch": 0.16722749056796166,
"grad_norm": 12.26109420407807,
"learning_rate": 4.757210593812909e-06,
"loss": 0.735,
"step": 3280
},
{
"epoch": 0.16773733047822983,
"grad_norm": 20.81788927557235,
"learning_rate": 4.7554328526574115e-06,
"loss": 0.6995,
"step": 3290
},
{
"epoch": 0.168247170388498,
"grad_norm": 10.426241109312354,
"learning_rate": 4.753648961429662e-06,
"loss": 0.6908,
"step": 3300
},
{
"epoch": 0.1687570102987662,
"grad_norm": 11.983786528863883,
"learning_rate": 4.751858924993943e-06,
"loss": 0.6396,
"step": 3310
},
{
"epoch": 0.16926685020903437,
"grad_norm": 8.761699158505628,
"learning_rate": 4.750062748231293e-06,
"loss": 0.63,
"step": 3320
},
{
"epoch": 0.16977669011930255,
"grad_norm": 6.274654989027958,
"learning_rate": 4.748260436039492e-06,
"loss": 0.7373,
"step": 3330
},
{
"epoch": 0.17028653002957073,
"grad_norm": 150.15499196552602,
"learning_rate": 4.7464519933330525e-06,
"loss": 0.6855,
"step": 3340
},
{
"epoch": 0.17079636993983888,
"grad_norm": 4.57851652515877,
"learning_rate": 4.744637425043201e-06,
"loss": 0.6602,
"step": 3350
},
{
"epoch": 0.17130620985010706,
"grad_norm": 4.9207753165585455,
"learning_rate": 4.742816736117869e-06,
"loss": 0.7346,
"step": 3360
},
{
"epoch": 0.17181604976037523,
"grad_norm": 7.641718276066049,
"learning_rate": 4.7409899315216774e-06,
"loss": 0.7553,
"step": 3370
},
{
"epoch": 0.1723258896706434,
"grad_norm": 13.595197861672144,
"learning_rate": 4.739157016235924e-06,
"loss": 0.6417,
"step": 3380
},
{
"epoch": 0.1728357295809116,
"grad_norm": 5.8252440540022885,
"learning_rate": 4.737317995258566e-06,
"loss": 0.6646,
"step": 3390
},
{
"epoch": 0.17334556949117977,
"grad_norm": 4.268513502564397,
"learning_rate": 4.735472873604212e-06,
"loss": 0.6445,
"step": 3400
},
{
"epoch": 0.17385540940144795,
"grad_norm": 7.0127504656236885,
"learning_rate": 4.733621656304106e-06,
"loss": 0.6542,
"step": 3410
},
{
"epoch": 0.17436524931171613,
"grad_norm": 4.158850612532249,
"learning_rate": 4.7317643484061125e-06,
"loss": 0.6717,
"step": 3420
},
{
"epoch": 0.1748750892219843,
"grad_norm": 12.017336843310925,
"learning_rate": 4.729900954974704e-06,
"loss": 0.6404,
"step": 3430
},
{
"epoch": 0.17538492913225248,
"grad_norm": 5.271107087734727,
"learning_rate": 4.728031481090946e-06,
"loss": 0.6871,
"step": 3440
},
{
"epoch": 0.17589476904252066,
"grad_norm": 10.111466361561238,
"learning_rate": 4.726155931852487e-06,
"loss": 0.6837,
"step": 3450
},
{
"epoch": 0.1764046089527888,
"grad_norm": 5.043545123417151,
"learning_rate": 4.724274312373539e-06,
"loss": 0.671,
"step": 3460
},
{
"epoch": 0.176914448863057,
"grad_norm": 7.856504690510926,
"learning_rate": 4.722386627784866e-06,
"loss": 0.6784,
"step": 3470
},
{
"epoch": 0.17742428877332517,
"grad_norm": 6.397278035704165,
"learning_rate": 4.720492883233772e-06,
"loss": 0.6764,
"step": 3480
},
{
"epoch": 0.17793412868359335,
"grad_norm": 5.808001185922261,
"learning_rate": 4.718593083884085e-06,
"loss": 0.7099,
"step": 3490
},
{
"epoch": 0.17844396859386152,
"grad_norm": 9.004117869597364,
"learning_rate": 4.716687234916141e-06,
"loss": 0.7707,
"step": 3500
},
{
"epoch": 0.1789538085041297,
"grad_norm": 4.629411334758138,
"learning_rate": 4.7147753415267736e-06,
"loss": 0.681,
"step": 3510
},
{
"epoch": 0.17946364841439788,
"grad_norm": 5.326181185776784,
"learning_rate": 4.7128574089292975e-06,
"loss": 0.6729,
"step": 3520
},
{
"epoch": 0.17997348832466606,
"grad_norm": 8.16522150459077,
"learning_rate": 4.710933442353498e-06,
"loss": 0.7004,
"step": 3530
},
{
"epoch": 0.18048332823493424,
"grad_norm": 6.615394186767928,
"learning_rate": 4.709003447045609e-06,
"loss": 0.7081,
"step": 3540
},
{
"epoch": 0.18099316814520242,
"grad_norm": 8.811942767619376,
"learning_rate": 4.707067428268307e-06,
"loss": 0.6395,
"step": 3550
},
{
"epoch": 0.1815030080554706,
"grad_norm": 5.925847531376736,
"learning_rate": 4.705125391300691e-06,
"loss": 0.617,
"step": 3560
},
{
"epoch": 0.18201284796573874,
"grad_norm": 3.604921855041018,
"learning_rate": 4.703177341438272e-06,
"loss": 0.6723,
"step": 3570
},
{
"epoch": 0.18252268787600692,
"grad_norm": 5.677306836426495,
"learning_rate": 4.701223283992956e-06,
"loss": 0.6538,
"step": 3580
},
{
"epoch": 0.1830325277862751,
"grad_norm": 5.012392985340839,
"learning_rate": 4.699263224293029e-06,
"loss": 0.6061,
"step": 3590
},
{
"epoch": 0.18354236769654328,
"grad_norm": 9.236505127466511,
"learning_rate": 4.697297167683147e-06,
"loss": 0.7969,
"step": 3600
},
{
"epoch": 0.18405220760681146,
"grad_norm": 14.008416902019645,
"learning_rate": 4.695325119524316e-06,
"loss": 0.6945,
"step": 3610
},
{
"epoch": 0.18456204751707964,
"grad_norm": 6.024458862666216,
"learning_rate": 4.693347085193879e-06,
"loss": 0.6403,
"step": 3620
},
{
"epoch": 0.18507188742734781,
"grad_norm": 9.406344908950013,
"learning_rate": 4.691363070085504e-06,
"loss": 0.6999,
"step": 3630
},
{
"epoch": 0.185581727337616,
"grad_norm": 8.252705261353633,
"learning_rate": 4.689373079609167e-06,
"loss": 0.6541,
"step": 3640
},
{
"epoch": 0.18609156724788417,
"grad_norm": 7.463150358542101,
"learning_rate": 4.687377119191138e-06,
"loss": 0.6606,
"step": 3650
},
{
"epoch": 0.18660140715815235,
"grad_norm": 12.232052172719072,
"learning_rate": 4.6853751942739615e-06,
"loss": 0.6683,
"step": 3660
},
{
"epoch": 0.18711124706842053,
"grad_norm": 5.120603854558654,
"learning_rate": 4.68336731031645e-06,
"loss": 0.6323,
"step": 3670
},
{
"epoch": 0.18762108697868868,
"grad_norm": 5.2441720769748414,
"learning_rate": 4.681353472793665e-06,
"loss": 0.6346,
"step": 3680
},
{
"epoch": 0.18813092688895686,
"grad_norm": 13.58549636665437,
"learning_rate": 4.6793336871969014e-06,
"loss": 0.7515,
"step": 3690
},
{
"epoch": 0.18864076679922503,
"grad_norm": 12.863786029385953,
"learning_rate": 4.677307959033672e-06,
"loss": 0.7563,
"step": 3700
},
{
"epoch": 0.1891506067094932,
"grad_norm": 12.74527164522038,
"learning_rate": 4.675276293827695e-06,
"loss": 0.6679,
"step": 3710
},
{
"epoch": 0.1896604466197614,
"grad_norm": 5.672326730728393,
"learning_rate": 4.673238697118877e-06,
"loss": 0.6657,
"step": 3720
},
{
"epoch": 0.19017028653002957,
"grad_norm": 8.669986727953395,
"learning_rate": 4.671195174463298e-06,
"loss": 0.7251,
"step": 3730
},
{
"epoch": 0.19068012644029775,
"grad_norm": 3.462771212444571,
"learning_rate": 4.669145731433199e-06,
"loss": 0.6472,
"step": 3740
},
{
"epoch": 0.19118996635056593,
"grad_norm": 8.140374911198643,
"learning_rate": 4.667090373616963e-06,
"loss": 0.636,
"step": 3750
},
{
"epoch": 0.1916998062608341,
"grad_norm": 8.375159298134957,
"learning_rate": 4.6650291066190995e-06,
"loss": 0.686,
"step": 3760
},
{
"epoch": 0.19220964617110228,
"grad_norm": 4.0302479468965275,
"learning_rate": 4.662961936060234e-06,
"loss": 0.658,
"step": 3770
},
{
"epoch": 0.19271948608137046,
"grad_norm": 30.16864678240673,
"learning_rate": 4.660888867577089e-06,
"loss": 0.6654,
"step": 3780
},
{
"epoch": 0.1932293259916386,
"grad_norm": 6.176858581461859,
"learning_rate": 4.658809906822469e-06,
"loss": 0.6375,
"step": 3790
},
{
"epoch": 0.1937391659019068,
"grad_norm": 15.313296678841132,
"learning_rate": 4.656725059465245e-06,
"loss": 0.6317,
"step": 3800
},
{
"epoch": 0.19424900581217497,
"grad_norm": 3.5343657984567307,
"learning_rate": 4.654634331190341e-06,
"loss": 0.6452,
"step": 3810
},
{
"epoch": 0.19475884572244315,
"grad_norm": 7.774662431701929,
"learning_rate": 4.652537727698713e-06,
"loss": 0.6727,
"step": 3820
},
{
"epoch": 0.19526868563271133,
"grad_norm": 5.547641934925014,
"learning_rate": 4.650435254707344e-06,
"loss": 0.7369,
"step": 3830
},
{
"epoch": 0.1957785255429795,
"grad_norm": 7.21333998728068,
"learning_rate": 4.648326917949215e-06,
"loss": 0.6797,
"step": 3840
},
{
"epoch": 0.19628836545324768,
"grad_norm": 15.50709374668024,
"learning_rate": 4.6462127231733014e-06,
"loss": 0.6391,
"step": 3850
},
{
"epoch": 0.19679820536351586,
"grad_norm": 6.905420869848083,
"learning_rate": 4.644092676144549e-06,
"loss": 0.7022,
"step": 3860
},
{
"epoch": 0.19730804527378404,
"grad_norm": 4.7299784245872685,
"learning_rate": 4.641966782643864e-06,
"loss": 0.6242,
"step": 3870
},
{
"epoch": 0.19781788518405222,
"grad_norm": 5.524370741445387,
"learning_rate": 4.639835048468091e-06,
"loss": 0.6087,
"step": 3880
},
{
"epoch": 0.1983277250943204,
"grad_norm": 5.639398324569761,
"learning_rate": 4.637697479430004e-06,
"loss": 0.6491,
"step": 3890
},
{
"epoch": 0.19883756500458855,
"grad_norm": 6.239776328645408,
"learning_rate": 4.635554081358288e-06,
"loss": 0.6736,
"step": 3900
},
{
"epoch": 0.19934740491485672,
"grad_norm": 10.43488596345882,
"learning_rate": 4.633404860097519e-06,
"loss": 0.7098,
"step": 3910
},
{
"epoch": 0.1998572448251249,
"grad_norm": 7.4619042443327785,
"learning_rate": 4.631249821508153e-06,
"loss": 0.6967,
"step": 3920
},
{
"epoch": 0.20036708473539308,
"grad_norm": 7.429655477193423,
"learning_rate": 4.6290889714665095e-06,
"loss": 0.7033,
"step": 3930
},
{
"epoch": 0.20087692464566126,
"grad_norm": 6.266633297662326,
"learning_rate": 4.626922315864756e-06,
"loss": 0.6624,
"step": 3940
},
{
"epoch": 0.20138676455592944,
"grad_norm": 14.54850609381256,
"learning_rate": 4.624749860610886e-06,
"loss": 0.5777,
"step": 3950
},
{
"epoch": 0.20189660446619762,
"grad_norm": 6.354726612196373,
"learning_rate": 4.622571611628712e-06,
"loss": 0.6659,
"step": 3960
},
{
"epoch": 0.2024064443764658,
"grad_norm": 9.01385785429811,
"learning_rate": 4.620387574857841e-06,
"loss": 0.7359,
"step": 3970
},
{
"epoch": 0.20291628428673397,
"grad_norm": 5.9455223230764975,
"learning_rate": 4.618197756253665e-06,
"loss": 0.6315,
"step": 3980
},
{
"epoch": 0.20342612419700215,
"grad_norm": 7.228697496865833,
"learning_rate": 4.61600216178734e-06,
"loss": 0.5958,
"step": 3990
},
{
"epoch": 0.20393596410727033,
"grad_norm": 5.261554083910349,
"learning_rate": 4.613800797445772e-06,
"loss": 0.6447,
"step": 4000
},
{
"epoch": 0.20444580401753848,
"grad_norm": 5.772195941211414,
"learning_rate": 4.611593669231601e-06,
"loss": 0.6585,
"step": 4010
},
{
"epoch": 0.20495564392780666,
"grad_norm": 6.096937434650072,
"learning_rate": 4.609380783163182e-06,
"loss": 0.6459,
"step": 4020
},
{
"epoch": 0.20546548383807484,
"grad_norm": 21.809160244855928,
"learning_rate": 4.6071621452745716e-06,
"loss": 0.619,
"step": 4030
},
{
"epoch": 0.20597532374834301,
"grad_norm": 4.542379378303144,
"learning_rate": 4.6049377616155116e-06,
"loss": 0.7122,
"step": 4040
},
{
"epoch": 0.2064851636586112,
"grad_norm": 6.584819766187672,
"learning_rate": 4.602707638251408e-06,
"loss": 0.7196,
"step": 4050
},
{
"epoch": 0.20699500356887937,
"grad_norm": 11.524432702136032,
"learning_rate": 4.600471781263321e-06,
"loss": 0.7068,
"step": 4060
},
{
"epoch": 0.20750484347914755,
"grad_norm": 13.258106134411522,
"learning_rate": 4.598230196747943e-06,
"loss": 0.6694,
"step": 4070
},
{
"epoch": 0.20801468338941573,
"grad_norm": 3.4620914060610293,
"learning_rate": 4.595982890817585e-06,
"loss": 0.6377,
"step": 4080
},
{
"epoch": 0.2085245232996839,
"grad_norm": 5.6652232780132925,
"learning_rate": 4.593729869600159e-06,
"loss": 0.6162,
"step": 4090
},
{
"epoch": 0.20903436320995208,
"grad_norm": 8.319639345956334,
"learning_rate": 4.591471139239161e-06,
"loss": 0.6331,
"step": 4100
},
{
"epoch": 0.20954420312022026,
"grad_norm": 5.339412633147117,
"learning_rate": 4.589206705893656e-06,
"loss": 0.7023,
"step": 4110
},
{
"epoch": 0.2100540430304884,
"grad_norm": 6.874025709868468,
"learning_rate": 4.5869365757382564e-06,
"loss": 0.6892,
"step": 4120
},
{
"epoch": 0.2105638829407566,
"grad_norm": 4.246606408830112,
"learning_rate": 4.584660754963113e-06,
"loss": 0.6833,
"step": 4130
},
{
"epoch": 0.21107372285102477,
"grad_norm": 4.124686879487375,
"learning_rate": 4.582379249773891e-06,
"loss": 0.6407,
"step": 4140
},
{
"epoch": 0.21158356276129295,
"grad_norm": 7.942648344581485,
"learning_rate": 4.580092066391755e-06,
"loss": 0.6751,
"step": 4150
},
{
"epoch": 0.21209340267156113,
"grad_norm": 4.216781470843067,
"learning_rate": 4.577799211053355e-06,
"loss": 0.6171,
"step": 4160
},
{
"epoch": 0.2126032425818293,
"grad_norm": 5.431013093447799,
"learning_rate": 4.575500690010806e-06,
"loss": 0.6673,
"step": 4170
},
{
"epoch": 0.21311308249209748,
"grad_norm": 3.795351879459813,
"learning_rate": 4.573196509531671e-06,
"loss": 0.713,
"step": 4180
},
{
"epoch": 0.21362292240236566,
"grad_norm": 4.851916163937092,
"learning_rate": 4.570886675898949e-06,
"loss": 0.6603,
"step": 4190
},
{
"epoch": 0.21413276231263384,
"grad_norm": 12.506235645752948,
"learning_rate": 4.56857119541105e-06,
"loss": 0.6435,
"step": 4200
},
{
"epoch": 0.21464260222290202,
"grad_norm": 4.37250096964298,
"learning_rate": 4.566250074381783e-06,
"loss": 0.6624,
"step": 4210
},
{
"epoch": 0.2151524421331702,
"grad_norm": 4.502842518687739,
"learning_rate": 4.5639233191403365e-06,
"loss": 0.6103,
"step": 4220
},
{
"epoch": 0.21566228204343835,
"grad_norm": 5.375401922417605,
"learning_rate": 4.561590936031265e-06,
"loss": 0.6581,
"step": 4230
},
{
"epoch": 0.21617212195370653,
"grad_norm": 5.615522541050511,
"learning_rate": 4.559252931414466e-06,
"loss": 0.6873,
"step": 4240
},
{
"epoch": 0.2166819618639747,
"grad_norm": 5.637724680426694,
"learning_rate": 4.556909311665169e-06,
"loss": 0.7198,
"step": 4250
},
{
"epoch": 0.21719180177424288,
"grad_norm": 6.792915531141634,
"learning_rate": 4.554560083173909e-06,
"loss": 0.6562,
"step": 4260
},
{
"epoch": 0.21770164168451106,
"grad_norm": 12.275271735359572,
"learning_rate": 4.552205252346522e-06,
"loss": 0.6079,
"step": 4270
},
{
"epoch": 0.21821148159477924,
"grad_norm": 3.073983043353074,
"learning_rate": 4.549844825604115e-06,
"loss": 0.6186,
"step": 4280
},
{
"epoch": 0.21872132150504742,
"grad_norm": 4.785561644921718,
"learning_rate": 4.547478809383057e-06,
"loss": 0.6722,
"step": 4290
},
{
"epoch": 0.2192311614153156,
"grad_norm": 7.253881470892052,
"learning_rate": 4.545107210134954e-06,
"loss": 0.6986,
"step": 4300
},
{
"epoch": 0.21974100132558377,
"grad_norm": 6.467754925973429,
"learning_rate": 4.542730034326641e-06,
"loss": 0.6458,
"step": 4310
},
{
"epoch": 0.22025084123585195,
"grad_norm": 5.075052675558556,
"learning_rate": 4.540347288440158e-06,
"loss": 0.6032,
"step": 4320
},
{
"epoch": 0.22076068114612013,
"grad_norm": 7.393418461726879,
"learning_rate": 4.537958978972729e-06,
"loss": 0.6279,
"step": 4330
},
{
"epoch": 0.22127052105638828,
"grad_norm": 5.078229909630058,
"learning_rate": 4.535565112436753e-06,
"loss": 0.6722,
"step": 4340
},
{
"epoch": 0.22178036096665646,
"grad_norm": 6.031044978312038,
"learning_rate": 4.5331656953597805e-06,
"loss": 0.6058,
"step": 4350
},
{
"epoch": 0.22229020087692464,
"grad_norm": 7.3002824690265165,
"learning_rate": 4.530760734284496e-06,
"loss": 0.6562,
"step": 4360
},
{
"epoch": 0.22280004078719282,
"grad_norm": 10.954637687981535,
"learning_rate": 4.528350235768706e-06,
"loss": 0.6435,
"step": 4370
},
{
"epoch": 0.223309880697461,
"grad_norm": 9.527844954768264,
"learning_rate": 4.52593420638531e-06,
"loss": 0.6365,
"step": 4380
},
{
"epoch": 0.22381972060772917,
"grad_norm": 12.572298822755124,
"learning_rate": 4.523512652722293e-06,
"loss": 0.646,
"step": 4390
},
{
"epoch": 0.22432956051799735,
"grad_norm": 8.69200891214177,
"learning_rate": 4.521085581382701e-06,
"loss": 0.6873,
"step": 4400
},
{
"epoch": 0.22483940042826553,
"grad_norm": 7.063257388870424,
"learning_rate": 4.51865299898463e-06,
"loss": 0.6567,
"step": 4410
},
{
"epoch": 0.2253492403385337,
"grad_norm": 8.50522598323742,
"learning_rate": 4.516214912161196e-06,
"loss": 0.6016,
"step": 4420
},
{
"epoch": 0.22585908024880189,
"grad_norm": 5.805612583386945,
"learning_rate": 4.513771327560533e-06,
"loss": 0.6981,
"step": 4430
},
{
"epoch": 0.22636892015907006,
"grad_norm": 3.4305683235344486,
"learning_rate": 4.511322251845758e-06,
"loss": 0.6244,
"step": 4440
},
{
"epoch": 0.22687876006933821,
"grad_norm": 7.249537201535017,
"learning_rate": 4.5088676916949685e-06,
"loss": 0.6534,
"step": 4450
},
{
"epoch": 0.2273885999796064,
"grad_norm": 4.279837196853443,
"learning_rate": 4.5064076538012105e-06,
"loss": 0.6381,
"step": 4460
},
{
"epoch": 0.22789843988987457,
"grad_norm": 21.808154066533312,
"learning_rate": 4.503942144872472e-06,
"loss": 0.6596,
"step": 4470
},
{
"epoch": 0.22840827980014275,
"grad_norm": 4.197990487421355,
"learning_rate": 4.501471171631654e-06,
"loss": 0.6803,
"step": 4480
},
{
"epoch": 0.22891811971041093,
"grad_norm": 5.961571626226339,
"learning_rate": 4.498994740816562e-06,
"loss": 0.6425,
"step": 4490
},
{
"epoch": 0.2294279596206791,
"grad_norm": 20.86279350187621,
"learning_rate": 4.496512859179882e-06,
"loss": 0.657,
"step": 4500
},
{
"epoch": 0.22993779953094728,
"grad_norm": 5.14373652719188,
"learning_rate": 4.494025533489161e-06,
"loss": 0.6662,
"step": 4510
},
{
"epoch": 0.23044763944121546,
"grad_norm": 5.0918306525583645,
"learning_rate": 4.491532770526794e-06,
"loss": 0.6147,
"step": 4520
},
{
"epoch": 0.23095747935148364,
"grad_norm": 4.6131560612494455,
"learning_rate": 4.489034577089998e-06,
"loss": 0.6752,
"step": 4530
},
{
"epoch": 0.23146731926175182,
"grad_norm": 5.010417962185847,
"learning_rate": 4.486530959990803e-06,
"loss": 0.6284,
"step": 4540
},
{
"epoch": 0.23197715917202,
"grad_norm": 7.269955227642231,
"learning_rate": 4.484021926056024e-06,
"loss": 0.6345,
"step": 4550
},
{
"epoch": 0.23248699908228815,
"grad_norm": 3.6132413760194555,
"learning_rate": 4.481507482127248e-06,
"loss": 0.6216,
"step": 4560
},
{
"epoch": 0.23299683899255633,
"grad_norm": 6.440674909052278,
"learning_rate": 4.478987635060814e-06,
"loss": 0.6236,
"step": 4570
},
{
"epoch": 0.2335066789028245,
"grad_norm": 12.282249691770065,
"learning_rate": 4.476462391727795e-06,
"loss": 0.6118,
"step": 4580
},
{
"epoch": 0.23401651881309268,
"grad_norm": 7.262980200026385,
"learning_rate": 4.473931759013976e-06,
"loss": 0.6619,
"step": 4590
},
{
"epoch": 0.23452635872336086,
"grad_norm": 7.930499586922202,
"learning_rate": 4.471395743819839e-06,
"loss": 0.6866,
"step": 4600
},
{
"epoch": 0.23503619863362904,
"grad_norm": 3.941439397600029,
"learning_rate": 4.468854353060545e-06,
"loss": 0.6482,
"step": 4610
},
{
"epoch": 0.23554603854389722,
"grad_norm": 4.980455089976807,
"learning_rate": 4.4663075936659075e-06,
"loss": 0.745,
"step": 4620
},
{
"epoch": 0.2360558784541654,
"grad_norm": 10.427334633224056,
"learning_rate": 4.463755472580386e-06,
"loss": 0.6794,
"step": 4630
},
{
"epoch": 0.23656571836443357,
"grad_norm": 5.225746399203979,
"learning_rate": 4.461197996763054e-06,
"loss": 0.5519,
"step": 4640
},
{
"epoch": 0.23707555827470175,
"grad_norm": 5.481458071446115,
"learning_rate": 4.458635173187592e-06,
"loss": 0.579,
"step": 4650
},
{
"epoch": 0.23758539818496993,
"grad_norm": 7.921100290729304,
"learning_rate": 4.456067008842257e-06,
"loss": 0.5722,
"step": 4660
},
{
"epoch": 0.23809523809523808,
"grad_norm": 4.067383472651753,
"learning_rate": 4.453493510729871e-06,
"loss": 0.689,
"step": 4670
},
{
"epoch": 0.23860507800550626,
"grad_norm": 5.760358825341278,
"learning_rate": 4.450914685867803e-06,
"loss": 0.6012,
"step": 4680
},
{
"epoch": 0.23911491791577444,
"grad_norm": 7.012099989273845,
"learning_rate": 4.448330541287943e-06,
"loss": 0.6208,
"step": 4690
},
{
"epoch": 0.23962475782604262,
"grad_norm": 9.768150229857632,
"learning_rate": 4.445741084036688e-06,
"loss": 0.7122,
"step": 4700
},
{
"epoch": 0.2401345977363108,
"grad_norm": 4.790523996499909,
"learning_rate": 4.443146321174925e-06,
"loss": 0.6698,
"step": 4710
},
{
"epoch": 0.24064443764657897,
"grad_norm": 4.462312793795511,
"learning_rate": 4.440546259778001e-06,
"loss": 0.6129,
"step": 4720
},
{
"epoch": 0.24115427755684715,
"grad_norm": 4.292603127227942,
"learning_rate": 4.437940906935717e-06,
"loss": 0.598,
"step": 4730
},
{
"epoch": 0.24166411746711533,
"grad_norm": 5.4642470424138025,
"learning_rate": 4.435330269752299e-06,
"loss": 0.6415,
"step": 4740
},
{
"epoch": 0.2421739573773835,
"grad_norm": 4.419531140610159,
"learning_rate": 4.432714355346386e-06,
"loss": 0.6435,
"step": 4750
},
{
"epoch": 0.2426837972876517,
"grad_norm": 7.200046549525596,
"learning_rate": 4.430093170851002e-06,
"loss": 0.6807,
"step": 4760
},
{
"epoch": 0.24319363719791987,
"grad_norm": 5.056999238017294,
"learning_rate": 4.427466723413547e-06,
"loss": 0.6293,
"step": 4770
},
{
"epoch": 0.24370347710818802,
"grad_norm": 4.48133378012618,
"learning_rate": 4.424835020195767e-06,
"loss": 0.6327,
"step": 4780
},
{
"epoch": 0.2442133170184562,
"grad_norm": 4.8645891122499485,
"learning_rate": 4.4221980683737405e-06,
"loss": 0.6042,
"step": 4790
},
{
"epoch": 0.24472315692872437,
"grad_norm": 11.18463566108358,
"learning_rate": 4.419555875137861e-06,
"loss": 0.6547,
"step": 4800
},
{
"epoch": 0.24523299683899255,
"grad_norm": 12.154473146198615,
"learning_rate": 4.41690844769281e-06,
"loss": 0.5869,
"step": 4810
},
{
"epoch": 0.24574283674926073,
"grad_norm": 4.324158535382592,
"learning_rate": 4.414255793257543e-06,
"loss": 0.6165,
"step": 4820
},
{
"epoch": 0.2462526766595289,
"grad_norm": 6.03471919846115,
"learning_rate": 4.411597919065271e-06,
"loss": 0.6493,
"step": 4830
},
{
"epoch": 0.24676251656979709,
"grad_norm": 4.06169086667296,
"learning_rate": 4.408934832363433e-06,
"loss": 0.6253,
"step": 4840
},
{
"epoch": 0.24727235648006526,
"grad_norm": 3.6614824441761677,
"learning_rate": 4.4062665404136865e-06,
"loss": 0.618,
"step": 4850
},
{
"epoch": 0.24778219639033344,
"grad_norm": 5.373379428885273,
"learning_rate": 4.403593050491878e-06,
"loss": 0.576,
"step": 4860
},
{
"epoch": 0.24829203630060162,
"grad_norm": 17.68117682291508,
"learning_rate": 4.400914369888031e-06,
"loss": 0.7237,
"step": 4870
},
{
"epoch": 0.2488018762108698,
"grad_norm": 3.892037751392953,
"learning_rate": 4.398230505906322e-06,
"loss": 0.5822,
"step": 4880
},
{
"epoch": 0.24931171612113795,
"grad_norm": 10.240705881927308,
"learning_rate": 4.395541465865062e-06,
"loss": 0.7296,
"step": 4890
},
{
"epoch": 0.24982155603140613,
"grad_norm": 6.2847519198064035,
"learning_rate": 4.392847257096674e-06,
"loss": 0.6233,
"step": 4900
},
{
"epoch": 0.2503313959416743,
"grad_norm": 5.386671580825061,
"learning_rate": 4.390147886947676e-06,
"loss": 0.6703,
"step": 4910
},
{
"epoch": 0.2508412358519425,
"grad_norm": 5.778523503321662,
"learning_rate": 4.387443362778661e-06,
"loss": 0.6312,
"step": 4920
},
{
"epoch": 0.25135107576221066,
"grad_norm": 4.1290430615963984,
"learning_rate": 4.384733691964276e-06,
"loss": 0.6339,
"step": 4930
},
{
"epoch": 0.25186091567247887,
"grad_norm": 4.233313985082475,
"learning_rate": 4.382018881893199e-06,
"loss": 0.5995,
"step": 4940
},
{
"epoch": 0.252370755582747,
"grad_norm": 9.726721675759366,
"learning_rate": 4.379298939968124e-06,
"loss": 0.6655,
"step": 4950
},
{
"epoch": 0.25288059549301517,
"grad_norm": 5.333780678244991,
"learning_rate": 4.376573873605738e-06,
"loss": 0.707,
"step": 4960
},
{
"epoch": 0.2533904354032834,
"grad_norm": 12.79409497605327,
"learning_rate": 4.373843690236702e-06,
"loss": 0.647,
"step": 4970
},
{
"epoch": 0.2539002753135515,
"grad_norm": 5.751493207318074,
"learning_rate": 4.371108397305629e-06,
"loss": 0.6433,
"step": 4980
},
{
"epoch": 0.25441011522381973,
"grad_norm": 5.784255393218418,
"learning_rate": 4.368368002271063e-06,
"loss": 0.6485,
"step": 4990
},
{
"epoch": 0.2549199551340879,
"grad_norm": 5.37700144524392,
"learning_rate": 4.365622512605464e-06,
"loss": 0.6825,
"step": 5000
},
{
"epoch": 0.2554297950443561,
"grad_norm": 10.994734773726519,
"learning_rate": 4.362871935795181e-06,
"loss": 0.6046,
"step": 5010
},
{
"epoch": 0.25593963495462424,
"grad_norm": 5.63606029861754,
"learning_rate": 4.360116279340436e-06,
"loss": 0.6669,
"step": 5020
},
{
"epoch": 0.25644947486489245,
"grad_norm": 10.172686227754747,
"learning_rate": 4.3573555507553026e-06,
"loss": 0.6623,
"step": 5030
},
{
"epoch": 0.2569593147751606,
"grad_norm": 4.697199356488424,
"learning_rate": 4.354589757567681e-06,
"loss": 0.7029,
"step": 5040
},
{
"epoch": 0.2574691546854288,
"grad_norm": 8.266991297087268,
"learning_rate": 4.351818907319287e-06,
"loss": 0.6695,
"step": 5050
},
{
"epoch": 0.25797899459569695,
"grad_norm": 5.449534429649487,
"learning_rate": 4.349043007565624e-06,
"loss": 0.6211,
"step": 5060
},
{
"epoch": 0.2584888345059651,
"grad_norm": 6.313723666703395,
"learning_rate": 4.346262065875962e-06,
"loss": 0.616,
"step": 5070
},
{
"epoch": 0.2589986744162333,
"grad_norm": 14.569546334056222,
"learning_rate": 4.343476089833322e-06,
"loss": 0.6764,
"step": 5080
},
{
"epoch": 0.25950851432650146,
"grad_norm": 6.514233199673362,
"learning_rate": 4.340685087034449e-06,
"loss": 0.6528,
"step": 5090
},
{
"epoch": 0.26001835423676967,
"grad_norm": 5.561839257372943,
"learning_rate": 4.337889065089802e-06,
"loss": 0.6699,
"step": 5100
},
{
"epoch": 0.2605281941470378,
"grad_norm": 4.478703593432573,
"learning_rate": 4.3350880316235176e-06,
"loss": 0.6601,
"step": 5110
},
{
"epoch": 0.261038034057306,
"grad_norm": 7.524693099329384,
"learning_rate": 4.332281994273403e-06,
"loss": 0.6119,
"step": 5120
},
{
"epoch": 0.2615478739675742,
"grad_norm": 4.135739369177627,
"learning_rate": 4.329470960690909e-06,
"loss": 0.637,
"step": 5130
},
{
"epoch": 0.2620577138778424,
"grad_norm": 14.990634974428975,
"learning_rate": 4.326654938541109e-06,
"loss": 0.6603,
"step": 5140
},
{
"epoch": 0.26256755378811053,
"grad_norm": 5.429171829425739,
"learning_rate": 4.323833935502679e-06,
"loss": 0.5659,
"step": 5150
},
{
"epoch": 0.26307739369837874,
"grad_norm": 6.768814826359542,
"learning_rate": 4.321007959267879e-06,
"loss": 0.6621,
"step": 5160
},
{
"epoch": 0.2635872336086469,
"grad_norm": 40.18656196374432,
"learning_rate": 4.3181770175425275e-06,
"loss": 0.5885,
"step": 5170
},
{
"epoch": 0.26409707351891504,
"grad_norm": 5.330611179159143,
"learning_rate": 4.315341118045983e-06,
"loss": 0.72,
"step": 5180
},
{
"epoch": 0.26460691342918324,
"grad_norm": 6.871222458050014,
"learning_rate": 4.3125002685111254e-06,
"loss": 0.6341,
"step": 5190
},
{
"epoch": 0.2651167533394514,
"grad_norm": 12.89041870738685,
"learning_rate": 4.309654476684327e-06,
"loss": 0.6403,
"step": 5200
},
{
"epoch": 0.2656265932497196,
"grad_norm": 8.871359899166356,
"learning_rate": 4.306803750325443e-06,
"loss": 0.5988,
"step": 5210
},
{
"epoch": 0.26613643315998775,
"grad_norm": 4.271969512057242,
"learning_rate": 4.30394809720778e-06,
"loss": 0.6501,
"step": 5220
},
{
"epoch": 0.26664627307025596,
"grad_norm": 5.586750400018364,
"learning_rate": 4.301087525118079e-06,
"loss": 0.6546,
"step": 5230
},
{
"epoch": 0.2671561129805241,
"grad_norm": 4.5372505083089125,
"learning_rate": 4.298222041856495e-06,
"loss": 0.6429,
"step": 5240
},
{
"epoch": 0.2676659528907923,
"grad_norm": 27.779145618695214,
"learning_rate": 4.295351655236574e-06,
"loss": 0.6193,
"step": 5250
},
{
"epoch": 0.26817579280106046,
"grad_norm": 14.688280119142508,
"learning_rate": 4.292476373085232e-06,
"loss": 0.6699,
"step": 5260
},
{
"epoch": 0.26868563271132867,
"grad_norm": 4.562206119898779,
"learning_rate": 4.289596203242739e-06,
"loss": 0.6078,
"step": 5270
},
{
"epoch": 0.2691954726215968,
"grad_norm": 30.92287637133422,
"learning_rate": 4.286711153562682e-06,
"loss": 0.651,
"step": 5280
},
{
"epoch": 0.26970531253186497,
"grad_norm": 6.666495203414483,
"learning_rate": 4.283821231911966e-06,
"loss": 0.6851,
"step": 5290
},
{
"epoch": 0.2702151524421332,
"grad_norm": 9.109830949836569,
"learning_rate": 4.280926446170772e-06,
"loss": 0.6587,
"step": 5300
},
{
"epoch": 0.27072499235240133,
"grad_norm": 3.234120315526601,
"learning_rate": 4.27802680423255e-06,
"loss": 0.6133,
"step": 5310
},
{
"epoch": 0.27123483226266953,
"grad_norm": 10.576744793710242,
"learning_rate": 4.275122314003988e-06,
"loss": 0.6223,
"step": 5320
},
{
"epoch": 0.2717446721729377,
"grad_norm": 4.464159166800522,
"learning_rate": 4.2722129834049975e-06,
"loss": 0.6651,
"step": 5330
},
{
"epoch": 0.2722545120832059,
"grad_norm": 6.165231648367287,
"learning_rate": 4.269298820368685e-06,
"loss": 0.6251,
"step": 5340
},
{
"epoch": 0.27276435199347404,
"grad_norm": 7.155986302726076,
"learning_rate": 4.2663798328413375e-06,
"loss": 0.6149,
"step": 5350
},
{
"epoch": 0.27327419190374225,
"grad_norm": 7.561081612665025,
"learning_rate": 4.263456028782396e-06,
"loss": 0.6038,
"step": 5360
},
{
"epoch": 0.2737840318140104,
"grad_norm": 5.392800891473693,
"learning_rate": 4.2605274161644324e-06,
"loss": 0.621,
"step": 5370
},
{
"epoch": 0.2742938717242786,
"grad_norm": 4.452795618894178,
"learning_rate": 4.2575940029731356e-06,
"loss": 0.6207,
"step": 5380
},
{
"epoch": 0.27480371163454675,
"grad_norm": 15.308045174026043,
"learning_rate": 4.2546557972072806e-06,
"loss": 0.6974,
"step": 5390
},
{
"epoch": 0.2753135515448149,
"grad_norm": 10.474470898481243,
"learning_rate": 4.251712806878713e-06,
"loss": 0.6183,
"step": 5400
},
{
"epoch": 0.2758233914550831,
"grad_norm": 5.027179277912336,
"learning_rate": 4.248765040012324e-06,
"loss": 0.6578,
"step": 5410
},
{
"epoch": 0.27633323136535126,
"grad_norm": 35.436926417503955,
"learning_rate": 4.2458125046460275e-06,
"loss": 0.6324,
"step": 5420
},
{
"epoch": 0.27684307127561947,
"grad_norm": 5.173289707094978,
"learning_rate": 4.242855208830744e-06,
"loss": 0.6098,
"step": 5430
},
{
"epoch": 0.2773529111858876,
"grad_norm": 4.057663120262747,
"learning_rate": 4.239893160630372e-06,
"loss": 0.642,
"step": 5440
},
{
"epoch": 0.2778627510961558,
"grad_norm": 17.42531435164107,
"learning_rate": 4.236926368121769e-06,
"loss": 0.6301,
"step": 5450
},
{
"epoch": 0.278372591006424,
"grad_norm": 4.394693733513858,
"learning_rate": 4.233954839394729e-06,
"loss": 0.6746,
"step": 5460
},
{
"epoch": 0.2788824309166922,
"grad_norm": 3.977959027112798,
"learning_rate": 4.2309785825519625e-06,
"loss": 0.5941,
"step": 5470
},
{
"epoch": 0.27939227082696033,
"grad_norm": 5.683460468119698,
"learning_rate": 4.22799760570907e-06,
"loss": 0.701,
"step": 5480
},
{
"epoch": 0.27990211073722854,
"grad_norm": 3.7760160964369396,
"learning_rate": 4.225011916994525e-06,
"loss": 0.5498,
"step": 5490
},
{
"epoch": 0.2804119506474967,
"grad_norm": 7.37545061226748,
"learning_rate": 4.222021524549646e-06,
"loss": 0.6047,
"step": 5500
},
{
"epoch": 0.28092179055776484,
"grad_norm": 3.8393438974742344,
"learning_rate": 4.21902643652858e-06,
"loss": 0.6342,
"step": 5510
},
{
"epoch": 0.28143163046803304,
"grad_norm": 3.8747277801352937,
"learning_rate": 4.216026661098278e-06,
"loss": 0.6174,
"step": 5520
},
{
"epoch": 0.2819414703783012,
"grad_norm": 6.386329134081537,
"learning_rate": 4.2130222064384704e-06,
"loss": 0.68,
"step": 5530
},
{
"epoch": 0.2824513102885694,
"grad_norm": 5.420447679317111,
"learning_rate": 4.210013080741649e-06,
"loss": 0.6835,
"step": 5540
},
{
"epoch": 0.28296115019883755,
"grad_norm": 7.173594663259141,
"learning_rate": 4.2069992922130424e-06,
"loss": 0.638,
"step": 5550
},
{
"epoch": 0.28347099010910576,
"grad_norm": 17.530283331531706,
"learning_rate": 4.20398084907059e-06,
"loss": 0.6291,
"step": 5560
},
{
"epoch": 0.2839808300193739,
"grad_norm": 8.109340502244885,
"learning_rate": 4.20095775954493e-06,
"loss": 0.6896,
"step": 5570
},
{
"epoch": 0.2844906699296421,
"grad_norm": 5.401146105324492,
"learning_rate": 4.1979300318793645e-06,
"loss": 0.6202,
"step": 5580
},
{
"epoch": 0.28500050983991027,
"grad_norm": 9.845797334403796,
"learning_rate": 4.194897674329845e-06,
"loss": 0.6617,
"step": 5590
},
{
"epoch": 0.28551034975017847,
"grad_norm": 6.98720973954987,
"learning_rate": 4.191860695164948e-06,
"loss": 0.6859,
"step": 5600
},
{
"epoch": 0.2860201896604466,
"grad_norm": 3.3852963513377605,
"learning_rate": 4.188819102665851e-06,
"loss": 0.6827,
"step": 5610
},
{
"epoch": 0.2865300295707148,
"grad_norm": 5.910829676428448,
"learning_rate": 4.185772905126313e-06,
"loss": 0.632,
"step": 5620
},
{
"epoch": 0.287039869480983,
"grad_norm": 5.3891638611882895,
"learning_rate": 4.182722110852647e-06,
"loss": 0.5784,
"step": 5630
},
{
"epoch": 0.28754970939125113,
"grad_norm": 6.399878165787999,
"learning_rate": 4.179666728163703e-06,
"loss": 0.574,
"step": 5640
},
{
"epoch": 0.28805954930151934,
"grad_norm": 7.872943692352509,
"learning_rate": 4.176606765390841e-06,
"loss": 0.6408,
"step": 5650
},
{
"epoch": 0.2885693892117875,
"grad_norm": 10.660244106341933,
"learning_rate": 4.1735422308779116e-06,
"loss": 0.702,
"step": 5660
},
{
"epoch": 0.2890792291220557,
"grad_norm": 9.453916032422011,
"learning_rate": 4.170473132981229e-06,
"loss": 0.7054,
"step": 5670
},
{
"epoch": 0.28958906903232384,
"grad_norm": 5.211481088433242,
"learning_rate": 4.167399480069552e-06,
"loss": 0.624,
"step": 5680
},
{
"epoch": 0.29009890894259205,
"grad_norm": 4.724676021619974,
"learning_rate": 4.164321280524062e-06,
"loss": 0.6543,
"step": 5690
},
{
"epoch": 0.2906087488528602,
"grad_norm": 5.899702889957497,
"learning_rate": 4.1612385427383335e-06,
"loss": 0.6213,
"step": 5700
},
{
"epoch": 0.2911185887631284,
"grad_norm": 8.356033595955743,
"learning_rate": 4.158151275118321e-06,
"loss": 0.6136,
"step": 5710
},
{
"epoch": 0.29162842867339656,
"grad_norm": 4.528472454079423,
"learning_rate": 4.155059486082326e-06,
"loss": 0.5934,
"step": 5720
},
{
"epoch": 0.2921382685836647,
"grad_norm": 3.9471977080042224,
"learning_rate": 4.151963184060982e-06,
"loss": 0.5906,
"step": 5730
},
{
"epoch": 0.2926481084939329,
"grad_norm": 7.583055298271071,
"learning_rate": 4.148862377497228e-06,
"loss": 0.5857,
"step": 5740
},
{
"epoch": 0.29315794840420106,
"grad_norm": 4.956269546892512,
"learning_rate": 4.145757074846286e-06,
"loss": 0.6188,
"step": 5750
},
{
"epoch": 0.29366778831446927,
"grad_norm": 4.545321137613146,
"learning_rate": 4.142647284575637e-06,
"loss": 0.5745,
"step": 5760
},
{
"epoch": 0.2941776282247374,
"grad_norm": 4.792940652062567,
"learning_rate": 4.1395330151649986e-06,
"loss": 0.6324,
"step": 5770
},
{
"epoch": 0.2946874681350056,
"grad_norm": 6.596024873190735,
"learning_rate": 4.136414275106302e-06,
"loss": 0.5657,
"step": 5780
},
{
"epoch": 0.2951973080452738,
"grad_norm": 5.713080839603736,
"learning_rate": 4.13329107290367e-06,
"loss": 0.6331,
"step": 5790
},
{
"epoch": 0.295707147955542,
"grad_norm": 5.432098547529835,
"learning_rate": 4.1301634170733925e-06,
"loss": 0.6064,
"step": 5800
},
{
"epoch": 0.29621698786581013,
"grad_norm": 5.590745245241055,
"learning_rate": 4.127031316143904e-06,
"loss": 0.5953,
"step": 5810
},
{
"epoch": 0.29672682777607834,
"grad_norm": 5.0642561912893695,
"learning_rate": 4.1238947786557584e-06,
"loss": 0.6138,
"step": 5820
},
{
"epoch": 0.2972366676863465,
"grad_norm": 5.397119694018976,
"learning_rate": 4.120753813161606e-06,
"loss": 0.6255,
"step": 5830
},
{
"epoch": 0.29774650759661464,
"grad_norm": 6.506357009448125,
"learning_rate": 4.117608428226174e-06,
"loss": 0.6385,
"step": 5840
},
{
"epoch": 0.29825634750688285,
"grad_norm": 6.6355006872288556,
"learning_rate": 4.1144586324262406e-06,
"loss": 0.6648,
"step": 5850
},
{
"epoch": 0.298766187417151,
"grad_norm": 6.819435229576645,
"learning_rate": 4.111304434350608e-06,
"loss": 0.6416,
"step": 5860
},
{
"epoch": 0.2992760273274192,
"grad_norm": 5.612359875216613,
"learning_rate": 4.108145842600086e-06,
"loss": 0.6635,
"step": 5870
},
{
"epoch": 0.29978586723768735,
"grad_norm": 3.838613644362246,
"learning_rate": 4.104982865787465e-06,
"loss": 0.6013,
"step": 5880
},
{
"epoch": 0.30029570714795556,
"grad_norm": 5.376602190700165,
"learning_rate": 4.101815512537488e-06,
"loss": 0.5881,
"step": 5890
},
{
"epoch": 0.3008055470582237,
"grad_norm": 6.896496839644866,
"learning_rate": 4.0986437914868374e-06,
"loss": 0.6871,
"step": 5900
},
{
"epoch": 0.3013153869684919,
"grad_norm": 8.306304088884753,
"learning_rate": 4.095467711284103e-06,
"loss": 0.6562,
"step": 5910
},
{
"epoch": 0.30182522687876007,
"grad_norm": 7.572440847065114,
"learning_rate": 4.092287280589759e-06,
"loss": 0.6525,
"step": 5920
},
{
"epoch": 0.3023350667890283,
"grad_norm": 8.692355693404446,
"learning_rate": 4.089102508076146e-06,
"loss": 0.5751,
"step": 5930
},
{
"epoch": 0.3028449066992964,
"grad_norm": 6.722049029065847,
"learning_rate": 4.085913402427442e-06,
"loss": 0.6343,
"step": 5940
},
{
"epoch": 0.3033547466095646,
"grad_norm": 7.828445788697127,
"learning_rate": 4.082719972339641e-06,
"loss": 0.5936,
"step": 5950
},
{
"epoch": 0.3038645865198328,
"grad_norm": 6.405566944324596,
"learning_rate": 4.0795222265205284e-06,
"loss": 0.6116,
"step": 5960
},
{
"epoch": 0.30437442643010093,
"grad_norm": 6.067963979872861,
"learning_rate": 4.076320173689658e-06,
"loss": 0.6546,
"step": 5970
},
{
"epoch": 0.30488426634036914,
"grad_norm": 5.604101231115741,
"learning_rate": 4.073113822578328e-06,
"loss": 0.7114,
"step": 5980
},
{
"epoch": 0.3053941062506373,
"grad_norm": 3.929822261335541,
"learning_rate": 4.069903181929557e-06,
"loss": 0.6041,
"step": 5990
},
{
"epoch": 0.3059039461609055,
"grad_norm": 8.08933198444047,
"learning_rate": 4.066688260498059e-06,
"loss": 0.6102,
"step": 6000
},
{
"epoch": 0.30641378607117364,
"grad_norm": 5.341035322956155,
"learning_rate": 4.063469067050223e-06,
"loss": 0.6173,
"step": 6010
},
{
"epoch": 0.30692362598144185,
"grad_norm": 4.8043762647528085,
"learning_rate": 4.060245610364085e-06,
"loss": 0.6416,
"step": 6020
},
{
"epoch": 0.30743346589171,
"grad_norm": 4.509900217695743,
"learning_rate": 4.057017899229307e-06,
"loss": 0.6424,
"step": 6030
},
{
"epoch": 0.3079433058019782,
"grad_norm": 7.984468045201899,
"learning_rate": 4.053785942447151e-06,
"loss": 0.6983,
"step": 6040
},
{
"epoch": 0.30845314571224636,
"grad_norm": 8.1778484515331,
"learning_rate": 4.0505497488304566e-06,
"loss": 0.6477,
"step": 6050
},
{
"epoch": 0.3089629856225145,
"grad_norm": 5.112670563038483,
"learning_rate": 4.047309327203616e-06,
"loss": 0.6241,
"step": 6060
},
{
"epoch": 0.3094728255327827,
"grad_norm": 8.347393904556576,
"learning_rate": 4.044064686402552e-06,
"loss": 0.5946,
"step": 6070
},
{
"epoch": 0.30998266544305086,
"grad_norm": 4.656857993534298,
"learning_rate": 4.040815835274689e-06,
"loss": 0.5772,
"step": 6080
},
{
"epoch": 0.31049250535331907,
"grad_norm": 4.658567236532135,
"learning_rate": 4.037562782678934e-06,
"loss": 0.5742,
"step": 6090
},
{
"epoch": 0.3110023452635872,
"grad_norm": 5.885392479470231,
"learning_rate": 4.034305537485651e-06,
"loss": 0.5661,
"step": 6100
},
{
"epoch": 0.3115121851738554,
"grad_norm": 6.812576194838161,
"learning_rate": 4.031044108576634e-06,
"loss": 0.6185,
"step": 6110
},
{
"epoch": 0.3120220250841236,
"grad_norm": 4.695832813773793,
"learning_rate": 4.027778504845088e-06,
"loss": 0.5996,
"step": 6120
},
{
"epoch": 0.3125318649943918,
"grad_norm": 6.719096539347398,
"learning_rate": 4.024508735195599e-06,
"loss": 0.5472,
"step": 6130
},
{
"epoch": 0.31304170490465993,
"grad_norm": 4.464117617072947,
"learning_rate": 4.021234808544115e-06,
"loss": 0.5882,
"step": 6140
},
{
"epoch": 0.3135515448149281,
"grad_norm": 4.949227201876802,
"learning_rate": 4.017956733817919e-06,
"loss": 0.6042,
"step": 6150
},
{
"epoch": 0.3140613847251963,
"grad_norm": 5.147805511736469,
"learning_rate": 4.014674519955602e-06,
"loss": 0.5562,
"step": 6160
},
{
"epoch": 0.31457122463546444,
"grad_norm": 5.27591052217068,
"learning_rate": 4.011388175907044e-06,
"loss": 0.6158,
"step": 6170
},
{
"epoch": 0.31508106454573265,
"grad_norm": 5.314691715702688,
"learning_rate": 4.008097710633388e-06,
"loss": 0.626,
"step": 6180
},
{
"epoch": 0.3155909044560008,
"grad_norm": 5.101799323690667,
"learning_rate": 4.004803133107011e-06,
"loss": 0.5678,
"step": 6190
},
{
"epoch": 0.316100744366269,
"grad_norm": 9.393447107360567,
"learning_rate": 4.0015044523115084e-06,
"loss": 0.6026,
"step": 6200
},
{
"epoch": 0.31661058427653715,
"grad_norm": 2.9871948397773864,
"learning_rate": 3.9982016772416595e-06,
"loss": 0.6206,
"step": 6210
},
{
"epoch": 0.31712042418680536,
"grad_norm": 5.96392264314999,
"learning_rate": 3.99489481690341e-06,
"loss": 0.6442,
"step": 6220
},
{
"epoch": 0.3176302640970735,
"grad_norm": 4.06827307212575,
"learning_rate": 3.991583880313846e-06,
"loss": 0.5968,
"step": 6230
},
{
"epoch": 0.3181401040073417,
"grad_norm": 5.97851845959425,
"learning_rate": 3.988268876501167e-06,
"loss": 0.5609,
"step": 6240
},
{
"epoch": 0.31864994391760987,
"grad_norm": 8.540876238094416,
"learning_rate": 3.984949814504664e-06,
"loss": 0.6204,
"step": 6250
},
{
"epoch": 0.319159783827878,
"grad_norm": 8.161296163654386,
"learning_rate": 3.981626703374693e-06,
"loss": 0.6187,
"step": 6260
},
{
"epoch": 0.3196696237381462,
"grad_norm": 4.6578960874648905,
"learning_rate": 3.9782995521726505e-06,
"loss": 0.5095,
"step": 6270
},
{
"epoch": 0.3201794636484144,
"grad_norm": 5.185027311642541,
"learning_rate": 3.974968369970953e-06,
"loss": 0.6377,
"step": 6280
},
{
"epoch": 0.3206893035586826,
"grad_norm": 8.45946575637304,
"learning_rate": 3.971633165853004e-06,
"loss": 0.6451,
"step": 6290
},
{
"epoch": 0.32119914346895073,
"grad_norm": 5.663225217140194,
"learning_rate": 3.968293948913175e-06,
"loss": 0.5533,
"step": 6300
},
{
"epoch": 0.32170898337921894,
"grad_norm": 7.004741286625379,
"learning_rate": 3.964950728256783e-06,
"loss": 0.6197,
"step": 6310
},
{
"epoch": 0.3222188232894871,
"grad_norm": 5.713591815943368,
"learning_rate": 3.961603513000058e-06,
"loss": 0.6442,
"step": 6320
},
{
"epoch": 0.3227286631997553,
"grad_norm": 4.911310025373795,
"learning_rate": 3.958252312270125e-06,
"loss": 0.6582,
"step": 6330
},
{
"epoch": 0.32323850311002345,
"grad_norm": 67.60264950871154,
"learning_rate": 3.954897135204975e-06,
"loss": 0.5731,
"step": 6340
},
{
"epoch": 0.32374834302029165,
"grad_norm": 9.225544023855372,
"learning_rate": 3.951537990953443e-06,
"loss": 0.6491,
"step": 6350
},
{
"epoch": 0.3242581829305598,
"grad_norm": 8.37214360676923,
"learning_rate": 3.94817488867518e-06,
"loss": 0.6637,
"step": 6360
},
{
"epoch": 0.32476802284082795,
"grad_norm": 7.165627579349391,
"learning_rate": 3.944807837540633e-06,
"loss": 0.63,
"step": 6370
},
{
"epoch": 0.32527786275109616,
"grad_norm": 7.9986534028713825,
"learning_rate": 3.94143684673101e-06,
"loss": 0.6675,
"step": 6380
},
{
"epoch": 0.3257877026613643,
"grad_norm": 7.300211677495484,
"learning_rate": 3.938061925438269e-06,
"loss": 0.6203,
"step": 6390
},
{
"epoch": 0.3262975425716325,
"grad_norm": 5.26360439797044,
"learning_rate": 3.934683082865082e-06,
"loss": 0.5485,
"step": 6400
},
{
"epoch": 0.32680738248190067,
"grad_norm": 8.674842576965311,
"learning_rate": 3.931300328224814e-06,
"loss": 0.685,
"step": 6410
},
{
"epoch": 0.32731722239216887,
"grad_norm": 3.680275478176689,
"learning_rate": 3.927913670741497e-06,
"loss": 0.5436,
"step": 6420
},
{
"epoch": 0.327827062302437,
"grad_norm": 5.151700048772699,
"learning_rate": 3.9245231196498055e-06,
"loss": 0.6186,
"step": 6430
},
{
"epoch": 0.32833690221270523,
"grad_norm": 12.50962121820052,
"learning_rate": 3.92112868419503e-06,
"loss": 0.5814,
"step": 6440
},
{
"epoch": 0.3288467421229734,
"grad_norm": 5.39886547774635,
"learning_rate": 3.917730373633056e-06,
"loss": 0.6702,
"step": 6450
},
{
"epoch": 0.3293565820332416,
"grad_norm": 9.745980327044636,
"learning_rate": 3.914328197230331e-06,
"loss": 0.65,
"step": 6460
},
{
"epoch": 0.32986642194350974,
"grad_norm": 4.328195735840979,
"learning_rate": 3.910922164263847e-06,
"loss": 0.6397,
"step": 6470
},
{
"epoch": 0.3303762618537779,
"grad_norm": 11.388860470153862,
"learning_rate": 3.907512284021113e-06,
"loss": 0.551,
"step": 6480
},
{
"epoch": 0.3308861017640461,
"grad_norm": 5.001205491020057,
"learning_rate": 3.9040985658001245e-06,
"loss": 0.6024,
"step": 6490
},
{
"epoch": 0.33139594167431424,
"grad_norm": 13.752803345684894,
"learning_rate": 3.900681018909346e-06,
"loss": 0.6269,
"step": 6500
},
{
"epoch": 0.33190578158458245,
"grad_norm": 6.563246966742989,
"learning_rate": 3.89725965266768e-06,
"loss": 0.5483,
"step": 6510
},
{
"epoch": 0.3324156214948506,
"grad_norm": 5.6009232150840385,
"learning_rate": 3.893834476404445e-06,
"loss": 0.6227,
"step": 6520
},
{
"epoch": 0.3329254614051188,
"grad_norm": 6.385115575499585,
"learning_rate": 3.890405499459346e-06,
"loss": 0.5391,
"step": 6530
},
{
"epoch": 0.33343530131538696,
"grad_norm": 10.100062751766151,
"learning_rate": 3.886972731182455e-06,
"loss": 0.6227,
"step": 6540
},
{
"epoch": 0.33394514122565516,
"grad_norm": 7.360787802711816,
"learning_rate": 3.88353618093418e-06,
"loss": 0.6554,
"step": 6550
},
{
"epoch": 0.3344549811359233,
"grad_norm": 5.356919889085,
"learning_rate": 3.880095858085242e-06,
"loss": 0.6468,
"step": 6560
},
{
"epoch": 0.3349648210461915,
"grad_norm": 5.509960574562544,
"learning_rate": 3.876651772016651e-06,
"loss": 0.7081,
"step": 6570
},
{
"epoch": 0.33547466095645967,
"grad_norm": 3.286871309961813,
"learning_rate": 3.873203932119674e-06,
"loss": 0.5847,
"step": 6580
},
{
"epoch": 0.3359845008667278,
"grad_norm": 3.8352613480782454,
"learning_rate": 3.869752347795817e-06,
"loss": 0.5517,
"step": 6590
},
{
"epoch": 0.336494340776996,
"grad_norm": 4.477157135506783,
"learning_rate": 3.866297028456797e-06,
"loss": 0.6064,
"step": 6600
},
{
"epoch": 0.3370041806872642,
"grad_norm": 4.734066056109367,
"learning_rate": 3.862837983524514e-06,
"loss": 0.6193,
"step": 6610
},
{
"epoch": 0.3375140205975324,
"grad_norm": 8.932654735360822,
"learning_rate": 3.859375222431028e-06,
"loss": 0.5583,
"step": 6620
},
{
"epoch": 0.33802386050780053,
"grad_norm": 5.980294085107751,
"learning_rate": 3.855908754618529e-06,
"loss": 0.5647,
"step": 6630
},
{
"epoch": 0.33853370041806874,
"grad_norm": 10.757717609465724,
"learning_rate": 3.852438589539318e-06,
"loss": 0.6095,
"step": 6640
},
{
"epoch": 0.3390435403283369,
"grad_norm": 7.4025432334386405,
"learning_rate": 3.848964736655778e-06,
"loss": 0.5996,
"step": 6650
},
{
"epoch": 0.3395533802386051,
"grad_norm": 4.770370050231316,
"learning_rate": 3.8454872054403436e-06,
"loss": 0.6346,
"step": 6660
},
{
"epoch": 0.34006322014887325,
"grad_norm": 4.717970663853238,
"learning_rate": 3.842006005375484e-06,
"loss": 0.6224,
"step": 6670
},
{
"epoch": 0.34057306005914145,
"grad_norm": 4.3404000579008395,
"learning_rate": 3.838521145953671e-06,
"loss": 0.5899,
"step": 6680
},
{
"epoch": 0.3410828999694096,
"grad_norm": 4.034827310652614,
"learning_rate": 3.835032636677353e-06,
"loss": 0.5911,
"step": 6690
},
{
"epoch": 0.34159273987967775,
"grad_norm": 7.368938458861282,
"learning_rate": 3.831540487058931e-06,
"loss": 0.5737,
"step": 6700
},
{
"epoch": 0.34210257978994596,
"grad_norm": 8.57724472913965,
"learning_rate": 3.828044706620735e-06,
"loss": 0.5313,
"step": 6710
},
{
"epoch": 0.3426124197002141,
"grad_norm": 6.329551601664903,
"learning_rate": 3.824545304894996e-06,
"loss": 0.57,
"step": 6720
},
{
"epoch": 0.3431222596104823,
"grad_norm": 4.795096185907367,
"learning_rate": 3.8210422914238135e-06,
"loss": 0.6148,
"step": 6730
},
{
"epoch": 0.34363209952075047,
"grad_norm": 6.213637575028185,
"learning_rate": 3.817535675759141e-06,
"loss": 0.6063,
"step": 6740
},
{
"epoch": 0.3441419394310187,
"grad_norm": 7.850124604471028,
"learning_rate": 3.814025467462753e-06,
"loss": 0.6415,
"step": 6750
},
{
"epoch": 0.3446517793412868,
"grad_norm": 7.606549162605095,
"learning_rate": 3.81051167610622e-06,
"loss": 0.6188,
"step": 6760
},
{
"epoch": 0.34516161925155503,
"grad_norm": 5.859136355753269,
"learning_rate": 3.806994311270882e-06,
"loss": 0.5968,
"step": 6770
},
{
"epoch": 0.3456714591618232,
"grad_norm": 7.357030820962491,
"learning_rate": 3.8034733825478244e-06,
"loss": 0.6151,
"step": 6780
},
{
"epoch": 0.3461812990720914,
"grad_norm": 6.118489158670542,
"learning_rate": 3.79994889953785e-06,
"loss": 0.5947,
"step": 6790
},
{
"epoch": 0.34669113898235954,
"grad_norm": 8.430159102749696,
"learning_rate": 3.796420871851454e-06,
"loss": 0.6021,
"step": 6800
},
{
"epoch": 0.3472009788926277,
"grad_norm": 15.874800204896232,
"learning_rate": 3.792889309108795e-06,
"loss": 0.5698,
"step": 6810
},
{
"epoch": 0.3477108188028959,
"grad_norm": 8.83185818771321,
"learning_rate": 3.7893542209396734e-06,
"loss": 0.6312,
"step": 6820
},
{
"epoch": 0.34822065871316404,
"grad_norm": 4.087291025953165,
"learning_rate": 3.7858156169835015e-06,
"loss": 0.5627,
"step": 6830
},
{
"epoch": 0.34873049862343225,
"grad_norm": 6.7607366003726055,
"learning_rate": 3.782273506889279e-06,
"loss": 0.5773,
"step": 6840
},
{
"epoch": 0.3492403385337004,
"grad_norm": 10.819485461238413,
"learning_rate": 3.7787279003155654e-06,
"loss": 0.5643,
"step": 6850
},
{
"epoch": 0.3497501784439686,
"grad_norm": 7.661944780552012,
"learning_rate": 3.7751788069304545e-06,
"loss": 0.6101,
"step": 6860
},
{
"epoch": 0.35026001835423676,
"grad_norm": 6.422963238448512,
"learning_rate": 3.7716262364115474e-06,
"loss": 0.6484,
"step": 6870
},
{
"epoch": 0.35076985826450496,
"grad_norm": 9.481303854150275,
"learning_rate": 3.768070198445929e-06,
"loss": 0.5689,
"step": 6880
},
{
"epoch": 0.3512796981747731,
"grad_norm": 6.181056944169703,
"learning_rate": 3.7645107027301345e-06,
"loss": 0.6259,
"step": 6890
},
{
"epoch": 0.3517895380850413,
"grad_norm": 4.107794346757061,
"learning_rate": 3.760947758970133e-06,
"loss": 0.6058,
"step": 6900
},
{
"epoch": 0.35229937799530947,
"grad_norm": 4.973326337674359,
"learning_rate": 3.757381376881292e-06,
"loss": 0.5507,
"step": 6910
},
{
"epoch": 0.3528092179055776,
"grad_norm": 10.480438607114168,
"learning_rate": 3.7538115661883566e-06,
"loss": 0.5403,
"step": 6920
},
{
"epoch": 0.3533190578158458,
"grad_norm": 4.104274396062177,
"learning_rate": 3.750238336625418e-06,
"loss": 0.5779,
"step": 6930
},
{
"epoch": 0.353828897726114,
"grad_norm": 4.529163767945204,
"learning_rate": 3.746661697935894e-06,
"loss": 0.6222,
"step": 6940
},
{
"epoch": 0.3543387376363822,
"grad_norm": 4.430559349481732,
"learning_rate": 3.743081659872495e-06,
"loss": 0.6055,
"step": 6950
},
{
"epoch": 0.35484857754665033,
"grad_norm": 6.5460741891803655,
"learning_rate": 3.7394982321972027e-06,
"loss": 0.5601,
"step": 6960
},
{
"epoch": 0.35535841745691854,
"grad_norm": 7.640248043872103,
"learning_rate": 3.735911424681241e-06,
"loss": 0.6209,
"step": 6970
},
{
"epoch": 0.3558682573671867,
"grad_norm": 8.37675772527331,
"learning_rate": 3.73232124710505e-06,
"loss": 0.6312,
"step": 6980
},
{
"epoch": 0.3563780972774549,
"grad_norm": 7.037076133600042,
"learning_rate": 3.7287277092582574e-06,
"loss": 0.6125,
"step": 6990
},
{
"epoch": 0.35688793718772305,
"grad_norm": 10.794399433912062,
"learning_rate": 3.7251308209396574e-06,
"loss": 0.5567,
"step": 7000
},
{
"epoch": 0.35739777709799125,
"grad_norm": 11.309080635594633,
"learning_rate": 3.7215305919571764e-06,
"loss": 0.5911,
"step": 7010
},
{
"epoch": 0.3579076170082594,
"grad_norm": 5.54462136401189,
"learning_rate": 3.7179270321278514e-06,
"loss": 0.5995,
"step": 7020
},
{
"epoch": 0.35841745691852755,
"grad_norm": 3.7492832827560103,
"learning_rate": 3.7143201512778036e-06,
"loss": 0.5922,
"step": 7030
},
{
"epoch": 0.35892729682879576,
"grad_norm": 6.858050111733345,
"learning_rate": 3.710709959242208e-06,
"loss": 0.5987,
"step": 7040
},
{
"epoch": 0.3594371367390639,
"grad_norm": 6.171010764226671,
"learning_rate": 3.707096465865268e-06,
"loss": 0.5419,
"step": 7050
},
{
"epoch": 0.3599469766493321,
"grad_norm": 5.329769378388623,
"learning_rate": 3.703479681000191e-06,
"loss": 0.5478,
"step": 7060
},
{
"epoch": 0.36045681655960027,
"grad_norm": 3.5454541378397204,
"learning_rate": 3.699859614509158e-06,
"loss": 0.6207,
"step": 7070
},
{
"epoch": 0.3609666564698685,
"grad_norm": 13.986305814991704,
"learning_rate": 3.6962362762633004e-06,
"loss": 0.6246,
"step": 7080
},
{
"epoch": 0.3614764963801366,
"grad_norm": 4.850896721665051,
"learning_rate": 3.6926096761426666e-06,
"loss": 0.5819,
"step": 7090
},
{
"epoch": 0.36198633629040483,
"grad_norm": 7.499418723064143,
"learning_rate": 3.6889798240362033e-06,
"loss": 0.5597,
"step": 7100
},
{
"epoch": 0.362496176200673,
"grad_norm": 14.96873551216076,
"learning_rate": 3.6853467298417243e-06,
"loss": 0.5084,
"step": 7110
},
{
"epoch": 0.3630060161109412,
"grad_norm": 5.911693184729608,
"learning_rate": 3.681710403465883e-06,
"loss": 0.5799,
"step": 7120
},
{
"epoch": 0.36351585602120934,
"grad_norm": 4.923835615599454,
"learning_rate": 3.6780708548241456e-06,
"loss": 0.6774,
"step": 7130
},
{
"epoch": 0.3640256959314775,
"grad_norm": 5.576239220291962,
"learning_rate": 3.6744280938407663e-06,
"loss": 0.5563,
"step": 7140
},
{
"epoch": 0.3645355358417457,
"grad_norm": 53.425006716624175,
"learning_rate": 3.6707821304487566e-06,
"loss": 0.5904,
"step": 7150
},
{
"epoch": 0.36504537575201385,
"grad_norm": 8.913931661640671,
"learning_rate": 3.667132974589863e-06,
"loss": 0.6302,
"step": 7160
},
{
"epoch": 0.36555521566228205,
"grad_norm": 4.475573478074097,
"learning_rate": 3.6634806362145346e-06,
"loss": 0.5533,
"step": 7170
},
{
"epoch": 0.3660650555725502,
"grad_norm": 5.442534505279004,
"learning_rate": 3.6598251252819e-06,
"loss": 0.6408,
"step": 7180
},
{
"epoch": 0.3665748954828184,
"grad_norm": 20.614809255691306,
"learning_rate": 3.6561664517597384e-06,
"loss": 0.5989,
"step": 7190
},
{
"epoch": 0.36708473539308656,
"grad_norm": 9.734907719100084,
"learning_rate": 3.652504625624452e-06,
"loss": 0.6404,
"step": 7200
},
{
"epoch": 0.36759457530335476,
"grad_norm": 6.631934468341404,
"learning_rate": 3.6488396568610407e-06,
"loss": 0.5767,
"step": 7210
},
{
"epoch": 0.3681044152136229,
"grad_norm": 7.1809330609897914,
"learning_rate": 3.645171555463073e-06,
"loss": 0.636,
"step": 7220
},
{
"epoch": 0.3686142551238911,
"grad_norm": 5.546281325797002,
"learning_rate": 3.641500331432658e-06,
"loss": 0.63,
"step": 7230
},
{
"epoch": 0.36912409503415927,
"grad_norm": 8.823366797783622,
"learning_rate": 3.6378259947804233e-06,
"loss": 0.5748,
"step": 7240
},
{
"epoch": 0.3696339349444274,
"grad_norm": 5.097288565368337,
"learning_rate": 3.6341485555254795e-06,
"loss": 0.5088,
"step": 7250
},
{
"epoch": 0.37014377485469563,
"grad_norm": 8.118113252898912,
"learning_rate": 3.6304680236954004e-06,
"loss": 0.6355,
"step": 7260
},
{
"epoch": 0.3706536147649638,
"grad_norm": 3.3464029872574375,
"learning_rate": 3.6267844093261918e-06,
"loss": 0.6184,
"step": 7270
},
{
"epoch": 0.371163454675232,
"grad_norm": 18.062546321417802,
"learning_rate": 3.6230977224622637e-06,
"loss": 0.6462,
"step": 7280
},
{
"epoch": 0.37167329458550014,
"grad_norm": 6.52233188905018,
"learning_rate": 3.619407973156406e-06,
"loss": 0.5156,
"step": 7290
},
{
"epoch": 0.37218313449576834,
"grad_norm": 8.221714618065553,
"learning_rate": 3.6157151714697573e-06,
"loss": 0.6841,
"step": 7300
},
{
"epoch": 0.3726929744060365,
"grad_norm": 27.792566756398507,
"learning_rate": 3.6120193274717815e-06,
"loss": 0.5526,
"step": 7310
},
{
"epoch": 0.3732028143163047,
"grad_norm": 4.306156811257337,
"learning_rate": 3.608320451240237e-06,
"loss": 0.5967,
"step": 7320
},
{
"epoch": 0.37371265422657285,
"grad_norm": 6.166072662851084,
"learning_rate": 3.6046185528611497e-06,
"loss": 0.6096,
"step": 7330
},
{
"epoch": 0.37422249413684106,
"grad_norm": 4.853281102665544,
"learning_rate": 3.600913642428788e-06,
"loss": 0.5802,
"step": 7340
},
{
"epoch": 0.3747323340471092,
"grad_norm": 6.741886629102916,
"learning_rate": 3.597205730045632e-06,
"loss": 0.6536,
"step": 7350
},
{
"epoch": 0.37524217395737736,
"grad_norm": 4.183273902615311,
"learning_rate": 3.5934948258223485e-06,
"loss": 0.652,
"step": 7360
},
{
"epoch": 0.37575201386764556,
"grad_norm": 9.101129386835122,
"learning_rate": 3.5897809398777607e-06,
"loss": 0.7082,
"step": 7370
},
{
"epoch": 0.3762618537779137,
"grad_norm": 4.815675180734852,
"learning_rate": 3.586064082338825e-06,
"loss": 0.6186,
"step": 7380
},
{
"epoch": 0.3767716936881819,
"grad_norm": 3.95393447830758,
"learning_rate": 3.5823442633405993e-06,
"loss": 0.559,
"step": 7390
},
{
"epoch": 0.37728153359845007,
"grad_norm": 14.067564962388795,
"learning_rate": 3.578621493026216e-06,
"loss": 0.567,
"step": 7400
},
{
"epoch": 0.3777913735087183,
"grad_norm": 4.773473342606289,
"learning_rate": 3.5748957815468556e-06,
"loss": 0.5831,
"step": 7410
},
{
"epoch": 0.3783012134189864,
"grad_norm": 5.549851227762529,
"learning_rate": 3.5711671390617188e-06,
"loss": 0.5673,
"step": 7420
},
{
"epoch": 0.37881105332925463,
"grad_norm": 3.3479933620192335,
"learning_rate": 3.567435575737999e-06,
"loss": 0.6056,
"step": 7430
},
{
"epoch": 0.3793208932395228,
"grad_norm": 14.15878245345039,
"learning_rate": 3.563701101750854e-06,
"loss": 0.5743,
"step": 7440
},
{
"epoch": 0.379830733149791,
"grad_norm": 6.419404980512699,
"learning_rate": 3.5599637272833753e-06,
"loss": 0.6721,
"step": 7450
},
{
"epoch": 0.38034057306005914,
"grad_norm": 3.475737030604581,
"learning_rate": 3.556223462526568e-06,
"loss": 0.6289,
"step": 7460
},
{
"epoch": 0.3808504129703273,
"grad_norm": 4.736659071805537,
"learning_rate": 3.5524803176793165e-06,
"loss": 0.5864,
"step": 7470
},
{
"epoch": 0.3813602528805955,
"grad_norm": 4.231077303312,
"learning_rate": 3.5487343029483577e-06,
"loss": 0.5538,
"step": 7480
},
{
"epoch": 0.38187009279086365,
"grad_norm": 4.529344365626084,
"learning_rate": 3.544985428548255e-06,
"loss": 0.6027,
"step": 7490
},
{
"epoch": 0.38237993270113185,
"grad_norm": 3.504905765153972,
"learning_rate": 3.541233704701369e-06,
"loss": 0.5744,
"step": 7500
},
{
"epoch": 0.3828897726114,
"grad_norm": 6.222110628729311,
"learning_rate": 3.5374791416378294e-06,
"loss": 0.5717,
"step": 7510
},
{
"epoch": 0.3833996125216682,
"grad_norm": 5.8783544186901215,
"learning_rate": 3.5337217495955113e-06,
"loss": 0.5574,
"step": 7520
},
{
"epoch": 0.38390945243193636,
"grad_norm": 6.577175269766371,
"learning_rate": 3.5299615388199983e-06,
"loss": 0.5388,
"step": 7530
},
{
"epoch": 0.38441929234220457,
"grad_norm": 5.968698056594268,
"learning_rate": 3.526198519564565e-06,
"loss": 0.546,
"step": 7540
},
{
"epoch": 0.3849291322524727,
"grad_norm": 7.436207395985792,
"learning_rate": 3.522432702090141e-06,
"loss": 0.5911,
"step": 7550
},
{
"epoch": 0.3854389721627409,
"grad_norm": 5.305969940463719,
"learning_rate": 3.518664096665289e-06,
"loss": 0.5795,
"step": 7560
},
{
"epoch": 0.3859488120730091,
"grad_norm": 4.764076818026139,
"learning_rate": 3.5148927135661697e-06,
"loss": 0.5664,
"step": 7570
},
{
"epoch": 0.3864586519832772,
"grad_norm": 4.456449482163297,
"learning_rate": 3.5111185630765216e-06,
"loss": 0.6528,
"step": 7580
},
{
"epoch": 0.38696849189354543,
"grad_norm": 9.887156748088438,
"learning_rate": 3.507341655487628e-06,
"loss": 0.5767,
"step": 7590
},
{
"epoch": 0.3874783318038136,
"grad_norm": 5.777103305694122,
"learning_rate": 3.5035620010982896e-06,
"loss": 0.5429,
"step": 7600
},
{
"epoch": 0.3879881717140818,
"grad_norm": 5.453330299163754,
"learning_rate": 3.4997796102147964e-06,
"loss": 0.5854,
"step": 7610
},
{
"epoch": 0.38849801162434994,
"grad_norm": 4.931929598094581,
"learning_rate": 3.495994493150903e-06,
"loss": 0.5633,
"step": 7620
},
{
"epoch": 0.38900785153461814,
"grad_norm": 5.217190682035675,
"learning_rate": 3.492206660227796e-06,
"loss": 0.6049,
"step": 7630
},
{
"epoch": 0.3895176914448863,
"grad_norm": 7.998364733143567,
"learning_rate": 3.4884161217740677e-06,
"loss": 0.4803,
"step": 7640
},
{
"epoch": 0.3900275313551545,
"grad_norm": 7.092320373283092,
"learning_rate": 3.4846228881256862e-06,
"loss": 0.5541,
"step": 7650
},
{
"epoch": 0.39053737126542265,
"grad_norm": 6.124803699299853,
"learning_rate": 3.480826969625971e-06,
"loss": 0.5922,
"step": 7660
},
{
"epoch": 0.39104721117569086,
"grad_norm": 5.153057465826043,
"learning_rate": 3.477028376625563e-06,
"loss": 0.5665,
"step": 7670
},
{
"epoch": 0.391557051085959,
"grad_norm": 4.517672802865693,
"learning_rate": 3.4732271194823936e-06,
"loss": 0.5277,
"step": 7680
},
{
"epoch": 0.39206689099622716,
"grad_norm": 4.166995164567575,
"learning_rate": 3.4694232085616596e-06,
"loss": 0.7118,
"step": 7690
},
{
"epoch": 0.39257673090649536,
"grad_norm": 5.1557568947593415,
"learning_rate": 3.465616654235795e-06,
"loss": 0.6042,
"step": 7700
},
{
"epoch": 0.3930865708167635,
"grad_norm": 6.393410866172962,
"learning_rate": 3.4618074668844424e-06,
"loss": 0.588,
"step": 7710
},
{
"epoch": 0.3935964107270317,
"grad_norm": 4.358690539680005,
"learning_rate": 3.4579956568944207e-06,
"loss": 0.6176,
"step": 7720
},
{
"epoch": 0.39410625063729987,
"grad_norm": 3.624911629797558,
"learning_rate": 3.454181234659703e-06,
"loss": 0.6397,
"step": 7730
},
{
"epoch": 0.3946160905475681,
"grad_norm": 4.344164968023655,
"learning_rate": 3.4503642105813852e-06,
"loss": 0.5771,
"step": 7740
},
{
"epoch": 0.3951259304578362,
"grad_norm": 5.636498947247398,
"learning_rate": 3.446544595067657e-06,
"loss": 0.6011,
"step": 7750
},
{
"epoch": 0.39563577036810443,
"grad_norm": 6.847194106097315,
"learning_rate": 3.442722398533775e-06,
"loss": 0.5671,
"step": 7760
},
{
"epoch": 0.3961456102783726,
"grad_norm": 9.184980996653614,
"learning_rate": 3.4388976314020334e-06,
"loss": 0.6045,
"step": 7770
},
{
"epoch": 0.3966554501886408,
"grad_norm": 4.422482401293273,
"learning_rate": 3.435070304101735e-06,
"loss": 0.5702,
"step": 7780
},
{
"epoch": 0.39716529009890894,
"grad_norm": 8.284493585068157,
"learning_rate": 3.4312404270691662e-06,
"loss": 0.5723,
"step": 7790
},
{
"epoch": 0.3976751300091771,
"grad_norm": 3.30035074762522,
"learning_rate": 3.4274080107475634e-06,
"loss": 0.5815,
"step": 7800
},
{
"epoch": 0.3981849699194453,
"grad_norm": 3.988407493682963,
"learning_rate": 3.4235730655870876e-06,
"loss": 0.6402,
"step": 7810
},
{
"epoch": 0.39869480982971345,
"grad_norm": 3.779728090113872,
"learning_rate": 3.4197356020447964e-06,
"loss": 0.5862,
"step": 7820
},
{
"epoch": 0.39920464973998165,
"grad_norm": 5.642758710878275,
"learning_rate": 3.4158956305846135e-06,
"loss": 0.5923,
"step": 7830
},
{
"epoch": 0.3997144896502498,
"grad_norm": 3.082114895002924,
"learning_rate": 3.412053161677302e-06,
"loss": 0.5496,
"step": 7840
},
{
"epoch": 0.400224329560518,
"grad_norm": 6.294460319464474,
"learning_rate": 3.408208205800434e-06,
"loss": 0.5882,
"step": 7850
},
{
"epoch": 0.40073416947078616,
"grad_norm": 9.70513659085264,
"learning_rate": 3.4043607734383627e-06,
"loss": 0.6992,
"step": 7860
},
{
"epoch": 0.40124400938105437,
"grad_norm": 5.457141505102727,
"learning_rate": 3.400510875082197e-06,
"loss": 0.6412,
"step": 7870
},
{
"epoch": 0.4017538492913225,
"grad_norm": 6.982613221228831,
"learning_rate": 3.396658521229766e-06,
"loss": 0.5833,
"step": 7880
},
{
"epoch": 0.4022636892015907,
"grad_norm": 5.057259608071754,
"learning_rate": 3.392803722385597e-06,
"loss": 0.6329,
"step": 7890
},
{
"epoch": 0.4027735291118589,
"grad_norm": 4.837813445437802,
"learning_rate": 3.388946489060884e-06,
"loss": 0.5982,
"step": 7900
},
{
"epoch": 0.403283369022127,
"grad_norm": 36.02262032028181,
"learning_rate": 3.3850868317734586e-06,
"loss": 0.5327,
"step": 7910
},
{
"epoch": 0.40379320893239523,
"grad_norm": 4.823228085584529,
"learning_rate": 3.381224761047763e-06,
"loss": 0.5623,
"step": 7920
},
{
"epoch": 0.4043030488426634,
"grad_norm": 8.36240492723203,
"learning_rate": 3.377360287414818e-06,
"loss": 0.5395,
"step": 7930
},
{
"epoch": 0.4048128887529316,
"grad_norm": 9.155441995327642,
"learning_rate": 3.3734934214121994e-06,
"loss": 0.698,
"step": 7940
},
{
"epoch": 0.40532272866319974,
"grad_norm": 11.938460867655994,
"learning_rate": 3.369624173584006e-06,
"loss": 0.5864,
"step": 7950
},
{
"epoch": 0.40583256857346794,
"grad_norm": 5.409381281008949,
"learning_rate": 3.3657525544808293e-06,
"loss": 0.6225,
"step": 7960
},
{
"epoch": 0.4063424084837361,
"grad_norm": 4.89907197073032,
"learning_rate": 3.361878574659729e-06,
"loss": 0.6242,
"step": 7970
},
{
"epoch": 0.4068522483940043,
"grad_norm": 6.797248201036603,
"learning_rate": 3.3580022446842e-06,
"loss": 0.605,
"step": 7980
},
{
"epoch": 0.40736208830427245,
"grad_norm": 3.1365064124030297,
"learning_rate": 3.3541235751241474e-06,
"loss": 0.5203,
"step": 7990
},
{
"epoch": 0.40787192821454066,
"grad_norm": 3.9884621581330264,
"learning_rate": 3.350242576555856e-06,
"loss": 0.6074,
"step": 8000
},
{
"epoch": 0.4083817681248088,
"grad_norm": 2.9254887717179336,
"learning_rate": 3.346359259561958e-06,
"loss": 0.5648,
"step": 8010
},
{
"epoch": 0.40889160803507696,
"grad_norm": 10.8121751372367,
"learning_rate": 3.3424736347314113e-06,
"loss": 0.599,
"step": 8020
},
{
"epoch": 0.40940144794534516,
"grad_norm": 8.156217448526485,
"learning_rate": 3.338585712659465e-06,
"loss": 0.5852,
"step": 8030
},
{
"epoch": 0.4099112878556133,
"grad_norm": 7.7980296103312075,
"learning_rate": 3.3346955039476324e-06,
"loss": 0.5456,
"step": 8040
},
{
"epoch": 0.4104211277658815,
"grad_norm": 4.936174392006569,
"learning_rate": 3.3308030192036623e-06,
"loss": 0.6042,
"step": 8050
},
{
"epoch": 0.41093096767614967,
"grad_norm": 7.916106361808594,
"learning_rate": 3.3269082690415094e-06,
"loss": 0.5187,
"step": 8060
},
{
"epoch": 0.4114408075864179,
"grad_norm": 8.010321022647496,
"learning_rate": 3.3230112640813063e-06,
"loss": 0.5558,
"step": 8070
},
{
"epoch": 0.41195064749668603,
"grad_norm": 8.677988264330512,
"learning_rate": 3.319112014949333e-06,
"loss": 0.5425,
"step": 8080
},
{
"epoch": 0.41246048740695423,
"grad_norm": 11.895359241550484,
"learning_rate": 3.3152105322779883e-06,
"loss": 0.6792,
"step": 8090
},
{
"epoch": 0.4129703273172224,
"grad_norm": 15.85477100579575,
"learning_rate": 3.3113068267057635e-06,
"loss": 0.6122,
"step": 8100
},
{
"epoch": 0.4134801672274906,
"grad_norm": 3.9481739068899153,
"learning_rate": 3.307400908877211e-06,
"loss": 0.5493,
"step": 8110
},
{
"epoch": 0.41399000713775874,
"grad_norm": 7.673653825673098,
"learning_rate": 3.303492789442913e-06,
"loss": 0.6135,
"step": 8120
},
{
"epoch": 0.4144998470480269,
"grad_norm": 3.835406906210918,
"learning_rate": 3.2995824790594577e-06,
"loss": 0.6283,
"step": 8130
},
{
"epoch": 0.4150096869582951,
"grad_norm": 6.589571450104878,
"learning_rate": 3.2956699883894065e-06,
"loss": 0.602,
"step": 8140
},
{
"epoch": 0.41551952686856325,
"grad_norm": 23.69316882717695,
"learning_rate": 3.291755328101266e-06,
"loss": 0.5069,
"step": 8150
},
{
"epoch": 0.41602936677883146,
"grad_norm": 7.054279717896161,
"learning_rate": 3.287838508869459e-06,
"loss": 0.6272,
"step": 8160
},
{
"epoch": 0.4165392066890996,
"grad_norm": 4.345210604566924,
"learning_rate": 3.2839195413742946e-06,
"loss": 0.5341,
"step": 8170
},
{
"epoch": 0.4170490465993678,
"grad_norm": 3.5090166274998595,
"learning_rate": 3.2799984363019403e-06,
"loss": 0.594,
"step": 8180
},
{
"epoch": 0.41755888650963596,
"grad_norm": 2.967852128891153,
"learning_rate": 3.276075204344393e-06,
"loss": 0.5998,
"step": 8190
},
{
"epoch": 0.41806872641990417,
"grad_norm": 8.402876117046452,
"learning_rate": 3.272149856199448e-06,
"loss": 0.6589,
"step": 8200
},
{
"epoch": 0.4185785663301723,
"grad_norm": 4.144641190938076,
"learning_rate": 3.2682224025706716e-06,
"loss": 0.5706,
"step": 8210
},
{
"epoch": 0.4190884062404405,
"grad_norm": 4.478507448970573,
"learning_rate": 3.2642928541673707e-06,
"loss": 0.5506,
"step": 8220
},
{
"epoch": 0.4195982461507087,
"grad_norm": 3.3506630062444813,
"learning_rate": 3.2603612217045654e-06,
"loss": 0.5969,
"step": 8230
},
{
"epoch": 0.4201080860609768,
"grad_norm": 3.398334235003455,
"learning_rate": 3.2564275159029573e-06,
"loss": 0.5549,
"step": 8240
},
{
"epoch": 0.42061792597124503,
"grad_norm": 6.514949132565751,
"learning_rate": 3.252491747488902e-06,
"loss": 0.5677,
"step": 8250
},
{
"epoch": 0.4211277658815132,
"grad_norm": 8.04138544752521,
"learning_rate": 3.2485539271943796e-06,
"loss": 0.6166,
"step": 8260
},
{
"epoch": 0.4216376057917814,
"grad_norm": 7.06300391690725,
"learning_rate": 3.244614065756965e-06,
"loss": 0.5547,
"step": 8270
},
{
"epoch": 0.42214744570204954,
"grad_norm": 5.0433275744964385,
"learning_rate": 3.240672173919798e-06,
"loss": 0.5116,
"step": 8280
},
{
"epoch": 0.42265728561231775,
"grad_norm": 3.8286442638870617,
"learning_rate": 3.236728262431558e-06,
"loss": 0.5947,
"step": 8290
},
{
"epoch": 0.4231671255225859,
"grad_norm": 13.900180282284353,
"learning_rate": 3.232782342046427e-06,
"loss": 0.6129,
"step": 8300
},
{
"epoch": 0.4236769654328541,
"grad_norm": 3.9473597000858356,
"learning_rate": 3.2288344235240685e-06,
"loss": 0.5906,
"step": 8310
},
{
"epoch": 0.42418680534312225,
"grad_norm": 5.2122881127886895,
"learning_rate": 3.2248845176295927e-06,
"loss": 0.5961,
"step": 8320
},
{
"epoch": 0.42469664525339046,
"grad_norm": 5.009172058839148,
"learning_rate": 3.2209326351335295e-06,
"loss": 0.5928,
"step": 8330
},
{
"epoch": 0.4252064851636586,
"grad_norm": 6.521057483323243,
"learning_rate": 3.2169787868117987e-06,
"loss": 0.61,
"step": 8340
},
{
"epoch": 0.42571632507392676,
"grad_norm": 5.327692437284964,
"learning_rate": 3.2130229834456787e-06,
"loss": 0.5519,
"step": 8350
},
{
"epoch": 0.42622616498419497,
"grad_norm": 5.106906180738507,
"learning_rate": 3.209065235821782e-06,
"loss": 0.6317,
"step": 8360
},
{
"epoch": 0.4267360048944631,
"grad_norm": 4.3310148354508256,
"learning_rate": 3.2051055547320203e-06,
"loss": 0.5857,
"step": 8370
},
{
"epoch": 0.4272458448047313,
"grad_norm": 3.714093407525919,
"learning_rate": 3.2011439509735785e-06,
"loss": 0.5143,
"step": 8380
},
{
"epoch": 0.4277556847149995,
"grad_norm": 7.753373666343138,
"learning_rate": 3.197180435348884e-06,
"loss": 0.6166,
"step": 8390
},
{
"epoch": 0.4282655246252677,
"grad_norm": 4.447818256533745,
"learning_rate": 3.193215018665577e-06,
"loss": 0.5273,
"step": 8400
},
{
"epoch": 0.42877536453553583,
"grad_norm": 7.4076486317423775,
"learning_rate": 3.189247711736482e-06,
"loss": 0.5517,
"step": 8410
},
{
"epoch": 0.42928520444580404,
"grad_norm": 10.992195174868568,
"learning_rate": 3.1852785253795764e-06,
"loss": 0.6263,
"step": 8420
},
{
"epoch": 0.4297950443560722,
"grad_norm": 4.770548166262247,
"learning_rate": 3.1813074704179647e-06,
"loss": 0.6634,
"step": 8430
},
{
"epoch": 0.4303048842663404,
"grad_norm": 3.609090543989375,
"learning_rate": 3.177334557679846e-06,
"loss": 0.5627,
"step": 8440
},
{
"epoch": 0.43081472417660854,
"grad_norm": 9.572348489266524,
"learning_rate": 3.173359797998483e-06,
"loss": 0.599,
"step": 8450
},
{
"epoch": 0.4313245640868767,
"grad_norm": 3.7512579905836443,
"learning_rate": 3.1693832022121783e-06,
"loss": 0.5482,
"step": 8460
},
{
"epoch": 0.4318344039971449,
"grad_norm": 5.42614717743248,
"learning_rate": 3.1654047811642372e-06,
"loss": 0.6387,
"step": 8470
},
{
"epoch": 0.43234424390741305,
"grad_norm": 3.9705217703334674,
"learning_rate": 3.161424545702947e-06,
"loss": 0.5731,
"step": 8480
},
{
"epoch": 0.43285408381768126,
"grad_norm": 5.429618852113854,
"learning_rate": 3.1574425066815357e-06,
"loss": 0.5638,
"step": 8490
},
{
"epoch": 0.4333639237279494,
"grad_norm": 8.338728006419904,
"learning_rate": 3.1534586749581554e-06,
"loss": 0.5891,
"step": 8500
},
{
"epoch": 0.4338737636382176,
"grad_norm": 9.30440694017716,
"learning_rate": 3.1494730613958436e-06,
"loss": 0.5712,
"step": 8510
},
{
"epoch": 0.43438360354848576,
"grad_norm": 4.016792940369282,
"learning_rate": 3.145485676862497e-06,
"loss": 0.5499,
"step": 8520
},
{
"epoch": 0.43489344345875397,
"grad_norm": 9.442733329946938,
"learning_rate": 3.1414965322308415e-06,
"loss": 0.6214,
"step": 8530
},
{
"epoch": 0.4354032833690221,
"grad_norm": 7.771741314596396,
"learning_rate": 3.137505638378403e-06,
"loss": 0.6009,
"step": 8540
},
{
"epoch": 0.4359131232792903,
"grad_norm": 5.075187089621499,
"learning_rate": 3.133513006187475e-06,
"loss": 0.5559,
"step": 8550
},
{
"epoch": 0.4364229631895585,
"grad_norm": 3.8460426994871577,
"learning_rate": 3.1295186465450944e-06,
"loss": 0.526,
"step": 8560
},
{
"epoch": 0.4369328030998266,
"grad_norm": 8.249346592558288,
"learning_rate": 3.125522570343004e-06,
"loss": 0.6245,
"step": 8570
},
{
"epoch": 0.43744264301009483,
"grad_norm": 9.590756026169126,
"learning_rate": 3.1215247884776324e-06,
"loss": 0.6972,
"step": 8580
},
{
"epoch": 0.437952482920363,
"grad_norm": 4.704624296546816,
"learning_rate": 3.1175253118500554e-06,
"loss": 0.5293,
"step": 8590
},
{
"epoch": 0.4384623228306312,
"grad_norm": 17.112051390648528,
"learning_rate": 3.1135241513659716e-06,
"loss": 0.5601,
"step": 8600
},
{
"epoch": 0.43897216274089934,
"grad_norm": 4.152463576270952,
"learning_rate": 3.1095213179356705e-06,
"loss": 0.5382,
"step": 8610
},
{
"epoch": 0.43948200265116755,
"grad_norm": 3.0628240596293104,
"learning_rate": 3.105516822474004e-06,
"loss": 0.6312,
"step": 8620
},
{
"epoch": 0.4399918425614357,
"grad_norm": 4.629290559282791,
"learning_rate": 3.101510675900356e-06,
"loss": 0.5886,
"step": 8630
},
{
"epoch": 0.4405016824717039,
"grad_norm": 39.41439770246066,
"learning_rate": 3.097502889138611e-06,
"loss": 0.6382,
"step": 8640
},
{
"epoch": 0.44101152238197205,
"grad_norm": 8.949954611853025,
"learning_rate": 3.0934934731171286e-06,
"loss": 0.5223,
"step": 8650
},
{
"epoch": 0.44152136229224026,
"grad_norm": 8.018772137832967,
"learning_rate": 3.089482438768709e-06,
"loss": 0.5763,
"step": 8660
},
{
"epoch": 0.4420312022025084,
"grad_norm": 3.964548362342211,
"learning_rate": 3.085469797030566e-06,
"loss": 0.5519,
"step": 8670
},
{
"epoch": 0.44254104211277656,
"grad_norm": 3.8128106015677345,
"learning_rate": 3.081455558844296e-06,
"loss": 0.5538,
"step": 8680
},
{
"epoch": 0.44305088202304477,
"grad_norm": 11.639397416099424,
"learning_rate": 3.07743973515585e-06,
"loss": 0.553,
"step": 8690
},
{
"epoch": 0.4435607219333129,
"grad_norm": 9.333974318192823,
"learning_rate": 3.0734223369154997e-06,
"loss": 0.5404,
"step": 8700
},
{
"epoch": 0.4440705618435811,
"grad_norm": 7.108447189691252,
"learning_rate": 3.069403375077813e-06,
"loss": 0.5232,
"step": 8710
},
{
"epoch": 0.4445804017538493,
"grad_norm": 4.095413839114276,
"learning_rate": 3.0653828606016183e-06,
"loss": 0.6315,
"step": 8720
},
{
"epoch": 0.4450902416641175,
"grad_norm": 5.411942121909914,
"learning_rate": 3.061360804449981e-06,
"loss": 0.5696,
"step": 8730
},
{
"epoch": 0.44560008157438563,
"grad_norm": 10.880577827647038,
"learning_rate": 3.0573372175901682e-06,
"loss": 0.5421,
"step": 8740
},
{
"epoch": 0.44610992148465384,
"grad_norm": 5.239725804560958,
"learning_rate": 3.0533121109936227e-06,
"loss": 0.5553,
"step": 8750
},
{
"epoch": 0.446619761394922,
"grad_norm": 21.377965936364102,
"learning_rate": 3.0492854956359284e-06,
"loss": 0.6209,
"step": 8760
},
{
"epoch": 0.4471296013051902,
"grad_norm": 4.442807449642586,
"learning_rate": 3.0452573824967857e-06,
"loss": 0.5645,
"step": 8770
},
{
"epoch": 0.44763944121545834,
"grad_norm": 3.0022864395420856,
"learning_rate": 3.041227782559979e-06,
"loss": 0.5508,
"step": 8780
},
{
"epoch": 0.4481492811257265,
"grad_norm": 6.91711907173244,
"learning_rate": 3.037196706813346e-06,
"loss": 0.5392,
"step": 8790
},
{
"epoch": 0.4486591210359947,
"grad_norm": 5.225513314611637,
"learning_rate": 3.033164166248748e-06,
"loss": 0.5924,
"step": 8800
},
{
"epoch": 0.44916896094626285,
"grad_norm": 5.058751147674174,
"learning_rate": 3.0291301718620426e-06,
"loss": 0.5647,
"step": 8810
},
{
"epoch": 0.44967880085653106,
"grad_norm": 4.632146639237204,
"learning_rate": 3.0250947346530495e-06,
"loss": 0.6215,
"step": 8820
},
{
"epoch": 0.4501886407667992,
"grad_norm": 6.650121965423678,
"learning_rate": 3.021057865625524e-06,
"loss": 0.5316,
"step": 8830
},
{
"epoch": 0.4506984806770674,
"grad_norm": 42.562330824923684,
"learning_rate": 3.0170195757871266e-06,
"loss": 0.6634,
"step": 8840
},
{
"epoch": 0.45120832058733557,
"grad_norm": 14.077174043183124,
"learning_rate": 3.012979876149388e-06,
"loss": 0.6021,
"step": 8850
},
{
"epoch": 0.45171816049760377,
"grad_norm": 5.249428160506823,
"learning_rate": 3.0089387777276878e-06,
"loss": 0.5904,
"step": 8860
},
{
"epoch": 0.4522280004078719,
"grad_norm": 4.21393589745194,
"learning_rate": 3.0048962915412185e-06,
"loss": 0.56,
"step": 8870
},
{
"epoch": 0.45273784031814013,
"grad_norm": 3.555231297467554,
"learning_rate": 3.000852428612954e-06,
"loss": 0.6582,
"step": 8880
},
{
"epoch": 0.4532476802284083,
"grad_norm": 5.60502876205121,
"learning_rate": 2.996807199969625e-06,
"loss": 0.5632,
"step": 8890
},
{
"epoch": 0.45375752013867643,
"grad_norm": 7.500436250158769,
"learning_rate": 2.9927606166416866e-06,
"loss": 0.5645,
"step": 8900
},
{
"epoch": 0.45426736004894464,
"grad_norm": 7.045836020242391,
"learning_rate": 2.9887126896632857e-06,
"loss": 0.5923,
"step": 8910
},
{
"epoch": 0.4547771999592128,
"grad_norm": 22.523957238427023,
"learning_rate": 2.9846634300722355e-06,
"loss": 0.5932,
"step": 8920
},
{
"epoch": 0.455287039869481,
"grad_norm": 5.050824780853054,
"learning_rate": 2.980612848909979e-06,
"loss": 0.639,
"step": 8930
},
{
"epoch": 0.45579687977974914,
"grad_norm": 7.614115860924002,
"learning_rate": 2.976560957221567e-06,
"loss": 0.5873,
"step": 8940
},
{
"epoch": 0.45630671969001735,
"grad_norm": 6.491069951479971,
"learning_rate": 2.9725077660556233e-06,
"loss": 0.566,
"step": 8950
},
{
"epoch": 0.4568165596002855,
"grad_norm": 7.142079192416597,
"learning_rate": 2.9684532864643123e-06,
"loss": 0.6099,
"step": 8960
},
{
"epoch": 0.4573263995105537,
"grad_norm": 3.714018766188148,
"learning_rate": 2.9643975295033135e-06,
"loss": 0.5759,
"step": 8970
},
{
"epoch": 0.45783623942082186,
"grad_norm": 11.088549349604403,
"learning_rate": 2.9603405062317898e-06,
"loss": 0.5974,
"step": 8980
},
{
"epoch": 0.45834607933109006,
"grad_norm": 4.604501095050998,
"learning_rate": 2.9562822277123564e-06,
"loss": 0.6014,
"step": 8990
},
{
"epoch": 0.4588559192413582,
"grad_norm": 11.114180991073091,
"learning_rate": 2.952222705011053e-06,
"loss": 0.5714,
"step": 9000
},
{
"epoch": 0.45936575915162636,
"grad_norm": 4.976800255705958,
"learning_rate": 2.9481619491973074e-06,
"loss": 0.6079,
"step": 9010
},
{
"epoch": 0.45987559906189457,
"grad_norm": 6.489602668124778,
"learning_rate": 2.944099971343915e-06,
"loss": 0.5479,
"step": 9020
},
{
"epoch": 0.4603854389721627,
"grad_norm": 5.632555986039924,
"learning_rate": 2.9400367825270015e-06,
"loss": 0.5913,
"step": 9030
},
{
"epoch": 0.4608952788824309,
"grad_norm": 5.280414607569495,
"learning_rate": 2.9359723938259927e-06,
"loss": 0.5724,
"step": 9040
},
{
"epoch": 0.4614051187926991,
"grad_norm": 5.2505014498706535,
"learning_rate": 2.931906816323589e-06,
"loss": 0.4946,
"step": 9050
},
{
"epoch": 0.4619149587029673,
"grad_norm": 3.3702823020212294,
"learning_rate": 2.9278400611057323e-06,
"loss": 0.5737,
"step": 9060
},
{
"epoch": 0.46242479861323543,
"grad_norm": 6.221817410357421,
"learning_rate": 2.9237721392615724e-06,
"loss": 0.6081,
"step": 9070
},
{
"epoch": 0.46293463852350364,
"grad_norm": 8.398183492194732,
"learning_rate": 2.919703061883446e-06,
"loss": 0.5596,
"step": 9080
},
{
"epoch": 0.4634444784337718,
"grad_norm": 4.990366377946183,
"learning_rate": 2.9156328400668336e-06,
"loss": 0.5524,
"step": 9090
},
{
"epoch": 0.46395431834404,
"grad_norm": 4.3605560509698815,
"learning_rate": 2.9115614849103434e-06,
"loss": 0.5369,
"step": 9100
},
{
"epoch": 0.46446415825430815,
"grad_norm": 11.1587751097611,
"learning_rate": 2.9074890075156696e-06,
"loss": 0.5939,
"step": 9110
},
{
"epoch": 0.4649739981645763,
"grad_norm": 7.011773782496179,
"learning_rate": 2.9034154189875674e-06,
"loss": 0.5303,
"step": 9120
},
{
"epoch": 0.4654838380748445,
"grad_norm": 4.934157146537043,
"learning_rate": 2.8993407304338224e-06,
"loss": 0.5491,
"step": 9130
},
{
"epoch": 0.46599367798511265,
"grad_norm": 4.795701900449935,
"learning_rate": 2.895264952965219e-06,
"loss": 0.5946,
"step": 9140
},
{
"epoch": 0.46650351789538086,
"grad_norm": 7.611915066468352,
"learning_rate": 2.891188097695511e-06,
"loss": 0.5646,
"step": 9150
},
{
"epoch": 0.467013357805649,
"grad_norm": 4.711620208242563,
"learning_rate": 2.8871101757413923e-06,
"loss": 0.5782,
"step": 9160
},
{
"epoch": 0.4675231977159172,
"grad_norm": 3.8928320923926907,
"learning_rate": 2.883031198222463e-06,
"loss": 0.5319,
"step": 9170
},
{
"epoch": 0.46803303762618537,
"grad_norm": 4.358072133947001,
"learning_rate": 2.8789511762612044e-06,
"loss": 0.6815,
"step": 9180
},
{
"epoch": 0.4685428775364536,
"grad_norm": 10.039468381811403,
"learning_rate": 2.8748701209829443e-06,
"loss": 0.6439,
"step": 9190
},
{
"epoch": 0.4690527174467217,
"grad_norm": 4.296954771190811,
"learning_rate": 2.870788043515827e-06,
"loss": 0.5502,
"step": 9200
},
{
"epoch": 0.46956255735698993,
"grad_norm": 3.446815441218835,
"learning_rate": 2.866704954990786e-06,
"loss": 0.5817,
"step": 9210
},
{
"epoch": 0.4700723972672581,
"grad_norm": 6.134508269465863,
"learning_rate": 2.8626208665415107e-06,
"loss": 0.5868,
"step": 9220
},
{
"epoch": 0.47058223717752623,
"grad_norm": 6.93550191570071,
"learning_rate": 2.8585357893044172e-06,
"loss": 0.5455,
"step": 9230
},
{
"epoch": 0.47109207708779444,
"grad_norm": 6.654137228385324,
"learning_rate": 2.854449734418619e-06,
"loss": 0.5442,
"step": 9240
},
{
"epoch": 0.4716019169980626,
"grad_norm": 11.560386076352874,
"learning_rate": 2.8503627130258925e-06,
"loss": 0.5603,
"step": 9250
},
{
"epoch": 0.4721117569083308,
"grad_norm": 6.0683644536936,
"learning_rate": 2.846274736270653e-06,
"loss": 0.6153,
"step": 9260
},
{
"epoch": 0.47262159681859894,
"grad_norm": 6.815459650555032,
"learning_rate": 2.8421858152999187e-06,
"loss": 0.6032,
"step": 9270
},
{
"epoch": 0.47313143672886715,
"grad_norm": 11.146270751915738,
"learning_rate": 2.838095961263283e-06,
"loss": 0.5493,
"step": 9280
},
{
"epoch": 0.4736412766391353,
"grad_norm": 11.121993285952847,
"learning_rate": 2.834005185312884e-06,
"loss": 0.5714,
"step": 9290
},
{
"epoch": 0.4741511165494035,
"grad_norm": 4.745351583868496,
"learning_rate": 2.8299134986033727e-06,
"loss": 0.5562,
"step": 9300
},
{
"epoch": 0.47466095645967166,
"grad_norm": 4.353099511973472,
"learning_rate": 2.825820912291885e-06,
"loss": 0.5912,
"step": 9310
},
{
"epoch": 0.47517079636993986,
"grad_norm": 5.126920756977334,
"learning_rate": 2.821727437538009e-06,
"loss": 0.5195,
"step": 9320
},
{
"epoch": 0.475680636280208,
"grad_norm": 6.42955652141462,
"learning_rate": 2.8176330855037538e-06,
"loss": 0.6274,
"step": 9330
},
{
"epoch": 0.47619047619047616,
"grad_norm": 5.904891242346767,
"learning_rate": 2.8135378673535224e-06,
"loss": 0.5435,
"step": 9340
},
{
"epoch": 0.47670031610074437,
"grad_norm": 27.72868464064991,
"learning_rate": 2.809441794254082e-06,
"loss": 0.5635,
"step": 9350
},
{
"epoch": 0.4772101560110125,
"grad_norm": 11.672843344614455,
"learning_rate": 2.805344877374525e-06,
"loss": 0.5822,
"step": 9360
},
{
"epoch": 0.4777199959212807,
"grad_norm": 4.954958875542397,
"learning_rate": 2.80124712788625e-06,
"loss": 0.5584,
"step": 9370
},
{
"epoch": 0.4782298358315489,
"grad_norm": 5.558534960789887,
"learning_rate": 2.797148556962923e-06,
"loss": 0.5654,
"step": 9380
},
{
"epoch": 0.4787396757418171,
"grad_norm": 5.447536352279092,
"learning_rate": 2.793049175780451e-06,
"loss": 0.5743,
"step": 9390
},
{
"epoch": 0.47924951565208523,
"grad_norm": 5.047795460818418,
"learning_rate": 2.7889489955169515e-06,
"loss": 0.5833,
"step": 9400
},
{
"epoch": 0.47975935556235344,
"grad_norm": 11.456581190958515,
"learning_rate": 2.7848480273527175e-06,
"loss": 0.611,
"step": 9410
},
{
"epoch": 0.4802691954726216,
"grad_norm": 3.890085922302097,
"learning_rate": 2.7807462824701925e-06,
"loss": 0.5747,
"step": 9420
},
{
"epoch": 0.4807790353828898,
"grad_norm": 5.196611911352359,
"learning_rate": 2.77664377205394e-06,
"loss": 0.5801,
"step": 9430
},
{
"epoch": 0.48128887529315795,
"grad_norm": 2.994490369871157,
"learning_rate": 2.7725405072906075e-06,
"loss": 0.545,
"step": 9440
},
{
"epoch": 0.4817987152034261,
"grad_norm": 10.405224439868286,
"learning_rate": 2.7684364993689006e-06,
"loss": 0.6603,
"step": 9450
},
{
"epoch": 0.4823085551136943,
"grad_norm": 14.18709433250474,
"learning_rate": 2.764331759479553e-06,
"loss": 0.5546,
"step": 9460
},
{
"epoch": 0.48281839502396245,
"grad_norm": 5.506536829621453,
"learning_rate": 2.760226298815291e-06,
"loss": 0.6388,
"step": 9470
},
{
"epoch": 0.48332823493423066,
"grad_norm": 5.81673305154053,
"learning_rate": 2.75612012857081e-06,
"loss": 0.6766,
"step": 9480
},
{
"epoch": 0.4838380748444988,
"grad_norm": 5.835158854004846,
"learning_rate": 2.7520132599427375e-06,
"loss": 0.5498,
"step": 9490
},
{
"epoch": 0.484347914754767,
"grad_norm": 8.128170539189613,
"learning_rate": 2.7479057041296057e-06,
"loss": 0.5716,
"step": 9500
},
{
"epoch": 0.48485775466503517,
"grad_norm": 7.30621922975406,
"learning_rate": 2.7437974723318226e-06,
"loss": 0.5755,
"step": 9510
},
{
"epoch": 0.4853675945753034,
"grad_norm": 5.691437075507362,
"learning_rate": 2.739688575751638e-06,
"loss": 0.5938,
"step": 9520
},
{
"epoch": 0.4858774344855715,
"grad_norm": 3.8836230376620504,
"learning_rate": 2.735579025593113e-06,
"loss": 0.5909,
"step": 9530
},
{
"epoch": 0.48638727439583973,
"grad_norm": 7.651371797879014,
"learning_rate": 2.731468833062094e-06,
"loss": 0.605,
"step": 9540
},
{
"epoch": 0.4868971143061079,
"grad_norm": 8.864828618350206,
"learning_rate": 2.7273580093661765e-06,
"loss": 0.6367,
"step": 9550
},
{
"epoch": 0.48740695421637603,
"grad_norm": 3.881264802333166,
"learning_rate": 2.723246565714678e-06,
"loss": 0.5762,
"step": 9560
},
{
"epoch": 0.48791679412664424,
"grad_norm": 6.71628341217047,
"learning_rate": 2.719134513318606e-06,
"loss": 0.6115,
"step": 9570
},
{
"epoch": 0.4884266340369124,
"grad_norm": 4.1016490825324174,
"learning_rate": 2.7150218633906284e-06,
"loss": 0.601,
"step": 9580
},
{
"epoch": 0.4889364739471806,
"grad_norm": 9.177448569264055,
"learning_rate": 2.7109086271450436e-06,
"loss": 0.5891,
"step": 9590
},
{
"epoch": 0.48944631385744874,
"grad_norm": 6.620603281256211,
"learning_rate": 2.7067948157977462e-06,
"loss": 0.4988,
"step": 9600
},
{
"epoch": 0.48995615376771695,
"grad_norm": 5.874675796856825,
"learning_rate": 2.702680440566201e-06,
"loss": 0.5392,
"step": 9610
},
{
"epoch": 0.4904659936779851,
"grad_norm": 7.756710543110514,
"learning_rate": 2.698565512669409e-06,
"loss": 0.5867,
"step": 9620
},
{
"epoch": 0.4909758335882533,
"grad_norm": 2.9198050797781216,
"learning_rate": 2.6944500433278796e-06,
"loss": 0.5954,
"step": 9630
},
{
"epoch": 0.49148567349852146,
"grad_norm": 5.860997756097276,
"learning_rate": 2.690334043763598e-06,
"loss": 0.5206,
"step": 9640
},
{
"epoch": 0.49199551340878966,
"grad_norm": 7.649175492439351,
"learning_rate": 2.6862175251999935e-06,
"loss": 0.5617,
"step": 9650
},
{
"epoch": 0.4925053533190578,
"grad_norm": 8.77651134911893,
"learning_rate": 2.6821004988619132e-06,
"loss": 0.5418,
"step": 9660
},
{
"epoch": 0.49301519322932597,
"grad_norm": 4.0924998748408195,
"learning_rate": 2.677982975975588e-06,
"loss": 0.4996,
"step": 9670
},
{
"epoch": 0.49352503313959417,
"grad_norm": 4.226414145121375,
"learning_rate": 2.6738649677686024e-06,
"loss": 0.5182,
"step": 9680
},
{
"epoch": 0.4940348730498623,
"grad_norm": 5.8282516414391194,
"learning_rate": 2.6697464854698644e-06,
"loss": 0.527,
"step": 9690
},
{
"epoch": 0.49454471296013053,
"grad_norm": 7.664715592436748,
"learning_rate": 2.6656275403095743e-06,
"loss": 0.5512,
"step": 9700
},
{
"epoch": 0.4950545528703987,
"grad_norm": 23.306412450191036,
"learning_rate": 2.6615081435191963e-06,
"loss": 0.6111,
"step": 9710
},
{
"epoch": 0.4955643927806669,
"grad_norm": 23.865657408670597,
"learning_rate": 2.657388306331423e-06,
"loss": 0.5416,
"step": 9720
},
{
"epoch": 0.49607423269093504,
"grad_norm": 4.119167363866121,
"learning_rate": 2.653268039980151e-06,
"loss": 0.5512,
"step": 9730
},
{
"epoch": 0.49658407260120324,
"grad_norm": 15.276755798114094,
"learning_rate": 2.6491473557004443e-06,
"loss": 0.6089,
"step": 9740
},
{
"epoch": 0.4970939125114714,
"grad_norm": 5.765963942324974,
"learning_rate": 2.64502626472851e-06,
"loss": 0.5374,
"step": 9750
},
{
"epoch": 0.4976037524217396,
"grad_norm": 4.253068997375066,
"learning_rate": 2.64090477830166e-06,
"loss": 0.5479,
"step": 9760
},
{
"epoch": 0.49811359233200775,
"grad_norm": 6.880603604822455,
"learning_rate": 2.636782907658288e-06,
"loss": 0.5912,
"step": 9770
},
{
"epoch": 0.4986234322422759,
"grad_norm": 4.854326436815256,
"learning_rate": 2.6326606640378334e-06,
"loss": 0.5736,
"step": 9780
},
{
"epoch": 0.4991332721525441,
"grad_norm": 10.465960609416705,
"learning_rate": 2.628538058680754e-06,
"loss": 0.5924,
"step": 9790
},
{
"epoch": 0.49964311206281226,
"grad_norm": 7.788512298031222,
"learning_rate": 2.6244151028284924e-06,
"loss": 0.583,
"step": 9800
},
{
"epoch": 0.5001529519730804,
"grad_norm": 4.570546794181796,
"learning_rate": 2.6202918077234485e-06,
"loss": 0.537,
"step": 9810
},
{
"epoch": 0.5006627918833486,
"grad_norm": 3.100288570725017,
"learning_rate": 2.6161681846089454e-06,
"loss": 0.55,
"step": 9820
},
{
"epoch": 0.5011726317936168,
"grad_norm": 6.39548680051344,
"learning_rate": 2.6120442447292027e-06,
"loss": 0.6072,
"step": 9830
},
{
"epoch": 0.501682471703885,
"grad_norm": 5.874407478871505,
"learning_rate": 2.6079199993293026e-06,
"loss": 0.5647,
"step": 9840
},
{
"epoch": 0.5021923116141531,
"grad_norm": 7.583285823586105,
"learning_rate": 2.6037954596551606e-06,
"loss": 0.5155,
"step": 9850
},
{
"epoch": 0.5027021515244213,
"grad_norm": 5.27127524513507,
"learning_rate": 2.599670636953494e-06,
"loss": 0.5934,
"step": 9860
},
{
"epoch": 0.5032119914346895,
"grad_norm": 4.777987059639289,
"learning_rate": 2.5955455424717933e-06,
"loss": 0.5417,
"step": 9870
},
{
"epoch": 0.5037218313449577,
"grad_norm": 3.980508299314216,
"learning_rate": 2.591420187458289e-06,
"loss": 0.5357,
"step": 9880
},
{
"epoch": 0.5042316712552258,
"grad_norm": 3.6273571452454156,
"learning_rate": 2.587294583161921e-06,
"loss": 0.5435,
"step": 9890
},
{
"epoch": 0.504741511165494,
"grad_norm": 4.221132624697208,
"learning_rate": 2.583168740832312e-06,
"loss": 0.5491,
"step": 9900
},
{
"epoch": 0.5052513510757622,
"grad_norm": 10.066676016575565,
"learning_rate": 2.5790426717197308e-06,
"loss": 0.6032,
"step": 9910
},
{
"epoch": 0.5057611909860303,
"grad_norm": 4.253540384615832,
"learning_rate": 2.5749163870750665e-06,
"loss": 0.5845,
"step": 9920
},
{
"epoch": 0.5062710308962985,
"grad_norm": 2.893054850314246,
"learning_rate": 2.570789898149794e-06,
"loss": 0.537,
"step": 9930
},
{
"epoch": 0.5067808708065668,
"grad_norm": 6.492221137287876,
"learning_rate": 2.5666632161959474e-06,
"loss": 0.5484,
"step": 9940
},
{
"epoch": 0.507290710716835,
"grad_norm": 4.397573523135934,
"learning_rate": 2.562536352466087e-06,
"loss": 0.538,
"step": 9950
},
{
"epoch": 0.507800550627103,
"grad_norm": 5.441347825617694,
"learning_rate": 2.558409318213265e-06,
"loss": 0.5491,
"step": 9960
},
{
"epoch": 0.5083103905373713,
"grad_norm": 4.056944171198213,
"learning_rate": 2.5542821246910038e-06,
"loss": 0.5681,
"step": 9970
},
{
"epoch": 0.5088202304476395,
"grad_norm": 3.861244100619014,
"learning_rate": 2.5501547831532568e-06,
"loss": 0.6587,
"step": 9980
},
{
"epoch": 0.5093300703579077,
"grad_norm": 6.519110978567216,
"learning_rate": 2.546027304854382e-06,
"loss": 0.5651,
"step": 9990
},
{
"epoch": 0.5098399102681758,
"grad_norm": 6.486849286756534,
"learning_rate": 2.541899701049111e-06,
"loss": 0.4912,
"step": 10000
},
{
"epoch": 0.510349750178444,
"grad_norm": 5.970343696600439,
"learning_rate": 2.5377719829925162e-06,
"loss": 0.5982,
"step": 10010
},
{
"epoch": 0.5108595900887122,
"grad_norm": 4.38099385940377,
"learning_rate": 2.5336441619399823e-06,
"loss": 0.5625,
"step": 10020
},
{
"epoch": 0.5113694299989803,
"grad_norm": 3.536380780120841,
"learning_rate": 2.5295162491471754e-06,
"loss": 0.548,
"step": 10030
},
{
"epoch": 0.5118792699092485,
"grad_norm": 3.382407647964885,
"learning_rate": 2.5253882558700103e-06,
"loss": 0.5405,
"step": 10040
},
{
"epoch": 0.5123891098195167,
"grad_norm": 9.11622952164378,
"learning_rate": 2.5212601933646225e-06,
"loss": 0.6232,
"step": 10050
},
{
"epoch": 0.5128989497297849,
"grad_norm": 6.122164041294447,
"learning_rate": 2.5171320728873355e-06,
"loss": 0.5719,
"step": 10060
},
{
"epoch": 0.513408789640053,
"grad_norm": 6.495547666912701,
"learning_rate": 2.5130039056946314e-06,
"loss": 0.5344,
"step": 10070
},
{
"epoch": 0.5139186295503212,
"grad_norm": 4.913443956238611,
"learning_rate": 2.5088757030431206e-06,
"loss": 0.5435,
"step": 10080
},
{
"epoch": 0.5144284694605894,
"grad_norm": 9.987950682928979,
"learning_rate": 2.5047474761895073e-06,
"loss": 0.5909,
"step": 10090
},
{
"epoch": 0.5149383093708576,
"grad_norm": 3.9281212370296084,
"learning_rate": 2.5006192363905653e-06,
"loss": 0.5447,
"step": 10100
},
{
"epoch": 0.5154481492811257,
"grad_norm": 6.037427254957927,
"learning_rate": 2.496490994903101e-06,
"loss": 0.542,
"step": 10110
},
{
"epoch": 0.5159579891913939,
"grad_norm": 4.247064832165126,
"learning_rate": 2.492362762983925e-06,
"loss": 0.5837,
"step": 10120
},
{
"epoch": 0.5164678291016621,
"grad_norm": 5.10408210518716,
"learning_rate": 2.488234551889826e-06,
"loss": 0.5276,
"step": 10130
},
{
"epoch": 0.5169776690119302,
"grad_norm": 6.231589076254952,
"learning_rate": 2.4841063728775307e-06,
"loss": 0.5332,
"step": 10140
},
{
"epoch": 0.5174875089221984,
"grad_norm": 4.10522838540734,
"learning_rate": 2.479978237203682e-06,
"loss": 0.5582,
"step": 10150
},
{
"epoch": 0.5179973488324666,
"grad_norm": 7.119667939256768,
"learning_rate": 2.4758501561248026e-06,
"loss": 0.5753,
"step": 10160
},
{
"epoch": 0.5185071887427348,
"grad_norm": 4.371923022928418,
"learning_rate": 2.471722140897268e-06,
"loss": 0.5264,
"step": 10170
},
{
"epoch": 0.5190170286530029,
"grad_norm": 4.381825152297158,
"learning_rate": 2.4675942027772707e-06,
"loss": 0.5744,
"step": 10180
},
{
"epoch": 0.5195268685632711,
"grad_norm": 4.302455577555786,
"learning_rate": 2.463466353020799e-06,
"loss": 0.592,
"step": 10190
},
{
"epoch": 0.5200367084735393,
"grad_norm": 5.390293346751291,
"learning_rate": 2.4593386028835934e-06,
"loss": 0.5023,
"step": 10200
},
{
"epoch": 0.5205465483838075,
"grad_norm": 10.236322814120419,
"learning_rate": 2.455210963621127e-06,
"loss": 0.591,
"step": 10210
},
{
"epoch": 0.5210563882940756,
"grad_norm": 10.486469158590065,
"learning_rate": 2.451083446488571e-06,
"loss": 0.6159,
"step": 10220
},
{
"epoch": 0.5215662282043438,
"grad_norm": 3.555299059511537,
"learning_rate": 2.446956062740761e-06,
"loss": 0.5874,
"step": 10230
},
{
"epoch": 0.522076068114612,
"grad_norm": 7.121350535168467,
"learning_rate": 2.44282882363217e-06,
"loss": 0.6058,
"step": 10240
},
{
"epoch": 0.5225859080248801,
"grad_norm": 9.768736907975871,
"learning_rate": 2.438701740416876e-06,
"loss": 0.608,
"step": 10250
},
{
"epoch": 0.5230957479351483,
"grad_norm": 5.884971628476978,
"learning_rate": 2.4345748243485347e-06,
"loss": 0.5322,
"step": 10260
},
{
"epoch": 0.5236055878454166,
"grad_norm": 4.723385492969481,
"learning_rate": 2.4304480866803417e-06,
"loss": 0.5857,
"step": 10270
},
{
"epoch": 0.5241154277556848,
"grad_norm": 4.333423115603075,
"learning_rate": 2.426321538665009e-06,
"loss": 0.6112,
"step": 10280
},
{
"epoch": 0.5246252676659529,
"grad_norm": 5.461195508744554,
"learning_rate": 2.4221951915547315e-06,
"loss": 0.5502,
"step": 10290
},
{
"epoch": 0.5251351075762211,
"grad_norm": 4.914285190435338,
"learning_rate": 2.4180690566011543e-06,
"loss": 0.5003,
"step": 10300
},
{
"epoch": 0.5256449474864893,
"grad_norm": 8.901376664241516,
"learning_rate": 2.413943145055347e-06,
"loss": 0.5469,
"step": 10310
},
{
"epoch": 0.5261547873967575,
"grad_norm": 5.179720006160652,
"learning_rate": 2.4098174681677668e-06,
"loss": 0.5429,
"step": 10320
},
{
"epoch": 0.5266646273070256,
"grad_norm": 5.9346677146659585,
"learning_rate": 2.405692037188233e-06,
"loss": 0.5618,
"step": 10330
},
{
"epoch": 0.5271744672172938,
"grad_norm": 3.3918064647292088,
"learning_rate": 2.4015668633658934e-06,
"loss": 0.5904,
"step": 10340
},
{
"epoch": 0.527684307127562,
"grad_norm": 10.706896453642253,
"learning_rate": 2.3974419579491963e-06,
"loss": 0.6084,
"step": 10350
},
{
"epoch": 0.5281941470378301,
"grad_norm": 3.880907933089968,
"learning_rate": 2.3933173321858558e-06,
"loss": 0.5492,
"step": 10360
},
{
"epoch": 0.5287039869480983,
"grad_norm": 5.482563779583249,
"learning_rate": 2.3891929973228244e-06,
"loss": 0.6077,
"step": 10370
},
{
"epoch": 0.5292138268583665,
"grad_norm": 5.878477031259435,
"learning_rate": 2.3850689646062625e-06,
"loss": 0.6259,
"step": 10380
},
{
"epoch": 0.5297236667686347,
"grad_norm": 3.472768112158214,
"learning_rate": 2.3809452452815047e-06,
"loss": 0.5619,
"step": 10390
},
{
"epoch": 0.5302335066789028,
"grad_norm": 8.906752951677479,
"learning_rate": 2.3768218505930333e-06,
"loss": 0.535,
"step": 10400
},
{
"epoch": 0.530743346589171,
"grad_norm": 9.26577684439362,
"learning_rate": 2.372698791784442e-06,
"loss": 0.6082,
"step": 10410
},
{
"epoch": 0.5312531864994392,
"grad_norm": 5.54121619673152,
"learning_rate": 2.3685760800984122e-06,
"loss": 0.538,
"step": 10420
},
{
"epoch": 0.5317630264097074,
"grad_norm": 4.627571106553583,
"learning_rate": 2.364453726776677e-06,
"loss": 0.5562,
"step": 10430
},
{
"epoch": 0.5322728663199755,
"grad_norm": 8.765739404143732,
"learning_rate": 2.3603317430599925e-06,
"loss": 0.5406,
"step": 10440
},
{
"epoch": 0.5327827062302437,
"grad_norm": 4.743984447048957,
"learning_rate": 2.3562101401881065e-06,
"loss": 0.5893,
"step": 10450
},
{
"epoch": 0.5332925461405119,
"grad_norm": 5.820171697799732,
"learning_rate": 2.3520889293997287e-06,
"loss": 0.5403,
"step": 10460
},
{
"epoch": 0.53380238605078,
"grad_norm": 7.08071549222254,
"learning_rate": 2.3479681219325025e-06,
"loss": 0.587,
"step": 10470
},
{
"epoch": 0.5343122259610482,
"grad_norm": 5.001056880602169,
"learning_rate": 2.343847729022965e-06,
"loss": 0.5896,
"step": 10480
},
{
"epoch": 0.5348220658713164,
"grad_norm": 4.8134315758242625,
"learning_rate": 2.3397277619065294e-06,
"loss": 0.5359,
"step": 10490
},
{
"epoch": 0.5353319057815846,
"grad_norm": 14.545775608240119,
"learning_rate": 2.335608231817444e-06,
"loss": 0.5701,
"step": 10500
},
{
"epoch": 0.5358417456918527,
"grad_norm": 21.826443251284513,
"learning_rate": 2.3314891499887678e-06,
"loss": 0.6265,
"step": 10510
},
{
"epoch": 0.5363515856021209,
"grad_norm": 7.425278676073777,
"learning_rate": 2.327370527652335e-06,
"loss": 0.5495,
"step": 10520
},
{
"epoch": 0.5368614255123891,
"grad_norm": 4.3193675426552804,
"learning_rate": 2.3232523760387283e-06,
"loss": 0.5474,
"step": 10530
},
{
"epoch": 0.5373712654226573,
"grad_norm": 5.714151480039064,
"learning_rate": 2.3191347063772484e-06,
"loss": 0.4856,
"step": 10540
},
{
"epoch": 0.5378811053329254,
"grad_norm": 11.00019352733884,
"learning_rate": 2.3150175298958786e-06,
"loss": 0.5881,
"step": 10550
},
{
"epoch": 0.5383909452431936,
"grad_norm": 5.664141098629008,
"learning_rate": 2.3109008578212597e-06,
"loss": 0.5599,
"step": 10560
},
{
"epoch": 0.5389007851534618,
"grad_norm": 4.064527896034296,
"learning_rate": 2.306784701378655e-06,
"loss": 0.6335,
"step": 10570
},
{
"epoch": 0.5394106250637299,
"grad_norm": 14.155884920463697,
"learning_rate": 2.302669071791925e-06,
"loss": 0.5945,
"step": 10580
},
{
"epoch": 0.5399204649739981,
"grad_norm": 6.127163135912767,
"learning_rate": 2.2985539802834907e-06,
"loss": 0.5718,
"step": 10590
},
{
"epoch": 0.5404303048842664,
"grad_norm": 4.245202176259139,
"learning_rate": 2.294439438074308e-06,
"loss": 0.5403,
"step": 10600
},
{
"epoch": 0.5409401447945346,
"grad_norm": 5.3540527690553255,
"learning_rate": 2.2903254563838308e-06,
"loss": 0.5854,
"step": 10610
},
{
"epoch": 0.5414499847048027,
"grad_norm": 4.634926708115154,
"learning_rate": 2.2862120464299913e-06,
"loss": 0.5154,
"step": 10620
},
{
"epoch": 0.5419598246150709,
"grad_norm": 7.827943685847099,
"learning_rate": 2.2820992194291577e-06,
"loss": 0.5375,
"step": 10630
},
{
"epoch": 0.5424696645253391,
"grad_norm": 4.9277639804091296,
"learning_rate": 2.277986986596109e-06,
"loss": 0.5545,
"step": 10640
},
{
"epoch": 0.5429795044356073,
"grad_norm": 7.304459395748408,
"learning_rate": 2.273875359144007e-06,
"loss": 0.5825,
"step": 10650
},
{
"epoch": 0.5434893443458754,
"grad_norm": 5.016441794583862,
"learning_rate": 2.2697643482843584e-06,
"loss": 0.5295,
"step": 10660
},
{
"epoch": 0.5439991842561436,
"grad_norm": 4.345126201608073,
"learning_rate": 2.2656539652269933e-06,
"loss": 0.5745,
"step": 10670
},
{
"epoch": 0.5445090241664118,
"grad_norm": 3.702144842617632,
"learning_rate": 2.2615442211800263e-06,
"loss": 0.5864,
"step": 10680
},
{
"epoch": 0.5450188640766799,
"grad_norm": 9.815653136997877,
"learning_rate": 2.2574351273498304e-06,
"loss": 0.6027,
"step": 10690
},
{
"epoch": 0.5455287039869481,
"grad_norm": 6.627027205767743,
"learning_rate": 2.253326694941008e-06,
"loss": 0.5411,
"step": 10700
},
{
"epoch": 0.5460385438972163,
"grad_norm": 4.384534828483341,
"learning_rate": 2.249218935156354e-06,
"loss": 0.5814,
"step": 10710
},
{
"epoch": 0.5465483838074845,
"grad_norm": 4.575249644366794,
"learning_rate": 2.2451118591968325e-06,
"loss": 0.551,
"step": 10720
},
{
"epoch": 0.5470582237177526,
"grad_norm": 3.360951380273878,
"learning_rate": 2.2410054782615396e-06,
"loss": 0.4974,
"step": 10730
},
{
"epoch": 0.5475680636280208,
"grad_norm": 7.019099679394434,
"learning_rate": 2.2368998035476817e-06,
"loss": 0.6235,
"step": 10740
},
{
"epoch": 0.548077903538289,
"grad_norm": 5.68531638189826,
"learning_rate": 2.2327948462505326e-06,
"loss": 0.5509,
"step": 10750
},
{
"epoch": 0.5485877434485572,
"grad_norm": 3.7536478981989947,
"learning_rate": 2.228690617563416e-06,
"loss": 0.5862,
"step": 10760
},
{
"epoch": 0.5490975833588253,
"grad_norm": 5.647338670633899,
"learning_rate": 2.2245871286776638e-06,
"loss": 0.5322,
"step": 10770
},
{
"epoch": 0.5496074232690935,
"grad_norm": 4.752246726979069,
"learning_rate": 2.2204843907825946e-06,
"loss": 0.5743,
"step": 10780
},
{
"epoch": 0.5501172631793617,
"grad_norm": 5.590092737478404,
"learning_rate": 2.2163824150654777e-06,
"loss": 0.6093,
"step": 10790
},
{
"epoch": 0.5506271030896298,
"grad_norm": 9.1089558709191,
"learning_rate": 2.212281212711502e-06,
"loss": 0.5374,
"step": 10800
},
{
"epoch": 0.551136942999898,
"grad_norm": 7.049632525787798,
"learning_rate": 2.208180794903753e-06,
"loss": 0.5919,
"step": 10810
},
{
"epoch": 0.5516467829101662,
"grad_norm": 4.279363456332365,
"learning_rate": 2.20408117282317e-06,
"loss": 0.5885,
"step": 10820
},
{
"epoch": 0.5521566228204344,
"grad_norm": 3.1944233373670796,
"learning_rate": 2.199982357648529e-06,
"loss": 0.5789,
"step": 10830
},
{
"epoch": 0.5526664627307025,
"grad_norm": 5.4291897517297,
"learning_rate": 2.1958843605564007e-06,
"loss": 0.5807,
"step": 10840
},
{
"epoch": 0.5531763026409707,
"grad_norm": 6.470349665252672,
"learning_rate": 2.1917871927211287e-06,
"loss": 0.536,
"step": 10850
},
{
"epoch": 0.5536861425512389,
"grad_norm": 4.34828054960982,
"learning_rate": 2.1876908653147918e-06,
"loss": 0.5524,
"step": 10860
},
{
"epoch": 0.5541959824615071,
"grad_norm": 9.626964358062196,
"learning_rate": 2.183595389507181e-06,
"loss": 0.519,
"step": 10870
},
{
"epoch": 0.5547058223717752,
"grad_norm": 3.400898759831172,
"learning_rate": 2.179500776465764e-06,
"loss": 0.5928,
"step": 10880
},
{
"epoch": 0.5552156622820434,
"grad_norm": 4.56901012290534,
"learning_rate": 2.1754070373556526e-06,
"loss": 0.612,
"step": 10890
},
{
"epoch": 0.5557255021923116,
"grad_norm": 7.484943915780186,
"learning_rate": 2.1713141833395808e-06,
"loss": 0.5161,
"step": 10900
},
{
"epoch": 0.5562353421025797,
"grad_norm": 5.353373483480901,
"learning_rate": 2.167222225577865e-06,
"loss": 0.561,
"step": 10910
},
{
"epoch": 0.556745182012848,
"grad_norm": 9.343382259221173,
"learning_rate": 2.163131175228381e-06,
"loss": 0.5467,
"step": 10920
},
{
"epoch": 0.5572550219231162,
"grad_norm": 4.91701329791304,
"learning_rate": 2.1590410434465265e-06,
"loss": 0.5567,
"step": 10930
},
{
"epoch": 0.5577648618333844,
"grad_norm": 7.592767432198721,
"learning_rate": 2.1549518413851978e-06,
"loss": 0.5182,
"step": 10940
},
{
"epoch": 0.5582747017436525,
"grad_norm": 4.6035679496622315,
"learning_rate": 2.150863580194756e-06,
"loss": 0.5559,
"step": 10950
},
{
"epoch": 0.5587845416539207,
"grad_norm": 6.7755205105330365,
"learning_rate": 2.1467762710229922e-06,
"loss": 0.6154,
"step": 10960
},
{
"epoch": 0.5592943815641889,
"grad_norm": 4.141599691037273,
"learning_rate": 2.1426899250151086e-06,
"loss": 0.5744,
"step": 10970
},
{
"epoch": 0.5598042214744571,
"grad_norm": 5.496559917092997,
"learning_rate": 2.1386045533136746e-06,
"loss": 0.5549,
"step": 10980
},
{
"epoch": 0.5603140613847252,
"grad_norm": 9.32601783703135,
"learning_rate": 2.134520167058607e-06,
"loss": 0.5604,
"step": 10990
},
{
"epoch": 0.5608239012949934,
"grad_norm": 6.267554337152859,
"learning_rate": 2.1304367773871337e-06,
"loss": 0.5857,
"step": 11000
},
{
"epoch": 0.5613337412052616,
"grad_norm": 5.252899169163052,
"learning_rate": 2.126354395433766e-06,
"loss": 0.5534,
"step": 11010
},
{
"epoch": 0.5618435811155297,
"grad_norm": 8.751971277608932,
"learning_rate": 2.122273032330265e-06,
"loss": 0.5246,
"step": 11020
},
{
"epoch": 0.5623534210257979,
"grad_norm": 3.1994062492488515,
"learning_rate": 2.1181926992056174e-06,
"loss": 0.6414,
"step": 11030
},
{
"epoch": 0.5628632609360661,
"grad_norm": 3.2534620027939556,
"learning_rate": 2.114113407186e-06,
"loss": 0.5736,
"step": 11040
},
{
"epoch": 0.5633731008463343,
"grad_norm": 5.190918228021598,
"learning_rate": 2.1100351673947477e-06,
"loss": 0.5931,
"step": 11050
},
{
"epoch": 0.5638829407566024,
"grad_norm": 6.724073808724146,
"learning_rate": 2.1059579909523315e-06,
"loss": 0.5593,
"step": 11060
},
{
"epoch": 0.5643927806668706,
"grad_norm": 3.678860606208677,
"learning_rate": 2.1018818889763182e-06,
"loss": 0.533,
"step": 11070
},
{
"epoch": 0.5649026205771388,
"grad_norm": 9.57308291531323,
"learning_rate": 2.097806872581348e-06,
"loss": 0.6132,
"step": 11080
},
{
"epoch": 0.565412460487407,
"grad_norm": 3.868519780141342,
"learning_rate": 2.0937329528790974e-06,
"loss": 0.4891,
"step": 11090
},
{
"epoch": 0.5659223003976751,
"grad_norm": 5.363780777577617,
"learning_rate": 2.0896601409782577e-06,
"loss": 0.6499,
"step": 11100
},
{
"epoch": 0.5664321403079433,
"grad_norm": 4.443975323978055,
"learning_rate": 2.0855884479844942e-06,
"loss": 0.5902,
"step": 11110
},
{
"epoch": 0.5669419802182115,
"grad_norm": 4.191030513536192,
"learning_rate": 2.081517885000424e-06,
"loss": 0.5186,
"step": 11120
},
{
"epoch": 0.5674518201284796,
"grad_norm": 3.851029169393777,
"learning_rate": 2.0774484631255836e-06,
"loss": 0.552,
"step": 11130
},
{
"epoch": 0.5679616600387478,
"grad_norm": 3.9250152313608058,
"learning_rate": 2.073380193456394e-06,
"loss": 0.586,
"step": 11140
},
{
"epoch": 0.568471499949016,
"grad_norm": 4.8944788999506965,
"learning_rate": 2.0693130870861407e-06,
"loss": 0.5198,
"step": 11150
},
{
"epoch": 0.5689813398592842,
"grad_norm": 17.84496869483186,
"learning_rate": 2.0652471551049302e-06,
"loss": 0.5571,
"step": 11160
},
{
"epoch": 0.5694911797695523,
"grad_norm": 5.358359968678426,
"learning_rate": 2.061182408599672e-06,
"loss": 0.5105,
"step": 11170
},
{
"epoch": 0.5700010196798205,
"grad_norm": 7.3072935579158935,
"learning_rate": 2.05711885865404e-06,
"loss": 0.6309,
"step": 11180
},
{
"epoch": 0.5705108595900887,
"grad_norm": 5.272764753814353,
"learning_rate": 2.0530565163484474e-06,
"loss": 0.5835,
"step": 11190
},
{
"epoch": 0.5710206995003569,
"grad_norm": 5.790512043425324,
"learning_rate": 2.048995392760013e-06,
"loss": 0.5492,
"step": 11200
},
{
"epoch": 0.571530539410625,
"grad_norm": 7.135530295492757,
"learning_rate": 2.044935498962532e-06,
"loss": 0.636,
"step": 11210
},
{
"epoch": 0.5720403793208932,
"grad_norm": 3.9062846802260895,
"learning_rate": 2.0408768460264493e-06,
"loss": 0.5561,
"step": 11220
},
{
"epoch": 0.5725502192311615,
"grad_norm": 7.73502881578725,
"learning_rate": 2.0368194450188218e-06,
"loss": 0.5896,
"step": 11230
},
{
"epoch": 0.5730600591414295,
"grad_norm": 10.422918025845112,
"learning_rate": 2.0327633070032965e-06,
"loss": 0.6042,
"step": 11240
},
{
"epoch": 0.5735698990516978,
"grad_norm": 52.45955623228605,
"learning_rate": 2.028708443040073e-06,
"loss": 0.5676,
"step": 11250
},
{
"epoch": 0.574079738961966,
"grad_norm": 4.796888490256279,
"learning_rate": 2.0246548641858814e-06,
"loss": 0.6291,
"step": 11260
},
{
"epoch": 0.5745895788722342,
"grad_norm": 5.30930569584662,
"learning_rate": 2.0206025814939427e-06,
"loss": 0.5119,
"step": 11270
},
{
"epoch": 0.5750994187825023,
"grad_norm": 4.900720389790013,
"learning_rate": 2.0165516060139463e-06,
"loss": 0.5577,
"step": 11280
},
{
"epoch": 0.5756092586927705,
"grad_norm": 4.161298191222105,
"learning_rate": 2.012501948792018e-06,
"loss": 0.5458,
"step": 11290
},
{
"epoch": 0.5761190986030387,
"grad_norm": 3.236190978560692,
"learning_rate": 2.008453620870685e-06,
"loss": 0.5864,
"step": 11300
},
{
"epoch": 0.5766289385133069,
"grad_norm": 6.154965235433483,
"learning_rate": 2.0044066332888552e-06,
"loss": 0.5126,
"step": 11310
},
{
"epoch": 0.577138778423575,
"grad_norm": 6.545756031439152,
"learning_rate": 2.0003609970817774e-06,
"loss": 0.5591,
"step": 11320
},
{
"epoch": 0.5776486183338432,
"grad_norm": 3.516722790445947,
"learning_rate": 1.9963167232810176e-06,
"loss": 0.5471,
"step": 11330
},
{
"epoch": 0.5781584582441114,
"grad_norm": 6.796196360122474,
"learning_rate": 1.992273822914425e-06,
"loss": 0.5553,
"step": 11340
},
{
"epoch": 0.5786682981543795,
"grad_norm": 5.947281464777988,
"learning_rate": 1.988232307006106e-06,
"loss": 0.5262,
"step": 11350
},
{
"epoch": 0.5791781380646477,
"grad_norm": 4.400108274800105,
"learning_rate": 1.984192186576391e-06,
"loss": 0.5634,
"step": 11360
},
{
"epoch": 0.5796879779749159,
"grad_norm": 11.142991144527091,
"learning_rate": 1.9801534726418035e-06,
"loss": 0.5781,
"step": 11370
},
{
"epoch": 0.5801978178851841,
"grad_norm": 7.149318827938102,
"learning_rate": 1.976116176215036e-06,
"loss": 0.5673,
"step": 11380
},
{
"epoch": 0.5807076577954522,
"grad_norm": 4.806649384218778,
"learning_rate": 1.972080308304911e-06,
"loss": 0.5796,
"step": 11390
},
{
"epoch": 0.5812174977057204,
"grad_norm": 3.6583334937343825,
"learning_rate": 1.968045879916359e-06,
"loss": 0.5311,
"step": 11400
},
{
"epoch": 0.5817273376159886,
"grad_norm": 4.298434612216439,
"learning_rate": 1.964012902050382e-06,
"loss": 0.5195,
"step": 11410
},
{
"epoch": 0.5822371775262568,
"grad_norm": 5.958333054975549,
"learning_rate": 1.959981385704032e-06,
"loss": 0.5675,
"step": 11420
},
{
"epoch": 0.5827470174365249,
"grad_norm": 4.231351948150938,
"learning_rate": 1.955951341870371e-06,
"loss": 0.6241,
"step": 11430
},
{
"epoch": 0.5832568573467931,
"grad_norm": 5.8880206088209555,
"learning_rate": 1.951922781538446e-06,
"loss": 0.5546,
"step": 11440
},
{
"epoch": 0.5837666972570613,
"grad_norm": 6.318099699009018,
"learning_rate": 1.947895715693263e-06,
"loss": 0.5229,
"step": 11450
},
{
"epoch": 0.5842765371673294,
"grad_norm": 6.753712899839696,
"learning_rate": 1.9438701553157485e-06,
"loss": 0.5203,
"step": 11460
},
{
"epoch": 0.5847863770775976,
"grad_norm": 4.0929932748696665,
"learning_rate": 1.9398461113827256e-06,
"loss": 0.5156,
"step": 11470
},
{
"epoch": 0.5852962169878658,
"grad_norm": 5.046458934742589,
"learning_rate": 1.9358235948668815e-06,
"loss": 0.539,
"step": 11480
},
{
"epoch": 0.585806056898134,
"grad_norm": 6.717232212966177,
"learning_rate": 1.9318026167367417e-06,
"loss": 0.5872,
"step": 11490
},
{
"epoch": 0.5863158968084021,
"grad_norm": 6.797352130078018,
"learning_rate": 1.927783187956631e-06,
"loss": 0.528,
"step": 11500
},
{
"epoch": 0.5868257367186703,
"grad_norm": 4.4083207807244555,
"learning_rate": 1.923765319486656e-06,
"loss": 0.5519,
"step": 11510
},
{
"epoch": 0.5873355766289385,
"grad_norm": 17.86733342145815,
"learning_rate": 1.9197490222826635e-06,
"loss": 0.5674,
"step": 11520
},
{
"epoch": 0.5878454165392067,
"grad_norm": 5.835577567162421,
"learning_rate": 1.915734307296218e-06,
"loss": 0.5766,
"step": 11530
},
{
"epoch": 0.5883552564494748,
"grad_norm": 4.974792442905227,
"learning_rate": 1.9117211854745717e-06,
"loss": 0.5107,
"step": 11540
},
{
"epoch": 0.588865096359743,
"grad_norm": 8.96090507193812,
"learning_rate": 1.9077096677606275e-06,
"loss": 0.5506,
"step": 11550
},
{
"epoch": 0.5893749362700113,
"grad_norm": 16.83735528640451,
"learning_rate": 1.903699765092919e-06,
"loss": 0.5879,
"step": 11560
},
{
"epoch": 0.5898847761802793,
"grad_norm": 4.927581402011277,
"learning_rate": 1.8996914884055723e-06,
"loss": 0.5516,
"step": 11570
},
{
"epoch": 0.5903946160905476,
"grad_norm": 6.298665939122396,
"learning_rate": 1.8956848486282833e-06,
"loss": 0.5509,
"step": 11580
},
{
"epoch": 0.5909044560008158,
"grad_norm": 6.190555334840309,
"learning_rate": 1.8916798566862816e-06,
"loss": 0.5289,
"step": 11590
},
{
"epoch": 0.591414295911084,
"grad_norm": 6.383236014028381,
"learning_rate": 1.8876765235003043e-06,
"loss": 0.5286,
"step": 11600
},
{
"epoch": 0.5919241358213521,
"grad_norm": 6.403813249783424,
"learning_rate": 1.883674859986567e-06,
"loss": 0.4884,
"step": 11610
},
{
"epoch": 0.5924339757316203,
"grad_norm": 5.266011099420638,
"learning_rate": 1.87967487705673e-06,
"loss": 0.5382,
"step": 11620
},
{
"epoch": 0.5929438156418885,
"grad_norm": 12.786564682455287,
"learning_rate": 1.8756765856178732e-06,
"loss": 0.5177,
"step": 11630
},
{
"epoch": 0.5934536555521567,
"grad_norm": 3.297347143611863,
"learning_rate": 1.8716799965724614e-06,
"loss": 0.5571,
"step": 11640
},
{
"epoch": 0.5939634954624248,
"grad_norm": 7.872024233619769,
"learning_rate": 1.867685120818321e-06,
"loss": 0.5575,
"step": 11650
},
{
"epoch": 0.594473335372693,
"grad_norm": 6.949646482840053,
"learning_rate": 1.8636919692486034e-06,
"loss": 0.5758,
"step": 11660
},
{
"epoch": 0.5949831752829612,
"grad_norm": 3.3496585669114847,
"learning_rate": 1.8597005527517609e-06,
"loss": 0.5285,
"step": 11670
},
{
"epoch": 0.5954930151932293,
"grad_norm": 7.231602373270994,
"learning_rate": 1.8557108822115128e-06,
"loss": 0.529,
"step": 11680
},
{
"epoch": 0.5960028551034975,
"grad_norm": 4.239113263704488,
"learning_rate": 1.8517229685068178e-06,
"loss": 0.5565,
"step": 11690
},
{
"epoch": 0.5965126950137657,
"grad_norm": 5.518795855930493,
"learning_rate": 1.8477368225118466e-06,
"loss": 0.6519,
"step": 11700
},
{
"epoch": 0.5970225349240339,
"grad_norm": 5.153882732760769,
"learning_rate": 1.8437524550959462e-06,
"loss": 0.5304,
"step": 11710
},
{
"epoch": 0.597532374834302,
"grad_norm": 5.368256886207826,
"learning_rate": 1.839769877123616e-06,
"loss": 0.5611,
"step": 11720
},
{
"epoch": 0.5980422147445702,
"grad_norm": 4.126730916547592,
"learning_rate": 1.8357890994544747e-06,
"loss": 0.5406,
"step": 11730
},
{
"epoch": 0.5985520546548384,
"grad_norm": 6.876143389007811,
"learning_rate": 1.8318101329432335e-06,
"loss": 0.5625,
"step": 11740
},
{
"epoch": 0.5990618945651066,
"grad_norm": 5.393888490327936,
"learning_rate": 1.827832988439664e-06,
"loss": 0.5545,
"step": 11750
},
{
"epoch": 0.5995717344753747,
"grad_norm": 11.078120096332158,
"learning_rate": 1.823857676788568e-06,
"loss": 0.5286,
"step": 11760
},
{
"epoch": 0.6000815743856429,
"grad_norm": 4.747475586071538,
"learning_rate": 1.8198842088297541e-06,
"loss": 0.5411,
"step": 11770
},
{
"epoch": 0.6005914142959111,
"grad_norm": 3.916120306877792,
"learning_rate": 1.8159125953979984e-06,
"loss": 0.534,
"step": 11780
},
{
"epoch": 0.6011012542061792,
"grad_norm": 2.4371681389424245,
"learning_rate": 1.8119428473230235e-06,
"loss": 0.5273,
"step": 11790
},
{
"epoch": 0.6016110941164474,
"grad_norm": 4.9710586345691725,
"learning_rate": 1.8079749754294631e-06,
"loss": 0.6184,
"step": 11800
},
{
"epoch": 0.6021209340267156,
"grad_norm": 5.243911574478128,
"learning_rate": 1.8040089905368383e-06,
"loss": 0.4609,
"step": 11810
},
{
"epoch": 0.6026307739369838,
"grad_norm": 3.697100357595567,
"learning_rate": 1.8000449034595205e-06,
"loss": 0.5856,
"step": 11820
},
{
"epoch": 0.6031406138472519,
"grad_norm": 3.306462379724759,
"learning_rate": 1.7960827250067106e-06,
"loss": 0.4624,
"step": 11830
},
{
"epoch": 0.6036504537575201,
"grad_norm": 4.3861372369161895,
"learning_rate": 1.7921224659824015e-06,
"loss": 0.5036,
"step": 11840
},
{
"epoch": 0.6041602936677883,
"grad_norm": 11.1381283616063,
"learning_rate": 1.7881641371853536e-06,
"loss": 0.5702,
"step": 11850
},
{
"epoch": 0.6046701335780565,
"grad_norm": 11.451440000420012,
"learning_rate": 1.7842077494090653e-06,
"loss": 0.541,
"step": 11860
},
{
"epoch": 0.6051799734883246,
"grad_norm": 27.201678953604713,
"learning_rate": 1.7802533134417398e-06,
"loss": 0.5649,
"step": 11870
},
{
"epoch": 0.6056898133985928,
"grad_norm": 6.787297503814909,
"learning_rate": 1.7763008400662608e-06,
"loss": 0.5071,
"step": 11880
},
{
"epoch": 0.606199653308861,
"grad_norm": 3.893800201972374,
"learning_rate": 1.7723503400601565e-06,
"loss": 0.5727,
"step": 11890
},
{
"epoch": 0.6067094932191291,
"grad_norm": 7.15765843457491,
"learning_rate": 1.7684018241955796e-06,
"loss": 0.5162,
"step": 11900
},
{
"epoch": 0.6072193331293974,
"grad_norm": 4.176542896671038,
"learning_rate": 1.7644553032392677e-06,
"loss": 0.6107,
"step": 11910
},
{
"epoch": 0.6077291730396656,
"grad_norm": 4.9221706209080285,
"learning_rate": 1.7605107879525213e-06,
"loss": 0.5224,
"step": 11920
},
{
"epoch": 0.6082390129499338,
"grad_norm": 6.756774089189636,
"learning_rate": 1.75656828909117e-06,
"loss": 0.595,
"step": 11930
},
{
"epoch": 0.6087488528602019,
"grad_norm": 5.9960279778683745,
"learning_rate": 1.7526278174055477e-06,
"loss": 0.5574,
"step": 11940
},
{
"epoch": 0.6092586927704701,
"grad_norm": 6.717606417710255,
"learning_rate": 1.7486893836404586e-06,
"loss": 0.5187,
"step": 11950
},
{
"epoch": 0.6097685326807383,
"grad_norm": 4.874332720229018,
"learning_rate": 1.7447529985351497e-06,
"loss": 0.5843,
"step": 11960
},
{
"epoch": 0.6102783725910065,
"grad_norm": 7.904763208913782,
"learning_rate": 1.740818672823284e-06,
"loss": 0.6236,
"step": 11970
},
{
"epoch": 0.6107882125012746,
"grad_norm": 5.962793605314727,
"learning_rate": 1.7368864172329053e-06,
"loss": 0.5432,
"step": 11980
},
{
"epoch": 0.6112980524115428,
"grad_norm": 4.128774021887511,
"learning_rate": 1.7329562424864176e-06,
"loss": 0.4786,
"step": 11990
},
{
"epoch": 0.611807892321811,
"grad_norm": 3.544701583811243,
"learning_rate": 1.729028159300546e-06,
"loss": 0.5346,
"step": 12000
},
{
"epoch": 0.6123177322320791,
"grad_norm": 4.840545505343333,
"learning_rate": 1.7251021783863149e-06,
"loss": 0.6048,
"step": 12010
},
{
"epoch": 0.6128275721423473,
"grad_norm": 7.077718602400651,
"learning_rate": 1.7211783104490168e-06,
"loss": 0.5303,
"step": 12020
},
{
"epoch": 0.6133374120526155,
"grad_norm": 4.654777975552529,
"learning_rate": 1.7172565661881807e-06,
"loss": 0.5346,
"step": 12030
},
{
"epoch": 0.6138472519628837,
"grad_norm": 2.727615970290839,
"learning_rate": 1.7133369562975466e-06,
"loss": 0.5221,
"step": 12040
},
{
"epoch": 0.6143570918731518,
"grad_norm": 20.005836961994195,
"learning_rate": 1.7094194914650319e-06,
"loss": 0.5183,
"step": 12050
},
{
"epoch": 0.61486693178342,
"grad_norm": 4.413912954732605,
"learning_rate": 1.7055041823727088e-06,
"loss": 0.4793,
"step": 12060
},
{
"epoch": 0.6153767716936882,
"grad_norm": 4.51420128049222,
"learning_rate": 1.7015910396967678e-06,
"loss": 0.5064,
"step": 12070
},
{
"epoch": 0.6158866116039564,
"grad_norm": 15.013480147209615,
"learning_rate": 1.6976800741074944e-06,
"loss": 0.5662,
"step": 12080
},
{
"epoch": 0.6163964515142245,
"grad_norm": 5.497354154336149,
"learning_rate": 1.6937712962692348e-06,
"loss": 0.4898,
"step": 12090
},
{
"epoch": 0.6169062914244927,
"grad_norm": 6.620777966612235,
"learning_rate": 1.6898647168403734e-06,
"loss": 0.5536,
"step": 12100
},
{
"epoch": 0.6174161313347609,
"grad_norm": 4.87993175632207,
"learning_rate": 1.6859603464732978e-06,
"loss": 0.5768,
"step": 12110
},
{
"epoch": 0.617925971245029,
"grad_norm": 5.228075443876219,
"learning_rate": 1.6820581958143712e-06,
"loss": 0.5504,
"step": 12120
},
{
"epoch": 0.6184358111552972,
"grad_norm": 5.719812149218567,
"learning_rate": 1.6781582755039071e-06,
"loss": 0.5477,
"step": 12130
},
{
"epoch": 0.6189456510655654,
"grad_norm": 6.6382363841486045,
"learning_rate": 1.6742605961761335e-06,
"loss": 0.6053,
"step": 12140
},
{
"epoch": 0.6194554909758336,
"grad_norm": 11.246442613850016,
"learning_rate": 1.6703651684591715e-06,
"loss": 0.509,
"step": 12150
},
{
"epoch": 0.6199653308861017,
"grad_norm": 4.671567718687719,
"learning_rate": 1.6664720029749999e-06,
"loss": 0.534,
"step": 12160
},
{
"epoch": 0.6204751707963699,
"grad_norm": 11.247562088368806,
"learning_rate": 1.662581110339429e-06,
"loss": 0.5609,
"step": 12170
},
{
"epoch": 0.6209850107066381,
"grad_norm": 5.417789308251715,
"learning_rate": 1.6586925011620741e-06,
"loss": 0.5708,
"step": 12180
},
{
"epoch": 0.6214948506169063,
"grad_norm": 7.427770824892025,
"learning_rate": 1.6548061860463209e-06,
"loss": 0.5697,
"step": 12190
},
{
"epoch": 0.6220046905271744,
"grad_norm": 5.948469061304988,
"learning_rate": 1.6509221755893018e-06,
"loss": 0.5605,
"step": 12200
},
{
"epoch": 0.6225145304374426,
"grad_norm": 9.126594912920444,
"learning_rate": 1.6470404803818623e-06,
"loss": 0.5697,
"step": 12210
},
{
"epoch": 0.6230243703477109,
"grad_norm": 5.788189971550228,
"learning_rate": 1.643161111008539e-06,
"loss": 0.5493,
"step": 12220
},
{
"epoch": 0.623534210257979,
"grad_norm": 4.623371512094305,
"learning_rate": 1.6392840780475225e-06,
"loss": 0.5321,
"step": 12230
},
{
"epoch": 0.6240440501682472,
"grad_norm": 5.082810607595676,
"learning_rate": 1.635409392070635e-06,
"loss": 0.5208,
"step": 12240
},
{
"epoch": 0.6245538900785154,
"grad_norm": 4.670898035508682,
"learning_rate": 1.6315370636432955e-06,
"loss": 0.5473,
"step": 12250
},
{
"epoch": 0.6250637299887836,
"grad_norm": 4.691373191839654,
"learning_rate": 1.6276671033245001e-06,
"loss": 0.5579,
"step": 12260
},
{
"epoch": 0.6255735698990517,
"grad_norm": 18.962311891891527,
"learning_rate": 1.623799521666783e-06,
"loss": 0.5563,
"step": 12270
},
{
"epoch": 0.6260834098093199,
"grad_norm": 6.744529272671036,
"learning_rate": 1.6199343292161932e-06,
"loss": 0.541,
"step": 12280
},
{
"epoch": 0.6265932497195881,
"grad_norm": 7.010215594924779,
"learning_rate": 1.616071536512267e-06,
"loss": 0.55,
"step": 12290
},
{
"epoch": 0.6271030896298562,
"grad_norm": 7.714402432937164,
"learning_rate": 1.6122111540879934e-06,
"loss": 0.5399,
"step": 12300
},
{
"epoch": 0.6276129295401244,
"grad_norm": 11.558769117556551,
"learning_rate": 1.608353192469794e-06,
"loss": 0.5092,
"step": 12310
},
{
"epoch": 0.6281227694503926,
"grad_norm": 5.472315236293344,
"learning_rate": 1.6044976621774835e-06,
"loss": 0.5645,
"step": 12320
},
{
"epoch": 0.6286326093606608,
"grad_norm": 5.091831525618372,
"learning_rate": 1.6006445737242525e-06,
"loss": 0.5571,
"step": 12330
},
{
"epoch": 0.6291424492709289,
"grad_norm": 7.371920062561914,
"learning_rate": 1.5967939376166288e-06,
"loss": 0.4884,
"step": 12340
},
{
"epoch": 0.6296522891811971,
"grad_norm": 8.880806971781821,
"learning_rate": 1.5929457643544568e-06,
"loss": 0.6268,
"step": 12350
},
{
"epoch": 0.6301621290914653,
"grad_norm": 13.793811691208258,
"learning_rate": 1.5891000644308636e-06,
"loss": 0.5725,
"step": 12360
},
{
"epoch": 0.6306719690017335,
"grad_norm": 7.305062091285268,
"learning_rate": 1.5852568483322297e-06,
"loss": 0.591,
"step": 12370
},
{
"epoch": 0.6311818089120016,
"grad_norm": 16.681756779784052,
"learning_rate": 1.5814161265381684e-06,
"loss": 0.5358,
"step": 12380
},
{
"epoch": 0.6316916488222698,
"grad_norm": 4.608431050619034,
"learning_rate": 1.5775779095214857e-06,
"loss": 0.5829,
"step": 12390
},
{
"epoch": 0.632201488732538,
"grad_norm": 4.904383309108225,
"learning_rate": 1.5737422077481621e-06,
"loss": 0.5241,
"step": 12400
},
{
"epoch": 0.6327113286428061,
"grad_norm": 16.230807709327845,
"learning_rate": 1.5699090316773153e-06,
"loss": 0.4842,
"step": 12410
},
{
"epoch": 0.6332211685530743,
"grad_norm": 7.659363053107744,
"learning_rate": 1.5660783917611804e-06,
"loss": 0.573,
"step": 12420
},
{
"epoch": 0.6337310084633425,
"grad_norm": 3.435532611433358,
"learning_rate": 1.5622502984450751e-06,
"loss": 0.529,
"step": 12430
},
{
"epoch": 0.6342408483736107,
"grad_norm": 3.673904674179521,
"learning_rate": 1.558424762167371e-06,
"loss": 0.5808,
"step": 12440
},
{
"epoch": 0.6347506882838788,
"grad_norm": 5.312355837601045,
"learning_rate": 1.554601793359471e-06,
"loss": 0.5653,
"step": 12450
},
{
"epoch": 0.635260528194147,
"grad_norm": 6.623149914811438,
"learning_rate": 1.550781402445774e-06,
"loss": 0.4652,
"step": 12460
},
{
"epoch": 0.6357703681044152,
"grad_norm": 5.9337240759175325,
"learning_rate": 1.5469635998436513e-06,
"loss": 0.4999,
"step": 12470
},
{
"epoch": 0.6362802080146834,
"grad_norm": 8.636217063262878,
"learning_rate": 1.5431483959634146e-06,
"loss": 0.5398,
"step": 12480
},
{
"epoch": 0.6367900479249515,
"grad_norm": 5.407358144161057,
"learning_rate": 1.5393358012082932e-06,
"loss": 0.6059,
"step": 12490
},
{
"epoch": 0.6372998878352197,
"grad_norm": 5.412543842045762,
"learning_rate": 1.5355258259743964e-06,
"loss": 0.583,
"step": 12500
},
{
"epoch": 0.6378097277454879,
"grad_norm": 12.869352755253521,
"learning_rate": 1.5317184806506958e-06,
"loss": 0.5093,
"step": 12510
},
{
"epoch": 0.638319567655756,
"grad_norm": 9.023668017857924,
"learning_rate": 1.5279137756189893e-06,
"loss": 0.5789,
"step": 12520
},
{
"epoch": 0.6388294075660242,
"grad_norm": 11.1484285224452,
"learning_rate": 1.5241117212538748e-06,
"loss": 0.5606,
"step": 12530
},
{
"epoch": 0.6393392474762924,
"grad_norm": 10.204286530208517,
"learning_rate": 1.5203123279227245e-06,
"loss": 0.5092,
"step": 12540
},
{
"epoch": 0.6398490873865607,
"grad_norm": 4.588531643687412,
"learning_rate": 1.5165156059856518e-06,
"loss": 0.5612,
"step": 12550
},
{
"epoch": 0.6403589272968288,
"grad_norm": 7.344025683251624,
"learning_rate": 1.5127215657954888e-06,
"loss": 0.6065,
"step": 12560
},
{
"epoch": 0.640868767207097,
"grad_norm": 6.786888474556521,
"learning_rate": 1.508930217697752e-06,
"loss": 0.4867,
"step": 12570
},
{
"epoch": 0.6413786071173652,
"grad_norm": 2.917608815001384,
"learning_rate": 1.5051415720306198e-06,
"loss": 0.5744,
"step": 12580
},
{
"epoch": 0.6418884470276334,
"grad_norm": 6.991368031486083,
"learning_rate": 1.5013556391249008e-06,
"loss": 0.5858,
"step": 12590
},
{
"epoch": 0.6423982869379015,
"grad_norm": 8.586815067663817,
"learning_rate": 1.4975724293040047e-06,
"loss": 0.6201,
"step": 12600
},
{
"epoch": 0.6429081268481697,
"grad_norm": 21.604522440881276,
"learning_rate": 1.4937919528839196e-06,
"loss": 0.5263,
"step": 12610
},
{
"epoch": 0.6434179667584379,
"grad_norm": 5.7633165069882795,
"learning_rate": 1.4900142201731766e-06,
"loss": 0.6214,
"step": 12620
},
{
"epoch": 0.643927806668706,
"grad_norm": 3.3215177368290503,
"learning_rate": 1.486239241472828e-06,
"loss": 0.5635,
"step": 12630
},
{
"epoch": 0.6444376465789742,
"grad_norm": 3.4209268213631403,
"learning_rate": 1.4824670270764135e-06,
"loss": 0.4951,
"step": 12640
},
{
"epoch": 0.6449474864892424,
"grad_norm": 7.881201877656715,
"learning_rate": 1.4786975872699388e-06,
"loss": 0.5314,
"step": 12650
},
{
"epoch": 0.6454573263995106,
"grad_norm": 14.91471390732208,
"learning_rate": 1.4749309323318406e-06,
"loss": 0.5333,
"step": 12660
},
{
"epoch": 0.6459671663097787,
"grad_norm": 11.257453345493493,
"learning_rate": 1.471167072532965e-06,
"loss": 0.4865,
"step": 12670
},
{
"epoch": 0.6464770062200469,
"grad_norm": 4.493486029718423,
"learning_rate": 1.467406018136534e-06,
"loss": 0.5599,
"step": 12680
},
{
"epoch": 0.6469868461303151,
"grad_norm": 5.626204141998258,
"learning_rate": 1.4636477793981197e-06,
"loss": 0.5358,
"step": 12690
},
{
"epoch": 0.6474966860405833,
"grad_norm": 4.584586023876025,
"learning_rate": 1.459892366565619e-06,
"loss": 0.5762,
"step": 12700
},
{
"epoch": 0.6480065259508514,
"grad_norm": 6.250609630380111,
"learning_rate": 1.45613978987922e-06,
"loss": 0.5393,
"step": 12710
},
{
"epoch": 0.6485163658611196,
"grad_norm": 6.187124929152572,
"learning_rate": 1.4523900595713808e-06,
"loss": 0.5332,
"step": 12720
},
{
"epoch": 0.6490262057713878,
"grad_norm": 5.114127113894209,
"learning_rate": 1.4486431858667943e-06,
"loss": 0.5373,
"step": 12730
},
{
"epoch": 0.6495360456816559,
"grad_norm": 4.8066581952266665,
"learning_rate": 1.4448991789823663e-06,
"loss": 0.5698,
"step": 12740
},
{
"epoch": 0.6500458855919241,
"grad_norm": 6.027746666952245,
"learning_rate": 1.441158049127185e-06,
"loss": 0.5449,
"step": 12750
},
{
"epoch": 0.6505557255021923,
"grad_norm": 10.424532550969587,
"learning_rate": 1.437419806502494e-06,
"loss": 0.4959,
"step": 12760
},
{
"epoch": 0.6510655654124605,
"grad_norm": 3.870539783400023,
"learning_rate": 1.4336844613016632e-06,
"loss": 0.517,
"step": 12770
},
{
"epoch": 0.6515754053227286,
"grad_norm": 3.6951199081428343,
"learning_rate": 1.4299520237101624e-06,
"loss": 0.5612,
"step": 12780
},
{
"epoch": 0.6520852452329968,
"grad_norm": 4.597449928950978,
"learning_rate": 1.4262225039055326e-06,
"loss": 0.5271,
"step": 12790
},
{
"epoch": 0.652595085143265,
"grad_norm": 12.281913122422118,
"learning_rate": 1.4224959120573595e-06,
"loss": 0.6379,
"step": 12800
},
{
"epoch": 0.6531049250535332,
"grad_norm": 4.6537050591912275,
"learning_rate": 1.4187722583272442e-06,
"loss": 0.5644,
"step": 12810
},
{
"epoch": 0.6536147649638013,
"grad_norm": 3.52889984457105,
"learning_rate": 1.4150515528687742e-06,
"loss": 0.5335,
"step": 12820
},
{
"epoch": 0.6541246048740695,
"grad_norm": 7.135927672545743,
"learning_rate": 1.4113338058275023e-06,
"loss": 0.5379,
"step": 12830
},
{
"epoch": 0.6546344447843377,
"grad_norm": 3.022192804234342,
"learning_rate": 1.4076190273409112e-06,
"loss": 0.5654,
"step": 12840
},
{
"epoch": 0.6551442846946058,
"grad_norm": 5.983120327543596,
"learning_rate": 1.403907227538389e-06,
"loss": 0.5455,
"step": 12850
},
{
"epoch": 0.655654124604874,
"grad_norm": 4.8791220155951285,
"learning_rate": 1.4001984165412042e-06,
"loss": 0.5214,
"step": 12860
},
{
"epoch": 0.6561639645151423,
"grad_norm": 4.739586848422346,
"learning_rate": 1.3964926044624694e-06,
"loss": 0.5536,
"step": 12870
},
{
"epoch": 0.6566738044254105,
"grad_norm": 3.9735225097277054,
"learning_rate": 1.3927898014071283e-06,
"loss": 0.5483,
"step": 12880
},
{
"epoch": 0.6571836443356786,
"grad_norm": 3.98878844665679,
"learning_rate": 1.3890900174719124e-06,
"loss": 0.5125,
"step": 12890
},
{
"epoch": 0.6576934842459468,
"grad_norm": 5.867019653833069,
"learning_rate": 1.3853932627453246e-06,
"loss": 0.4877,
"step": 12900
},
{
"epoch": 0.658203324156215,
"grad_norm": 6.111636248490676,
"learning_rate": 1.3816995473076064e-06,
"loss": 0.4964,
"step": 12910
},
{
"epoch": 0.6587131640664832,
"grad_norm": 4.919133887476734,
"learning_rate": 1.3780088812307124e-06,
"loss": 0.4471,
"step": 12920
},
{
"epoch": 0.6592230039767513,
"grad_norm": 4.197396139968978,
"learning_rate": 1.3743212745782819e-06,
"loss": 0.5337,
"step": 12930
},
{
"epoch": 0.6597328438870195,
"grad_norm": 5.946895956028823,
"learning_rate": 1.3706367374056123e-06,
"loss": 0.5977,
"step": 12940
},
{
"epoch": 0.6602426837972877,
"grad_norm": 13.003811745210815,
"learning_rate": 1.3669552797596309e-06,
"loss": 0.567,
"step": 12950
},
{
"epoch": 0.6607525237075558,
"grad_norm": 4.004722562893251,
"learning_rate": 1.3632769116788672e-06,
"loss": 0.5212,
"step": 12960
},
{
"epoch": 0.661262363617824,
"grad_norm": 5.84978913294524,
"learning_rate": 1.3596016431934278e-06,
"loss": 0.5636,
"step": 12970
},
{
"epoch": 0.6617722035280922,
"grad_norm": 4.726395100247807,
"learning_rate": 1.355929484324964e-06,
"loss": 0.56,
"step": 12980
},
{
"epoch": 0.6622820434383604,
"grad_norm": 6.083303667891072,
"learning_rate": 1.3522604450866533e-06,
"loss": 0.5806,
"step": 12990
},
{
"epoch": 0.6627918833486285,
"grad_norm": 4.976334480121951,
"learning_rate": 1.34859453548316e-06,
"loss": 0.5081,
"step": 13000
},
{
"epoch": 0.6633017232588967,
"grad_norm": 3.2620170790567418,
"learning_rate": 1.3449317655106209e-06,
"loss": 0.5024,
"step": 13010
},
{
"epoch": 0.6638115631691649,
"grad_norm": 4.383688303394006,
"learning_rate": 1.341272145156609e-06,
"loss": 0.5076,
"step": 13020
},
{
"epoch": 0.6643214030794331,
"grad_norm": 5.680026855236324,
"learning_rate": 1.3376156844001054e-06,
"loss": 0.5455,
"step": 13030
},
{
"epoch": 0.6648312429897012,
"grad_norm": 5.492757631610527,
"learning_rate": 1.3339623932114837e-06,
"loss": 0.5247,
"step": 13040
},
{
"epoch": 0.6653410828999694,
"grad_norm": 8.569767932778747,
"learning_rate": 1.3303122815524668e-06,
"loss": 0.4356,
"step": 13050
},
{
"epoch": 0.6658509228102376,
"grad_norm": 4.748192353109913,
"learning_rate": 1.3266653593761124e-06,
"loss": 0.5581,
"step": 13060
},
{
"epoch": 0.6663607627205057,
"grad_norm": 4.048596076733172,
"learning_rate": 1.3230216366267796e-06,
"loss": 0.5113,
"step": 13070
},
{
"epoch": 0.6668706026307739,
"grad_norm": 8.732362292125414,
"learning_rate": 1.3193811232401038e-06,
"loss": 0.5092,
"step": 13080
},
{
"epoch": 0.6673804425410421,
"grad_norm": 7.060647395941407,
"learning_rate": 1.3157438291429692e-06,
"loss": 0.5312,
"step": 13090
},
{
"epoch": 0.6678902824513103,
"grad_norm": 5.421755966391951,
"learning_rate": 1.3121097642534811e-06,
"loss": 0.5273,
"step": 13100
},
{
"epoch": 0.6684001223615784,
"grad_norm": 5.9238799862585205,
"learning_rate": 1.3084789384809405e-06,
"loss": 0.5405,
"step": 13110
},
{
"epoch": 0.6689099622718466,
"grad_norm": 4.110601472838295,
"learning_rate": 1.3048513617258145e-06,
"loss": 0.5739,
"step": 13120
},
{
"epoch": 0.6694198021821148,
"grad_norm": 6.199699317727319,
"learning_rate": 1.3012270438797137e-06,
"loss": 0.5018,
"step": 13130
},
{
"epoch": 0.669929642092383,
"grad_norm": 4.507735245985465,
"learning_rate": 1.2976059948253572e-06,
"loss": 0.57,
"step": 13140
},
{
"epoch": 0.6704394820026511,
"grad_norm": 6.293867493672353,
"learning_rate": 1.2939882244365577e-06,
"loss": 0.5294,
"step": 13150
},
{
"epoch": 0.6709493219129193,
"grad_norm": 10.6075989445441,
"learning_rate": 1.29037374257818e-06,
"loss": 0.5391,
"step": 13160
},
{
"epoch": 0.6714591618231875,
"grad_norm": 5.713377377391046,
"learning_rate": 1.2867625591061296e-06,
"loss": 0.587,
"step": 13170
},
{
"epoch": 0.6719690017334556,
"grad_norm": 4.1722961820365345,
"learning_rate": 1.2831546838673133e-06,
"loss": 0.5386,
"step": 13180
},
{
"epoch": 0.6724788416437238,
"grad_norm": 7.7111157513651625,
"learning_rate": 1.2795501266996157e-06,
"loss": 0.5468,
"step": 13190
},
{
"epoch": 0.672988681553992,
"grad_norm": 9.602787919368614,
"learning_rate": 1.27594889743188e-06,
"loss": 0.6124,
"step": 13200
},
{
"epoch": 0.6734985214642603,
"grad_norm": 6.380183019190915,
"learning_rate": 1.2723510058838678e-06,
"loss": 0.5047,
"step": 13210
},
{
"epoch": 0.6740083613745284,
"grad_norm": 2.710144136012811,
"learning_rate": 1.2687564618662434e-06,
"loss": 0.561,
"step": 13220
},
{
"epoch": 0.6745182012847966,
"grad_norm": 7.452950908001987,
"learning_rate": 1.2651652751805433e-06,
"loss": 0.5509,
"step": 13230
},
{
"epoch": 0.6750280411950648,
"grad_norm": 3.631417280232438,
"learning_rate": 1.2615774556191478e-06,
"loss": 0.4819,
"step": 13240
},
{
"epoch": 0.675537881105333,
"grad_norm": 10.61091930714242,
"learning_rate": 1.2579930129652562e-06,
"loss": 0.556,
"step": 13250
},
{
"epoch": 0.6760477210156011,
"grad_norm": 3.314191356866052,
"learning_rate": 1.2544119569928604e-06,
"loss": 0.5335,
"step": 13260
},
{
"epoch": 0.6765575609258693,
"grad_norm": 4.731522258922947,
"learning_rate": 1.250834297466717e-06,
"loss": 0.5158,
"step": 13270
},
{
"epoch": 0.6770674008361375,
"grad_norm": 6.064347680365402,
"learning_rate": 1.2472600441423208e-06,
"loss": 0.4946,
"step": 13280
},
{
"epoch": 0.6775772407464056,
"grad_norm": 14.532010946729013,
"learning_rate": 1.2436892067658807e-06,
"loss": 0.5467,
"step": 13290
},
{
"epoch": 0.6780870806566738,
"grad_norm": 4.50886391168367,
"learning_rate": 1.240121795074286e-06,
"loss": 0.5721,
"step": 13300
},
{
"epoch": 0.678596920566942,
"grad_norm": 10.326308885826023,
"learning_rate": 1.2365578187950927e-06,
"loss": 0.5194,
"step": 13310
},
{
"epoch": 0.6791067604772102,
"grad_norm": 4.084961974236026,
"learning_rate": 1.2329972876464808e-06,
"loss": 0.5192,
"step": 13320
},
{
"epoch": 0.6796166003874783,
"grad_norm": 4.000577035487769,
"learning_rate": 1.2294402113372433e-06,
"loss": 0.5699,
"step": 13330
},
{
"epoch": 0.6801264402977465,
"grad_norm": 3.1591278263694695,
"learning_rate": 1.2258865995667493e-06,
"loss": 0.4665,
"step": 13340
},
{
"epoch": 0.6806362802080147,
"grad_norm": 5.006640627452529,
"learning_rate": 1.2223364620249185e-06,
"loss": 0.4945,
"step": 13350
},
{
"epoch": 0.6811461201182829,
"grad_norm": 7.912333077859562,
"learning_rate": 1.2187898083922033e-06,
"loss": 0.5749,
"step": 13360
},
{
"epoch": 0.681655960028551,
"grad_norm": 5.71637174633293,
"learning_rate": 1.2152466483395504e-06,
"loss": 0.5378,
"step": 13370
},
{
"epoch": 0.6821657999388192,
"grad_norm": 5.642639425945319,
"learning_rate": 1.211706991528383e-06,
"loss": 0.5075,
"step": 13380
},
{
"epoch": 0.6826756398490874,
"grad_norm": 3.494475670208223,
"learning_rate": 1.2081708476105714e-06,
"loss": 0.5116,
"step": 13390
},
{
"epoch": 0.6831854797593555,
"grad_norm": 3.4281174575970588,
"learning_rate": 1.2046382262284071e-06,
"loss": 0.4985,
"step": 13400
},
{
"epoch": 0.6836953196696237,
"grad_norm": 5.9440661581854926,
"learning_rate": 1.2011091370145758e-06,
"loss": 0.5967,
"step": 13410
},
{
"epoch": 0.6842051595798919,
"grad_norm": 4.0954872210042295,
"learning_rate": 1.1975835895921326e-06,
"loss": 0.4953,
"step": 13420
},
{
"epoch": 0.6847149994901601,
"grad_norm": 4.429836719631265,
"learning_rate": 1.1940615935744743e-06,
"loss": 0.5524,
"step": 13430
},
{
"epoch": 0.6852248394004282,
"grad_norm": 6.956063008289005,
"learning_rate": 1.1905431585653137e-06,
"loss": 0.4984,
"step": 13440
},
{
"epoch": 0.6857346793106964,
"grad_norm": 6.877457551841281,
"learning_rate": 1.1870282941586556e-06,
"loss": 0.5232,
"step": 13450
},
{
"epoch": 0.6862445192209646,
"grad_norm": 3.1965950945102932,
"learning_rate": 1.183517009938763e-06,
"loss": 0.5288,
"step": 13460
},
{
"epoch": 0.6867543591312328,
"grad_norm": 11.665745355509321,
"learning_rate": 1.1800093154801442e-06,
"loss": 0.4782,
"step": 13470
},
{
"epoch": 0.6872641990415009,
"grad_norm": 5.109328036369074,
"learning_rate": 1.1765052203475115e-06,
"loss": 0.5358,
"step": 13480
},
{
"epoch": 0.6877740389517691,
"grad_norm": 7.600926840467256,
"learning_rate": 1.1730047340957692e-06,
"loss": 0.511,
"step": 13490
},
{
"epoch": 0.6882838788620373,
"grad_norm": 4.906184451993843,
"learning_rate": 1.1695078662699775e-06,
"loss": 0.5583,
"step": 13500
},
{
"epoch": 0.6887937187723054,
"grad_norm": 3.5218318713562735,
"learning_rate": 1.1660146264053275e-06,
"loss": 0.4649,
"step": 13510
},
{
"epoch": 0.6893035586825736,
"grad_norm": 3.5067071711997913,
"learning_rate": 1.162525024027125e-06,
"loss": 0.5805,
"step": 13520
},
{
"epoch": 0.6898133985928419,
"grad_norm": 4.699981732851516,
"learning_rate": 1.159039068650749e-06,
"loss": 0.5593,
"step": 13530
},
{
"epoch": 0.6903232385031101,
"grad_norm": 10.828556477398795,
"learning_rate": 1.1555567697816392e-06,
"loss": 0.5423,
"step": 13540
},
{
"epoch": 0.6908330784133782,
"grad_norm": 4.545678718062527,
"learning_rate": 1.1520781369152628e-06,
"loss": 0.5206,
"step": 13550
},
{
"epoch": 0.6913429183236464,
"grad_norm": 5.432979914559371,
"learning_rate": 1.1486031795370914e-06,
"loss": 0.5179,
"step": 13560
},
{
"epoch": 0.6918527582339146,
"grad_norm": 3.1618624576592826,
"learning_rate": 1.1451319071225738e-06,
"loss": 0.499,
"step": 13570
},
{
"epoch": 0.6923625981441828,
"grad_norm": 4.0550254609043,
"learning_rate": 1.141664329137111e-06,
"loss": 0.5755,
"step": 13580
},
{
"epoch": 0.6928724380544509,
"grad_norm": 4.448786177362205,
"learning_rate": 1.1382004550360298e-06,
"loss": 0.5407,
"step": 13590
},
{
"epoch": 0.6933822779647191,
"grad_norm": 4.479185711499678,
"learning_rate": 1.134740294264558e-06,
"loss": 0.4921,
"step": 13600
},
{
"epoch": 0.6938921178749873,
"grad_norm": 3.959760678064776,
"learning_rate": 1.1312838562577976e-06,
"loss": 0.5016,
"step": 13610
},
{
"epoch": 0.6944019577852554,
"grad_norm": 4.6393714352249775,
"learning_rate": 1.1278311504406974e-06,
"loss": 0.5327,
"step": 13620
},
{
"epoch": 0.6949117976955236,
"grad_norm": 5.703034593328831,
"learning_rate": 1.1243821862280343e-06,
"loss": 0.4838,
"step": 13630
},
{
"epoch": 0.6954216376057918,
"grad_norm": 3.4654265252596836,
"learning_rate": 1.1209369730243762e-06,
"loss": 0.5999,
"step": 13640
},
{
"epoch": 0.69593147751606,
"grad_norm": 18.249146813923055,
"learning_rate": 1.117495520224069e-06,
"loss": 0.5813,
"step": 13650
},
{
"epoch": 0.6964413174263281,
"grad_norm": 5.215129682145256,
"learning_rate": 1.114057837211202e-06,
"loss": 0.5504,
"step": 13660
},
{
"epoch": 0.6969511573365963,
"grad_norm": 13.35921248564992,
"learning_rate": 1.1106239333595823e-06,
"loss": 0.5916,
"step": 13670
},
{
"epoch": 0.6974609972468645,
"grad_norm": 4.227440914273525,
"learning_rate": 1.1071938180327185e-06,
"loss": 0.5046,
"step": 13680
},
{
"epoch": 0.6979708371571327,
"grad_norm": 8.417052230158902,
"learning_rate": 1.1037675005837827e-06,
"loss": 0.663,
"step": 13690
},
{
"epoch": 0.6984806770674008,
"grad_norm": 6.91541081949382,
"learning_rate": 1.1003449903555944e-06,
"loss": 0.6073,
"step": 13700
},
{
"epoch": 0.698990516977669,
"grad_norm": 10.19117588259858,
"learning_rate": 1.0969262966805903e-06,
"loss": 0.5035,
"step": 13710
},
{
"epoch": 0.6995003568879372,
"grad_norm": 5.428290411616628,
"learning_rate": 1.0935114288808005e-06,
"loss": 0.5033,
"step": 13720
},
{
"epoch": 0.7000101967982053,
"grad_norm": 4.358636805651446,
"learning_rate": 1.090100396267823e-06,
"loss": 0.5032,
"step": 13730
},
{
"epoch": 0.7005200367084735,
"grad_norm": 5.185241676179502,
"learning_rate": 1.0866932081427984e-06,
"loss": 0.5662,
"step": 13740
},
{
"epoch": 0.7010298766187417,
"grad_norm": 8.50985381816569,
"learning_rate": 1.0832898737963832e-06,
"loss": 0.5239,
"step": 13750
},
{
"epoch": 0.7015397165290099,
"grad_norm": 6.338985130588811,
"learning_rate": 1.0798904025087262e-06,
"loss": 0.5575,
"step": 13760
},
{
"epoch": 0.702049556439278,
"grad_norm": 3.4872034476308893,
"learning_rate": 1.076494803549443e-06,
"loss": 0.5081,
"step": 13770
},
{
"epoch": 0.7025593963495462,
"grad_norm": 7.103598479204582,
"learning_rate": 1.0731030861775874e-06,
"loss": 0.5587,
"step": 13780
},
{
"epoch": 0.7030692362598144,
"grad_norm": 3.773045891764863,
"learning_rate": 1.0697152596416341e-06,
"loss": 0.5464,
"step": 13790
},
{
"epoch": 0.7035790761700826,
"grad_norm": 3.8734820664187937,
"learning_rate": 1.0663313331794428e-06,
"loss": 0.4675,
"step": 13800
},
{
"epoch": 0.7040889160803507,
"grad_norm": 7.508994775236017,
"learning_rate": 1.0629513160182422e-06,
"loss": 0.5603,
"step": 13810
},
{
"epoch": 0.7045987559906189,
"grad_norm": 5.441123508318171,
"learning_rate": 1.0595752173746e-06,
"loss": 0.5353,
"step": 13820
},
{
"epoch": 0.7051085959008871,
"grad_norm": 3.429978029709435,
"learning_rate": 1.0562030464543982e-06,
"loss": 0.5399,
"step": 13830
},
{
"epoch": 0.7056184358111552,
"grad_norm": 2.787620462489004,
"learning_rate": 1.0528348124528121e-06,
"loss": 0.5183,
"step": 13840
},
{
"epoch": 0.7061282757214234,
"grad_norm": 6.9756116661554906,
"learning_rate": 1.0494705245542766e-06,
"loss": 0.499,
"step": 13850
},
{
"epoch": 0.7066381156316917,
"grad_norm": 10.846115249347585,
"learning_rate": 1.04611019193247e-06,
"loss": 0.5046,
"step": 13860
},
{
"epoch": 0.7071479555419599,
"grad_norm": 3.7129839386872403,
"learning_rate": 1.0427538237502854e-06,
"loss": 0.5372,
"step": 13870
},
{
"epoch": 0.707657795452228,
"grad_norm": 5.373422938408549,
"learning_rate": 1.0394014291598041e-06,
"loss": 0.4437,
"step": 13880
},
{
"epoch": 0.7081676353624962,
"grad_norm": 10.493283934694439,
"learning_rate": 1.036053017302274e-06,
"loss": 0.5619,
"step": 13890
},
{
"epoch": 0.7086774752727644,
"grad_norm": 5.958469552599999,
"learning_rate": 1.0327085973080814e-06,
"loss": 0.5291,
"step": 13900
},
{
"epoch": 0.7091873151830326,
"grad_norm": 6.404184672896513,
"learning_rate": 1.0293681782967288e-06,
"loss": 0.4772,
"step": 13910
},
{
"epoch": 0.7096971550933007,
"grad_norm": 3.8169356599310023,
"learning_rate": 1.0260317693768083e-06,
"loss": 0.5386,
"step": 13920
},
{
"epoch": 0.7102069950035689,
"grad_norm": 3.40165352082515,
"learning_rate": 1.0226993796459784e-06,
"loss": 0.5371,
"step": 13930
},
{
"epoch": 0.7107168349138371,
"grad_norm": 5.006368131558215,
"learning_rate": 1.0193710181909344e-06,
"loss": 0.5415,
"step": 13940
},
{
"epoch": 0.7112266748241052,
"grad_norm": 4.9091721415200835,
"learning_rate": 1.0160466940873944e-06,
"loss": 0.5094,
"step": 13950
},
{
"epoch": 0.7117365147343734,
"grad_norm": 4.738065807683627,
"learning_rate": 1.0127264164000606e-06,
"loss": 0.5101,
"step": 13960
},
{
"epoch": 0.7122463546446416,
"grad_norm": 10.240913693281849,
"learning_rate": 1.0094101941826048e-06,
"loss": 0.4673,
"step": 13970
},
{
"epoch": 0.7127561945549098,
"grad_norm": 9.614962253075381,
"learning_rate": 1.0060980364776402e-06,
"loss": 0.5639,
"step": 13980
},
{
"epoch": 0.7132660344651779,
"grad_norm": 5.324972995964977,
"learning_rate": 1.0027899523166954e-06,
"loss": 0.4865,
"step": 13990
},
{
"epoch": 0.7137758743754461,
"grad_norm": 4.260870819503009,
"learning_rate": 9.994859507201959e-07,
"loss": 0.5586,
"step": 14000
},
{
"epoch": 0.7142857142857143,
"grad_norm": 4.6217941213399145,
"learning_rate": 9.961860406974286e-07,
"loss": 0.4976,
"step": 14010
},
{
"epoch": 0.7147955541959825,
"grad_norm": 4.407019724655899,
"learning_rate": 9.928902312465275e-07,
"loss": 0.53,
"step": 14020
},
{
"epoch": 0.7153053941062506,
"grad_norm": 4.386482002578632,
"learning_rate": 9.895985313544442e-07,
"loss": 0.526,
"step": 14030
},
{
"epoch": 0.7158152340165188,
"grad_norm": 3.3708178278878704,
"learning_rate": 9.863109499969254e-07,
"loss": 0.5599,
"step": 14040
},
{
"epoch": 0.716325073926787,
"grad_norm": 5.0206374331314105,
"learning_rate": 9.830274961384856e-07,
"loss": 0.5688,
"step": 14050
},
{
"epoch": 0.7168349138370551,
"grad_norm": 3.139775686128774,
"learning_rate": 9.797481787323862e-07,
"loss": 0.5012,
"step": 14060
},
{
"epoch": 0.7173447537473233,
"grad_norm": 6.653746772176197,
"learning_rate": 9.764730067206088e-07,
"loss": 0.5294,
"step": 14070
},
{
"epoch": 0.7178545936575915,
"grad_norm": 4.357412922721259,
"learning_rate": 9.732019890338309e-07,
"loss": 0.509,
"step": 14080
},
{
"epoch": 0.7183644335678597,
"grad_norm": 3.839673228603953,
"learning_rate": 9.699351345914041e-07,
"loss": 0.5104,
"step": 14090
},
{
"epoch": 0.7188742734781278,
"grad_norm": 23.668587448697135,
"learning_rate": 9.666724523013227e-07,
"loss": 0.5501,
"step": 14100
},
{
"epoch": 0.719384113388396,
"grad_norm": 4.4844222842045625,
"learning_rate": 9.634139510602122e-07,
"loss": 0.4946,
"step": 14110
},
{
"epoch": 0.7198939532986642,
"grad_norm": 6.745755804091752,
"learning_rate": 9.6015963975329e-07,
"loss": 0.4884,
"step": 14120
},
{
"epoch": 0.7204037932089324,
"grad_norm": 4.4366799233010425,
"learning_rate": 9.569095272543524e-07,
"loss": 0.5551,
"step": 14130
},
{
"epoch": 0.7209136331192005,
"grad_norm": 9.640533687940211,
"learning_rate": 9.536636224257456e-07,
"loss": 0.5628,
"step": 14140
},
{
"epoch": 0.7214234730294687,
"grad_norm": 3.5947524451830435,
"learning_rate": 9.504219341183418e-07,
"loss": 0.5519,
"step": 14150
},
{
"epoch": 0.721933312939737,
"grad_norm": 3.4886534478829443,
"learning_rate": 9.471844711715184e-07,
"loss": 0.5381,
"step": 14160
},
{
"epoch": 0.722443152850005,
"grad_norm": 8.292489041824693,
"learning_rate": 9.439512424131267e-07,
"loss": 0.5321,
"step": 14170
},
{
"epoch": 0.7229529927602732,
"grad_norm": 9.814451519043287,
"learning_rate": 9.407222566594751e-07,
"loss": 0.6348,
"step": 14180
},
{
"epoch": 0.7234628326705415,
"grad_norm": 4.0835422241062895,
"learning_rate": 9.374975227153021e-07,
"loss": 0.6112,
"step": 14190
},
{
"epoch": 0.7239726725808097,
"grad_norm": 5.546649107764507,
"learning_rate": 9.342770493737521e-07,
"loss": 0.4992,
"step": 14200
},
{
"epoch": 0.7244825124910778,
"grad_norm": 5.6247007802896505,
"learning_rate": 9.310608454163517e-07,
"loss": 0.5302,
"step": 14210
},
{
"epoch": 0.724992352401346,
"grad_norm": 11.65330539318631,
"learning_rate": 9.278489196129865e-07,
"loss": 0.5414,
"step": 14220
},
{
"epoch": 0.7255021923116142,
"grad_norm": 7.408609765365619,
"learning_rate": 9.246412807218735e-07,
"loss": 0.4965,
"step": 14230
},
{
"epoch": 0.7260120322218824,
"grad_norm": 3.906564074730722,
"learning_rate": 9.214379374895455e-07,
"loss": 0.5405,
"step": 14240
},
{
"epoch": 0.7265218721321505,
"grad_norm": 6.03172766111003,
"learning_rate": 9.182388986508186e-07,
"loss": 0.5727,
"step": 14250
},
{
"epoch": 0.7270317120424187,
"grad_norm": 3.657948056638405,
"learning_rate": 9.150441729287699e-07,
"loss": 0.5865,
"step": 14260
},
{
"epoch": 0.7275415519526869,
"grad_norm": 3.7963633704586206,
"learning_rate": 9.118537690347215e-07,
"loss": 0.5096,
"step": 14270
},
{
"epoch": 0.728051391862955,
"grad_norm": 8.153094763806996,
"learning_rate": 9.086676956682045e-07,
"loss": 0.5628,
"step": 14280
},
{
"epoch": 0.7285612317732232,
"grad_norm": 3.917059090015746,
"learning_rate": 9.054859615169453e-07,
"loss": 0.5071,
"step": 14290
},
{
"epoch": 0.7290710716834914,
"grad_norm": 5.323547381777923,
"learning_rate": 9.023085752568369e-07,
"loss": 0.5364,
"step": 14300
},
{
"epoch": 0.7295809115937596,
"grad_norm": 7.41810590733887,
"learning_rate": 8.991355455519168e-07,
"loss": 0.5536,
"step": 14310
},
{
"epoch": 0.7300907515040277,
"grad_norm": 4.669390408900202,
"learning_rate": 8.959668810543453e-07,
"loss": 0.5436,
"step": 14320
},
{
"epoch": 0.7306005914142959,
"grad_norm": 6.065054037117158,
"learning_rate": 8.928025904043749e-07,
"loss": 0.4896,
"step": 14330
},
{
"epoch": 0.7311104313245641,
"grad_norm": 23.928736776921905,
"learning_rate": 8.896426822303358e-07,
"loss": 0.5042,
"step": 14340
},
{
"epoch": 0.7316202712348323,
"grad_norm": 10.911901643265594,
"learning_rate": 8.864871651486065e-07,
"loss": 0.4854,
"step": 14350
},
{
"epoch": 0.7321301111451004,
"grad_norm": 4.334101354501816,
"learning_rate": 8.833360477635919e-07,
"loss": 0.5111,
"step": 14360
},
{
"epoch": 0.7326399510553686,
"grad_norm": 30.626249478236296,
"learning_rate": 8.801893386677002e-07,
"loss": 0.5369,
"step": 14370
},
{
"epoch": 0.7331497909656368,
"grad_norm": 5.523049148478515,
"learning_rate": 8.7704704644132e-07,
"loss": 0.5812,
"step": 14380
},
{
"epoch": 0.7336596308759049,
"grad_norm": 6.679171577346897,
"learning_rate": 8.739091796527927e-07,
"loss": 0.5561,
"step": 14390
},
{
"epoch": 0.7341694707861731,
"grad_norm": 4.483553121355593,
"learning_rate": 8.707757468583972e-07,
"loss": 0.5139,
"step": 14400
},
{
"epoch": 0.7346793106964413,
"grad_norm": 7.254614868274957,
"learning_rate": 8.6764675660232e-07,
"loss": 0.509,
"step": 14410
},
{
"epoch": 0.7351891506067095,
"grad_norm": 3.6453576393069453,
"learning_rate": 8.645222174166309e-07,
"loss": 0.5539,
"step": 14420
},
{
"epoch": 0.7356989905169776,
"grad_norm": 13.253454465235388,
"learning_rate": 8.61402137821268e-07,
"loss": 0.5494,
"step": 14430
},
{
"epoch": 0.7362088304272458,
"grad_norm": 10.8333846264643,
"learning_rate": 8.582865263240042e-07,
"loss": 0.5256,
"step": 14440
},
{
"epoch": 0.736718670337514,
"grad_norm": 7.385484695909464,
"learning_rate": 8.551753914204311e-07,
"loss": 0.526,
"step": 14450
},
{
"epoch": 0.7372285102477822,
"grad_norm": 3.0532216386596795,
"learning_rate": 8.520687415939339e-07,
"loss": 0.5204,
"step": 14460
},
{
"epoch": 0.7377383501580503,
"grad_norm": 5.239889698069807,
"learning_rate": 8.489665853156662e-07,
"loss": 0.5562,
"step": 14470
},
{
"epoch": 0.7382481900683185,
"grad_norm": 5.66957723534589,
"learning_rate": 8.458689310445323e-07,
"loss": 0.5134,
"step": 14480
},
{
"epoch": 0.7387580299785867,
"grad_norm": 6.887725374706838,
"learning_rate": 8.427757872271561e-07,
"loss": 0.5458,
"step": 14490
},
{
"epoch": 0.7392678698888548,
"grad_norm": 4.799912330825484,
"learning_rate": 8.39687162297865e-07,
"loss": 0.573,
"step": 14500
},
{
"epoch": 0.739777709799123,
"grad_norm": 7.202305943592422,
"learning_rate": 8.36603064678664e-07,
"loss": 0.5623,
"step": 14510
},
{
"epoch": 0.7402875497093913,
"grad_norm": 5.299091219196599,
"learning_rate": 8.33523502779213e-07,
"loss": 0.6033,
"step": 14520
},
{
"epoch": 0.7407973896196595,
"grad_norm": 5.238692159044794,
"learning_rate": 8.304484849968039e-07,
"loss": 0.5429,
"step": 14530
},
{
"epoch": 0.7413072295299276,
"grad_norm": 4.684407320094921,
"learning_rate": 8.273780197163386e-07,
"loss": 0.5166,
"step": 14540
},
{
"epoch": 0.7418170694401958,
"grad_norm": 5.753622178896207,
"learning_rate": 8.243121153103023e-07,
"loss": 0.5472,
"step": 14550
},
{
"epoch": 0.742326909350464,
"grad_norm": 7.087123397792935,
"learning_rate": 8.212507801387482e-07,
"loss": 0.6117,
"step": 14560
},
{
"epoch": 0.7428367492607322,
"grad_norm": 7.13203280832583,
"learning_rate": 8.181940225492682e-07,
"loss": 0.5548,
"step": 14570
},
{
"epoch": 0.7433465891710003,
"grad_norm": 7.616309137785203,
"learning_rate": 8.151418508769693e-07,
"loss": 0.4745,
"step": 14580
},
{
"epoch": 0.7438564290812685,
"grad_norm": 3.155014298375468,
"learning_rate": 8.120942734444595e-07,
"loss": 0.5218,
"step": 14590
},
{
"epoch": 0.7443662689915367,
"grad_norm": 5.6846824528358075,
"learning_rate": 8.09051298561814e-07,
"loss": 0.5922,
"step": 14600
},
{
"epoch": 0.7448761089018048,
"grad_norm": 3.3883550853534206,
"learning_rate": 8.060129345265605e-07,
"loss": 0.5469,
"step": 14610
},
{
"epoch": 0.745385948812073,
"grad_norm": 9.248881588874386,
"learning_rate": 8.029791896236533e-07,
"loss": 0.5155,
"step": 14620
},
{
"epoch": 0.7458957887223412,
"grad_norm": 3.8012797877693187,
"learning_rate": 7.999500721254519e-07,
"loss": 0.5222,
"step": 14630
},
{
"epoch": 0.7464056286326094,
"grad_norm": 2.701782409751808,
"learning_rate": 7.96925590291697e-07,
"loss": 0.507,
"step": 14640
},
{
"epoch": 0.7469154685428775,
"grad_norm": 9.491913707808619,
"learning_rate": 7.939057523694896e-07,
"loss": 0.5454,
"step": 14650
},
{
"epoch": 0.7474253084531457,
"grad_norm": 7.107255599519812,
"learning_rate": 7.908905665932671e-07,
"loss": 0.4842,
"step": 14660
},
{
"epoch": 0.7479351483634139,
"grad_norm": 9.728994427109939,
"learning_rate": 7.87880041184782e-07,
"loss": 0.564,
"step": 14670
},
{
"epoch": 0.7484449882736821,
"grad_norm": 6.956076542503038,
"learning_rate": 7.848741843530791e-07,
"loss": 0.5287,
"step": 14680
},
{
"epoch": 0.7489548281839502,
"grad_norm": 2.6395577512159725,
"learning_rate": 7.818730042944723e-07,
"loss": 0.5707,
"step": 14690
},
{
"epoch": 0.7494646680942184,
"grad_norm": 9.747969117043482,
"learning_rate": 7.788765091925246e-07,
"loss": 0.5039,
"step": 14700
},
{
"epoch": 0.7499745080044866,
"grad_norm": 5.484830220813169,
"learning_rate": 7.758847072180203e-07,
"loss": 0.5538,
"step": 14710
},
{
"epoch": 0.7504843479147547,
"grad_norm": 15.07969923639329,
"learning_rate": 7.72897606528952e-07,
"loss": 0.5445,
"step": 14720
},
{
"epoch": 0.7509941878250229,
"grad_norm": 3.6621041989126857,
"learning_rate": 7.699152152704898e-07,
"loss": 0.5321,
"step": 14730
},
{
"epoch": 0.7515040277352911,
"grad_norm": 4.632508039691828,
"learning_rate": 7.669375415749603e-07,
"loss": 0.4959,
"step": 14740
},
{
"epoch": 0.7520138676455593,
"grad_norm": 7.217687618025717,
"learning_rate": 7.63964593561832e-07,
"loss": 0.5396,
"step": 14750
},
{
"epoch": 0.7525237075558274,
"grad_norm": 7.382874561141493,
"learning_rate": 7.609963793376815e-07,
"loss": 0.5244,
"step": 14760
},
{
"epoch": 0.7530335474660956,
"grad_norm": 6.3650612267419415,
"learning_rate": 7.580329069961809e-07,
"loss": 0.5529,
"step": 14770
},
{
"epoch": 0.7535433873763638,
"grad_norm": 4.766970477281975,
"learning_rate": 7.550741846180712e-07,
"loss": 0.5108,
"step": 14780
},
{
"epoch": 0.754053227286632,
"grad_norm": 2.930138193299308,
"learning_rate": 7.521202202711414e-07,
"loss": 0.6012,
"step": 14790
},
{
"epoch": 0.7545630671969001,
"grad_norm": 5.8796338014810985,
"learning_rate": 7.491710220102066e-07,
"loss": 0.5399,
"step": 14800
},
{
"epoch": 0.7550729071071683,
"grad_norm": 5.320155271550149,
"learning_rate": 7.462265978770858e-07,
"loss": 0.5043,
"step": 14810
},
{
"epoch": 0.7555827470174366,
"grad_norm": 5.053829893937168,
"learning_rate": 7.432869559005792e-07,
"loss": 0.5423,
"step": 14820
},
{
"epoch": 0.7560925869277046,
"grad_norm": 3.289593905278923,
"learning_rate": 7.403521040964484e-07,
"loss": 0.5576,
"step": 14830
},
{
"epoch": 0.7566024268379729,
"grad_norm": 8.24442927994742,
"learning_rate": 7.374220504673923e-07,
"loss": 0.5819,
"step": 14840
},
{
"epoch": 0.7571122667482411,
"grad_norm": 5.041764055896299,
"learning_rate": 7.344968030030264e-07,
"loss": 0.5658,
"step": 14850
},
{
"epoch": 0.7576221066585093,
"grad_norm": 4.260901484163499,
"learning_rate": 7.315763696798616e-07,
"loss": 0.5519,
"step": 14860
},
{
"epoch": 0.7581319465687774,
"grad_norm": 4.558924939736004,
"learning_rate": 7.286607584612793e-07,
"loss": 0.4863,
"step": 14870
},
{
"epoch": 0.7586417864790456,
"grad_norm": 9.764656402188647,
"learning_rate": 7.257499772975163e-07,
"loss": 0.5138,
"step": 14880
},
{
"epoch": 0.7591516263893138,
"grad_norm": 3.094465911077131,
"learning_rate": 7.228440341256346e-07,
"loss": 0.5065,
"step": 14890
},
{
"epoch": 0.759661466299582,
"grad_norm": 7.400158231997503,
"learning_rate": 7.199429368695051e-07,
"loss": 0.5073,
"step": 14900
},
{
"epoch": 0.7601713062098501,
"grad_norm": 4.365522790457924,
"learning_rate": 7.170466934397891e-07,
"loss": 0.4945,
"step": 14910
},
{
"epoch": 0.7606811461201183,
"grad_norm": 5.247066226195045,
"learning_rate": 7.14155311733906e-07,
"loss": 0.5191,
"step": 14920
},
{
"epoch": 0.7611909860303865,
"grad_norm": 4.160452666994774,
"learning_rate": 7.112687996360224e-07,
"loss": 0.5657,
"step": 14930
},
{
"epoch": 0.7617008259406546,
"grad_norm": 11.931233780230366,
"learning_rate": 7.08387165017026e-07,
"loss": 0.5901,
"step": 14940
},
{
"epoch": 0.7622106658509228,
"grad_norm": 4.554328966808751,
"learning_rate": 7.055104157345041e-07,
"loss": 0.5187,
"step": 14950
},
{
"epoch": 0.762720505761191,
"grad_norm": 5.395007320989106,
"learning_rate": 7.026385596327232e-07,
"loss": 0.5444,
"step": 14960
},
{
"epoch": 0.7632303456714592,
"grad_norm": 8.916613524388753,
"learning_rate": 6.99771604542607e-07,
"loss": 0.5292,
"step": 14970
},
{
"epoch": 0.7637401855817273,
"grad_norm": 6.870209571443328,
"learning_rate": 6.969095582817148e-07,
"loss": 0.559,
"step": 14980
},
{
"epoch": 0.7642500254919955,
"grad_norm": 6.062564072607088,
"learning_rate": 6.940524286542213e-07,
"loss": 0.5129,
"step": 14990
},
{
"epoch": 0.7647598654022637,
"grad_norm": 5.933962740007179,
"learning_rate": 6.912002234508947e-07,
"loss": 0.541,
"step": 15000
},
{
"epoch": 0.7652697053125319,
"grad_norm": 8.33148869061257,
"learning_rate": 6.88352950449074e-07,
"loss": 0.6014,
"step": 15010
},
{
"epoch": 0.7657795452228,
"grad_norm": 5.404610542025662,
"learning_rate": 6.855106174126516e-07,
"loss": 0.4735,
"step": 15020
},
{
"epoch": 0.7662893851330682,
"grad_norm": 3.0650833159751367,
"learning_rate": 6.826732320920456e-07,
"loss": 0.5844,
"step": 15030
},
{
"epoch": 0.7667992250433364,
"grad_norm": 3.958418343261916,
"learning_rate": 6.79840802224189e-07,
"loss": 0.5469,
"step": 15040
},
{
"epoch": 0.7673090649536045,
"grad_norm": 10.223439475038505,
"learning_rate": 6.770133355324957e-07,
"loss": 0.6197,
"step": 15050
},
{
"epoch": 0.7678189048638727,
"grad_norm": 4.389918821277152,
"learning_rate": 6.741908397268496e-07,
"loss": 0.5135,
"step": 15060
},
{
"epoch": 0.7683287447741409,
"grad_norm": 6.503301224974861,
"learning_rate": 6.713733225035818e-07,
"loss": 0.5351,
"step": 15070
},
{
"epoch": 0.7688385846844091,
"grad_norm": 5.21955397202132,
"learning_rate": 6.685607915454437e-07,
"loss": 0.4678,
"step": 15080
},
{
"epoch": 0.7693484245946772,
"grad_norm": 3.1999458165792323,
"learning_rate": 6.657532545215928e-07,
"loss": 0.4628,
"step": 15090
},
{
"epoch": 0.7698582645049454,
"grad_norm": 12.706069444402658,
"learning_rate": 6.629507190875686e-07,
"loss": 0.5201,
"step": 15100
},
{
"epoch": 0.7703681044152136,
"grad_norm": 14.230718310388408,
"learning_rate": 6.601531928852728e-07,
"loss": 0.5617,
"step": 15110
},
{
"epoch": 0.7708779443254818,
"grad_norm": 28.891417769100123,
"learning_rate": 6.573606835429472e-07,
"loss": 0.537,
"step": 15120
},
{
"epoch": 0.7713877842357499,
"grad_norm": 8.740323329961853,
"learning_rate": 6.545731986751546e-07,
"loss": 0.5627,
"step": 15130
},
{
"epoch": 0.7718976241460181,
"grad_norm": 8.631963455615224,
"learning_rate": 6.517907458827568e-07,
"loss": 0.5337,
"step": 15140
},
{
"epoch": 0.7724074640562864,
"grad_norm": 5.85027033587785,
"learning_rate": 6.490133327528942e-07,
"loss": 0.5373,
"step": 15150
},
{
"epoch": 0.7729173039665544,
"grad_norm": 3.871926078528546,
"learning_rate": 6.46240966858965e-07,
"loss": 0.563,
"step": 15160
},
{
"epoch": 0.7734271438768227,
"grad_norm": 3.9283942132818823,
"learning_rate": 6.434736557606047e-07,
"loss": 0.5512,
"step": 15170
},
{
"epoch": 0.7739369837870909,
"grad_norm": 4.4761739895502215,
"learning_rate": 6.407114070036665e-07,
"loss": 0.5255,
"step": 15180
},
{
"epoch": 0.7744468236973591,
"grad_norm": 4.409233102247651,
"learning_rate": 6.379542281201967e-07,
"loss": 0.4909,
"step": 15190
},
{
"epoch": 0.7749566636076272,
"grad_norm": 4.090706265843955,
"learning_rate": 6.35202126628422e-07,
"loss": 0.5158,
"step": 15200
},
{
"epoch": 0.7754665035178954,
"grad_norm": 5.188126774120149,
"learning_rate": 6.324551100327195e-07,
"loss": 0.5194,
"step": 15210
},
{
"epoch": 0.7759763434281636,
"grad_norm": 6.597898676471625,
"learning_rate": 6.297131858236025e-07,
"loss": 0.577,
"step": 15220
},
{
"epoch": 0.7764861833384318,
"grad_norm": 4.713145880980132,
"learning_rate": 6.269763614777011e-07,
"loss": 0.5226,
"step": 15230
},
{
"epoch": 0.7769960232486999,
"grad_norm": 5.773255440619456,
"learning_rate": 6.24244644457735e-07,
"loss": 0.5218,
"step": 15240
},
{
"epoch": 0.7775058631589681,
"grad_norm": 13.235778740405175,
"learning_rate": 6.215180422124997e-07,
"loss": 0.5361,
"step": 15250
},
{
"epoch": 0.7780157030692363,
"grad_norm": 15.720743466927107,
"learning_rate": 6.187965621768436e-07,
"loss": 0.5696,
"step": 15260
},
{
"epoch": 0.7785255429795044,
"grad_norm": 4.732857606537919,
"learning_rate": 6.160802117716471e-07,
"loss": 0.5383,
"step": 15270
},
{
"epoch": 0.7790353828897726,
"grad_norm": 5.176324768044832,
"learning_rate": 6.133689984038047e-07,
"loss": 0.5001,
"step": 15280
},
{
"epoch": 0.7795452228000408,
"grad_norm": 8.993520814341444,
"learning_rate": 6.106629294662025e-07,
"loss": 0.5806,
"step": 15290
},
{
"epoch": 0.780055062710309,
"grad_norm": 10.628295405434407,
"learning_rate": 6.079620123376972e-07,
"loss": 0.5014,
"step": 15300
},
{
"epoch": 0.7805649026205771,
"grad_norm": 6.325191155492919,
"learning_rate": 6.052662543831012e-07,
"loss": 0.4634,
"step": 15310
},
{
"epoch": 0.7810747425308453,
"grad_norm": 4.813892470382942,
"learning_rate": 6.02575662953156e-07,
"loss": 0.4811,
"step": 15320
},
{
"epoch": 0.7815845824411135,
"grad_norm": 5.029153575317388,
"learning_rate": 5.998902453845165e-07,
"loss": 0.5822,
"step": 15330
},
{
"epoch": 0.7820944223513817,
"grad_norm": 6.335856899692538,
"learning_rate": 5.972100089997299e-07,
"loss": 0.5127,
"step": 15340
},
{
"epoch": 0.7826042622616498,
"grad_norm": 4.196124291218907,
"learning_rate": 5.945349611072126e-07,
"loss": 0.5053,
"step": 15350
},
{
"epoch": 0.783114102171918,
"grad_norm": 5.7765556982052475,
"learning_rate": 5.918651090012384e-07,
"loss": 0.5246,
"step": 15360
},
{
"epoch": 0.7836239420821862,
"grad_norm": 9.099399618655017,
"learning_rate": 5.892004599619077e-07,
"loss": 0.5451,
"step": 15370
},
{
"epoch": 0.7841337819924543,
"grad_norm": 3.056540668163783,
"learning_rate": 5.865410212551361e-07,
"loss": 0.4797,
"step": 15380
},
{
"epoch": 0.7846436219027225,
"grad_norm": 5.1252232091206436,
"learning_rate": 5.838868001326336e-07,
"loss": 0.497,
"step": 15390
},
{
"epoch": 0.7851534618129907,
"grad_norm": 6.841451451521136,
"learning_rate": 5.812378038318788e-07,
"loss": 0.5197,
"step": 15400
},
{
"epoch": 0.7856633017232589,
"grad_norm": 7.531317670006524,
"learning_rate": 5.785940395761061e-07,
"loss": 0.5756,
"step": 15410
},
{
"epoch": 0.786173141633527,
"grad_norm": 5.370658816046862,
"learning_rate": 5.759555145742824e-07,
"loss": 0.545,
"step": 15420
},
{
"epoch": 0.7866829815437952,
"grad_norm": 3.7757242323468674,
"learning_rate": 5.733222360210885e-07,
"loss": 0.5811,
"step": 15430
},
{
"epoch": 0.7871928214540634,
"grad_norm": 10.023306932078548,
"learning_rate": 5.706942110968994e-07,
"loss": 0.5363,
"step": 15440
},
{
"epoch": 0.7877026613643316,
"grad_norm": 5.933544014596687,
"learning_rate": 5.680714469677651e-07,
"loss": 0.6137,
"step": 15450
},
{
"epoch": 0.7882125012745997,
"grad_norm": 8.465674808714084,
"learning_rate": 5.654539507853879e-07,
"loss": 0.5788,
"step": 15460
},
{
"epoch": 0.788722341184868,
"grad_norm": 4.58886139470769,
"learning_rate": 5.628417296871097e-07,
"loss": 0.5571,
"step": 15470
},
{
"epoch": 0.7892321810951362,
"grad_norm": 4.326657679689051,
"learning_rate": 5.602347907958855e-07,
"loss": 0.5479,
"step": 15480
},
{
"epoch": 0.7897420210054042,
"grad_norm": 4.677559047322052,
"learning_rate": 5.576331412202676e-07,
"loss": 0.6593,
"step": 15490
},
{
"epoch": 0.7902518609156725,
"grad_norm": 6.077257747033794,
"learning_rate": 5.550367880543866e-07,
"loss": 0.499,
"step": 15500
},
{
"epoch": 0.7907617008259407,
"grad_norm": 10.484898441781258,
"learning_rate": 5.524457383779271e-07,
"loss": 0.5202,
"step": 15510
},
{
"epoch": 0.7912715407362089,
"grad_norm": 3.742747302001426,
"learning_rate": 5.498599992561188e-07,
"loss": 0.5461,
"step": 15520
},
{
"epoch": 0.791781380646477,
"grad_norm": 4.519035549453744,
"learning_rate": 5.472795777397041e-07,
"loss": 0.5412,
"step": 15530
},
{
"epoch": 0.7922912205567452,
"grad_norm": 4.845913103524951,
"learning_rate": 5.447044808649285e-07,
"loss": 0.5359,
"step": 15540
},
{
"epoch": 0.7928010604670134,
"grad_norm": 4.093748348260354,
"learning_rate": 5.421347156535203e-07,
"loss": 0.4973,
"step": 15550
},
{
"epoch": 0.7933109003772816,
"grad_norm": 9.055252341832157,
"learning_rate": 5.39570289112665e-07,
"loss": 0.5477,
"step": 15560
},
{
"epoch": 0.7938207402875497,
"grad_norm": 5.226423238317914,
"learning_rate": 5.370112082349943e-07,
"loss": 0.5318,
"step": 15570
},
{
"epoch": 0.7943305801978179,
"grad_norm": 4.125234301364343,
"learning_rate": 5.344574799985619e-07,
"loss": 0.4249,
"step": 15580
},
{
"epoch": 0.7948404201080861,
"grad_norm": 3.593852692871862,
"learning_rate": 5.319091113668262e-07,
"loss": 0.5162,
"step": 15590
},
{
"epoch": 0.7953502600183542,
"grad_norm": 6.072628638810297,
"learning_rate": 5.293661092886315e-07,
"loss": 0.6117,
"step": 15600
},
{
"epoch": 0.7958600999286224,
"grad_norm": 6.386766033860978,
"learning_rate": 5.268284806981891e-07,
"loss": 0.5775,
"step": 15610
},
{
"epoch": 0.7963699398388906,
"grad_norm": 8.435530592292467,
"learning_rate": 5.242962325150552e-07,
"loss": 0.5503,
"step": 15620
},
{
"epoch": 0.7968797797491588,
"grad_norm": 5.414258568730564,
"learning_rate": 5.217693716441191e-07,
"loss": 0.6187,
"step": 15630
},
{
"epoch": 0.7973896196594269,
"grad_norm": 2.8227447529462806,
"learning_rate": 5.192479049755778e-07,
"loss": 0.4826,
"step": 15640
},
{
"epoch": 0.7978994595696951,
"grad_norm": 23.70181001058814,
"learning_rate": 5.167318393849178e-07,
"loss": 0.5054,
"step": 15650
},
{
"epoch": 0.7984092994799633,
"grad_norm": 15.271685415282231,
"learning_rate": 5.142211817329021e-07,
"loss": 0.6935,
"step": 15660
},
{
"epoch": 0.7989191393902315,
"grad_norm": 5.512943854490426,
"learning_rate": 5.117159388655426e-07,
"loss": 0.5309,
"step": 15670
},
{
"epoch": 0.7994289793004996,
"grad_norm": 4.434586867424822,
"learning_rate": 5.09216117614092e-07,
"loss": 0.5076,
"step": 15680
},
{
"epoch": 0.7999388192107678,
"grad_norm": 4.752780785342232,
"learning_rate": 5.067217247950138e-07,
"loss": 0.4835,
"step": 15690
},
{
"epoch": 0.800448659121036,
"grad_norm": 8.105618928522265,
"learning_rate": 5.042327672099725e-07,
"loss": 0.4811,
"step": 15700
},
{
"epoch": 0.8009584990313041,
"grad_norm": 5.46194334511076,
"learning_rate": 5.017492516458116e-07,
"loss": 0.4926,
"step": 15710
},
{
"epoch": 0.8014683389415723,
"grad_norm": 5.066361499693194,
"learning_rate": 4.992711848745349e-07,
"loss": 0.4697,
"step": 15720
},
{
"epoch": 0.8019781788518405,
"grad_norm": 5.944837272509392,
"learning_rate": 4.967985736532882e-07,
"loss": 0.5184,
"step": 15730
},
{
"epoch": 0.8024880187621087,
"grad_norm": 4.696244034550994,
"learning_rate": 4.943314247243425e-07,
"loss": 0.5405,
"step": 15740
},
{
"epoch": 0.8029978586723768,
"grad_norm": 10.887633687345506,
"learning_rate": 4.918697448150727e-07,
"loss": 0.5767,
"step": 15750
},
{
"epoch": 0.803507698582645,
"grad_norm": 3.4663557433127057,
"learning_rate": 4.894135406379421e-07,
"loss": 0.5356,
"step": 15760
},
{
"epoch": 0.8040175384929132,
"grad_norm": 7.259699123006181,
"learning_rate": 4.869628188904832e-07,
"loss": 0.5481,
"step": 15770
},
{
"epoch": 0.8045273784031814,
"grad_norm": 7.921449405349131,
"learning_rate": 4.845175862552759e-07,
"loss": 0.5833,
"step": 15780
},
{
"epoch": 0.8050372183134495,
"grad_norm": 14.699382495241307,
"learning_rate": 4.820778493999375e-07,
"loss": 0.5896,
"step": 15790
},
{
"epoch": 0.8055470582237177,
"grad_norm": 6.022198769713844,
"learning_rate": 4.796436149770969e-07,
"loss": 0.545,
"step": 15800
},
{
"epoch": 0.806056898133986,
"grad_norm": 5.577713449884468,
"learning_rate": 4.77214889624377e-07,
"loss": 0.5459,
"step": 15810
},
{
"epoch": 0.806566738044254,
"grad_norm": 3.2210760946186405,
"learning_rate": 4.7479167996438315e-07,
"loss": 0.5459,
"step": 15820
},
{
"epoch": 0.8070765779545223,
"grad_norm": 4.97876124934091,
"learning_rate": 4.723739926046761e-07,
"loss": 0.4913,
"step": 15830
},
{
"epoch": 0.8075864178647905,
"grad_norm": 4.510696187069355,
"learning_rate": 4.699618341377632e-07,
"loss": 0.4973,
"step": 15840
},
{
"epoch": 0.8080962577750587,
"grad_norm": 3.9365502019662246,
"learning_rate": 4.675552111410711e-07,
"loss": 0.5016,
"step": 15850
},
{
"epoch": 0.8086060976853268,
"grad_norm": 5.0078715399382405,
"learning_rate": 4.6515413017693524e-07,
"loss": 0.5389,
"step": 15860
},
{
"epoch": 0.809115937595595,
"grad_norm": 8.239920743289655,
"learning_rate": 4.627585977925783e-07,
"loss": 0.5725,
"step": 15870
},
{
"epoch": 0.8096257775058632,
"grad_norm": 9.066999058957496,
"learning_rate": 4.603686205200936e-07,
"loss": 0.5648,
"step": 15880
},
{
"epoch": 0.8101356174161314,
"grad_norm": 4.986664430186731,
"learning_rate": 4.579842048764263e-07,
"loss": 0.5684,
"step": 15890
},
{
"epoch": 0.8106454573263995,
"grad_norm": 4.112469874492605,
"learning_rate": 4.5560535736335673e-07,
"loss": 0.5262,
"step": 15900
},
{
"epoch": 0.8111552972366677,
"grad_norm": 8.757247383724744,
"learning_rate": 4.5323208446748175e-07,
"loss": 0.4913,
"step": 15910
},
{
"epoch": 0.8116651371469359,
"grad_norm": 7.467282043021592,
"learning_rate": 4.5086439266019797e-07,
"loss": 0.5646,
"step": 15920
},
{
"epoch": 0.812174977057204,
"grad_norm": 5.5454779199251885,
"learning_rate": 4.485022883976836e-07,
"loss": 0.5602,
"step": 15930
},
{
"epoch": 0.8126848169674722,
"grad_norm": 7.27480785400461,
"learning_rate": 4.4614577812087863e-07,
"loss": 0.5046,
"step": 15940
},
{
"epoch": 0.8131946568777404,
"grad_norm": 9.32556780978424,
"learning_rate": 4.4379486825547325e-07,
"loss": 0.5495,
"step": 15950
},
{
"epoch": 0.8137044967880086,
"grad_norm": 5.382248420000106,
"learning_rate": 4.4144956521188496e-07,
"loss": 0.5218,
"step": 15960
},
{
"epoch": 0.8142143366982767,
"grad_norm": 6.454028203276016,
"learning_rate": 4.391098753852399e-07,
"loss": 0.5273,
"step": 15970
},
{
"epoch": 0.8147241766085449,
"grad_norm": 4.063079790730802,
"learning_rate": 4.3677580515536363e-07,
"loss": 0.5034,
"step": 15980
},
{
"epoch": 0.8152340165188131,
"grad_norm": 5.187670333044299,
"learning_rate": 4.344473608867528e-07,
"loss": 0.4923,
"step": 15990
},
{
"epoch": 0.8157438564290813,
"grad_norm": 14.989153395386417,
"learning_rate": 4.321245489285683e-07,
"loss": 0.5377,
"step": 16000
},
{
"epoch": 0.8162536963393494,
"grad_norm": 5.987137910554856,
"learning_rate": 4.2980737561460845e-07,
"loss": 0.4938,
"step": 16010
},
{
"epoch": 0.8167635362496176,
"grad_norm": 14.405575944838505,
"learning_rate": 4.2749584726329866e-07,
"loss": 0.5596,
"step": 16020
},
{
"epoch": 0.8172733761598858,
"grad_norm": 4.051551711111712,
"learning_rate": 4.251899701776721e-07,
"loss": 0.5379,
"step": 16030
},
{
"epoch": 0.8177832160701539,
"grad_norm": 4.9871145490802125,
"learning_rate": 4.2288975064535053e-07,
"loss": 0.5672,
"step": 16040
},
{
"epoch": 0.8182930559804221,
"grad_norm": 4.879306760664567,
"learning_rate": 4.205951949385303e-07,
"loss": 0.5156,
"step": 16050
},
{
"epoch": 0.8188028958906903,
"grad_norm": 6.3657642605460625,
"learning_rate": 4.1830630931396303e-07,
"loss": 0.5306,
"step": 16060
},
{
"epoch": 0.8193127358009585,
"grad_norm": 3.863545196627187,
"learning_rate": 4.160231000129392e-07,
"loss": 0.539,
"step": 16070
},
{
"epoch": 0.8198225757112266,
"grad_norm": 6.145189297892257,
"learning_rate": 4.1374557326127133e-07,
"loss": 0.6047,
"step": 16080
},
{
"epoch": 0.8203324156214948,
"grad_norm": 8.554033895830825,
"learning_rate": 4.114737352692774e-07,
"loss": 0.4907,
"step": 16090
},
{
"epoch": 0.820842255531763,
"grad_norm": 12.936995455861432,
"learning_rate": 4.092075922317615e-07,
"loss": 0.5162,
"step": 16100
},
{
"epoch": 0.8213520954420312,
"grad_norm": 4.73513719170877,
"learning_rate": 4.0694715032800256e-07,
"loss": 0.5229,
"step": 16110
},
{
"epoch": 0.8218619353522993,
"grad_norm": 4.045630560351024,
"learning_rate": 4.046924157217294e-07,
"loss": 0.5055,
"step": 16120
},
{
"epoch": 0.8223717752625676,
"grad_norm": 10.623104316424055,
"learning_rate": 4.0244339456111e-07,
"loss": 0.4885,
"step": 16130
},
{
"epoch": 0.8228816151728358,
"grad_norm": 6.014456587568287,
"learning_rate": 4.0020009297873584e-07,
"loss": 0.5823,
"step": 16140
},
{
"epoch": 0.8233914550831039,
"grad_norm": 7.5181137437104395,
"learning_rate": 3.979625170915971e-07,
"loss": 0.4618,
"step": 16150
},
{
"epoch": 0.8239012949933721,
"grad_norm": 5.106004631421925,
"learning_rate": 3.957306730010763e-07,
"loss": 0.5141,
"step": 16160
},
{
"epoch": 0.8244111349036403,
"grad_norm": 2.900163368538095,
"learning_rate": 3.935045667929227e-07,
"loss": 0.5507,
"step": 16170
},
{
"epoch": 0.8249209748139085,
"grad_norm": 5.751498206078458,
"learning_rate": 3.9128420453724144e-07,
"loss": 0.527,
"step": 16180
},
{
"epoch": 0.8254308147241766,
"grad_norm": 3.3746818544786654,
"learning_rate": 3.8906959228847547e-07,
"loss": 0.5234,
"step": 16190
},
{
"epoch": 0.8259406546344448,
"grad_norm": 6.900117163253321,
"learning_rate": 3.868607360853877e-07,
"loss": 0.5441,
"step": 16200
},
{
"epoch": 0.826450494544713,
"grad_norm": 5.80204355915017,
"learning_rate": 3.846576419510462e-07,
"loss": 0.5288,
"step": 16210
},
{
"epoch": 0.8269603344549812,
"grad_norm": 4.7810298840125345,
"learning_rate": 3.8246031589280695e-07,
"loss": 0.5027,
"step": 16220
},
{
"epoch": 0.8274701743652493,
"grad_norm": 5.258018591863968,
"learning_rate": 3.802687639022981e-07,
"loss": 0.5793,
"step": 16230
},
{
"epoch": 0.8279800142755175,
"grad_norm": 4.449355989948682,
"learning_rate": 3.7808299195540214e-07,
"loss": 0.4824,
"step": 16240
},
{
"epoch": 0.8284898541857857,
"grad_norm": 7.034355785850802,
"learning_rate": 3.7590300601224203e-07,
"loss": 0.5506,
"step": 16250
},
{
"epoch": 0.8289996940960538,
"grad_norm": 5.875077627792097,
"learning_rate": 3.737288120171612e-07,
"loss": 0.5326,
"step": 16260
},
{
"epoch": 0.829509534006322,
"grad_norm": 4.506545413449493,
"learning_rate": 3.715604158987135e-07,
"loss": 0.5059,
"step": 16270
},
{
"epoch": 0.8300193739165902,
"grad_norm": 5.883332166476767,
"learning_rate": 3.693978235696391e-07,
"loss": 0.5652,
"step": 16280
},
{
"epoch": 0.8305292138268584,
"grad_norm": 4.370669756889069,
"learning_rate": 3.6724104092685507e-07,
"loss": 0.534,
"step": 16290
},
{
"epoch": 0.8310390537371265,
"grad_norm": 9.033741774140763,
"learning_rate": 3.650900738514371e-07,
"loss": 0.5316,
"step": 16300
},
{
"epoch": 0.8315488936473947,
"grad_norm": 6.4939863578699075,
"learning_rate": 3.629449282086003e-07,
"loss": 0.5008,
"step": 16310
},
{
"epoch": 0.8320587335576629,
"grad_norm": 4.516805585409594,
"learning_rate": 3.6080560984769005e-07,
"loss": 0.511,
"step": 16320
},
{
"epoch": 0.8325685734679311,
"grad_norm": 5.753588314258997,
"learning_rate": 3.5867212460215794e-07,
"loss": 0.5563,
"step": 16330
},
{
"epoch": 0.8330784133781992,
"grad_norm": 6.705773362344025,
"learning_rate": 3.565444782895522e-07,
"loss": 0.4542,
"step": 16340
},
{
"epoch": 0.8335882532884674,
"grad_norm": 5.268893546565757,
"learning_rate": 3.5442267671149946e-07,
"loss": 0.4817,
"step": 16350
},
{
"epoch": 0.8340980931987356,
"grad_norm": 6.178390458965791,
"learning_rate": 3.523067256536883e-07,
"loss": 0.5895,
"step": 16360
},
{
"epoch": 0.8346079331090037,
"grad_norm": 3.5081842493945175,
"learning_rate": 3.501966308858551e-07,
"loss": 0.4927,
"step": 16370
},
{
"epoch": 0.8351177730192719,
"grad_norm": 5.7331348035161485,
"learning_rate": 3.4809239816176646e-07,
"loss": 0.4501,
"step": 16380
},
{
"epoch": 0.8356276129295401,
"grad_norm": 37.09804608028036,
"learning_rate": 3.459940332192052e-07,
"loss": 0.579,
"step": 16390
},
{
"epoch": 0.8361374528398083,
"grad_norm": 6.024637461058086,
"learning_rate": 3.439015417799538e-07,
"loss": 0.5211,
"step": 16400
},
{
"epoch": 0.8366472927500764,
"grad_norm": 7.131386131971093,
"learning_rate": 3.418149295497791e-07,
"loss": 0.4902,
"step": 16410
},
{
"epoch": 0.8371571326603446,
"grad_norm": 4.975390581904939,
"learning_rate": 3.3973420221841526e-07,
"loss": 0.5848,
"step": 16420
},
{
"epoch": 0.8376669725706128,
"grad_norm": 3.2168326404356193,
"learning_rate": 3.3765936545955253e-07,
"loss": 0.4928,
"step": 16430
},
{
"epoch": 0.838176812480881,
"grad_norm": 8.173840087490719,
"learning_rate": 3.3559042493081563e-07,
"loss": 0.5722,
"step": 16440
},
{
"epoch": 0.8386866523911491,
"grad_norm": 10.478297726213858,
"learning_rate": 3.335273862737529e-07,
"loss": 0.5577,
"step": 16450
},
{
"epoch": 0.8391964923014174,
"grad_norm": 4.182535638613699,
"learning_rate": 3.314702551138216e-07,
"loss": 0.5497,
"step": 16460
},
{
"epoch": 0.8397063322116856,
"grad_norm": 6.060638066365898,
"learning_rate": 3.2941903706036613e-07,
"loss": 0.5193,
"step": 16470
},
{
"epoch": 0.8402161721219537,
"grad_norm": 7.350914854500174,
"learning_rate": 3.273737377066122e-07,
"loss": 0.5696,
"step": 16480
},
{
"epoch": 0.8407260120322219,
"grad_norm": 4.815524558034425,
"learning_rate": 3.2533436262964206e-07,
"loss": 0.4722,
"step": 16490
},
{
"epoch": 0.8412358519424901,
"grad_norm": 11.349707013776193,
"learning_rate": 3.2330091739038614e-07,
"loss": 0.55,
"step": 16500
},
{
"epoch": 0.8417456918527583,
"grad_norm": 3.3495609606329158,
"learning_rate": 3.212734075336049e-07,
"loss": 0.4602,
"step": 16510
},
{
"epoch": 0.8422555317630264,
"grad_norm": 3.155634761619086,
"learning_rate": 3.1925183858787485e-07,
"loss": 0.4686,
"step": 16520
},
{
"epoch": 0.8427653716732946,
"grad_norm": 5.654370814489343,
"learning_rate": 3.172362160655723e-07,
"loss": 0.5654,
"step": 16530
},
{
"epoch": 0.8432752115835628,
"grad_norm": 7.2459688577760675,
"learning_rate": 3.152265454628589e-07,
"loss": 0.4983,
"step": 16540
},
{
"epoch": 0.843785051493831,
"grad_norm": 12.838026243388592,
"learning_rate": 3.1322283225966727e-07,
"loss": 0.5211,
"step": 16550
},
{
"epoch": 0.8442948914040991,
"grad_norm": 6.724264790058811,
"learning_rate": 3.112250819196852e-07,
"loss": 0.5112,
"step": 16560
},
{
"epoch": 0.8448047313143673,
"grad_norm": 6.156426497675465,
"learning_rate": 3.092332998903416e-07,
"loss": 0.6122,
"step": 16570
},
{
"epoch": 0.8453145712246355,
"grad_norm": 3.946549895253614,
"learning_rate": 3.072474916027887e-07,
"loss": 0.5416,
"step": 16580
},
{
"epoch": 0.8458244111349036,
"grad_norm": 4.932372360532843,
"learning_rate": 3.0526766247189394e-07,
"loss": 0.5095,
"step": 16590
},
{
"epoch": 0.8463342510451718,
"grad_norm": 8.468156384447775,
"learning_rate": 3.032938178962169e-07,
"loss": 0.5267,
"step": 16600
},
{
"epoch": 0.84684409095544,
"grad_norm": 5.636385976253131,
"learning_rate": 3.0132596325800013e-07,
"loss": 0.6106,
"step": 16610
},
{
"epoch": 0.8473539308657082,
"grad_norm": 4.445978162192108,
"learning_rate": 2.9936410392315427e-07,
"loss": 0.5669,
"step": 16620
},
{
"epoch": 0.8478637707759763,
"grad_norm": 3.6755291552420033,
"learning_rate": 2.974082452412394e-07,
"loss": 0.5297,
"step": 16630
},
{
"epoch": 0.8483736106862445,
"grad_norm": 14.392696428263024,
"learning_rate": 2.9545839254545514e-07,
"loss": 0.5325,
"step": 16640
},
{
"epoch": 0.8488834505965127,
"grad_norm": 12.282062098835242,
"learning_rate": 2.9351455115262356e-07,
"loss": 0.5086,
"step": 16650
},
{
"epoch": 0.8493932905067809,
"grad_norm": 4.4895478669348154,
"learning_rate": 2.915767263631747e-07,
"loss": 0.553,
"step": 16660
},
{
"epoch": 0.849903130417049,
"grad_norm": 14.685321876752772,
"learning_rate": 2.8964492346113343e-07,
"loss": 0.5659,
"step": 16670
},
{
"epoch": 0.8504129703273172,
"grad_norm": 4.2937451100068325,
"learning_rate": 2.877191477141039e-07,
"loss": 0.4832,
"step": 16680
},
{
"epoch": 0.8509228102375854,
"grad_norm": 6.059884328555942,
"learning_rate": 2.857994043732551e-07,
"loss": 0.5519,
"step": 16690
},
{
"epoch": 0.8514326501478535,
"grad_norm": 5.57087296263021,
"learning_rate": 2.8388569867330797e-07,
"loss": 0.5334,
"step": 16700
},
{
"epoch": 0.8519424900581217,
"grad_norm": 7.572939938965859,
"learning_rate": 2.819780358325189e-07,
"loss": 0.5571,
"step": 16710
},
{
"epoch": 0.8524523299683899,
"grad_norm": 7.3686333546596146,
"learning_rate": 2.8007642105266797e-07,
"loss": 0.4949,
"step": 16720
},
{
"epoch": 0.8529621698786581,
"grad_norm": 5.485068833070104,
"learning_rate": 2.78180859519043e-07,
"loss": 0.4964,
"step": 16730
},
{
"epoch": 0.8534720097889262,
"grad_norm": 6.146247832338137,
"learning_rate": 2.762913564004244e-07,
"loss": 0.5582,
"step": 16740
},
{
"epoch": 0.8539818496991944,
"grad_norm": 4.202837258365529,
"learning_rate": 2.7440791684907625e-07,
"loss": 0.5619,
"step": 16750
},
{
"epoch": 0.8544916896094626,
"grad_norm": 6.663318250972651,
"learning_rate": 2.7253054600072436e-07,
"loss": 0.522,
"step": 16760
},
{
"epoch": 0.8550015295197309,
"grad_norm": 3.523654035395865,
"learning_rate": 2.7065924897454993e-07,
"loss": 0.5118,
"step": 16770
},
{
"epoch": 0.855511369429999,
"grad_norm": 9.047514101897685,
"learning_rate": 2.6879403087317013e-07,
"loss": 0.5087,
"step": 16780
},
{
"epoch": 0.8560212093402672,
"grad_norm": 5.565027610301657,
"learning_rate": 2.6693489678262715e-07,
"loss": 0.5665,
"step": 16790
},
{
"epoch": 0.8565310492505354,
"grad_norm": 10.04970020606709,
"learning_rate": 2.65081851772373e-07,
"loss": 0.5852,
"step": 16800
},
{
"epoch": 0.8570408891608035,
"grad_norm": 5.631790723135971,
"learning_rate": 2.6323490089525643e-07,
"loss": 0.5738,
"step": 16810
},
{
"epoch": 0.8575507290710717,
"grad_norm": 5.180855418168242,
"learning_rate": 2.613940491875086e-07,
"loss": 0.4877,
"step": 16820
},
{
"epoch": 0.8580605689813399,
"grad_norm": 6.088906502363133,
"learning_rate": 2.595593016687295e-07,
"loss": 0.5443,
"step": 16830
},
{
"epoch": 0.8585704088916081,
"grad_norm": 4.107268653796043,
"learning_rate": 2.5773066334187466e-07,
"loss": 0.5163,
"step": 16840
},
{
"epoch": 0.8590802488018762,
"grad_norm": 3.9928473873773775,
"learning_rate": 2.559081391932411e-07,
"loss": 0.4553,
"step": 16850
},
{
"epoch": 0.8595900887121444,
"grad_norm": 10.42860184414175,
"learning_rate": 2.5409173419245315e-07,
"loss": 0.5117,
"step": 16860
},
{
"epoch": 0.8600999286224126,
"grad_norm": 6.250708882455792,
"learning_rate": 2.522814532924506e-07,
"loss": 0.4819,
"step": 16870
},
{
"epoch": 0.8606097685326808,
"grad_norm": 5.636688813902365,
"learning_rate": 2.504773014294734e-07,
"loss": 0.5047,
"step": 16880
},
{
"epoch": 0.8611196084429489,
"grad_norm": 6.26781373117407,
"learning_rate": 2.4867928352305006e-07,
"loss": 0.5618,
"step": 16890
},
{
"epoch": 0.8616294483532171,
"grad_norm": 5.166991212116818,
"learning_rate": 2.4688740447598033e-07,
"loss": 0.4388,
"step": 16900
},
{
"epoch": 0.8621392882634853,
"grad_norm": 5.317514632106231,
"learning_rate": 2.451016691743288e-07,
"loss": 0.6003,
"step": 16910
},
{
"epoch": 0.8626491281737534,
"grad_norm": 7.020604141487461,
"learning_rate": 2.433220824874036e-07,
"loss": 0.5452,
"step": 16920
},
{
"epoch": 0.8631589680840216,
"grad_norm": 7.227820968357044,
"learning_rate": 2.4154864926774935e-07,
"loss": 0.5176,
"step": 16930
},
{
"epoch": 0.8636688079942898,
"grad_norm": 6.852096725624449,
"learning_rate": 2.3978137435113015e-07,
"loss": 0.5811,
"step": 16940
},
{
"epoch": 0.864178647904558,
"grad_norm": 3.6983724687122104,
"learning_rate": 2.3802026255651868e-07,
"loss": 0.5203,
"step": 16950
},
{
"epoch": 0.8646884878148261,
"grad_norm": 9.399040457964405,
"learning_rate": 2.3626531868608165e-07,
"loss": 0.5561,
"step": 16960
},
{
"epoch": 0.8651983277250943,
"grad_norm": 3.2489497167538315,
"learning_rate": 2.345165475251676e-07,
"loss": 0.5627,
"step": 16970
},
{
"epoch": 0.8657081676353625,
"grad_norm": 6.056572547330579,
"learning_rate": 2.327739538422924e-07,
"loss": 0.5468,
"step": 16980
},
{
"epoch": 0.8662180075456307,
"grad_norm": 4.870229017115183,
"learning_rate": 2.3103754238912867e-07,
"loss": 0.4947,
"step": 16990
},
{
"epoch": 0.8667278474558988,
"grad_norm": 7.903019107389556,
"learning_rate": 2.2930731790049038e-07,
"loss": 0.5893,
"step": 17000
},
{
"epoch": 0.867237687366167,
"grad_norm": 5.300668621748874,
"learning_rate": 2.2758328509432187e-07,
"loss": 0.4533,
"step": 17010
},
{
"epoch": 0.8677475272764352,
"grad_norm": 5.469657679037128,
"learning_rate": 2.2586544867168297e-07,
"loss": 0.4888,
"step": 17020
},
{
"epoch": 0.8682573671867033,
"grad_norm": 7.051821198335733,
"learning_rate": 2.241538133167387e-07,
"loss": 0.5334,
"step": 17030
},
{
"epoch": 0.8687672070969715,
"grad_norm": 4.789754602599315,
"learning_rate": 2.2244838369674394e-07,
"loss": 0.5432,
"step": 17040
},
{
"epoch": 0.8692770470072397,
"grad_norm": 7.080841294094559,
"learning_rate": 2.2074916446203326e-07,
"loss": 0.5249,
"step": 17050
},
{
"epoch": 0.8697868869175079,
"grad_norm": 24.173764285939093,
"learning_rate": 2.1905616024600423e-07,
"loss": 0.5213,
"step": 17060
},
{
"epoch": 0.870296726827776,
"grad_norm": 4.894521920646139,
"learning_rate": 2.1736937566511103e-07,
"loss": 0.5126,
"step": 17070
},
{
"epoch": 0.8708065667380442,
"grad_norm": 4.90909346394224,
"learning_rate": 2.1568881531884523e-07,
"loss": 0.515,
"step": 17080
},
{
"epoch": 0.8713164066483124,
"grad_norm": 3.7209650934848195,
"learning_rate": 2.140144837897279e-07,
"loss": 0.5577,
"step": 17090
},
{
"epoch": 0.8718262465585807,
"grad_norm": 3.737982101575765,
"learning_rate": 2.12346385643295e-07,
"loss": 0.5269,
"step": 17100
},
{
"epoch": 0.8723360864688487,
"grad_norm": 14.080844208281462,
"learning_rate": 2.1068452542808564e-07,
"loss": 0.496,
"step": 17110
},
{
"epoch": 0.872845926379117,
"grad_norm": 4.956753332572378,
"learning_rate": 2.0902890767562883e-07,
"loss": 0.5491,
"step": 17120
},
{
"epoch": 0.8733557662893852,
"grad_norm": 4.591640625650321,
"learning_rate": 2.073795369004322e-07,
"loss": 0.6068,
"step": 17130
},
{
"epoch": 0.8738656061996533,
"grad_norm": 4.099384667329855,
"learning_rate": 2.0573641759996986e-07,
"loss": 0.5315,
"step": 17140
},
{
"epoch": 0.8743754461099215,
"grad_norm": 4.950824362700582,
"learning_rate": 2.0409955425466837e-07,
"loss": 0.5279,
"step": 17150
},
{
"epoch": 0.8748852860201897,
"grad_norm": 6.414979645813058,
"learning_rate": 2.024689513278963e-07,
"loss": 0.4776,
"step": 17160
},
{
"epoch": 0.8753951259304579,
"grad_norm": 5.394474787839066,
"learning_rate": 2.008446132659511e-07,
"loss": 0.5215,
"step": 17170
},
{
"epoch": 0.875904965840726,
"grad_norm": 4.5324708115394605,
"learning_rate": 1.9922654449804817e-07,
"loss": 0.4876,
"step": 17180
},
{
"epoch": 0.8764148057509942,
"grad_norm": 5.907527034390697,
"learning_rate": 1.976147494363062e-07,
"loss": 0.4698,
"step": 17190
},
{
"epoch": 0.8769246456612624,
"grad_norm": 8.922743293877302,
"learning_rate": 1.9600923247573871e-07,
"loss": 0.5154,
"step": 17200
},
{
"epoch": 0.8774344855715306,
"grad_norm": 4.998554702230558,
"learning_rate": 1.944099979942396e-07,
"loss": 0.5034,
"step": 17210
},
{
"epoch": 0.8779443254817987,
"grad_norm": 5.668728758328859,
"learning_rate": 1.9281705035257058e-07,
"loss": 0.5499,
"step": 17220
},
{
"epoch": 0.8784541653920669,
"grad_norm": 4.671811840815958,
"learning_rate": 1.91230393894353e-07,
"loss": 0.5687,
"step": 17230
},
{
"epoch": 0.8789640053023351,
"grad_norm": 5.520575226755685,
"learning_rate": 1.89650032946051e-07,
"loss": 0.4812,
"step": 17240
},
{
"epoch": 0.8794738452126032,
"grad_norm": 11.05687779884733,
"learning_rate": 1.8807597181696362e-07,
"loss": 0.7002,
"step": 17250
},
{
"epoch": 0.8799836851228714,
"grad_norm": 16.581275564566983,
"learning_rate": 1.8650821479921145e-07,
"loss": 0.5369,
"step": 17260
},
{
"epoch": 0.8804935250331396,
"grad_norm": 6.006427943479531,
"learning_rate": 1.8494676616772512e-07,
"loss": 0.605,
"step": 17270
},
{
"epoch": 0.8810033649434078,
"grad_norm": 6.045415253926665,
"learning_rate": 1.8339163018023383e-07,
"loss": 0.4756,
"step": 17280
},
{
"epoch": 0.8815132048536759,
"grad_norm": 5.206019981041104,
"learning_rate": 1.8184281107725327e-07,
"loss": 0.5694,
"step": 17290
},
{
"epoch": 0.8820230447639441,
"grad_norm": 6.466061518762644,
"learning_rate": 1.8030031308207407e-07,
"loss": 0.4846,
"step": 17300
},
{
"epoch": 0.8825328846742123,
"grad_norm": 5.06266467785271,
"learning_rate": 1.7876414040075175e-07,
"loss": 0.4877,
"step": 17310
},
{
"epoch": 0.8830427245844805,
"grad_norm": 3.8687971588466934,
"learning_rate": 1.7723429722209278e-07,
"loss": 0.4456,
"step": 17320
},
{
"epoch": 0.8835525644947486,
"grad_norm": 6.99193564633083,
"learning_rate": 1.7571078771764533e-07,
"loss": 0.5818,
"step": 17330
},
{
"epoch": 0.8840624044050168,
"grad_norm": 8.539752753092014,
"learning_rate": 1.7419361604168682e-07,
"loss": 0.5616,
"step": 17340
},
{
"epoch": 0.884572244315285,
"grad_norm": 8.203570899756697,
"learning_rate": 1.7268278633121167e-07,
"loss": 0.536,
"step": 17350
},
{
"epoch": 0.8850820842255531,
"grad_norm": 3.185969185168957,
"learning_rate": 1.7117830270592357e-07,
"loss": 0.5055,
"step": 17360
},
{
"epoch": 0.8855919241358213,
"grad_norm": 2.931715047088655,
"learning_rate": 1.6968016926822013e-07,
"loss": 0.5626,
"step": 17370
},
{
"epoch": 0.8861017640460895,
"grad_norm": 7.24661592844988,
"learning_rate": 1.6818839010318223e-07,
"loss": 0.5572,
"step": 17380
},
{
"epoch": 0.8866116039563577,
"grad_norm": 8.24951575739579,
"learning_rate": 1.6670296927856767e-07,
"loss": 0.4993,
"step": 17390
},
{
"epoch": 0.8871214438666258,
"grad_norm": 21.389911613660374,
"learning_rate": 1.6522391084479283e-07,
"loss": 0.5698,
"step": 17400
},
{
"epoch": 0.887631283776894,
"grad_norm": 16.404827647203533,
"learning_rate": 1.637512188349269e-07,
"loss": 0.5382,
"step": 17410
},
{
"epoch": 0.8881411236871622,
"grad_norm": 4.805873407925152,
"learning_rate": 1.62284897264679e-07,
"loss": 0.4783,
"step": 17420
},
{
"epoch": 0.8886509635974305,
"grad_norm": 3.3798702825749714,
"learning_rate": 1.6082495013238775e-07,
"loss": 0.4808,
"step": 17430
},
{
"epoch": 0.8891608035076985,
"grad_norm": 12.790949004053491,
"learning_rate": 1.5937138141900982e-07,
"loss": 0.5197,
"step": 17440
},
{
"epoch": 0.8896706434179668,
"grad_norm": 9.44744368083394,
"learning_rate": 1.5792419508810858e-07,
"loss": 0.5452,
"step": 17450
},
{
"epoch": 0.890180483328235,
"grad_norm": 7.129885436749561,
"learning_rate": 1.5648339508584548e-07,
"loss": 0.5228,
"step": 17460
},
{
"epoch": 0.8906903232385031,
"grad_norm": 5.135176674116688,
"learning_rate": 1.5504898534096673e-07,
"loss": 0.5183,
"step": 17470
},
{
"epoch": 0.8912001631487713,
"grad_norm": 5.1041591374414,
"learning_rate": 1.5362096976479475e-07,
"loss": 0.5185,
"step": 17480
},
{
"epoch": 0.8917100030590395,
"grad_norm": 5.694786690994035,
"learning_rate": 1.5219935225121412e-07,
"loss": 0.4908,
"step": 17490
},
{
"epoch": 0.8922198429693077,
"grad_norm": 3.9034922717406015,
"learning_rate": 1.5078413667666715e-07,
"loss": 0.5097,
"step": 17500
},
{
"epoch": 0.8927296828795758,
"grad_norm": 8.002868699227667,
"learning_rate": 1.4937532690013523e-07,
"loss": 0.4972,
"step": 17510
},
{
"epoch": 0.893239522789844,
"grad_norm": 10.291943080794496,
"learning_rate": 1.4797292676313607e-07,
"loss": 0.6284,
"step": 17520
},
{
"epoch": 0.8937493627001122,
"grad_norm": 9.650821841936288,
"learning_rate": 1.4657694008970796e-07,
"loss": 0.5288,
"step": 17530
},
{
"epoch": 0.8942592026103804,
"grad_norm": 3.9948393204016064,
"learning_rate": 1.4518737068640044e-07,
"loss": 0.5057,
"step": 17540
},
{
"epoch": 0.8947690425206485,
"grad_norm": 6.17897672680295,
"learning_rate": 1.438042223422667e-07,
"loss": 0.5056,
"step": 17550
},
{
"epoch": 0.8952788824309167,
"grad_norm": 9.235506895995227,
"learning_rate": 1.4242749882884875e-07,
"loss": 0.5267,
"step": 17560
},
{
"epoch": 0.8957887223411849,
"grad_norm": 6.187656386174637,
"learning_rate": 1.4105720390017147e-07,
"loss": 0.5462,
"step": 17570
},
{
"epoch": 0.896298562251453,
"grad_norm": 4.243447123841643,
"learning_rate": 1.39693341292729e-07,
"loss": 0.5367,
"step": 17580
},
{
"epoch": 0.8968084021617212,
"grad_norm": 4.041289794364369,
"learning_rate": 1.3833591472547647e-07,
"loss": 0.4994,
"step": 17590
},
{
"epoch": 0.8973182420719894,
"grad_norm": 6.229288293844123,
"learning_rate": 1.369849278998195e-07,
"loss": 0.5206,
"step": 17600
},
{
"epoch": 0.8978280819822576,
"grad_norm": 6.79920156828565,
"learning_rate": 1.3564038449960405e-07,
"loss": 0.533,
"step": 17610
},
{
"epoch": 0.8983379218925257,
"grad_norm": 3.2521858657367737,
"learning_rate": 1.3430228819110548e-07,
"loss": 0.4946,
"step": 17620
},
{
"epoch": 0.8988477618027939,
"grad_norm": 4.990742845655695,
"learning_rate": 1.329706426230201e-07,
"loss": 0.4791,
"step": 17630
},
{
"epoch": 0.8993576017130621,
"grad_norm": 7.025888602922682,
"learning_rate": 1.3164545142645452e-07,
"loss": 0.5821,
"step": 17640
},
{
"epoch": 0.8998674416233303,
"grad_norm": 10.93605938712498,
"learning_rate": 1.3032671821491426e-07,
"loss": 0.479,
"step": 17650
},
{
"epoch": 0.9003772815335984,
"grad_norm": 13.273121414912028,
"learning_rate": 1.2901444658429823e-07,
"loss": 0.5172,
"step": 17660
},
{
"epoch": 0.9008871214438666,
"grad_norm": 7.254958027501088,
"learning_rate": 1.2770864011288243e-07,
"loss": 0.4899,
"step": 17670
},
{
"epoch": 0.9013969613541348,
"grad_norm": 4.878424338077347,
"learning_rate": 1.2640930236131666e-07,
"loss": 0.5012,
"step": 17680
},
{
"epoch": 0.9019068012644029,
"grad_norm": 6.437844629509608,
"learning_rate": 1.2511643687261126e-07,
"loss": 0.5102,
"step": 17690
},
{
"epoch": 0.9024166411746711,
"grad_norm": 5.239404662880723,
"learning_rate": 1.2383004717212626e-07,
"loss": 0.5383,
"step": 17700
},
{
"epoch": 0.9029264810849393,
"grad_norm": 5.393620607223004,
"learning_rate": 1.225501367675666e-07,
"loss": 0.5649,
"step": 17710
},
{
"epoch": 0.9034363209952075,
"grad_norm": 5.273731975446447,
"learning_rate": 1.212767091489675e-07,
"loss": 0.5049,
"step": 17720
},
{
"epoch": 0.9039461609054756,
"grad_norm": 4.042327687721329,
"learning_rate": 1.2000976778868744e-07,
"loss": 0.5192,
"step": 17730
},
{
"epoch": 0.9044560008157438,
"grad_norm": 4.318986918711694,
"learning_rate": 1.1874931614139857e-07,
"loss": 0.5109,
"step": 17740
},
{
"epoch": 0.904965840726012,
"grad_norm": 4.720341432462499,
"learning_rate": 1.1749535764407737e-07,
"loss": 0.5631,
"step": 17750
},
{
"epoch": 0.9054756806362803,
"grad_norm": 5.672296381527825,
"learning_rate": 1.1624789571599404e-07,
"loss": 0.5489,
"step": 17760
},
{
"epoch": 0.9059855205465484,
"grad_norm": 6.938516984102913,
"learning_rate": 1.1500693375870454e-07,
"loss": 0.5594,
"step": 17770
},
{
"epoch": 0.9064953604568166,
"grad_norm": 4.3384297659200275,
"learning_rate": 1.1377247515604095e-07,
"loss": 0.5518,
"step": 17780
},
{
"epoch": 0.9070052003670848,
"grad_norm": 3.7070064736161403,
"learning_rate": 1.125445232741021e-07,
"loss": 0.4584,
"step": 17790
},
{
"epoch": 0.9075150402773529,
"grad_norm": 10.374380190425383,
"learning_rate": 1.1132308146124454e-07,
"loss": 0.4766,
"step": 17800
},
{
"epoch": 0.9080248801876211,
"grad_norm": 4.718312236764829,
"learning_rate": 1.1010815304807188e-07,
"loss": 0.5457,
"step": 17810
},
{
"epoch": 0.9085347200978893,
"grad_norm": 3.749039857676094,
"learning_rate": 1.0889974134742937e-07,
"loss": 0.5028,
"step": 17820
},
{
"epoch": 0.9090445600081575,
"grad_norm": 3.823156797221916,
"learning_rate": 1.0769784965439096e-07,
"loss": 0.5052,
"step": 17830
},
{
"epoch": 0.9095543999184256,
"grad_norm": 3.014825307279332,
"learning_rate": 1.0650248124625256e-07,
"loss": 0.5315,
"step": 17840
},
{
"epoch": 0.9100642398286938,
"grad_norm": 5.212599447844577,
"learning_rate": 1.0531363938252326e-07,
"loss": 0.5186,
"step": 17850
},
{
"epoch": 0.910574079738962,
"grad_norm": 34.368575662060316,
"learning_rate": 1.0413132730491355e-07,
"loss": 0.5162,
"step": 17860
},
{
"epoch": 0.9110839196492302,
"grad_norm": 3.597099037504823,
"learning_rate": 1.0295554823733122e-07,
"loss": 0.5855,
"step": 17870
},
{
"epoch": 0.9115937595594983,
"grad_norm": 4.727267398755642,
"learning_rate": 1.0178630538586753e-07,
"loss": 0.4831,
"step": 17880
},
{
"epoch": 0.9121035994697665,
"grad_norm": 5.911766216360492,
"learning_rate": 1.0062360193879295e-07,
"loss": 0.5668,
"step": 17890
},
{
"epoch": 0.9126134393800347,
"grad_norm": 4.0615112555300925,
"learning_rate": 9.946744106654526e-08,
"loss": 0.4179,
"step": 17900
},
{
"epoch": 0.9131232792903028,
"grad_norm": 4.749359954012263,
"learning_rate": 9.83178259217224e-08,
"loss": 0.5185,
"step": 17910
},
{
"epoch": 0.913633119200571,
"grad_norm": 6.3639629967995965,
"learning_rate": 9.717475963907346e-08,
"loss": 0.5243,
"step": 17920
},
{
"epoch": 0.9141429591108392,
"grad_norm": 5.920025477063065,
"learning_rate": 9.60382453354905e-08,
"loss": 0.4972,
"step": 17930
},
{
"epoch": 0.9146527990211074,
"grad_norm": 6.0515248027506905,
"learning_rate": 9.490828610999924e-08,
"loss": 0.5253,
"step": 17940
},
{
"epoch": 0.9151626389313755,
"grad_norm": 3.748270274591082,
"learning_rate": 9.378488504375144e-08,
"loss": 0.4371,
"step": 17950
},
{
"epoch": 0.9156724788416437,
"grad_norm": 4.968252816190375,
"learning_rate": 9.26680452000167e-08,
"loss": 0.5414,
"step": 17960
},
{
"epoch": 0.9161823187519119,
"grad_norm": 4.873600807902032,
"learning_rate": 9.155776962417207e-08,
"loss": 0.5299,
"step": 17970
},
{
"epoch": 0.9166921586621801,
"grad_norm": 4.219217507441979,
"learning_rate": 9.045406134369716e-08,
"loss": 0.5429,
"step": 17980
},
{
"epoch": 0.9172019985724482,
"grad_norm": 5.241201213247095,
"learning_rate": 8.935692336816265e-08,
"loss": 0.511,
"step": 17990
},
{
"epoch": 0.9177118384827164,
"grad_norm": 8.919986388742297,
"learning_rate": 8.826635868922461e-08,
"loss": 0.4773,
"step": 18000
},
{
"epoch": 0.9182216783929846,
"grad_norm": 10.10570201012482,
"learning_rate": 8.718237028061377e-08,
"loss": 0.5781,
"step": 18010
},
{
"epoch": 0.9187315183032527,
"grad_norm": 4.158483913899484,
"learning_rate": 8.610496109812933e-08,
"loss": 0.5329,
"step": 18020
},
{
"epoch": 0.9192413582135209,
"grad_norm": 9.586346343848911,
"learning_rate": 8.503413407963151e-08,
"loss": 0.5469,
"step": 18030
},
{
"epoch": 0.9197511981237891,
"grad_norm": 3.8448185881102566,
"learning_rate": 8.396989214503049e-08,
"loss": 0.4836,
"step": 18040
},
{
"epoch": 0.9202610380340573,
"grad_norm": 5.640326473227691,
"learning_rate": 8.291223819628214e-08,
"loss": 0.6244,
"step": 18050
},
{
"epoch": 0.9207708779443254,
"grad_norm": 7.373226438300865,
"learning_rate": 8.186117511737674e-08,
"loss": 0.5215,
"step": 18060
},
{
"epoch": 0.9212807178545936,
"grad_norm": 3.4587339642694674,
"learning_rate": 8.08167057743342e-08,
"loss": 0.4781,
"step": 18070
},
{
"epoch": 0.9217905577648619,
"grad_norm": 2.768301830786394,
"learning_rate": 7.977883301519357e-08,
"loss": 0.5722,
"step": 18080
},
{
"epoch": 0.9223003976751301,
"grad_norm": 5.719285971725797,
"learning_rate": 7.874755967000714e-08,
"loss": 0.5325,
"step": 18090
},
{
"epoch": 0.9228102375853982,
"grad_norm": 6.042524456479693,
"learning_rate": 7.772288855083188e-08,
"loss": 0.6014,
"step": 18100
},
{
"epoch": 0.9233200774956664,
"grad_norm": 5.400178848850842,
"learning_rate": 7.670482245172194e-08,
"loss": 0.5099,
"step": 18110
},
{
"epoch": 0.9238299174059346,
"grad_norm": 2.754575388814914,
"learning_rate": 7.569336414872092e-08,
"loss": 0.4918,
"step": 18120
},
{
"epoch": 0.9243397573162027,
"grad_norm": 13.88985456058074,
"learning_rate": 7.468851639985397e-08,
"loss": 0.5413,
"step": 18130
},
{
"epoch": 0.9248495972264709,
"grad_norm": 4.388939796155767,
"learning_rate": 7.369028194512212e-08,
"loss": 0.5016,
"step": 18140
},
{
"epoch": 0.9253594371367391,
"grad_norm": 6.205604328569233,
"learning_rate": 7.269866350649135e-08,
"loss": 0.5335,
"step": 18150
},
{
"epoch": 0.9258692770470073,
"grad_norm": 6.101462279645939,
"learning_rate": 7.171366378788985e-08,
"loss": 0.5468,
"step": 18160
},
{
"epoch": 0.9263791169572754,
"grad_norm": 6.020969502565288,
"learning_rate": 7.073528547519498e-08,
"loss": 0.5997,
"step": 18170
},
{
"epoch": 0.9268889568675436,
"grad_norm": 4.618725332842686,
"learning_rate": 6.976353123623103e-08,
"loss": 0.5041,
"step": 18180
},
{
"epoch": 0.9273987967778118,
"grad_norm": 7.184348947277078,
"learning_rate": 6.87984037207598e-08,
"loss": 0.4754,
"step": 18190
},
{
"epoch": 0.92790863668808,
"grad_norm": 7.713917613131603,
"learning_rate": 6.783990556047255e-08,
"loss": 0.5175,
"step": 18200
},
{
"epoch": 0.9284184765983481,
"grad_norm": 4.792556129343706,
"learning_rate": 6.688803936898414e-08,
"loss": 0.4878,
"step": 18210
},
{
"epoch": 0.9289283165086163,
"grad_norm": 3.703695350787858,
"learning_rate": 6.594280774182588e-08,
"loss": 0.4873,
"step": 18220
},
{
"epoch": 0.9294381564188845,
"grad_norm": 6.124863696737036,
"learning_rate": 6.500421325643741e-08,
"loss": 0.5573,
"step": 18230
},
{
"epoch": 0.9299479963291526,
"grad_norm": 4.086006806022881,
"learning_rate": 6.407225847216064e-08,
"loss": 0.4596,
"step": 18240
},
{
"epoch": 0.9304578362394208,
"grad_norm": 4.730225850142239,
"learning_rate": 6.31469459302328e-08,
"loss": 0.503,
"step": 18250
},
{
"epoch": 0.930967676149689,
"grad_norm": 9.109944551466292,
"learning_rate": 6.222827815377891e-08,
"loss": 0.6018,
"step": 18260
},
{
"epoch": 0.9314775160599572,
"grad_norm": 3.4809623856378744,
"learning_rate": 6.131625764780463e-08,
"loss": 0.4811,
"step": 18270
},
{
"epoch": 0.9319873559702253,
"grad_norm": 13.953918060150174,
"learning_rate": 6.041088689919122e-08,
"loss": 0.5377,
"step": 18280
},
{
"epoch": 0.9324971958804935,
"grad_norm": 4.25953364961008,
"learning_rate": 5.951216837668611e-08,
"loss": 0.5599,
"step": 18290
},
{
"epoch": 0.9330070357907617,
"grad_norm": 6.07972176790746,
"learning_rate": 5.8620104530898445e-08,
"loss": 0.5707,
"step": 18300
},
{
"epoch": 0.9335168757010299,
"grad_norm": 5.3162707838036365,
"learning_rate": 5.7734697794291084e-08,
"loss": 0.5307,
"step": 18310
},
{
"epoch": 0.934026715611298,
"grad_norm": 5.0188908228263545,
"learning_rate": 5.6855950581175544e-08,
"loss": 0.5962,
"step": 18320
},
{
"epoch": 0.9345365555215662,
"grad_norm": 5.24867013376636,
"learning_rate": 5.598386528770205e-08,
"loss": 0.5565,
"step": 18330
},
{
"epoch": 0.9350463954318344,
"grad_norm": 22.382867821978923,
"learning_rate": 5.5118444291856756e-08,
"loss": 0.5487,
"step": 18340
},
{
"epoch": 0.9355562353421025,
"grad_norm": 7.0743618016834375,
"learning_rate": 5.4259689953454485e-08,
"loss": 0.5524,
"step": 18350
},
{
"epoch": 0.9360660752523707,
"grad_norm": 4.1306254705707355,
"learning_rate": 5.3407604614129635e-08,
"loss": 0.507,
"step": 18360
},
{
"epoch": 0.9365759151626389,
"grad_norm": 5.496527409956476,
"learning_rate": 5.2562190597333086e-08,
"loss": 0.5337,
"step": 18370
},
{
"epoch": 0.9370857550729071,
"grad_norm": 5.707646973628439,
"learning_rate": 5.172345020832359e-08,
"loss": 0.5257,
"step": 18380
},
{
"epoch": 0.9375955949831752,
"grad_norm": 5.210175801682956,
"learning_rate": 5.0891385734163077e-08,
"loss": 0.5407,
"step": 18390
},
{
"epoch": 0.9381054348934434,
"grad_norm": 7.382791437311747,
"learning_rate": 5.006599944370944e-08,
"loss": 0.5697,
"step": 18400
},
{
"epoch": 0.9386152748037117,
"grad_norm": 7.111821103951391,
"learning_rate": 4.924729358761066e-08,
"loss": 0.5179,
"step": 18410
},
{
"epoch": 0.9391251147139799,
"grad_norm": 8.048320031381866,
"learning_rate": 4.843527039829821e-08,
"loss": 0.5582,
"step": 18420
},
{
"epoch": 0.939634954624248,
"grad_norm": 3.8996485562976444,
"learning_rate": 4.762993208998229e-08,
"loss": 0.5644,
"step": 18430
},
{
"epoch": 0.9401447945345162,
"grad_norm": 5.671078940006414,
"learning_rate": 4.6831280858644354e-08,
"loss": 0.475,
"step": 18440
},
{
"epoch": 0.9406546344447844,
"grad_norm": 16.07670677743477,
"learning_rate": 4.6039318882031e-08,
"loss": 0.49,
"step": 18450
},
{
"epoch": 0.9411644743550525,
"grad_norm": 7.4934067342706525,
"learning_rate": 4.525404831965063e-08,
"loss": 0.5153,
"step": 18460
},
{
"epoch": 0.9416743142653207,
"grad_norm": 3.141994208167517,
"learning_rate": 4.44754713127632e-08,
"loss": 0.5038,
"step": 18470
},
{
"epoch": 0.9421841541755889,
"grad_norm": 32.17902273556436,
"learning_rate": 4.3703589984378516e-08,
"loss": 0.4903,
"step": 18480
},
{
"epoch": 0.9426939940858571,
"grad_norm": 3.367404953847282,
"learning_rate": 4.293840643924796e-08,
"loss": 0.4557,
"step": 18490
},
{
"epoch": 0.9432038339961252,
"grad_norm": 5.048749451134672,
"learning_rate": 4.217992276385974e-08,
"loss": 0.5364,
"step": 18500
},
{
"epoch": 0.9437136739063934,
"grad_norm": 5.0034318322981495,
"learning_rate": 4.142814102643361e-08,
"loss": 0.5513,
"step": 18510
},
{
"epoch": 0.9442235138166616,
"grad_norm": 4.278247994376298,
"learning_rate": 4.0683063276913405e-08,
"loss": 0.5449,
"step": 18520
},
{
"epoch": 0.9447333537269298,
"grad_norm": 5.421839129452638,
"learning_rate": 3.99446915469634e-08,
"loss": 0.5449,
"step": 18530
},
{
"epoch": 0.9452431936371979,
"grad_norm": 4.104415412127668,
"learning_rate": 3.921302784996167e-08,
"loss": 0.5404,
"step": 18540
},
{
"epoch": 0.9457530335474661,
"grad_norm": 4.701408530965104,
"learning_rate": 3.8488074180995374e-08,
"loss": 0.4649,
"step": 18550
},
{
"epoch": 0.9462628734577343,
"grad_norm": 9.32179627217768,
"learning_rate": 3.776983251685462e-08,
"loss": 0.523,
"step": 18560
},
{
"epoch": 0.9467727133680024,
"grad_norm": 3.6634884696605363,
"learning_rate": 3.7058304816027516e-08,
"loss": 0.4673,
"step": 18570
},
{
"epoch": 0.9472825532782706,
"grad_norm": 5.139736780159314,
"learning_rate": 3.635349301869401e-08,
"loss": 0.5031,
"step": 18580
},
{
"epoch": 0.9477923931885388,
"grad_norm": 4.596816520394499,
"learning_rate": 3.5655399046722326e-08,
"loss": 0.5159,
"step": 18590
},
{
"epoch": 0.948302233098807,
"grad_norm": 11.765445721487552,
"learning_rate": 3.496402480366229e-08,
"loss": 0.5276,
"step": 18600
},
{
"epoch": 0.9488120730090751,
"grad_norm": 6.844763570325794,
"learning_rate": 3.42793721747392e-08,
"loss": 0.5238,
"step": 18610
},
{
"epoch": 0.9493219129193433,
"grad_norm": 4.547721352100664,
"learning_rate": 3.3601443026852476e-08,
"loss": 0.5704,
"step": 18620
},
{
"epoch": 0.9498317528296115,
"grad_norm": 6.251252686216228,
"learning_rate": 3.293023920856564e-08,
"loss": 0.5405,
"step": 18630
},
{
"epoch": 0.9503415927398797,
"grad_norm": 6.015275652746063,
"learning_rate": 3.226576255010494e-08,
"loss": 0.5118,
"step": 18640
},
{
"epoch": 0.9508514326501478,
"grad_norm": 8.234395597155862,
"learning_rate": 3.160801486335324e-08,
"loss": 0.5592,
"step": 18650
},
{
"epoch": 0.951361272560416,
"grad_norm": 4.754344888661146,
"learning_rate": 3.095699794184476e-08,
"loss": 0.5524,
"step": 18660
},
{
"epoch": 0.9518711124706842,
"grad_norm": 6.68079127540894,
"learning_rate": 3.0312713560759766e-08,
"loss": 0.4849,
"step": 18670
},
{
"epoch": 0.9523809523809523,
"grad_norm": 5.451421382141734,
"learning_rate": 2.9675163476921287e-08,
"loss": 0.5537,
"step": 18680
},
{
"epoch": 0.9528907922912205,
"grad_norm": 9.786743935052339,
"learning_rate": 2.9044349428788976e-08,
"loss": 0.4827,
"step": 18690
},
{
"epoch": 0.9534006322014887,
"grad_norm": 8.232002898614457,
"learning_rate": 2.842027313645468e-08,
"loss": 0.5923,
"step": 18700
},
{
"epoch": 0.953910472111757,
"grad_norm": 6.979721702774588,
"learning_rate": 2.7802936301638273e-08,
"loss": 0.4923,
"step": 18710
},
{
"epoch": 0.954420312022025,
"grad_norm": 6.011316761886053,
"learning_rate": 2.7192340607681833e-08,
"loss": 0.5147,
"step": 18720
},
{
"epoch": 0.9549301519322932,
"grad_norm": 4.47132964768527,
"learning_rate": 2.658848771954714e-08,
"loss": 0.5763,
"step": 18730
},
{
"epoch": 0.9554399918425615,
"grad_norm": 4.885688369435783,
"learning_rate": 2.5991379283807904e-08,
"loss": 0.4976,
"step": 18740
},
{
"epoch": 0.9559498317528297,
"grad_norm": 5.2130620487001895,
"learning_rate": 2.5401016928648935e-08,
"loss": 0.5505,
"step": 18750
},
{
"epoch": 0.9564596716630978,
"grad_norm": 8.042198715976756,
"learning_rate": 2.481740226385948e-08,
"loss": 0.6171,
"step": 18760
},
{
"epoch": 0.956969511573366,
"grad_norm": 5.985678970410733,
"learning_rate": 2.4240536880828503e-08,
"loss": 0.4905,
"step": 18770
},
{
"epoch": 0.9574793514836342,
"grad_norm": 8.406610661486411,
"learning_rate": 2.3670422352542466e-08,
"loss": 0.5365,
"step": 18780
},
{
"epoch": 0.9579891913939023,
"grad_norm": 4.112693522165467,
"learning_rate": 2.3107060233578393e-08,
"loss": 0.5136,
"step": 18790
},
{
"epoch": 0.9584990313041705,
"grad_norm": 7.338631506115541,
"learning_rate": 2.25504520601022e-08,
"loss": 0.498,
"step": 18800
},
{
"epoch": 0.9590088712144387,
"grad_norm": 6.2098176838166745,
"learning_rate": 2.200059934986204e-08,
"loss": 0.491,
"step": 18810
},
{
"epoch": 0.9595187111247069,
"grad_norm": 5.07719923123484,
"learning_rate": 2.1457503602186625e-08,
"loss": 0.5815,
"step": 18820
},
{
"epoch": 0.960028551034975,
"grad_norm": 5.337217289825358,
"learning_rate": 2.0921166297978858e-08,
"loss": 0.5515,
"step": 18830
},
{
"epoch": 0.9605383909452432,
"grad_norm": 9.869232248594697,
"learning_rate": 2.0391588899713598e-08,
"loss": 0.52,
"step": 18840
},
{
"epoch": 0.9610482308555114,
"grad_norm": 3.8427534633301312,
"learning_rate": 1.9868772851432404e-08,
"loss": 0.5542,
"step": 18850
},
{
"epoch": 0.9615580707657796,
"grad_norm": 4.991658787134779,
"learning_rate": 1.935271957874074e-08,
"loss": 0.5525,
"step": 18860
},
{
"epoch": 0.9620679106760477,
"grad_norm": 4.300256783152754,
"learning_rate": 1.8843430488802438e-08,
"loss": 0.5173,
"step": 18870
},
{
"epoch": 0.9625777505863159,
"grad_norm": 5.147408487787641,
"learning_rate": 1.8340906970338023e-08,
"loss": 0.4697,
"step": 18880
},
{
"epoch": 0.9630875904965841,
"grad_norm": 11.129787417227005,
"learning_rate": 1.7845150393618894e-08,
"loss": 0.5594,
"step": 18890
},
{
"epoch": 0.9635974304068522,
"grad_norm": 11.910584867583186,
"learning_rate": 1.7356162110465092e-08,
"loss": 0.5424,
"step": 18900
},
{
"epoch": 0.9641072703171204,
"grad_norm": 24.564796616772416,
"learning_rate": 1.6873943454240593e-08,
"loss": 0.5801,
"step": 18910
},
{
"epoch": 0.9646171102273886,
"grad_norm": 11.85284427839324,
"learning_rate": 1.639849573985025e-08,
"loss": 0.4865,
"step": 18920
},
{
"epoch": 0.9651269501376568,
"grad_norm": 5.767042170342012,
"learning_rate": 1.5929820263735907e-08,
"loss": 0.5142,
"step": 18930
},
{
"epoch": 0.9656367900479249,
"grad_norm": 3.232135744615972,
"learning_rate": 1.546791830387362e-08,
"loss": 0.5221,
"step": 18940
},
{
"epoch": 0.9661466299581931,
"grad_norm": 7.960254210098102,
"learning_rate": 1.5012791119768665e-08,
"loss": 0.4861,
"step": 18950
},
{
"epoch": 0.9666564698684613,
"grad_norm": 4.380004085312518,
"learning_rate": 1.4564439952453324e-08,
"loss": 0.5369,
"step": 18960
},
{
"epoch": 0.9671663097787295,
"grad_norm": 4.3180007597053125,
"learning_rate": 1.4122866024483261e-08,
"loss": 0.5186,
"step": 18970
},
{
"epoch": 0.9676761496889976,
"grad_norm": 4.037312910139026,
"learning_rate": 1.3688070539933928e-08,
"loss": 0.4379,
"step": 18980
},
{
"epoch": 0.9681859895992658,
"grad_norm": 4.476262239890478,
"learning_rate": 1.3260054684397782e-08,
"loss": 0.5926,
"step": 18990
},
{
"epoch": 0.968695829509534,
"grad_norm": 9.271495266404623,
"learning_rate": 1.2838819624980125e-08,
"loss": 0.5238,
"step": 19000
},
{
"epoch": 0.9692056694198021,
"grad_norm": 5.490916312155755,
"learning_rate": 1.2424366510297436e-08,
"loss": 0.4774,
"step": 19010
},
{
"epoch": 0.9697155093300703,
"grad_norm": 6.857369554668582,
"learning_rate": 1.20166964704721e-08,
"loss": 0.5446,
"step": 19020
},
{
"epoch": 0.9702253492403385,
"grad_norm": 8.71544312715168,
"learning_rate": 1.1615810617131573e-08,
"loss": 0.5236,
"step": 19030
},
{
"epoch": 0.9707351891506067,
"grad_norm": 8.467862835101643,
"learning_rate": 1.1221710043403943e-08,
"loss": 0.5231,
"step": 19040
},
{
"epoch": 0.9712450290608748,
"grad_norm": 7.436576967265831,
"learning_rate": 1.0834395823915156e-08,
"loss": 0.5239,
"step": 19050
},
{
"epoch": 0.971754868971143,
"grad_norm": 12.762100749378813,
"learning_rate": 1.0453869014786232e-08,
"loss": 0.5472,
"step": 19060
},
{
"epoch": 0.9722647088814113,
"grad_norm": 4.004442125450377,
"learning_rate": 1.008013065363106e-08,
"loss": 0.555,
"step": 19070
},
{
"epoch": 0.9727745487916795,
"grad_norm": 10.557378950896895,
"learning_rate": 9.713181759552215e-09,
"loss": 0.5519,
"step": 19080
},
{
"epoch": 0.9732843887019476,
"grad_norm": 6.28040223860578,
"learning_rate": 9.353023333138755e-09,
"loss": 0.4956,
"step": 19090
},
{
"epoch": 0.9737942286122158,
"grad_norm": 5.6899027379222655,
"learning_rate": 8.999656356464547e-09,
"loss": 0.5118,
"step": 19100
},
{
"epoch": 0.974304068522484,
"grad_norm": 5.807412866914558,
"learning_rate": 8.65308179308355e-09,
"loss": 0.5229,
"step": 19110
},
{
"epoch": 0.9748139084327521,
"grad_norm": 4.11394127307105,
"learning_rate": 8.31330058802926e-09,
"loss": 0.5073,
"step": 19120
},
{
"epoch": 0.9753237483430203,
"grad_norm": 5.700434689816356,
"learning_rate": 7.980313667810268e-09,
"loss": 0.4894,
"step": 19130
},
{
"epoch": 0.9758335882532885,
"grad_norm": 4.577599228091688,
"learning_rate": 7.654121940409432e-09,
"loss": 0.6207,
"step": 19140
},
{
"epoch": 0.9763434281635567,
"grad_norm": 10.419675523544738,
"learning_rate": 7.334726295280259e-09,
"loss": 0.5352,
"step": 19150
},
{
"epoch": 0.9768532680738248,
"grad_norm": 4.439358657537711,
"learning_rate": 7.0221276033446975e-09,
"loss": 0.4932,
"step": 19160
},
{
"epoch": 0.977363107984093,
"grad_norm": 3.742211679316256,
"learning_rate": 6.716326716991184e-09,
"loss": 0.5847,
"step": 19170
},
{
"epoch": 0.9778729478943612,
"grad_norm": 3.8227065535066678,
"learning_rate": 6.417324470071873e-09,
"loss": 0.5439,
"step": 19180
},
{
"epoch": 0.9783827878046294,
"grad_norm": 4.7303947099623835,
"learning_rate": 6.125121677900414e-09,
"loss": 0.5091,
"step": 19190
},
{
"epoch": 0.9788926277148975,
"grad_norm": 8.717984325580684,
"learning_rate": 5.83971913725001e-09,
"loss": 0.5532,
"step": 19200
},
{
"epoch": 0.9794024676251657,
"grad_norm": 3.950523220126785,
"learning_rate": 5.56111762635092e-09,
"loss": 0.5678,
"step": 19210
},
{
"epoch": 0.9799123075354339,
"grad_norm": 6.2586202336071395,
"learning_rate": 5.289317904888513e-09,
"loss": 0.4832,
"step": 19220
},
{
"epoch": 0.980422147445702,
"grad_norm": 6.26610501924219,
"learning_rate": 5.024320714001329e-09,
"loss": 0.6249,
"step": 19230
},
{
"epoch": 0.9809319873559702,
"grad_norm": 4.730438194254504,
"learning_rate": 4.766126776278579e-09,
"loss": 0.5083,
"step": 19240
},
{
"epoch": 0.9814418272662384,
"grad_norm": 5.157032241241999,
"learning_rate": 4.514736795758756e-09,
"loss": 0.5722,
"step": 19250
},
{
"epoch": 0.9819516671765066,
"grad_norm": 4.9127912748996305,
"learning_rate": 4.2701514579276955e-09,
"loss": 0.5183,
"step": 19260
},
{
"epoch": 0.9824615070867747,
"grad_norm": 7.864075457181176,
"learning_rate": 4.03237142971552e-09,
"loss": 0.506,
"step": 19270
},
{
"epoch": 0.9829713469970429,
"grad_norm": 5.0400627370982924,
"learning_rate": 3.8013973594969166e-09,
"loss": 0.5261,
"step": 19280
},
{
"epoch": 0.9834811869073111,
"grad_norm": 4.769948287567263,
"learning_rate": 3.5772298770875293e-09,
"loss": 0.4839,
"step": 19290
},
{
"epoch": 0.9839910268175793,
"grad_norm": 4.202266396236591,
"learning_rate": 3.3598695937428483e-09,
"loss": 0.5264,
"step": 19300
},
{
"epoch": 0.9845008667278474,
"grad_norm": 14.024777095366243,
"learning_rate": 3.1493171021571013e-09,
"loss": 0.5577,
"step": 19310
},
{
"epoch": 0.9850107066381156,
"grad_norm": 3.753221181700882,
"learning_rate": 2.9455729764607533e-09,
"loss": 0.5397,
"step": 19320
},
{
"epoch": 0.9855205465483838,
"grad_norm": 3.2700035457841525,
"learning_rate": 2.748637772219398e-09,
"loss": 0.5002,
"step": 19330
},
{
"epoch": 0.9860303864586519,
"grad_norm": 7.1456997972092005,
"learning_rate": 2.558512026432647e-09,
"loss": 0.5349,
"step": 19340
},
{
"epoch": 0.9865402263689201,
"grad_norm": 3.811434567567053,
"learning_rate": 2.375196257531631e-09,
"loss": 0.4973,
"step": 19350
},
{
"epoch": 0.9870500662791883,
"grad_norm": 5.195560958785233,
"learning_rate": 2.1986909653781696e-09,
"loss": 0.5455,
"step": 19360
},
{
"epoch": 0.9875599061894565,
"grad_norm": 3.075752215335098,
"learning_rate": 2.0289966312639353e-09,
"loss": 0.5713,
"step": 19370
},
{
"epoch": 0.9880697460997246,
"grad_norm": 5.175670667888546,
"learning_rate": 1.866113717907958e-09,
"loss": 0.5186,
"step": 19380
},
{
"epoch": 0.9885795860099929,
"grad_norm": 8.82076603320367,
"learning_rate": 1.7100426694566241e-09,
"loss": 0.6248,
"step": 19390
},
{
"epoch": 0.9890894259202611,
"grad_norm": 10.598190890684371,
"learning_rate": 1.5607839114811785e-09,
"loss": 0.5185,
"step": 19400
},
{
"epoch": 0.9895992658305293,
"grad_norm": 99.97448677834379,
"learning_rate": 1.4183378509782797e-09,
"loss": 0.5391,
"step": 19410
},
{
"epoch": 0.9901091057407974,
"grad_norm": 3.5906574918458554,
"learning_rate": 1.2827048763663918e-09,
"loss": 0.5658,
"step": 19420
},
{
"epoch": 0.9906189456510656,
"grad_norm": 3.937560719140895,
"learning_rate": 1.1538853574874497e-09,
"loss": 0.5157,
"step": 19430
},
{
"epoch": 0.9911287855613338,
"grad_norm": 3.370981182140972,
"learning_rate": 1.0318796456040835e-09,
"loss": 0.546,
"step": 19440
},
{
"epoch": 0.9916386254716019,
"grad_norm": 3.908118760270642,
"learning_rate": 9.166880733993411e-10,
"loss": 0.5132,
"step": 19450
},
{
"epoch": 0.9921484653818701,
"grad_norm": 5.504638469324314,
"learning_rate": 8.083109549750223e-10,
"loss": 0.5299,
"step": 19460
},
{
"epoch": 0.9926583052921383,
"grad_norm": 4.939716355524505,
"learning_rate": 7.06748585852235e-10,
"loss": 0.5293,
"step": 19470
},
{
"epoch": 0.9931681452024065,
"grad_norm": 3.029382803122107,
"learning_rate": 6.12001242968896e-10,
"loss": 0.537,
"step": 19480
},
{
"epoch": 0.9936779851126746,
"grad_norm": 3.7112758915934716,
"learning_rate": 5.240691846808421e-10,
"loss": 0.4678,
"step": 19490
},
{
"epoch": 0.9941878250229428,
"grad_norm": 9.076414136062386,
"learning_rate": 4.4295265075905425e-10,
"loss": 0.5024,
"step": 19500
},
{
"epoch": 0.994697664933211,
"grad_norm": 4.660051024920895,
"learning_rate": 3.686518623910451e-10,
"loss": 0.512,
"step": 19510
},
{
"epoch": 0.9952075048434792,
"grad_norm": 3.430866952752682,
"learning_rate": 3.011670221786389e-10,
"loss": 0.5249,
"step": 19520
},
{
"epoch": 0.9957173447537473,
"grad_norm": 3.6543057669756416,
"learning_rate": 2.404983141379713e-10,
"loss": 0.5477,
"step": 19530
},
{
"epoch": 0.9962271846640155,
"grad_norm": 5.190759538622556,
"learning_rate": 1.8664590369976697e-10,
"loss": 0.5671,
"step": 19540
},
{
"epoch": 0.9967370245742837,
"grad_norm": 3.6001177060757676,
"learning_rate": 1.396099377076743e-10,
"loss": 0.5121,
"step": 19550
},
{
"epoch": 0.9972468644845518,
"grad_norm": 4.8061797540089515,
"learning_rate": 9.939054441826523e-11,
"loss": 0.4448,
"step": 19560
},
{
"epoch": 0.99775670439482,
"grad_norm": 4.397650838060559,
"learning_rate": 6.59878335013131e-11,
"loss": 0.624,
"step": 19570
},
{
"epoch": 0.9982665443050882,
"grad_norm": 4.026961541414647,
"learning_rate": 3.940189603840461e-11,
"loss": 0.5493,
"step": 19580
},
{
"epoch": 0.9987763842153564,
"grad_norm": 4.025535540667108,
"learning_rate": 1.9632804523772587e-11,
"loss": 0.5757,
"step": 19590
},
{
"epoch": 0.9992862241256245,
"grad_norm": 10.89356163552132,
"learning_rate": 6.680612863463332e-12,
"loss": 0.5871,
"step": 19600
},
{
"epoch": 0.9997960640358927,
"grad_norm": 7.500947430018431,
"learning_rate": 5.453563750590363e-13,
"loss": 0.5528,
"step": 19610
},
{
"epoch": 1.0,
"step": 19614,
"total_flos": 1.1749560677040128e+16,
"train_loss": 0.6247651589151381,
"train_runtime": 104252.569,
"train_samples_per_second": 12.041,
"train_steps_per_second": 0.188
}
],
"logging_steps": 10,
"max_steps": 19614,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1749560677040128e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}