NanoTranslator-M / trainer_state.json
Mxode's picture
upload model
c6555d1
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 19532,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0005119803399549457,
"grad_norm": 1.4939812421798706,
"learning_rate": 7.679705099324186e-05,
"loss": 8.8823,
"step": 10
},
{
"epoch": 0.0010239606799098914,
"grad_norm": 1.7099491357803345,
"learning_rate": 0.00015359410198648372,
"loss": 8.4098,
"step": 20
},
{
"epoch": 0.0015359410198648373,
"grad_norm": 1.341354489326477,
"learning_rate": 0.00023039115297972558,
"loss": 7.809,
"step": 30
},
{
"epoch": 0.0020479213598197828,
"grad_norm": 1.2555238008499146,
"learning_rate": 0.00030718820397296744,
"loss": 7.2814,
"step": 40
},
{
"epoch": 0.0025599016997747285,
"grad_norm": 1.1430288553237915,
"learning_rate": 0.0003839852549662093,
"loss": 6.8009,
"step": 50
},
{
"epoch": 0.0030718820397296746,
"grad_norm": 0.7848866581916809,
"learning_rate": 0.00046078230595945115,
"loss": 6.4164,
"step": 60
},
{
"epoch": 0.0035838623796846203,
"grad_norm": 1.1270220279693604,
"learning_rate": 0.000537579356952693,
"loss": 6.1553,
"step": 70
},
{
"epoch": 0.0040958427196395655,
"grad_norm": 0.5496548414230347,
"learning_rate": 0.0006143764079459349,
"loss": 5.9572,
"step": 80
},
{
"epoch": 0.004607823059594511,
"grad_norm": 0.8258134126663208,
"learning_rate": 0.0006911734589391768,
"loss": 5.7536,
"step": 90
},
{
"epoch": 0.005119803399549457,
"grad_norm": 0.544425368309021,
"learning_rate": 0.0007679705099324186,
"loss": 5.6043,
"step": 100
},
{
"epoch": 0.005631783739504403,
"grad_norm": 1.129820466041565,
"learning_rate": 0.0008447675609256605,
"loss": 5.3984,
"step": 110
},
{
"epoch": 0.006143764079459349,
"grad_norm": 1.6234118938446045,
"learning_rate": 0.0009215646119189023,
"loss": 5.2392,
"step": 120
},
{
"epoch": 0.006655744419414295,
"grad_norm": 0.7183708548545837,
"learning_rate": 0.0009983616629121441,
"loss": 5.107,
"step": 130
},
{
"epoch": 0.0071677247593692405,
"grad_norm": 1.0296348333358765,
"learning_rate": 0.001075158713905386,
"loss": 4.9662,
"step": 140
},
{
"epoch": 0.007679705099324186,
"grad_norm": 1.6978133916854858,
"learning_rate": 0.001151955764898628,
"loss": 4.8161,
"step": 150
},
{
"epoch": 0.008191685439279131,
"grad_norm": 0.8946409821510315,
"learning_rate": 0.0012287528158918697,
"loss": 4.7119,
"step": 160
},
{
"epoch": 0.008703665779234078,
"grad_norm": 1.0135765075683594,
"learning_rate": 0.0013055498668851117,
"loss": 4.6082,
"step": 170
},
{
"epoch": 0.009215646119189022,
"grad_norm": 0.8236331343650818,
"learning_rate": 0.0013823469178783536,
"loss": 4.496,
"step": 180
},
{
"epoch": 0.009727626459143969,
"grad_norm": 1.161008596420288,
"learning_rate": 0.0014591439688715956,
"loss": 4.4071,
"step": 190
},
{
"epoch": 0.010239606799098914,
"grad_norm": 1.3253235816955566,
"learning_rate": 0.0015,
"loss": 4.3286,
"step": 200
},
{
"epoch": 0.01075158713905386,
"grad_norm": 1.6026867628097534,
"learning_rate": 0.0015,
"loss": 4.2374,
"step": 210
},
{
"epoch": 0.011263567479008805,
"grad_norm": 1.0043503046035767,
"learning_rate": 0.0015,
"loss": 4.1526,
"step": 220
},
{
"epoch": 0.011775547818963752,
"grad_norm": 0.963283121585846,
"learning_rate": 0.0015,
"loss": 4.0709,
"step": 230
},
{
"epoch": 0.012287528158918698,
"grad_norm": 0.8025517463684082,
"learning_rate": 0.0015,
"loss": 3.9997,
"step": 240
},
{
"epoch": 0.012799508498873643,
"grad_norm": 0.7000623345375061,
"learning_rate": 0.0015,
"loss": 3.91,
"step": 250
},
{
"epoch": 0.01331148883882859,
"grad_norm": 0.8964600563049316,
"learning_rate": 0.0015,
"loss": 3.8844,
"step": 260
},
{
"epoch": 0.013823469178783535,
"grad_norm": 0.7321097254753113,
"learning_rate": 0.0015,
"loss": 3.8324,
"step": 270
},
{
"epoch": 0.014335449518738481,
"grad_norm": 0.8242825269699097,
"learning_rate": 0.0015,
"loss": 3.7653,
"step": 280
},
{
"epoch": 0.014847429858693426,
"grad_norm": 1.045832633972168,
"learning_rate": 0.0015,
"loss": 3.7115,
"step": 290
},
{
"epoch": 0.015359410198648372,
"grad_norm": 1.0511783361434937,
"learning_rate": 0.0015,
"loss": 3.6754,
"step": 300
},
{
"epoch": 0.015871390538603317,
"grad_norm": 0.79283607006073,
"learning_rate": 0.0015,
"loss": 3.615,
"step": 310
},
{
"epoch": 0.016383370878558262,
"grad_norm": 0.7592840194702148,
"learning_rate": 0.0015,
"loss": 3.5692,
"step": 320
},
{
"epoch": 0.01689535121851321,
"grad_norm": 0.6317871809005737,
"learning_rate": 0.0015,
"loss": 3.5581,
"step": 330
},
{
"epoch": 0.017407331558468155,
"grad_norm": 0.8634727597236633,
"learning_rate": 0.0015,
"loss": 3.5035,
"step": 340
},
{
"epoch": 0.0179193118984231,
"grad_norm": 0.9801504611968994,
"learning_rate": 0.0015,
"loss": 3.4543,
"step": 350
},
{
"epoch": 0.018431292238378045,
"grad_norm": 0.9941282868385315,
"learning_rate": 0.0015,
"loss": 3.4323,
"step": 360
},
{
"epoch": 0.018943272578332993,
"grad_norm": 1.1075271368026733,
"learning_rate": 0.0015,
"loss": 3.3992,
"step": 370
},
{
"epoch": 0.019455252918287938,
"grad_norm": 0.9263769388198853,
"learning_rate": 0.0015,
"loss": 3.3484,
"step": 380
},
{
"epoch": 0.019967233258242883,
"grad_norm": 0.6879151463508606,
"learning_rate": 0.0015,
"loss": 3.3255,
"step": 390
},
{
"epoch": 0.020479213598197828,
"grad_norm": 1.0170198678970337,
"learning_rate": 0.0015,
"loss": 3.2744,
"step": 400
},
{
"epoch": 0.020991193938152776,
"grad_norm": 0.9534377455711365,
"learning_rate": 0.0015,
"loss": 3.2513,
"step": 410
},
{
"epoch": 0.02150317427810772,
"grad_norm": 1.1487725973129272,
"learning_rate": 0.0015,
"loss": 3.2043,
"step": 420
},
{
"epoch": 0.022015154618062666,
"grad_norm": 0.8081286549568176,
"learning_rate": 0.0015,
"loss": 3.1891,
"step": 430
},
{
"epoch": 0.02252713495801761,
"grad_norm": 0.8324559926986694,
"learning_rate": 0.0015,
"loss": 3.1025,
"step": 440
},
{
"epoch": 0.02303911529797256,
"grad_norm": 0.9536003470420837,
"learning_rate": 0.0015,
"loss": 3.1029,
"step": 450
},
{
"epoch": 0.023551095637927504,
"grad_norm": 1.3307809829711914,
"learning_rate": 0.0015,
"loss": 3.0508,
"step": 460
},
{
"epoch": 0.02406307597788245,
"grad_norm": 1.237606167793274,
"learning_rate": 0.0015,
"loss": 3.0528,
"step": 470
},
{
"epoch": 0.024575056317837397,
"grad_norm": 0.9293427467346191,
"learning_rate": 0.0015,
"loss": 2.9933,
"step": 480
},
{
"epoch": 0.02508703665779234,
"grad_norm": 0.8388038873672485,
"learning_rate": 0.0015,
"loss": 2.9593,
"step": 490
},
{
"epoch": 0.025599016997747286,
"grad_norm": 0.7568584084510803,
"learning_rate": 0.0015,
"loss": 2.9442,
"step": 500
},
{
"epoch": 0.02611099733770223,
"grad_norm": 0.7443001866340637,
"learning_rate": 0.0015,
"loss": 2.9138,
"step": 510
},
{
"epoch": 0.02662297767765718,
"grad_norm": 0.9567376375198364,
"learning_rate": 0.0015,
"loss": 2.8952,
"step": 520
},
{
"epoch": 0.027134958017612124,
"grad_norm": 0.7521085143089294,
"learning_rate": 0.0015,
"loss": 2.8719,
"step": 530
},
{
"epoch": 0.02764693835756707,
"grad_norm": 1.0200743675231934,
"learning_rate": 0.0015,
"loss": 2.8533,
"step": 540
},
{
"epoch": 0.028158918697522014,
"grad_norm": 0.8097197413444519,
"learning_rate": 0.0015,
"loss": 2.8476,
"step": 550
},
{
"epoch": 0.028670899037476962,
"grad_norm": 0.7335869669914246,
"learning_rate": 0.0015,
"loss": 2.7611,
"step": 560
},
{
"epoch": 0.029182879377431907,
"grad_norm": 0.7385020852088928,
"learning_rate": 0.0015,
"loss": 2.7824,
"step": 570
},
{
"epoch": 0.029694859717386852,
"grad_norm": 0.8730366826057434,
"learning_rate": 0.0015,
"loss": 2.7236,
"step": 580
},
{
"epoch": 0.030206840057341797,
"grad_norm": 0.8042418360710144,
"learning_rate": 0.0015,
"loss": 2.7331,
"step": 590
},
{
"epoch": 0.030718820397296745,
"grad_norm": 0.7750236392021179,
"learning_rate": 0.0015,
"loss": 2.6946,
"step": 600
},
{
"epoch": 0.03123080073725169,
"grad_norm": 1.130753755569458,
"learning_rate": 0.0015,
"loss": 2.7127,
"step": 610
},
{
"epoch": 0.031742781077206635,
"grad_norm": 0.7699748277664185,
"learning_rate": 0.0015,
"loss": 2.665,
"step": 620
},
{
"epoch": 0.03225476141716158,
"grad_norm": 0.7676917314529419,
"learning_rate": 0.0015,
"loss": 2.6516,
"step": 630
},
{
"epoch": 0.032766741757116524,
"grad_norm": 0.9566435217857361,
"learning_rate": 0.0015,
"loss": 2.6311,
"step": 640
},
{
"epoch": 0.03327872209707147,
"grad_norm": 0.9319092631340027,
"learning_rate": 0.0015,
"loss": 2.6062,
"step": 650
},
{
"epoch": 0.03379070243702642,
"grad_norm": 0.7314916849136353,
"learning_rate": 0.0015,
"loss": 2.5822,
"step": 660
},
{
"epoch": 0.03430268277698136,
"grad_norm": 0.765346109867096,
"learning_rate": 0.0015,
"loss": 2.587,
"step": 670
},
{
"epoch": 0.03481466311693631,
"grad_norm": 0.8714979887008667,
"learning_rate": 0.0015,
"loss": 2.5479,
"step": 680
},
{
"epoch": 0.03532664345689126,
"grad_norm": 0.7182953357696533,
"learning_rate": 0.0015,
"loss": 2.5388,
"step": 690
},
{
"epoch": 0.0358386237968462,
"grad_norm": 0.71555095911026,
"learning_rate": 0.0015,
"loss": 2.5196,
"step": 700
},
{
"epoch": 0.03635060413680115,
"grad_norm": 0.6901549696922302,
"learning_rate": 0.0015,
"loss": 2.4948,
"step": 710
},
{
"epoch": 0.03686258447675609,
"grad_norm": 0.7073848247528076,
"learning_rate": 0.0015,
"loss": 2.4814,
"step": 720
},
{
"epoch": 0.03737456481671104,
"grad_norm": 0.6590971350669861,
"learning_rate": 0.0015,
"loss": 2.4799,
"step": 730
},
{
"epoch": 0.037886545156665986,
"grad_norm": 0.6124588251113892,
"learning_rate": 0.0015,
"loss": 2.4529,
"step": 740
},
{
"epoch": 0.03839852549662093,
"grad_norm": 0.7170097231864929,
"learning_rate": 0.0015,
"loss": 2.4397,
"step": 750
},
{
"epoch": 0.038910505836575876,
"grad_norm": 0.7509459853172302,
"learning_rate": 0.0015,
"loss": 2.433,
"step": 760
},
{
"epoch": 0.039422486176530824,
"grad_norm": 0.8185219168663025,
"learning_rate": 0.0015,
"loss": 2.4364,
"step": 770
},
{
"epoch": 0.039934466516485766,
"grad_norm": 0.6452121734619141,
"learning_rate": 0.0015,
"loss": 2.4375,
"step": 780
},
{
"epoch": 0.040446446856440714,
"grad_norm": 0.7798700928688049,
"learning_rate": 0.0015,
"loss": 2.4082,
"step": 790
},
{
"epoch": 0.040958427196395655,
"grad_norm": 0.905072808265686,
"learning_rate": 0.0015,
"loss": 2.3811,
"step": 800
},
{
"epoch": 0.041470407536350604,
"grad_norm": 0.7047348618507385,
"learning_rate": 0.0015,
"loss": 2.3955,
"step": 810
},
{
"epoch": 0.04198238787630555,
"grad_norm": 0.6472852230072021,
"learning_rate": 0.0015,
"loss": 2.3776,
"step": 820
},
{
"epoch": 0.04249436821626049,
"grad_norm": 0.729308545589447,
"learning_rate": 0.0015,
"loss": 2.3465,
"step": 830
},
{
"epoch": 0.04300634855621544,
"grad_norm": 0.8292624950408936,
"learning_rate": 0.0015,
"loss": 2.3578,
"step": 840
},
{
"epoch": 0.04351832889617039,
"grad_norm": 0.6298139691352844,
"learning_rate": 0.0015,
"loss": 2.3349,
"step": 850
},
{
"epoch": 0.04403030923612533,
"grad_norm": 0.647214949131012,
"learning_rate": 0.0015,
"loss": 2.299,
"step": 860
},
{
"epoch": 0.04454228957608028,
"grad_norm": 0.7034851312637329,
"learning_rate": 0.0015,
"loss": 2.2927,
"step": 870
},
{
"epoch": 0.04505426991603522,
"grad_norm": 0.6373961567878723,
"learning_rate": 0.0015,
"loss": 2.2776,
"step": 880
},
{
"epoch": 0.04556625025599017,
"grad_norm": 0.8384701609611511,
"learning_rate": 0.0015,
"loss": 2.2948,
"step": 890
},
{
"epoch": 0.04607823059594512,
"grad_norm": 0.7856025695800781,
"learning_rate": 0.0015,
"loss": 2.3034,
"step": 900
},
{
"epoch": 0.04659021093590006,
"grad_norm": 0.6041284799575806,
"learning_rate": 0.0015,
"loss": 2.2773,
"step": 910
},
{
"epoch": 0.04710219127585501,
"grad_norm": 0.8801588416099548,
"learning_rate": 0.0015,
"loss": 2.2706,
"step": 920
},
{
"epoch": 0.047614171615809955,
"grad_norm": 0.7567424178123474,
"learning_rate": 0.0015,
"loss": 2.2754,
"step": 930
},
{
"epoch": 0.0481261519557649,
"grad_norm": 0.6421610713005066,
"learning_rate": 0.0015,
"loss": 2.2514,
"step": 940
},
{
"epoch": 0.048638132295719845,
"grad_norm": 0.7311142683029175,
"learning_rate": 0.0015,
"loss": 2.2005,
"step": 950
},
{
"epoch": 0.04915011263567479,
"grad_norm": 0.7399065494537354,
"learning_rate": 0.0015,
"loss": 2.2038,
"step": 960
},
{
"epoch": 0.049662092975629735,
"grad_norm": 0.708454430103302,
"learning_rate": 0.0015,
"loss": 2.1758,
"step": 970
},
{
"epoch": 0.05017407331558468,
"grad_norm": 0.6199438571929932,
"learning_rate": 0.0015,
"loss": 2.227,
"step": 980
},
{
"epoch": 0.050686053655539624,
"grad_norm": 0.6159200668334961,
"learning_rate": 0.0015,
"loss": 2.1547,
"step": 990
},
{
"epoch": 0.05119803399549457,
"grad_norm": 0.6560512781143188,
"learning_rate": 0.0015,
"loss": 2.1787,
"step": 1000
},
{
"epoch": 0.05171001433544952,
"grad_norm": 0.6151387691497803,
"learning_rate": 0.0015,
"loss": 2.1776,
"step": 1010
},
{
"epoch": 0.05222199467540446,
"grad_norm": 0.6162774562835693,
"learning_rate": 0.0015,
"loss": 2.1604,
"step": 1020
},
{
"epoch": 0.05273397501535941,
"grad_norm": 0.6564657092094421,
"learning_rate": 0.0015,
"loss": 2.1837,
"step": 1030
},
{
"epoch": 0.05324595535531436,
"grad_norm": 0.5790508985519409,
"learning_rate": 0.0015,
"loss": 2.1561,
"step": 1040
},
{
"epoch": 0.0537579356952693,
"grad_norm": 0.6484589576721191,
"learning_rate": 0.0015,
"loss": 2.1676,
"step": 1050
},
{
"epoch": 0.05426991603522425,
"grad_norm": 0.6969457268714905,
"learning_rate": 0.0015,
"loss": 2.1462,
"step": 1060
},
{
"epoch": 0.05478189637517919,
"grad_norm": 0.7145557403564453,
"learning_rate": 0.0015,
"loss": 2.13,
"step": 1070
},
{
"epoch": 0.05529387671513414,
"grad_norm": 0.6353093981742859,
"learning_rate": 0.0015,
"loss": 2.1197,
"step": 1080
},
{
"epoch": 0.055805857055089086,
"grad_norm": 0.5896279811859131,
"learning_rate": 0.0015,
"loss": 2.1177,
"step": 1090
},
{
"epoch": 0.05631783739504403,
"grad_norm": 0.6247608661651611,
"learning_rate": 0.0015,
"loss": 2.1123,
"step": 1100
},
{
"epoch": 0.056829817734998976,
"grad_norm": 0.6024080514907837,
"learning_rate": 0.0015,
"loss": 2.0949,
"step": 1110
},
{
"epoch": 0.057341798074953924,
"grad_norm": 0.7400630116462708,
"learning_rate": 0.0015,
"loss": 2.0915,
"step": 1120
},
{
"epoch": 0.057853778414908866,
"grad_norm": 0.6276081800460815,
"learning_rate": 0.0015,
"loss": 2.0916,
"step": 1130
},
{
"epoch": 0.058365758754863814,
"grad_norm": 0.7214579582214355,
"learning_rate": 0.0015,
"loss": 2.1027,
"step": 1140
},
{
"epoch": 0.05887773909481876,
"grad_norm": 0.7833266258239746,
"learning_rate": 0.0015,
"loss": 2.0884,
"step": 1150
},
{
"epoch": 0.059389719434773704,
"grad_norm": 0.7453588247299194,
"learning_rate": 0.0015,
"loss": 2.0764,
"step": 1160
},
{
"epoch": 0.05990169977472865,
"grad_norm": 0.5965461134910583,
"learning_rate": 0.0015,
"loss": 2.0941,
"step": 1170
},
{
"epoch": 0.06041368011468359,
"grad_norm": 0.6565614938735962,
"learning_rate": 0.0015,
"loss": 2.0396,
"step": 1180
},
{
"epoch": 0.06092566045463854,
"grad_norm": 0.670816957950592,
"learning_rate": 0.0015,
"loss": 2.0629,
"step": 1190
},
{
"epoch": 0.06143764079459349,
"grad_norm": 0.6220470666885376,
"learning_rate": 0.0015,
"loss": 2.064,
"step": 1200
},
{
"epoch": 0.06194962113454843,
"grad_norm": 0.5919376015663147,
"learning_rate": 0.0015,
"loss": 2.0385,
"step": 1210
},
{
"epoch": 0.06246160147450338,
"grad_norm": 0.6242793202400208,
"learning_rate": 0.0015,
"loss": 2.0487,
"step": 1220
},
{
"epoch": 0.06297358181445832,
"grad_norm": 0.5903810262680054,
"learning_rate": 0.0015,
"loss": 2.0348,
"step": 1230
},
{
"epoch": 0.06348556215441327,
"grad_norm": 0.6573896408081055,
"learning_rate": 0.0015,
"loss": 2.0186,
"step": 1240
},
{
"epoch": 0.06399754249436822,
"grad_norm": 0.6017488241195679,
"learning_rate": 0.0015,
"loss": 2.0126,
"step": 1250
},
{
"epoch": 0.06450952283432317,
"grad_norm": 0.533431351184845,
"learning_rate": 0.0015,
"loss": 2.026,
"step": 1260
},
{
"epoch": 0.06502150317427811,
"grad_norm": 0.5461450815200806,
"learning_rate": 0.0015,
"loss": 1.9961,
"step": 1270
},
{
"epoch": 0.06553348351423305,
"grad_norm": 0.5579766035079956,
"learning_rate": 0.0015,
"loss": 2.0064,
"step": 1280
},
{
"epoch": 0.066045463854188,
"grad_norm": 0.5514289736747742,
"learning_rate": 0.0015,
"loss": 2.0079,
"step": 1290
},
{
"epoch": 0.06655744419414295,
"grad_norm": 0.5938010215759277,
"learning_rate": 0.0015,
"loss": 1.9811,
"step": 1300
},
{
"epoch": 0.0670694245340979,
"grad_norm": 0.703124463558197,
"learning_rate": 0.0015,
"loss": 1.9634,
"step": 1310
},
{
"epoch": 0.06758140487405284,
"grad_norm": 0.545432448387146,
"learning_rate": 0.0015,
"loss": 1.9927,
"step": 1320
},
{
"epoch": 0.06809338521400778,
"grad_norm": 0.5673125386238098,
"learning_rate": 0.0015,
"loss": 1.9911,
"step": 1330
},
{
"epoch": 0.06860536555396272,
"grad_norm": 0.5682245492935181,
"learning_rate": 0.0015,
"loss": 1.9733,
"step": 1340
},
{
"epoch": 0.06911734589391767,
"grad_norm": 0.5960274934768677,
"learning_rate": 0.0015,
"loss": 1.9733,
"step": 1350
},
{
"epoch": 0.06962932623387262,
"grad_norm": 0.6102215051651001,
"learning_rate": 0.0015,
"loss": 1.9559,
"step": 1360
},
{
"epoch": 0.07014130657382757,
"grad_norm": 0.5990728735923767,
"learning_rate": 0.0015,
"loss": 1.9463,
"step": 1370
},
{
"epoch": 0.07065328691378252,
"grad_norm": 0.6161502003669739,
"learning_rate": 0.0015,
"loss": 1.978,
"step": 1380
},
{
"epoch": 0.07116526725373745,
"grad_norm": 0.5682898759841919,
"learning_rate": 0.0015,
"loss": 1.9558,
"step": 1390
},
{
"epoch": 0.0716772475936924,
"grad_norm": 0.5973048210144043,
"learning_rate": 0.0015,
"loss": 1.9376,
"step": 1400
},
{
"epoch": 0.07218922793364735,
"grad_norm": 0.5553535223007202,
"learning_rate": 0.0015,
"loss": 1.9468,
"step": 1410
},
{
"epoch": 0.0727012082736023,
"grad_norm": 0.5181711912155151,
"learning_rate": 0.0015,
"loss": 1.9188,
"step": 1420
},
{
"epoch": 0.07321318861355725,
"grad_norm": 0.6532855033874512,
"learning_rate": 0.0015,
"loss": 1.9069,
"step": 1430
},
{
"epoch": 0.07372516895351218,
"grad_norm": 0.531043291091919,
"learning_rate": 0.0015,
"loss": 1.9319,
"step": 1440
},
{
"epoch": 0.07423714929346713,
"grad_norm": 0.5700235962867737,
"learning_rate": 0.0015,
"loss": 1.8891,
"step": 1450
},
{
"epoch": 0.07474912963342208,
"grad_norm": 0.523414134979248,
"learning_rate": 0.0015,
"loss": 1.9165,
"step": 1460
},
{
"epoch": 0.07526110997337702,
"grad_norm": 0.5649904608726501,
"learning_rate": 0.0015,
"loss": 1.905,
"step": 1470
},
{
"epoch": 0.07577309031333197,
"grad_norm": 0.5912672877311707,
"learning_rate": 0.0015,
"loss": 1.9162,
"step": 1480
},
{
"epoch": 0.07628507065328691,
"grad_norm": 0.5597636699676514,
"learning_rate": 0.0015,
"loss": 1.9158,
"step": 1490
},
{
"epoch": 0.07679705099324186,
"grad_norm": 0.553896963596344,
"learning_rate": 0.0015,
"loss": 1.871,
"step": 1500
},
{
"epoch": 0.0773090313331968,
"grad_norm": 0.5018342137336731,
"learning_rate": 0.0015,
"loss": 1.9119,
"step": 1510
},
{
"epoch": 0.07782101167315175,
"grad_norm": 0.5367796421051025,
"learning_rate": 0.0015,
"loss": 1.8706,
"step": 1520
},
{
"epoch": 0.0783329920131067,
"grad_norm": 0.5023203492164612,
"learning_rate": 0.0015,
"loss": 1.8808,
"step": 1530
},
{
"epoch": 0.07884497235306165,
"grad_norm": 0.5962059497833252,
"learning_rate": 0.0015,
"loss": 1.9022,
"step": 1540
},
{
"epoch": 0.07935695269301658,
"grad_norm": 0.5200186967849731,
"learning_rate": 0.0015,
"loss": 1.8728,
"step": 1550
},
{
"epoch": 0.07986893303297153,
"grad_norm": 0.5361810922622681,
"learning_rate": 0.0015,
"loss": 1.8462,
"step": 1560
},
{
"epoch": 0.08038091337292648,
"grad_norm": 0.5771626830101013,
"learning_rate": 0.0015,
"loss": 1.873,
"step": 1570
},
{
"epoch": 0.08089289371288143,
"grad_norm": 0.5451227426528931,
"learning_rate": 0.0015,
"loss": 1.8693,
"step": 1580
},
{
"epoch": 0.08140487405283638,
"grad_norm": 0.5574854016304016,
"learning_rate": 0.0015,
"loss": 1.8615,
"step": 1590
},
{
"epoch": 0.08191685439279131,
"grad_norm": 0.574317455291748,
"learning_rate": 0.0015,
"loss": 1.8424,
"step": 1600
},
{
"epoch": 0.08242883473274626,
"grad_norm": 0.545906662940979,
"learning_rate": 0.0015,
"loss": 1.8572,
"step": 1610
},
{
"epoch": 0.08294081507270121,
"grad_norm": 0.5127050280570984,
"learning_rate": 0.0015,
"loss": 1.8391,
"step": 1620
},
{
"epoch": 0.08345279541265616,
"grad_norm": 0.5646129250526428,
"learning_rate": 0.0015,
"loss": 1.8316,
"step": 1630
},
{
"epoch": 0.0839647757526111,
"grad_norm": 0.5549367666244507,
"learning_rate": 0.0015,
"loss": 1.8371,
"step": 1640
},
{
"epoch": 0.08447675609256605,
"grad_norm": 0.5479699373245239,
"learning_rate": 0.0015,
"loss": 1.8378,
"step": 1650
},
{
"epoch": 0.08498873643252099,
"grad_norm": 0.5359328985214233,
"learning_rate": 0.0015,
"loss": 1.8372,
"step": 1660
},
{
"epoch": 0.08550071677247593,
"grad_norm": 0.5599870085716248,
"learning_rate": 0.0015,
"loss": 1.8499,
"step": 1670
},
{
"epoch": 0.08601269711243088,
"grad_norm": 0.5272551774978638,
"learning_rate": 0.0015,
"loss": 1.8381,
"step": 1680
},
{
"epoch": 0.08652467745238583,
"grad_norm": 0.534377932548523,
"learning_rate": 0.0015,
"loss": 1.8124,
"step": 1690
},
{
"epoch": 0.08703665779234078,
"grad_norm": 0.6432906985282898,
"learning_rate": 0.0015,
"loss": 1.8354,
"step": 1700
},
{
"epoch": 0.08754863813229571,
"grad_norm": 0.5227901935577393,
"learning_rate": 0.0015,
"loss": 1.8091,
"step": 1710
},
{
"epoch": 0.08806061847225066,
"grad_norm": 0.48951131105422974,
"learning_rate": 0.0015,
"loss": 1.7854,
"step": 1720
},
{
"epoch": 0.08857259881220561,
"grad_norm": 0.5127034783363342,
"learning_rate": 0.0015,
"loss": 1.8208,
"step": 1730
},
{
"epoch": 0.08908457915216056,
"grad_norm": 0.5147260427474976,
"learning_rate": 0.0015,
"loss": 1.8289,
"step": 1740
},
{
"epoch": 0.08959655949211551,
"grad_norm": 0.536268413066864,
"learning_rate": 0.0015,
"loss": 1.7894,
"step": 1750
},
{
"epoch": 0.09010853983207044,
"grad_norm": 0.537369966506958,
"learning_rate": 0.0015,
"loss": 1.7985,
"step": 1760
},
{
"epoch": 0.09062052017202539,
"grad_norm": 0.5217599868774414,
"learning_rate": 0.0015,
"loss": 1.8196,
"step": 1770
},
{
"epoch": 0.09113250051198034,
"grad_norm": 0.47711503505706787,
"learning_rate": 0.0015,
"loss": 1.7931,
"step": 1780
},
{
"epoch": 0.09164448085193529,
"grad_norm": 0.5544558763504028,
"learning_rate": 0.0015,
"loss": 1.8201,
"step": 1790
},
{
"epoch": 0.09215646119189023,
"grad_norm": 0.5024393200874329,
"learning_rate": 0.0015,
"loss": 1.7974,
"step": 1800
},
{
"epoch": 0.09266844153184518,
"grad_norm": 0.5126355290412903,
"learning_rate": 0.0015,
"loss": 1.7874,
"step": 1810
},
{
"epoch": 0.09318042187180012,
"grad_norm": 0.5882781744003296,
"learning_rate": 0.0015,
"loss": 1.791,
"step": 1820
},
{
"epoch": 0.09369240221175507,
"grad_norm": 0.508765697479248,
"learning_rate": 0.0015,
"loss": 1.7819,
"step": 1830
},
{
"epoch": 0.09420438255171001,
"grad_norm": 0.5449949502944946,
"learning_rate": 0.0015,
"loss": 1.7838,
"step": 1840
},
{
"epoch": 0.09471636289166496,
"grad_norm": 0.4996667802333832,
"learning_rate": 0.0015,
"loss": 1.7618,
"step": 1850
},
{
"epoch": 0.09522834323161991,
"grad_norm": 0.5014889240264893,
"learning_rate": 0.0015,
"loss": 1.7752,
"step": 1860
},
{
"epoch": 0.09574032357157485,
"grad_norm": 0.5011769533157349,
"learning_rate": 0.0015,
"loss": 1.7768,
"step": 1870
},
{
"epoch": 0.0962523039115298,
"grad_norm": 0.49963292479515076,
"learning_rate": 0.0015,
"loss": 1.778,
"step": 1880
},
{
"epoch": 0.09676428425148474,
"grad_norm": 0.46659213304519653,
"learning_rate": 0.0015,
"loss": 1.7668,
"step": 1890
},
{
"epoch": 0.09727626459143969,
"grad_norm": 0.5140760540962219,
"learning_rate": 0.0015,
"loss": 1.7448,
"step": 1900
},
{
"epoch": 0.09778824493139464,
"grad_norm": 0.49709445238113403,
"learning_rate": 0.0015,
"loss": 1.7573,
"step": 1910
},
{
"epoch": 0.09830022527134959,
"grad_norm": 0.464329332113266,
"learning_rate": 0.0015,
"loss": 1.7435,
"step": 1920
},
{
"epoch": 0.09881220561130452,
"grad_norm": 0.4815766215324402,
"learning_rate": 0.0015,
"loss": 1.7533,
"step": 1930
},
{
"epoch": 0.09932418595125947,
"grad_norm": 0.4601441025733948,
"learning_rate": 0.0015,
"loss": 1.7339,
"step": 1940
},
{
"epoch": 0.09983616629121442,
"grad_norm": 0.46905994415283203,
"learning_rate": 0.0015,
"loss": 1.7421,
"step": 1950
},
{
"epoch": 0.10034814663116937,
"grad_norm": 0.4927903413772583,
"learning_rate": 0.0015,
"loss": 1.7259,
"step": 1960
},
{
"epoch": 0.10086012697112431,
"grad_norm": 0.4930973947048187,
"learning_rate": 0.0015,
"loss": 1.735,
"step": 1970
},
{
"epoch": 0.10137210731107925,
"grad_norm": 0.4698399305343628,
"learning_rate": 0.0015,
"loss": 1.7478,
"step": 1980
},
{
"epoch": 0.1018840876510342,
"grad_norm": 0.5083284974098206,
"learning_rate": 0.0015,
"loss": 1.7491,
"step": 1990
},
{
"epoch": 0.10239606799098915,
"grad_norm": 0.4888325035572052,
"learning_rate": 0.0015,
"loss": 1.7261,
"step": 2000
},
{
"epoch": 0.1029080483309441,
"grad_norm": 0.524994432926178,
"learning_rate": 0.0015,
"loss": 1.7221,
"step": 2010
},
{
"epoch": 0.10342002867089904,
"grad_norm": 0.49820294976234436,
"learning_rate": 0.0015,
"loss": 1.7279,
"step": 2020
},
{
"epoch": 0.10393200901085399,
"grad_norm": 0.49288976192474365,
"learning_rate": 0.0015,
"loss": 1.746,
"step": 2030
},
{
"epoch": 0.10444398935080892,
"grad_norm": 0.4776252806186676,
"learning_rate": 0.0015,
"loss": 1.7384,
"step": 2040
},
{
"epoch": 0.10495596969076387,
"grad_norm": 0.46143004298210144,
"learning_rate": 0.0015,
"loss": 1.7037,
"step": 2050
},
{
"epoch": 0.10546795003071882,
"grad_norm": 0.4855809211730957,
"learning_rate": 0.0015,
"loss": 1.7052,
"step": 2060
},
{
"epoch": 0.10597993037067377,
"grad_norm": 0.491964727640152,
"learning_rate": 0.0015,
"loss": 1.7275,
"step": 2070
},
{
"epoch": 0.10649191071062872,
"grad_norm": 0.5072810053825378,
"learning_rate": 0.0015,
"loss": 1.7262,
"step": 2080
},
{
"epoch": 0.10700389105058365,
"grad_norm": 0.5020768642425537,
"learning_rate": 0.0015,
"loss": 1.7106,
"step": 2090
},
{
"epoch": 0.1075158713905386,
"grad_norm": 0.4881630837917328,
"learning_rate": 0.0015,
"loss": 1.7411,
"step": 2100
},
{
"epoch": 0.10802785173049355,
"grad_norm": 0.5104793906211853,
"learning_rate": 0.0015,
"loss": 1.7053,
"step": 2110
},
{
"epoch": 0.1085398320704485,
"grad_norm": 0.4574519991874695,
"learning_rate": 0.0015,
"loss": 1.7219,
"step": 2120
},
{
"epoch": 0.10905181241040345,
"grad_norm": 0.4427832365036011,
"learning_rate": 0.0015,
"loss": 1.6966,
"step": 2130
},
{
"epoch": 0.10956379275035838,
"grad_norm": 0.46723929047584534,
"learning_rate": 0.0015,
"loss": 1.7106,
"step": 2140
},
{
"epoch": 0.11007577309031333,
"grad_norm": 0.4710049629211426,
"learning_rate": 0.0015,
"loss": 1.7,
"step": 2150
},
{
"epoch": 0.11058775343026828,
"grad_norm": 0.46849745512008667,
"learning_rate": 0.0015,
"loss": 1.7071,
"step": 2160
},
{
"epoch": 0.11109973377022322,
"grad_norm": 0.4712335765361786,
"learning_rate": 0.0015,
"loss": 1.685,
"step": 2170
},
{
"epoch": 0.11161171411017817,
"grad_norm": 0.45318537950515747,
"learning_rate": 0.0015,
"loss": 1.6996,
"step": 2180
},
{
"epoch": 0.11212369445013312,
"grad_norm": 0.4772440791130066,
"learning_rate": 0.0015,
"loss": 1.705,
"step": 2190
},
{
"epoch": 0.11263567479008806,
"grad_norm": 0.4854085147380829,
"learning_rate": 0.0015,
"loss": 1.691,
"step": 2200
},
{
"epoch": 0.113147655130043,
"grad_norm": 0.4931398928165436,
"learning_rate": 0.0015,
"loss": 1.6979,
"step": 2210
},
{
"epoch": 0.11365963546999795,
"grad_norm": 0.4212550222873688,
"learning_rate": 0.0015,
"loss": 1.6792,
"step": 2220
},
{
"epoch": 0.1141716158099529,
"grad_norm": 0.4916476905345917,
"learning_rate": 0.0015,
"loss": 1.682,
"step": 2230
},
{
"epoch": 0.11468359614990785,
"grad_norm": 0.44974076747894287,
"learning_rate": 0.0015,
"loss": 1.6734,
"step": 2240
},
{
"epoch": 0.11519557648986278,
"grad_norm": 0.4464137554168701,
"learning_rate": 0.0015,
"loss": 1.7032,
"step": 2250
},
{
"epoch": 0.11570755682981773,
"grad_norm": 0.4473714530467987,
"learning_rate": 0.0015,
"loss": 1.6868,
"step": 2260
},
{
"epoch": 0.11621953716977268,
"grad_norm": 0.4802720844745636,
"learning_rate": 0.0015,
"loss": 1.6805,
"step": 2270
},
{
"epoch": 0.11673151750972763,
"grad_norm": 0.45060625672340393,
"learning_rate": 0.0015,
"loss": 1.6716,
"step": 2280
},
{
"epoch": 0.11724349784968258,
"grad_norm": 0.47407498955726624,
"learning_rate": 0.0015,
"loss": 1.6569,
"step": 2290
},
{
"epoch": 0.11775547818963752,
"grad_norm": 0.45615556836128235,
"learning_rate": 0.0015,
"loss": 1.6682,
"step": 2300
},
{
"epoch": 0.11826745852959246,
"grad_norm": 0.4670998156070709,
"learning_rate": 0.0015,
"loss": 1.6785,
"step": 2310
},
{
"epoch": 0.11877943886954741,
"grad_norm": 0.45432570576667786,
"learning_rate": 0.0015,
"loss": 1.674,
"step": 2320
},
{
"epoch": 0.11929141920950236,
"grad_norm": 0.44804081320762634,
"learning_rate": 0.0015,
"loss": 1.6619,
"step": 2330
},
{
"epoch": 0.1198033995494573,
"grad_norm": 0.4523905813694,
"learning_rate": 0.0015,
"loss": 1.6652,
"step": 2340
},
{
"epoch": 0.12031537988941225,
"grad_norm": 0.4514728784561157,
"learning_rate": 0.0015,
"loss": 1.6652,
"step": 2350
},
{
"epoch": 0.12082736022936719,
"grad_norm": 0.41209134459495544,
"learning_rate": 0.0015,
"loss": 1.658,
"step": 2360
},
{
"epoch": 0.12133934056932213,
"grad_norm": 0.4219752252101898,
"learning_rate": 0.0015,
"loss": 1.6379,
"step": 2370
},
{
"epoch": 0.12185132090927708,
"grad_norm": 0.47252357006073,
"learning_rate": 0.0015,
"loss": 1.6636,
"step": 2380
},
{
"epoch": 0.12236330124923203,
"grad_norm": 0.4292849004268646,
"learning_rate": 0.0015,
"loss": 1.6528,
"step": 2390
},
{
"epoch": 0.12287528158918698,
"grad_norm": 0.4734489917755127,
"learning_rate": 0.0015,
"loss": 1.6297,
"step": 2400
},
{
"epoch": 0.12338726192914191,
"grad_norm": 0.48543623089790344,
"learning_rate": 0.0015,
"loss": 1.6404,
"step": 2410
},
{
"epoch": 0.12389924226909686,
"grad_norm": 0.4184911549091339,
"learning_rate": 0.0015,
"loss": 1.6315,
"step": 2420
},
{
"epoch": 0.12441122260905181,
"grad_norm": 0.42600351572036743,
"learning_rate": 0.0015,
"loss": 1.6502,
"step": 2430
},
{
"epoch": 0.12492320294900676,
"grad_norm": 0.4201619029045105,
"learning_rate": 0.0015,
"loss": 1.6372,
"step": 2440
},
{
"epoch": 0.1254351832889617,
"grad_norm": 0.4165250360965729,
"learning_rate": 0.0015,
"loss": 1.6334,
"step": 2450
},
{
"epoch": 0.12594716362891664,
"grad_norm": 0.4470268487930298,
"learning_rate": 0.0015,
"loss": 1.6359,
"step": 2460
},
{
"epoch": 0.1264591439688716,
"grad_norm": 0.4310542941093445,
"learning_rate": 0.0015,
"loss": 1.6439,
"step": 2470
},
{
"epoch": 0.12697112430882654,
"grad_norm": 0.4297926425933838,
"learning_rate": 0.0015,
"loss": 1.6222,
"step": 2480
},
{
"epoch": 0.1274831046487815,
"grad_norm": 0.45335137844085693,
"learning_rate": 0.0015,
"loss": 1.6559,
"step": 2490
},
{
"epoch": 0.12799508498873644,
"grad_norm": 0.4176558256149292,
"learning_rate": 0.0015,
"loss": 1.6561,
"step": 2500
},
{
"epoch": 0.12850706532869138,
"grad_norm": 0.4358290433883667,
"learning_rate": 0.0015,
"loss": 1.6241,
"step": 2510
},
{
"epoch": 0.12901904566864633,
"grad_norm": 0.44109201431274414,
"learning_rate": 0.0015,
"loss": 1.6022,
"step": 2520
},
{
"epoch": 0.12953102600860128,
"grad_norm": 0.44387978315353394,
"learning_rate": 0.0015,
"loss": 1.6335,
"step": 2530
},
{
"epoch": 0.13004300634855623,
"grad_norm": 0.434861421585083,
"learning_rate": 0.0015,
"loss": 1.6377,
"step": 2540
},
{
"epoch": 0.13055498668851115,
"grad_norm": 0.419826865196228,
"learning_rate": 0.0015,
"loss": 1.6238,
"step": 2550
},
{
"epoch": 0.1310669670284661,
"grad_norm": 0.471110463142395,
"learning_rate": 0.0015,
"loss": 1.6383,
"step": 2560
},
{
"epoch": 0.13157894736842105,
"grad_norm": 0.44935643672943115,
"learning_rate": 0.0015,
"loss": 1.6006,
"step": 2570
},
{
"epoch": 0.132090927708376,
"grad_norm": 0.4497852027416229,
"learning_rate": 0.0015,
"loss": 1.6115,
"step": 2580
},
{
"epoch": 0.13260290804833094,
"grad_norm": 0.45850351452827454,
"learning_rate": 0.0015,
"loss": 1.6194,
"step": 2590
},
{
"epoch": 0.1331148883882859,
"grad_norm": 0.40869665145874023,
"learning_rate": 0.0015,
"loss": 1.6159,
"step": 2600
},
{
"epoch": 0.13362686872824084,
"grad_norm": 0.4347962737083435,
"learning_rate": 0.0015,
"loss": 1.6254,
"step": 2610
},
{
"epoch": 0.1341388490681958,
"grad_norm": 0.4899897873401642,
"learning_rate": 0.0015,
"loss": 1.6296,
"step": 2620
},
{
"epoch": 0.13465082940815074,
"grad_norm": 0.44309839606285095,
"learning_rate": 0.0015,
"loss": 1.6179,
"step": 2630
},
{
"epoch": 0.13516280974810568,
"grad_norm": 0.3890606164932251,
"learning_rate": 0.0015,
"loss": 1.6044,
"step": 2640
},
{
"epoch": 0.13567479008806063,
"grad_norm": 0.42358025908470154,
"learning_rate": 0.0015,
"loss": 1.619,
"step": 2650
},
{
"epoch": 0.13618677042801555,
"grad_norm": 0.42111581563949585,
"learning_rate": 0.0015,
"loss": 1.6127,
"step": 2660
},
{
"epoch": 0.1366987507679705,
"grad_norm": 0.4441932141780853,
"learning_rate": 0.0015,
"loss": 1.6224,
"step": 2670
},
{
"epoch": 0.13721073110792545,
"grad_norm": 0.4351959228515625,
"learning_rate": 0.0015,
"loss": 1.5957,
"step": 2680
},
{
"epoch": 0.1377227114478804,
"grad_norm": 0.43544304370880127,
"learning_rate": 0.0015,
"loss": 1.5925,
"step": 2690
},
{
"epoch": 0.13823469178783535,
"grad_norm": 0.4298728406429291,
"learning_rate": 0.0015,
"loss": 1.5893,
"step": 2700
},
{
"epoch": 0.1387466721277903,
"grad_norm": 0.4463229477405548,
"learning_rate": 0.0015,
"loss": 1.5881,
"step": 2710
},
{
"epoch": 0.13925865246774524,
"grad_norm": 0.43847158551216125,
"learning_rate": 0.0015,
"loss": 1.5982,
"step": 2720
},
{
"epoch": 0.1397706328077002,
"grad_norm": 0.44918614625930786,
"learning_rate": 0.0015,
"loss": 1.6095,
"step": 2730
},
{
"epoch": 0.14028261314765514,
"grad_norm": 0.45398586988449097,
"learning_rate": 0.0015,
"loss": 1.5985,
"step": 2740
},
{
"epoch": 0.1407945934876101,
"grad_norm": 0.41213494539260864,
"learning_rate": 0.0015,
"loss": 1.6153,
"step": 2750
},
{
"epoch": 0.14130657382756504,
"grad_norm": 0.41266897320747375,
"learning_rate": 0.0015,
"loss": 1.5919,
"step": 2760
},
{
"epoch": 0.14181855416751996,
"grad_norm": 0.42942896485328674,
"learning_rate": 0.0015,
"loss": 1.5793,
"step": 2770
},
{
"epoch": 0.1423305345074749,
"grad_norm": 0.4180223047733307,
"learning_rate": 0.0015,
"loss": 1.5938,
"step": 2780
},
{
"epoch": 0.14284251484742985,
"grad_norm": 0.4204559922218323,
"learning_rate": 0.0015,
"loss": 1.5927,
"step": 2790
},
{
"epoch": 0.1433544951873848,
"grad_norm": 0.43727442622184753,
"learning_rate": 0.0015,
"loss": 1.6018,
"step": 2800
},
{
"epoch": 0.14386647552733975,
"grad_norm": 0.4330785870552063,
"learning_rate": 0.0015,
"loss": 1.6004,
"step": 2810
},
{
"epoch": 0.1443784558672947,
"grad_norm": 0.415101021528244,
"learning_rate": 0.0015,
"loss": 1.5708,
"step": 2820
},
{
"epoch": 0.14489043620724965,
"grad_norm": 0.41477903723716736,
"learning_rate": 0.0015,
"loss": 1.5747,
"step": 2830
},
{
"epoch": 0.1454024165472046,
"grad_norm": 0.4343889653682709,
"learning_rate": 0.0015,
"loss": 1.5958,
"step": 2840
},
{
"epoch": 0.14591439688715954,
"grad_norm": 0.4018150866031647,
"learning_rate": 0.0015,
"loss": 1.5589,
"step": 2850
},
{
"epoch": 0.1464263772271145,
"grad_norm": 0.4799724817276001,
"learning_rate": 0.0015,
"loss": 1.5745,
"step": 2860
},
{
"epoch": 0.1469383575670694,
"grad_norm": 0.42355528473854065,
"learning_rate": 0.0015,
"loss": 1.5928,
"step": 2870
},
{
"epoch": 0.14745033790702436,
"grad_norm": 0.40638747811317444,
"learning_rate": 0.0015,
"loss": 1.5623,
"step": 2880
},
{
"epoch": 0.1479623182469793,
"grad_norm": 0.39846664667129517,
"learning_rate": 0.0015,
"loss": 1.577,
"step": 2890
},
{
"epoch": 0.14847429858693426,
"grad_norm": 0.4010321795940399,
"learning_rate": 0.0015,
"loss": 1.5821,
"step": 2900
},
{
"epoch": 0.1489862789268892,
"grad_norm": 0.42778313159942627,
"learning_rate": 0.0015,
"loss": 1.5623,
"step": 2910
},
{
"epoch": 0.14949825926684415,
"grad_norm": 0.39266425371170044,
"learning_rate": 0.0015,
"loss": 1.5821,
"step": 2920
},
{
"epoch": 0.1500102396067991,
"grad_norm": 0.40784794092178345,
"learning_rate": 0.0015,
"loss": 1.5664,
"step": 2930
},
{
"epoch": 0.15052221994675405,
"grad_norm": 0.43437501788139343,
"learning_rate": 0.0015,
"loss": 1.5658,
"step": 2940
},
{
"epoch": 0.151034200286709,
"grad_norm": 0.4373057186603546,
"learning_rate": 0.0015,
"loss": 1.5591,
"step": 2950
},
{
"epoch": 0.15154618062666395,
"grad_norm": 0.40370023250579834,
"learning_rate": 0.0015,
"loss": 1.555,
"step": 2960
},
{
"epoch": 0.1520581609666189,
"grad_norm": 0.4626748263835907,
"learning_rate": 0.0015,
"loss": 1.5808,
"step": 2970
},
{
"epoch": 0.15257014130657381,
"grad_norm": 0.4095107614994049,
"learning_rate": 0.0015,
"loss": 1.5705,
"step": 2980
},
{
"epoch": 0.15308212164652876,
"grad_norm": 0.4343841075897217,
"learning_rate": 0.0015,
"loss": 1.5738,
"step": 2990
},
{
"epoch": 0.1535941019864837,
"grad_norm": 0.42325645685195923,
"learning_rate": 0.0015,
"loss": 1.567,
"step": 3000
},
{
"epoch": 0.15410608232643866,
"grad_norm": 0.39237692952156067,
"learning_rate": 0.0015,
"loss": 1.5748,
"step": 3010
},
{
"epoch": 0.1546180626663936,
"grad_norm": 0.39682793617248535,
"learning_rate": 0.0015,
"loss": 1.5711,
"step": 3020
},
{
"epoch": 0.15513004300634856,
"grad_norm": 0.4060477614402771,
"learning_rate": 0.0015,
"loss": 1.5623,
"step": 3030
},
{
"epoch": 0.1556420233463035,
"grad_norm": 0.4088119864463806,
"learning_rate": 0.0015,
"loss": 1.5532,
"step": 3040
},
{
"epoch": 0.15615400368625845,
"grad_norm": 0.39976736903190613,
"learning_rate": 0.0015,
"loss": 1.5436,
"step": 3050
},
{
"epoch": 0.1566659840262134,
"grad_norm": 0.42855167388916016,
"learning_rate": 0.0015,
"loss": 1.5577,
"step": 3060
},
{
"epoch": 0.15717796436616835,
"grad_norm": 0.4451335072517395,
"learning_rate": 0.0015,
"loss": 1.5375,
"step": 3070
},
{
"epoch": 0.1576899447061233,
"grad_norm": 0.3867264688014984,
"learning_rate": 0.0015,
"loss": 1.5418,
"step": 3080
},
{
"epoch": 0.15820192504607822,
"grad_norm": 0.4165036976337433,
"learning_rate": 0.0015,
"loss": 1.564,
"step": 3090
},
{
"epoch": 0.15871390538603317,
"grad_norm": 0.3978787958621979,
"learning_rate": 0.0015,
"loss": 1.5408,
"step": 3100
},
{
"epoch": 0.15922588572598811,
"grad_norm": 0.37848272919654846,
"learning_rate": 0.0015,
"loss": 1.5477,
"step": 3110
},
{
"epoch": 0.15973786606594306,
"grad_norm": 0.4218755066394806,
"learning_rate": 0.0015,
"loss": 1.5533,
"step": 3120
},
{
"epoch": 0.160249846405898,
"grad_norm": 0.38090386986732483,
"learning_rate": 0.0015,
"loss": 1.5453,
"step": 3130
},
{
"epoch": 0.16076182674585296,
"grad_norm": 0.39693617820739746,
"learning_rate": 0.0015,
"loss": 1.5633,
"step": 3140
},
{
"epoch": 0.1612738070858079,
"grad_norm": 0.3855767250061035,
"learning_rate": 0.0015,
"loss": 1.5381,
"step": 3150
},
{
"epoch": 0.16178578742576286,
"grad_norm": 0.3672980070114136,
"learning_rate": 0.0015,
"loss": 1.5458,
"step": 3160
},
{
"epoch": 0.1622977677657178,
"grad_norm": 0.3810063302516937,
"learning_rate": 0.0015,
"loss": 1.559,
"step": 3170
},
{
"epoch": 0.16280974810567275,
"grad_norm": 0.4658653140068054,
"learning_rate": 0.0015,
"loss": 1.5274,
"step": 3180
},
{
"epoch": 0.1633217284456277,
"grad_norm": 0.40785935521125793,
"learning_rate": 0.0015,
"loss": 1.5279,
"step": 3190
},
{
"epoch": 0.16383370878558262,
"grad_norm": 0.40147677063941956,
"learning_rate": 0.0015,
"loss": 1.542,
"step": 3200
},
{
"epoch": 0.16434568912553757,
"grad_norm": 0.39116302132606506,
"learning_rate": 0.0015,
"loss": 1.5148,
"step": 3210
},
{
"epoch": 0.16485766946549252,
"grad_norm": 0.3875216245651245,
"learning_rate": 0.0015,
"loss": 1.5289,
"step": 3220
},
{
"epoch": 0.16536964980544747,
"grad_norm": 0.4106022119522095,
"learning_rate": 0.0015,
"loss": 1.5358,
"step": 3230
},
{
"epoch": 0.16588163014540241,
"grad_norm": 0.393637090921402,
"learning_rate": 0.0015,
"loss": 1.5334,
"step": 3240
},
{
"epoch": 0.16639361048535736,
"grad_norm": 0.3800962269306183,
"learning_rate": 0.0015,
"loss": 1.5364,
"step": 3250
},
{
"epoch": 0.1669055908253123,
"grad_norm": 0.3848235011100769,
"learning_rate": 0.0015,
"loss": 1.5411,
"step": 3260
},
{
"epoch": 0.16741757116526726,
"grad_norm": 0.38832154870033264,
"learning_rate": 0.0015,
"loss": 1.5373,
"step": 3270
},
{
"epoch": 0.1679295515052222,
"grad_norm": 0.43623119592666626,
"learning_rate": 0.0015,
"loss": 1.5558,
"step": 3280
},
{
"epoch": 0.16844153184517716,
"grad_norm": 0.3507107198238373,
"learning_rate": 0.0015,
"loss": 1.5365,
"step": 3290
},
{
"epoch": 0.1689535121851321,
"grad_norm": 0.38700392842292786,
"learning_rate": 0.0015,
"loss": 1.5383,
"step": 3300
},
{
"epoch": 0.16946549252508702,
"grad_norm": 0.38841623067855835,
"learning_rate": 0.0015,
"loss": 1.5399,
"step": 3310
},
{
"epoch": 0.16997747286504197,
"grad_norm": 0.39128798246383667,
"learning_rate": 0.0015,
"loss": 1.5271,
"step": 3320
},
{
"epoch": 0.17048945320499692,
"grad_norm": 0.38994646072387695,
"learning_rate": 0.0015,
"loss": 1.5317,
"step": 3330
},
{
"epoch": 0.17100143354495187,
"grad_norm": 0.37731438875198364,
"learning_rate": 0.0015,
"loss": 1.5251,
"step": 3340
},
{
"epoch": 0.17151341388490682,
"grad_norm": 0.4156712293624878,
"learning_rate": 0.0015,
"loss": 1.5221,
"step": 3350
},
{
"epoch": 0.17202539422486177,
"grad_norm": 0.38232874870300293,
"learning_rate": 0.0015,
"loss": 1.5196,
"step": 3360
},
{
"epoch": 0.17253737456481671,
"grad_norm": 0.3940838575363159,
"learning_rate": 0.0015,
"loss": 1.5213,
"step": 3370
},
{
"epoch": 0.17304935490477166,
"grad_norm": 0.4050334393978119,
"learning_rate": 0.0015,
"loss": 1.5159,
"step": 3380
},
{
"epoch": 0.1735613352447266,
"grad_norm": 0.3736588954925537,
"learning_rate": 0.0015,
"loss": 1.5157,
"step": 3390
},
{
"epoch": 0.17407331558468156,
"grad_norm": 0.40355414152145386,
"learning_rate": 0.0015,
"loss": 1.5446,
"step": 3400
},
{
"epoch": 0.1745852959246365,
"grad_norm": 0.37198445200920105,
"learning_rate": 0.0015,
"loss": 1.5322,
"step": 3410
},
{
"epoch": 0.17509727626459143,
"grad_norm": 0.35825085639953613,
"learning_rate": 0.0015,
"loss": 1.5136,
"step": 3420
},
{
"epoch": 0.17560925660454638,
"grad_norm": 0.4174591302871704,
"learning_rate": 0.0015,
"loss": 1.5092,
"step": 3430
},
{
"epoch": 0.17612123694450132,
"grad_norm": 0.38272011280059814,
"learning_rate": 0.0015,
"loss": 1.515,
"step": 3440
},
{
"epoch": 0.17663321728445627,
"grad_norm": 0.4088602364063263,
"learning_rate": 0.0015,
"loss": 1.5089,
"step": 3450
},
{
"epoch": 0.17714519762441122,
"grad_norm": 0.37706780433654785,
"learning_rate": 0.0015,
"loss": 1.513,
"step": 3460
},
{
"epoch": 0.17765717796436617,
"grad_norm": 0.3772091865539551,
"learning_rate": 0.0015,
"loss": 1.5096,
"step": 3470
},
{
"epoch": 0.17816915830432112,
"grad_norm": 0.3540133535861969,
"learning_rate": 0.0015,
"loss": 1.5099,
"step": 3480
},
{
"epoch": 0.17868113864427607,
"grad_norm": 0.36549830436706543,
"learning_rate": 0.0015,
"loss": 1.511,
"step": 3490
},
{
"epoch": 0.17919311898423101,
"grad_norm": 0.39273905754089355,
"learning_rate": 0.0015,
"loss": 1.5005,
"step": 3500
},
{
"epoch": 0.17970509932418596,
"grad_norm": 0.35500046610832214,
"learning_rate": 0.0015,
"loss": 1.4962,
"step": 3510
},
{
"epoch": 0.18021707966414088,
"grad_norm": 0.39818084239959717,
"learning_rate": 0.0015,
"loss": 1.4951,
"step": 3520
},
{
"epoch": 0.18072906000409583,
"grad_norm": 0.3649390637874603,
"learning_rate": 0.0015,
"loss": 1.5038,
"step": 3530
},
{
"epoch": 0.18124104034405078,
"grad_norm": 0.376000314950943,
"learning_rate": 0.0015,
"loss": 1.4945,
"step": 3540
},
{
"epoch": 0.18175302068400573,
"grad_norm": 0.3638756573200226,
"learning_rate": 0.0015,
"loss": 1.5012,
"step": 3550
},
{
"epoch": 0.18226500102396068,
"grad_norm": 0.3695107400417328,
"learning_rate": 0.0015,
"loss": 1.5261,
"step": 3560
},
{
"epoch": 0.18277698136391562,
"grad_norm": 0.424125999212265,
"learning_rate": 0.0015,
"loss": 1.5245,
"step": 3570
},
{
"epoch": 0.18328896170387057,
"grad_norm": 0.3683246374130249,
"learning_rate": 0.0015,
"loss": 1.507,
"step": 3580
},
{
"epoch": 0.18380094204382552,
"grad_norm": 0.3763924241065979,
"learning_rate": 0.0015,
"loss": 1.4671,
"step": 3590
},
{
"epoch": 0.18431292238378047,
"grad_norm": 0.3692323565483093,
"learning_rate": 0.0015,
"loss": 1.5182,
"step": 3600
},
{
"epoch": 0.18482490272373542,
"grad_norm": 0.37030673027038574,
"learning_rate": 0.0015,
"loss": 1.5037,
"step": 3610
},
{
"epoch": 0.18533688306369037,
"grad_norm": 0.3666503429412842,
"learning_rate": 0.0015,
"loss": 1.499,
"step": 3620
},
{
"epoch": 0.1858488634036453,
"grad_norm": 0.3609069287776947,
"learning_rate": 0.0015,
"loss": 1.5052,
"step": 3630
},
{
"epoch": 0.18636084374360024,
"grad_norm": 0.3748449683189392,
"learning_rate": 0.0015,
"loss": 1.4596,
"step": 3640
},
{
"epoch": 0.18687282408355518,
"grad_norm": 0.4080664813518524,
"learning_rate": 0.0015,
"loss": 1.5051,
"step": 3650
},
{
"epoch": 0.18738480442351013,
"grad_norm": 0.3743340075016022,
"learning_rate": 0.0015,
"loss": 1.4658,
"step": 3660
},
{
"epoch": 0.18789678476346508,
"grad_norm": 0.36924538016319275,
"learning_rate": 0.0015,
"loss": 1.474,
"step": 3670
},
{
"epoch": 0.18840876510342003,
"grad_norm": 0.3834936022758484,
"learning_rate": 0.0015,
"loss": 1.4952,
"step": 3680
},
{
"epoch": 0.18892074544337498,
"grad_norm": 0.3493509590625763,
"learning_rate": 0.0015,
"loss": 1.4765,
"step": 3690
},
{
"epoch": 0.18943272578332992,
"grad_norm": 0.3550162613391876,
"learning_rate": 0.0015,
"loss": 1.4928,
"step": 3700
},
{
"epoch": 0.18994470612328487,
"grad_norm": 0.3747323155403137,
"learning_rate": 0.0015,
"loss": 1.4872,
"step": 3710
},
{
"epoch": 0.19045668646323982,
"grad_norm": 0.3649948835372925,
"learning_rate": 0.0015,
"loss": 1.5015,
"step": 3720
},
{
"epoch": 0.19096866680319477,
"grad_norm": 0.37357765436172485,
"learning_rate": 0.0015,
"loss": 1.4828,
"step": 3730
},
{
"epoch": 0.1914806471431497,
"grad_norm": 0.36136525869369507,
"learning_rate": 0.0015,
"loss": 1.5063,
"step": 3740
},
{
"epoch": 0.19199262748310464,
"grad_norm": 0.35555464029312134,
"learning_rate": 0.0015,
"loss": 1.4797,
"step": 3750
},
{
"epoch": 0.1925046078230596,
"grad_norm": 0.3460323214530945,
"learning_rate": 0.0015,
"loss": 1.4913,
"step": 3760
},
{
"epoch": 0.19301658816301454,
"grad_norm": 0.35079696774482727,
"learning_rate": 0.0015,
"loss": 1.4714,
"step": 3770
},
{
"epoch": 0.19352856850296948,
"grad_norm": 0.3562418818473816,
"learning_rate": 0.0015,
"loss": 1.4816,
"step": 3780
},
{
"epoch": 0.19404054884292443,
"grad_norm": 0.3714292049407959,
"learning_rate": 0.0015,
"loss": 1.496,
"step": 3790
},
{
"epoch": 0.19455252918287938,
"grad_norm": 0.37646958231925964,
"learning_rate": 0.0015,
"loss": 1.4814,
"step": 3800
},
{
"epoch": 0.19506450952283433,
"grad_norm": 0.37127116322517395,
"learning_rate": 0.0015,
"loss": 1.4902,
"step": 3810
},
{
"epoch": 0.19557648986278928,
"grad_norm": 0.3644818961620331,
"learning_rate": 0.0015,
"loss": 1.4811,
"step": 3820
},
{
"epoch": 0.19608847020274423,
"grad_norm": 0.38677945733070374,
"learning_rate": 0.0015,
"loss": 1.5001,
"step": 3830
},
{
"epoch": 0.19660045054269917,
"grad_norm": 0.379823237657547,
"learning_rate": 0.0015,
"loss": 1.4665,
"step": 3840
},
{
"epoch": 0.1971124308826541,
"grad_norm": 0.37844884395599365,
"learning_rate": 0.0015,
"loss": 1.4783,
"step": 3850
},
{
"epoch": 0.19762441122260904,
"grad_norm": 0.36030471324920654,
"learning_rate": 0.0015,
"loss": 1.4883,
"step": 3860
},
{
"epoch": 0.198136391562564,
"grad_norm": 0.3515039384365082,
"learning_rate": 0.0015,
"loss": 1.4614,
"step": 3870
},
{
"epoch": 0.19864837190251894,
"grad_norm": 0.3469856381416321,
"learning_rate": 0.0015,
"loss": 1.4669,
"step": 3880
},
{
"epoch": 0.1991603522424739,
"grad_norm": 0.3526422381401062,
"learning_rate": 0.0015,
"loss": 1.4568,
"step": 3890
},
{
"epoch": 0.19967233258242884,
"grad_norm": 0.34970229864120483,
"learning_rate": 0.0015,
"loss": 1.4467,
"step": 3900
},
{
"epoch": 0.20018431292238378,
"grad_norm": 0.35208991169929504,
"learning_rate": 0.0015,
"loss": 1.5057,
"step": 3910
},
{
"epoch": 0.20069629326233873,
"grad_norm": 0.35446539521217346,
"learning_rate": 0.0015,
"loss": 1.4677,
"step": 3920
},
{
"epoch": 0.20120827360229368,
"grad_norm": 0.32680749893188477,
"learning_rate": 0.0015,
"loss": 1.4577,
"step": 3930
},
{
"epoch": 0.20172025394224863,
"grad_norm": 0.3479768931865692,
"learning_rate": 0.0015,
"loss": 1.4679,
"step": 3940
},
{
"epoch": 0.20223223428220358,
"grad_norm": 0.3349073529243469,
"learning_rate": 0.0015,
"loss": 1.4497,
"step": 3950
},
{
"epoch": 0.2027442146221585,
"grad_norm": 0.35016781091690063,
"learning_rate": 0.0015,
"loss": 1.449,
"step": 3960
},
{
"epoch": 0.20325619496211345,
"grad_norm": 0.349086195230484,
"learning_rate": 0.0015,
"loss": 1.4751,
"step": 3970
},
{
"epoch": 0.2037681753020684,
"grad_norm": 0.36575040221214294,
"learning_rate": 0.0015,
"loss": 1.4653,
"step": 3980
},
{
"epoch": 0.20428015564202334,
"grad_norm": 0.34002363681793213,
"learning_rate": 0.0015,
"loss": 1.4826,
"step": 3990
},
{
"epoch": 0.2047921359819783,
"grad_norm": 0.36541834473609924,
"learning_rate": 0.0015,
"loss": 1.4485,
"step": 4000
},
{
"epoch": 0.20530411632193324,
"grad_norm": 0.3874847888946533,
"learning_rate": 0.0015,
"loss": 1.478,
"step": 4010
},
{
"epoch": 0.2058160966618882,
"grad_norm": 0.36418798565864563,
"learning_rate": 0.0015,
"loss": 1.4629,
"step": 4020
},
{
"epoch": 0.20632807700184314,
"grad_norm": 0.34188389778137207,
"learning_rate": 0.0015,
"loss": 1.4784,
"step": 4030
},
{
"epoch": 0.20684005734179808,
"grad_norm": 0.35976287722587585,
"learning_rate": 0.0015,
"loss": 1.458,
"step": 4040
},
{
"epoch": 0.20735203768175303,
"grad_norm": 0.37284791469573975,
"learning_rate": 0.0015,
"loss": 1.471,
"step": 4050
},
{
"epoch": 0.20786401802170798,
"grad_norm": 0.3462198078632355,
"learning_rate": 0.0015,
"loss": 1.4748,
"step": 4060
},
{
"epoch": 0.2083759983616629,
"grad_norm": 0.3988822102546692,
"learning_rate": 0.0015,
"loss": 1.4576,
"step": 4070
},
{
"epoch": 0.20888797870161785,
"grad_norm": 0.361892431974411,
"learning_rate": 0.0015,
"loss": 1.4516,
"step": 4080
},
{
"epoch": 0.2093999590415728,
"grad_norm": 0.3648587763309479,
"learning_rate": 0.0015,
"loss": 1.4537,
"step": 4090
},
{
"epoch": 0.20991193938152775,
"grad_norm": 0.35592299699783325,
"learning_rate": 0.0015,
"loss": 1.4346,
"step": 4100
},
{
"epoch": 0.2104239197214827,
"grad_norm": 0.3457651138305664,
"learning_rate": 0.0015,
"loss": 1.4455,
"step": 4110
},
{
"epoch": 0.21093590006143764,
"grad_norm": 0.3580280542373657,
"learning_rate": 0.0015,
"loss": 1.452,
"step": 4120
},
{
"epoch": 0.2114478804013926,
"grad_norm": 0.3704809844493866,
"learning_rate": 0.0015,
"loss": 1.4655,
"step": 4130
},
{
"epoch": 0.21195986074134754,
"grad_norm": 0.37433552742004395,
"learning_rate": 0.0015,
"loss": 1.4526,
"step": 4140
},
{
"epoch": 0.2124718410813025,
"grad_norm": 0.35324522852897644,
"learning_rate": 0.0015,
"loss": 1.4651,
"step": 4150
},
{
"epoch": 0.21298382142125744,
"grad_norm": 0.34257858991622925,
"learning_rate": 0.0015,
"loss": 1.4454,
"step": 4160
},
{
"epoch": 0.21349580176121236,
"grad_norm": 0.34159529209136963,
"learning_rate": 0.0015,
"loss": 1.4561,
"step": 4170
},
{
"epoch": 0.2140077821011673,
"grad_norm": 0.3691791296005249,
"learning_rate": 0.0015,
"loss": 1.4496,
"step": 4180
},
{
"epoch": 0.21451976244112225,
"grad_norm": 0.3290902078151703,
"learning_rate": 0.0015,
"loss": 1.4477,
"step": 4190
},
{
"epoch": 0.2150317427810772,
"grad_norm": 0.35127583146095276,
"learning_rate": 0.0015,
"loss": 1.4389,
"step": 4200
},
{
"epoch": 0.21554372312103215,
"grad_norm": 0.3416004776954651,
"learning_rate": 0.0015,
"loss": 1.4569,
"step": 4210
},
{
"epoch": 0.2160557034609871,
"grad_norm": 0.33589133620262146,
"learning_rate": 0.0015,
"loss": 1.4536,
"step": 4220
},
{
"epoch": 0.21656768380094205,
"grad_norm": 0.3249707818031311,
"learning_rate": 0.0015,
"loss": 1.4421,
"step": 4230
},
{
"epoch": 0.217079664140897,
"grad_norm": 0.3269306719303131,
"learning_rate": 0.0015,
"loss": 1.4644,
"step": 4240
},
{
"epoch": 0.21759164448085194,
"grad_norm": 0.34012100100517273,
"learning_rate": 0.0015,
"loss": 1.4419,
"step": 4250
},
{
"epoch": 0.2181036248208069,
"grad_norm": 0.3248611390590668,
"learning_rate": 0.0015,
"loss": 1.4321,
"step": 4260
},
{
"epoch": 0.21861560516076184,
"grad_norm": 0.33508434891700745,
"learning_rate": 0.0015,
"loss": 1.4547,
"step": 4270
},
{
"epoch": 0.21912758550071676,
"grad_norm": 0.3807787597179413,
"learning_rate": 0.0015,
"loss": 1.441,
"step": 4280
},
{
"epoch": 0.2196395658406717,
"grad_norm": 0.34403491020202637,
"learning_rate": 0.0015,
"loss": 1.4309,
"step": 4290
},
{
"epoch": 0.22015154618062666,
"grad_norm": 0.339507520198822,
"learning_rate": 0.0015,
"loss": 1.4408,
"step": 4300
},
{
"epoch": 0.2206635265205816,
"grad_norm": 0.34783267974853516,
"learning_rate": 0.0015,
"loss": 1.4362,
"step": 4310
},
{
"epoch": 0.22117550686053655,
"grad_norm": 0.3477760851383209,
"learning_rate": 0.0015,
"loss": 1.4743,
"step": 4320
},
{
"epoch": 0.2216874872004915,
"grad_norm": 0.33150288462638855,
"learning_rate": 0.0015,
"loss": 1.4338,
"step": 4330
},
{
"epoch": 0.22219946754044645,
"grad_norm": 0.3353327810764313,
"learning_rate": 0.0015,
"loss": 1.4389,
"step": 4340
},
{
"epoch": 0.2227114478804014,
"grad_norm": 0.35436680912971497,
"learning_rate": 0.0015,
"loss": 1.4221,
"step": 4350
},
{
"epoch": 0.22322342822035635,
"grad_norm": 0.35052821040153503,
"learning_rate": 0.0015,
"loss": 1.4463,
"step": 4360
},
{
"epoch": 0.2237354085603113,
"grad_norm": 0.3383365273475647,
"learning_rate": 0.0015,
"loss": 1.4438,
"step": 4370
},
{
"epoch": 0.22424738890026624,
"grad_norm": 0.33028966188430786,
"learning_rate": 0.0015,
"loss": 1.4365,
"step": 4380
},
{
"epoch": 0.22475936924022116,
"grad_norm": 0.3439690172672272,
"learning_rate": 0.0015,
"loss": 1.434,
"step": 4390
},
{
"epoch": 0.2252713495801761,
"grad_norm": 0.3257237374782562,
"learning_rate": 0.0015,
"loss": 1.4268,
"step": 4400
},
{
"epoch": 0.22578332992013106,
"grad_norm": 0.34487271308898926,
"learning_rate": 0.0015,
"loss": 1.419,
"step": 4410
},
{
"epoch": 0.226295310260086,
"grad_norm": 0.3513702154159546,
"learning_rate": 0.0015,
"loss": 1.416,
"step": 4420
},
{
"epoch": 0.22680729060004096,
"grad_norm": 0.32178881764411926,
"learning_rate": 0.0015,
"loss": 1.4267,
"step": 4430
},
{
"epoch": 0.2273192709399959,
"grad_norm": 0.32011663913726807,
"learning_rate": 0.0015,
"loss": 1.4269,
"step": 4440
},
{
"epoch": 0.22783125127995085,
"grad_norm": 0.3356774151325226,
"learning_rate": 0.0015,
"loss": 1.4253,
"step": 4450
},
{
"epoch": 0.2283432316199058,
"grad_norm": 0.33938485383987427,
"learning_rate": 0.0015,
"loss": 1.4137,
"step": 4460
},
{
"epoch": 0.22885521195986075,
"grad_norm": 0.3313305675983429,
"learning_rate": 0.0015,
"loss": 1.4178,
"step": 4470
},
{
"epoch": 0.2293671922998157,
"grad_norm": 0.31967252492904663,
"learning_rate": 0.0015,
"loss": 1.4421,
"step": 4480
},
{
"epoch": 0.22987917263977065,
"grad_norm": 0.3485276401042938,
"learning_rate": 0.0015,
"loss": 1.4202,
"step": 4490
},
{
"epoch": 0.23039115297972557,
"grad_norm": 0.3465486764907837,
"learning_rate": 0.0015,
"loss": 1.4364,
"step": 4500
},
{
"epoch": 0.23090313331968051,
"grad_norm": 0.3443972170352936,
"learning_rate": 0.0015,
"loss": 1.4326,
"step": 4510
},
{
"epoch": 0.23141511365963546,
"grad_norm": 0.33160969614982605,
"learning_rate": 0.0015,
"loss": 1.4147,
"step": 4520
},
{
"epoch": 0.2319270939995904,
"grad_norm": 0.3427571952342987,
"learning_rate": 0.0015,
"loss": 1.4316,
"step": 4530
},
{
"epoch": 0.23243907433954536,
"grad_norm": 0.3282462954521179,
"learning_rate": 0.0015,
"loss": 1.3933,
"step": 4540
},
{
"epoch": 0.2329510546795003,
"grad_norm": 0.3840288519859314,
"learning_rate": 0.0015,
"loss": 1.4206,
"step": 4550
},
{
"epoch": 0.23346303501945526,
"grad_norm": 0.34188082814216614,
"learning_rate": 0.0015,
"loss": 1.4286,
"step": 4560
},
{
"epoch": 0.2339750153594102,
"grad_norm": 0.32480111718177795,
"learning_rate": 0.0015,
"loss": 1.4191,
"step": 4570
},
{
"epoch": 0.23448699569936515,
"grad_norm": 0.3416594862937927,
"learning_rate": 0.0015,
"loss": 1.432,
"step": 4580
},
{
"epoch": 0.2349989760393201,
"grad_norm": 0.32898756861686707,
"learning_rate": 0.0015,
"loss": 1.414,
"step": 4590
},
{
"epoch": 0.23551095637927505,
"grad_norm": 0.3290642499923706,
"learning_rate": 0.0015,
"loss": 1.4272,
"step": 4600
},
{
"epoch": 0.23602293671922997,
"grad_norm": 0.333150178194046,
"learning_rate": 0.0015,
"loss": 1.4254,
"step": 4610
},
{
"epoch": 0.23653491705918492,
"grad_norm": 0.30599096417427063,
"learning_rate": 0.0015,
"loss": 1.4255,
"step": 4620
},
{
"epoch": 0.23704689739913987,
"grad_norm": 0.34288567304611206,
"learning_rate": 0.0015,
"loss": 1.4027,
"step": 4630
},
{
"epoch": 0.23755887773909481,
"grad_norm": 0.36715662479400635,
"learning_rate": 0.0015,
"loss": 1.4155,
"step": 4640
},
{
"epoch": 0.23807085807904976,
"grad_norm": 0.32257118821144104,
"learning_rate": 0.0015,
"loss": 1.4178,
"step": 4650
},
{
"epoch": 0.2385828384190047,
"grad_norm": 0.3298852741718292,
"learning_rate": 0.0015,
"loss": 1.4149,
"step": 4660
},
{
"epoch": 0.23909481875895966,
"grad_norm": 0.32268422842025757,
"learning_rate": 0.0015,
"loss": 1.4384,
"step": 4670
},
{
"epoch": 0.2396067990989146,
"grad_norm": 0.33715546131134033,
"learning_rate": 0.0015,
"loss": 1.4014,
"step": 4680
},
{
"epoch": 0.24011877943886956,
"grad_norm": 0.3131064772605896,
"learning_rate": 0.0015,
"loss": 1.4163,
"step": 4690
},
{
"epoch": 0.2406307597788245,
"grad_norm": 0.3470405042171478,
"learning_rate": 0.0015,
"loss": 1.4186,
"step": 4700
},
{
"epoch": 0.24114274011877943,
"grad_norm": 0.35475459694862366,
"learning_rate": 0.0015,
"loss": 1.417,
"step": 4710
},
{
"epoch": 0.24165472045873437,
"grad_norm": 0.3337201178073883,
"learning_rate": 0.0015,
"loss": 1.4271,
"step": 4720
},
{
"epoch": 0.24216670079868932,
"grad_norm": 0.3554363548755646,
"learning_rate": 0.0015,
"loss": 1.4182,
"step": 4730
},
{
"epoch": 0.24267868113864427,
"grad_norm": 0.32346460223197937,
"learning_rate": 0.0015,
"loss": 1.421,
"step": 4740
},
{
"epoch": 0.24319066147859922,
"grad_norm": 0.3117121756076813,
"learning_rate": 0.0015,
"loss": 1.4278,
"step": 4750
},
{
"epoch": 0.24370264181855417,
"grad_norm": 0.3506932556629181,
"learning_rate": 0.0015,
"loss": 1.3881,
"step": 4760
},
{
"epoch": 0.24421462215850911,
"grad_norm": 0.3424610495567322,
"learning_rate": 0.0015,
"loss": 1.4236,
"step": 4770
},
{
"epoch": 0.24472660249846406,
"grad_norm": 0.3284012973308563,
"learning_rate": 0.0015,
"loss": 1.4147,
"step": 4780
},
{
"epoch": 0.245238582838419,
"grad_norm": 0.3341637849807739,
"learning_rate": 0.0015,
"loss": 1.4109,
"step": 4790
},
{
"epoch": 0.24575056317837396,
"grad_norm": 0.32382500171661377,
"learning_rate": 0.0015,
"loss": 1.4063,
"step": 4800
},
{
"epoch": 0.2462625435183289,
"grad_norm": 0.3269002437591553,
"learning_rate": 0.0015,
"loss": 1.42,
"step": 4810
},
{
"epoch": 0.24677452385828383,
"grad_norm": 0.33705347776412964,
"learning_rate": 0.0015,
"loss": 1.4108,
"step": 4820
},
{
"epoch": 0.24728650419823878,
"grad_norm": 0.32141435146331787,
"learning_rate": 0.0015,
"loss": 1.4012,
"step": 4830
},
{
"epoch": 0.24779848453819373,
"grad_norm": 0.32620713114738464,
"learning_rate": 0.0015,
"loss": 1.3946,
"step": 4840
},
{
"epoch": 0.24831046487814867,
"grad_norm": 0.3150465488433838,
"learning_rate": 0.0015,
"loss": 1.4239,
"step": 4850
},
{
"epoch": 0.24882244521810362,
"grad_norm": 0.3141099214553833,
"learning_rate": 0.0015,
"loss": 1.4248,
"step": 4860
},
{
"epoch": 0.24933442555805857,
"grad_norm": 0.31802797317504883,
"learning_rate": 0.0015,
"loss": 1.3965,
"step": 4870
},
{
"epoch": 0.24984640589801352,
"grad_norm": 0.31748947501182556,
"learning_rate": 0.0015,
"loss": 1.4222,
"step": 4880
},
{
"epoch": 0.25035838623796847,
"grad_norm": 0.30938032269477844,
"learning_rate": 0.0015,
"loss": 1.4001,
"step": 4890
},
{
"epoch": 0.2508703665779234,
"grad_norm": 0.3129180371761322,
"learning_rate": 0.0015,
"loss": 1.3958,
"step": 4900
},
{
"epoch": 0.25138234691787836,
"grad_norm": 0.31602999567985535,
"learning_rate": 0.0015,
"loss": 1.4114,
"step": 4910
},
{
"epoch": 0.2518943272578333,
"grad_norm": 0.3049462139606476,
"learning_rate": 0.0015,
"loss": 1.3868,
"step": 4920
},
{
"epoch": 0.25240630759778826,
"grad_norm": 0.3103995621204376,
"learning_rate": 0.0015,
"loss": 1.401,
"step": 4930
},
{
"epoch": 0.2529182879377432,
"grad_norm": 0.30271056294441223,
"learning_rate": 0.0015,
"loss": 1.4046,
"step": 4940
},
{
"epoch": 0.25343026827769816,
"grad_norm": 0.32372725009918213,
"learning_rate": 0.0015,
"loss": 1.3719,
"step": 4950
},
{
"epoch": 0.2539422486176531,
"grad_norm": 0.3129730224609375,
"learning_rate": 0.0015,
"loss": 1.3797,
"step": 4960
},
{
"epoch": 0.25445422895760805,
"grad_norm": 0.3240148425102234,
"learning_rate": 0.0015,
"loss": 1.4134,
"step": 4970
},
{
"epoch": 0.254966209297563,
"grad_norm": 0.30317404866218567,
"learning_rate": 0.0015,
"loss": 1.3894,
"step": 4980
},
{
"epoch": 0.2554781896375179,
"grad_norm": 0.33288583159446716,
"learning_rate": 0.0015,
"loss": 1.4132,
"step": 4990
},
{
"epoch": 0.25599016997747287,
"grad_norm": 0.3233846127986908,
"learning_rate": 0.0015,
"loss": 1.3762,
"step": 5000
},
{
"epoch": 0.2565021503174278,
"grad_norm": 0.30729755759239197,
"learning_rate": 0.0015,
"loss": 1.3975,
"step": 5010
},
{
"epoch": 0.25701413065738277,
"grad_norm": 0.3006018400192261,
"learning_rate": 0.0015,
"loss": 1.4047,
"step": 5020
},
{
"epoch": 0.2575261109973377,
"grad_norm": 0.3207467794418335,
"learning_rate": 0.0015,
"loss": 1.4084,
"step": 5030
},
{
"epoch": 0.25803809133729266,
"grad_norm": 0.3039129674434662,
"learning_rate": 0.0015,
"loss": 1.4209,
"step": 5040
},
{
"epoch": 0.2585500716772476,
"grad_norm": 0.29750290513038635,
"learning_rate": 0.0015,
"loss": 1.4156,
"step": 5050
},
{
"epoch": 0.25906205201720256,
"grad_norm": 0.314507395029068,
"learning_rate": 0.0015,
"loss": 1.3685,
"step": 5060
},
{
"epoch": 0.2595740323571575,
"grad_norm": 0.3176608681678772,
"learning_rate": 0.0015,
"loss": 1.3701,
"step": 5070
},
{
"epoch": 0.26008601269711246,
"grad_norm": 0.3273438513278961,
"learning_rate": 0.0015,
"loss": 1.3841,
"step": 5080
},
{
"epoch": 0.2605979930370674,
"grad_norm": 0.3173183798789978,
"learning_rate": 0.0015,
"loss": 1.3732,
"step": 5090
},
{
"epoch": 0.2611099733770223,
"grad_norm": 0.33317986130714417,
"learning_rate": 0.0015,
"loss": 1.3815,
"step": 5100
},
{
"epoch": 0.2616219537169773,
"grad_norm": 0.3045515716075897,
"learning_rate": 0.0015,
"loss": 1.4042,
"step": 5110
},
{
"epoch": 0.2621339340569322,
"grad_norm": 0.3056975305080414,
"learning_rate": 0.0015,
"loss": 1.4156,
"step": 5120
},
{
"epoch": 0.26264591439688717,
"grad_norm": 0.3231489956378937,
"learning_rate": 0.0015,
"loss": 1.4076,
"step": 5130
},
{
"epoch": 0.2631578947368421,
"grad_norm": 0.3215503990650177,
"learning_rate": 0.0015,
"loss": 1.3712,
"step": 5140
},
{
"epoch": 0.26366987507679707,
"grad_norm": 0.30379393696784973,
"learning_rate": 0.0015,
"loss": 1.3648,
"step": 5150
},
{
"epoch": 0.264181855416752,
"grad_norm": 0.2987072765827179,
"learning_rate": 0.0015,
"loss": 1.3859,
"step": 5160
},
{
"epoch": 0.26469383575670696,
"grad_norm": 0.3293174207210541,
"learning_rate": 0.0015,
"loss": 1.3974,
"step": 5170
},
{
"epoch": 0.2652058160966619,
"grad_norm": 0.34920957684516907,
"learning_rate": 0.0015,
"loss": 1.3868,
"step": 5180
},
{
"epoch": 0.26571779643661686,
"grad_norm": 0.3054308295249939,
"learning_rate": 0.0015,
"loss": 1.3838,
"step": 5190
},
{
"epoch": 0.2662297767765718,
"grad_norm": 0.3131832182407379,
"learning_rate": 0.0015,
"loss": 1.377,
"step": 5200
},
{
"epoch": 0.2667417571165267,
"grad_norm": 0.30868205428123474,
"learning_rate": 0.0015,
"loss": 1.3999,
"step": 5210
},
{
"epoch": 0.2672537374564817,
"grad_norm": 0.3193263113498688,
"learning_rate": 0.0015,
"loss": 1.3789,
"step": 5220
},
{
"epoch": 0.2677657177964366,
"grad_norm": 0.3142963945865631,
"learning_rate": 0.0015,
"loss": 1.3993,
"step": 5230
},
{
"epoch": 0.2682776981363916,
"grad_norm": 0.3012097179889679,
"learning_rate": 0.0015,
"loss": 1.3959,
"step": 5240
},
{
"epoch": 0.2687896784763465,
"grad_norm": 0.30580368638038635,
"learning_rate": 0.0015,
"loss": 1.4106,
"step": 5250
},
{
"epoch": 0.26930165881630147,
"grad_norm": 0.2862599790096283,
"learning_rate": 0.0015,
"loss": 1.3873,
"step": 5260
},
{
"epoch": 0.2698136391562564,
"grad_norm": 0.3221125602722168,
"learning_rate": 0.0015,
"loss": 1.3997,
"step": 5270
},
{
"epoch": 0.27032561949621137,
"grad_norm": 0.29167062044143677,
"learning_rate": 0.0015,
"loss": 1.3707,
"step": 5280
},
{
"epoch": 0.2708375998361663,
"grad_norm": 0.3372457027435303,
"learning_rate": 0.0015,
"loss": 1.3767,
"step": 5290
},
{
"epoch": 0.27134958017612126,
"grad_norm": 0.308940589427948,
"learning_rate": 0.0015,
"loss": 1.377,
"step": 5300
},
{
"epoch": 0.2718615605160762,
"grad_norm": 0.2946240305900574,
"learning_rate": 0.0015,
"loss": 1.3811,
"step": 5310
},
{
"epoch": 0.2723735408560311,
"grad_norm": 0.30118903517723083,
"learning_rate": 0.0015,
"loss": 1.3991,
"step": 5320
},
{
"epoch": 0.2728855211959861,
"grad_norm": 0.3128001093864441,
"learning_rate": 0.0015,
"loss": 1.3806,
"step": 5330
},
{
"epoch": 0.273397501535941,
"grad_norm": 0.3355924189090729,
"learning_rate": 0.0015,
"loss": 1.378,
"step": 5340
},
{
"epoch": 0.273909481875896,
"grad_norm": 0.29809674620628357,
"learning_rate": 0.0015,
"loss": 1.365,
"step": 5350
},
{
"epoch": 0.2744214622158509,
"grad_norm": 0.2897878885269165,
"learning_rate": 0.0015,
"loss": 1.3796,
"step": 5360
},
{
"epoch": 0.2749334425558059,
"grad_norm": 0.33131879568099976,
"learning_rate": 0.0015,
"loss": 1.3789,
"step": 5370
},
{
"epoch": 0.2754454228957608,
"grad_norm": 0.3270549476146698,
"learning_rate": 0.0015,
"loss": 1.3877,
"step": 5380
},
{
"epoch": 0.27595740323571577,
"grad_norm": 0.3001706898212433,
"learning_rate": 0.0015,
"loss": 1.376,
"step": 5390
},
{
"epoch": 0.2764693835756707,
"grad_norm": 0.3149849772453308,
"learning_rate": 0.0015,
"loss": 1.3815,
"step": 5400
},
{
"epoch": 0.27698136391562567,
"grad_norm": 0.28992435336112976,
"learning_rate": 0.0015,
"loss": 1.3731,
"step": 5410
},
{
"epoch": 0.2774933442555806,
"grad_norm": 0.295311838388443,
"learning_rate": 0.0015,
"loss": 1.3958,
"step": 5420
},
{
"epoch": 0.2780053245955355,
"grad_norm": 0.2988681495189667,
"learning_rate": 0.0015,
"loss": 1.3946,
"step": 5430
},
{
"epoch": 0.2785173049354905,
"grad_norm": 0.3085227608680725,
"learning_rate": 0.0015,
"loss": 1.3776,
"step": 5440
},
{
"epoch": 0.2790292852754454,
"grad_norm": 0.30014750361442566,
"learning_rate": 0.0015,
"loss": 1.3772,
"step": 5450
},
{
"epoch": 0.2795412656154004,
"grad_norm": 0.3058876693248749,
"learning_rate": 0.0015,
"loss": 1.3637,
"step": 5460
},
{
"epoch": 0.2800532459553553,
"grad_norm": 0.2952674925327301,
"learning_rate": 0.0015,
"loss": 1.3888,
"step": 5470
},
{
"epoch": 0.2805652262953103,
"grad_norm": 0.3016969561576843,
"learning_rate": 0.0015,
"loss": 1.3874,
"step": 5480
},
{
"epoch": 0.2810772066352652,
"grad_norm": 0.30375874042510986,
"learning_rate": 0.0015,
"loss": 1.3652,
"step": 5490
},
{
"epoch": 0.2815891869752202,
"grad_norm": 0.29380300641059875,
"learning_rate": 0.0015,
"loss": 1.3768,
"step": 5500
},
{
"epoch": 0.2821011673151751,
"grad_norm": 0.2994033992290497,
"learning_rate": 0.0015,
"loss": 1.376,
"step": 5510
},
{
"epoch": 0.28261314765513007,
"grad_norm": 0.3174065053462982,
"learning_rate": 0.0015,
"loss": 1.3873,
"step": 5520
},
{
"epoch": 0.283125127995085,
"grad_norm": 0.3069535791873932,
"learning_rate": 0.0015,
"loss": 1.3636,
"step": 5530
},
{
"epoch": 0.2836371083350399,
"grad_norm": 0.2826645076274872,
"learning_rate": 0.0015,
"loss": 1.3567,
"step": 5540
},
{
"epoch": 0.2841490886749949,
"grad_norm": 0.295926034450531,
"learning_rate": 0.0015,
"loss": 1.361,
"step": 5550
},
{
"epoch": 0.2846610690149498,
"grad_norm": 0.29257112741470337,
"learning_rate": 0.0015,
"loss": 1.3699,
"step": 5560
},
{
"epoch": 0.2851730493549048,
"grad_norm": 0.28169023990631104,
"learning_rate": 0.0015,
"loss": 1.353,
"step": 5570
},
{
"epoch": 0.2856850296948597,
"grad_norm": 0.31054553389549255,
"learning_rate": 0.0015,
"loss": 1.3955,
"step": 5580
},
{
"epoch": 0.2861970100348147,
"grad_norm": 0.28373947739601135,
"learning_rate": 0.0015,
"loss": 1.3843,
"step": 5590
},
{
"epoch": 0.2867089903747696,
"grad_norm": 0.29920247197151184,
"learning_rate": 0.0015,
"loss": 1.3588,
"step": 5600
},
{
"epoch": 0.2872209707147246,
"grad_norm": 0.2981637120246887,
"learning_rate": 0.0015,
"loss": 1.376,
"step": 5610
},
{
"epoch": 0.2877329510546795,
"grad_norm": 0.269811749458313,
"learning_rate": 0.0015,
"loss": 1.3733,
"step": 5620
},
{
"epoch": 0.2882449313946345,
"grad_norm": 0.28365617990493774,
"learning_rate": 0.0015,
"loss": 1.3376,
"step": 5630
},
{
"epoch": 0.2887569117345894,
"grad_norm": 0.2953552305698395,
"learning_rate": 0.0015,
"loss": 1.367,
"step": 5640
},
{
"epoch": 0.2892688920745443,
"grad_norm": 0.2910911440849304,
"learning_rate": 0.0015,
"loss": 1.3708,
"step": 5650
},
{
"epoch": 0.2897808724144993,
"grad_norm": 0.2998880445957184,
"learning_rate": 0.0015,
"loss": 1.3917,
"step": 5660
},
{
"epoch": 0.2902928527544542,
"grad_norm": 0.3000008165836334,
"learning_rate": 0.0015,
"loss": 1.3597,
"step": 5670
},
{
"epoch": 0.2908048330944092,
"grad_norm": 0.3019564747810364,
"learning_rate": 0.0015,
"loss": 1.3641,
"step": 5680
},
{
"epoch": 0.2913168134343641,
"grad_norm": 0.28087547421455383,
"learning_rate": 0.0015,
"loss": 1.3427,
"step": 5690
},
{
"epoch": 0.2918287937743191,
"grad_norm": 0.32179591059684753,
"learning_rate": 0.0015,
"loss": 1.3576,
"step": 5700
},
{
"epoch": 0.292340774114274,
"grad_norm": 0.30196836590766907,
"learning_rate": 0.0015,
"loss": 1.3866,
"step": 5710
},
{
"epoch": 0.292852754454229,
"grad_norm": 0.29928138852119446,
"learning_rate": 0.0015,
"loss": 1.3711,
"step": 5720
},
{
"epoch": 0.2933647347941839,
"grad_norm": 0.30917906761169434,
"learning_rate": 0.0015,
"loss": 1.3481,
"step": 5730
},
{
"epoch": 0.2938767151341388,
"grad_norm": 0.32579630613327026,
"learning_rate": 0.0015,
"loss": 1.3713,
"step": 5740
},
{
"epoch": 0.2943886954740938,
"grad_norm": 0.3042047321796417,
"learning_rate": 0.0015,
"loss": 1.3758,
"step": 5750
},
{
"epoch": 0.2949006758140487,
"grad_norm": 0.2910909354686737,
"learning_rate": 0.0015,
"loss": 1.3675,
"step": 5760
},
{
"epoch": 0.2954126561540037,
"grad_norm": 0.29718905687332153,
"learning_rate": 0.0015,
"loss": 1.3576,
"step": 5770
},
{
"epoch": 0.2959246364939586,
"grad_norm": 0.28392040729522705,
"learning_rate": 0.0015,
"loss": 1.3779,
"step": 5780
},
{
"epoch": 0.2964366168339136,
"grad_norm": 0.2852902114391327,
"learning_rate": 0.0015,
"loss": 1.3709,
"step": 5790
},
{
"epoch": 0.2969485971738685,
"grad_norm": 0.29683250188827515,
"learning_rate": 0.0015,
"loss": 1.3757,
"step": 5800
},
{
"epoch": 0.2974605775138235,
"grad_norm": 0.2882269620895386,
"learning_rate": 0.0015,
"loss": 1.3706,
"step": 5810
},
{
"epoch": 0.2979725578537784,
"grad_norm": 0.3086804449558258,
"learning_rate": 0.0015,
"loss": 1.3506,
"step": 5820
},
{
"epoch": 0.2984845381937334,
"grad_norm": 0.2780090868473053,
"learning_rate": 0.0015,
"loss": 1.3565,
"step": 5830
},
{
"epoch": 0.2989965185336883,
"grad_norm": 0.30415329337120056,
"learning_rate": 0.0015,
"loss": 1.3593,
"step": 5840
},
{
"epoch": 0.2995084988736432,
"grad_norm": 0.2865590751171112,
"learning_rate": 0.0015,
"loss": 1.3873,
"step": 5850
},
{
"epoch": 0.3000204792135982,
"grad_norm": 0.2798267900943756,
"learning_rate": 0.0015,
"loss": 1.3439,
"step": 5860
},
{
"epoch": 0.3005324595535531,
"grad_norm": 0.29937195777893066,
"learning_rate": 0.0015,
"loss": 1.3483,
"step": 5870
},
{
"epoch": 0.3010444398935081,
"grad_norm": 0.27708205580711365,
"learning_rate": 0.0015,
"loss": 1.3207,
"step": 5880
},
{
"epoch": 0.301556420233463,
"grad_norm": 0.2955605983734131,
"learning_rate": 0.0015,
"loss": 1.3524,
"step": 5890
},
{
"epoch": 0.302068400573418,
"grad_norm": 0.3226946294307709,
"learning_rate": 0.0015,
"loss": 1.3545,
"step": 5900
},
{
"epoch": 0.3025803809133729,
"grad_norm": 0.2925417721271515,
"learning_rate": 0.0015,
"loss": 1.3435,
"step": 5910
},
{
"epoch": 0.3030923612533279,
"grad_norm": 0.3087621331214905,
"learning_rate": 0.0015,
"loss": 1.3275,
"step": 5920
},
{
"epoch": 0.3036043415932828,
"grad_norm": 0.2996879518032074,
"learning_rate": 0.0015,
"loss": 1.3514,
"step": 5930
},
{
"epoch": 0.3041163219332378,
"grad_norm": 0.3085525333881378,
"learning_rate": 0.0015,
"loss": 1.3539,
"step": 5940
},
{
"epoch": 0.3046283022731927,
"grad_norm": 0.28985559940338135,
"learning_rate": 0.0015,
"loss": 1.3661,
"step": 5950
},
{
"epoch": 0.30514028261314763,
"grad_norm": 0.2889237701892853,
"learning_rate": 0.0015,
"loss": 1.3622,
"step": 5960
},
{
"epoch": 0.3056522629531026,
"grad_norm": 0.3278009593486786,
"learning_rate": 0.0015,
"loss": 1.3438,
"step": 5970
},
{
"epoch": 0.3061642432930575,
"grad_norm": 0.2967126965522766,
"learning_rate": 0.0015,
"loss": 1.3752,
"step": 5980
},
{
"epoch": 0.3066762236330125,
"grad_norm": 0.2810833752155304,
"learning_rate": 0.0015,
"loss": 1.3673,
"step": 5990
},
{
"epoch": 0.3071882039729674,
"grad_norm": 0.2842026650905609,
"learning_rate": 0.0015,
"loss": 1.3315,
"step": 6000
},
{
"epoch": 0.3077001843129224,
"grad_norm": 0.2904771864414215,
"learning_rate": 0.0015,
"loss": 1.3551,
"step": 6010
},
{
"epoch": 0.3082121646528773,
"grad_norm": 0.2798822224140167,
"learning_rate": 0.0015,
"loss": 1.374,
"step": 6020
},
{
"epoch": 0.3087241449928323,
"grad_norm": 0.2831931412220001,
"learning_rate": 0.0015,
"loss": 1.3449,
"step": 6030
},
{
"epoch": 0.3092361253327872,
"grad_norm": 0.27797648310661316,
"learning_rate": 0.0015,
"loss": 1.3427,
"step": 6040
},
{
"epoch": 0.3097481056727422,
"grad_norm": 0.2972757816314697,
"learning_rate": 0.0015,
"loss": 1.3498,
"step": 6050
},
{
"epoch": 0.3102600860126971,
"grad_norm": 0.2661411166191101,
"learning_rate": 0.0015,
"loss": 1.3391,
"step": 6060
},
{
"epoch": 0.31077206635265203,
"grad_norm": 0.2736954689025879,
"learning_rate": 0.0015,
"loss": 1.3637,
"step": 6070
},
{
"epoch": 0.311284046692607,
"grad_norm": 0.27739083766937256,
"learning_rate": 0.0015,
"loss": 1.3432,
"step": 6080
},
{
"epoch": 0.31179602703256193,
"grad_norm": 0.275734543800354,
"learning_rate": 0.0015,
"loss": 1.3523,
"step": 6090
},
{
"epoch": 0.3123080073725169,
"grad_norm": 0.29389500617980957,
"learning_rate": 0.0015,
"loss": 1.3566,
"step": 6100
},
{
"epoch": 0.3128199877124718,
"grad_norm": 0.3517824113368988,
"learning_rate": 0.0015,
"loss": 1.3401,
"step": 6110
},
{
"epoch": 0.3133319680524268,
"grad_norm": 0.2847048342227936,
"learning_rate": 0.0015,
"loss": 1.3345,
"step": 6120
},
{
"epoch": 0.3138439483923817,
"grad_norm": 0.2781658470630646,
"learning_rate": 0.0015,
"loss": 1.3165,
"step": 6130
},
{
"epoch": 0.3143559287323367,
"grad_norm": 0.27928218245506287,
"learning_rate": 0.0015,
"loss": 1.3419,
"step": 6140
},
{
"epoch": 0.3148679090722916,
"grad_norm": 0.29375484585762024,
"learning_rate": 0.0015,
"loss": 1.3424,
"step": 6150
},
{
"epoch": 0.3153798894122466,
"grad_norm": 0.2773997187614441,
"learning_rate": 0.0015,
"loss": 1.3153,
"step": 6160
},
{
"epoch": 0.3158918697522015,
"grad_norm": 0.2810317277908325,
"learning_rate": 0.0015,
"loss": 1.3633,
"step": 6170
},
{
"epoch": 0.31640385009215644,
"grad_norm": 0.2810805141925812,
"learning_rate": 0.0015,
"loss": 1.3388,
"step": 6180
},
{
"epoch": 0.3169158304321114,
"grad_norm": 0.27900010347366333,
"learning_rate": 0.0015,
"loss": 1.3494,
"step": 6190
},
{
"epoch": 0.31742781077206633,
"grad_norm": 0.2763247787952423,
"learning_rate": 0.0015,
"loss": 1.347,
"step": 6200
},
{
"epoch": 0.3179397911120213,
"grad_norm": 0.27593132853507996,
"learning_rate": 0.0015,
"loss": 1.3286,
"step": 6210
},
{
"epoch": 0.31845177145197623,
"grad_norm": 0.2928100526332855,
"learning_rate": 0.0015,
"loss": 1.3485,
"step": 6220
},
{
"epoch": 0.3189637517919312,
"grad_norm": 0.2809889316558838,
"learning_rate": 0.0015,
"loss": 1.3318,
"step": 6230
},
{
"epoch": 0.3194757321318861,
"grad_norm": 0.2984907329082489,
"learning_rate": 0.0015,
"loss": 1.3474,
"step": 6240
},
{
"epoch": 0.3199877124718411,
"grad_norm": 0.2861260771751404,
"learning_rate": 0.0015,
"loss": 1.3308,
"step": 6250
},
{
"epoch": 0.320499692811796,
"grad_norm": 0.30209678411483765,
"learning_rate": 0.0015,
"loss": 1.3438,
"step": 6260
},
{
"epoch": 0.321011673151751,
"grad_norm": 0.27839919924736023,
"learning_rate": 0.0015,
"loss": 1.3606,
"step": 6270
},
{
"epoch": 0.3215236534917059,
"grad_norm": 0.27120068669319153,
"learning_rate": 0.0015,
"loss": 1.3291,
"step": 6280
},
{
"epoch": 0.32203563383166084,
"grad_norm": 0.2891988158226013,
"learning_rate": 0.0015,
"loss": 1.3483,
"step": 6290
},
{
"epoch": 0.3225476141716158,
"grad_norm": 0.3099561929702759,
"learning_rate": 0.0015,
"loss": 1.3538,
"step": 6300
},
{
"epoch": 0.32305959451157074,
"grad_norm": 0.28136762976646423,
"learning_rate": 0.0015,
"loss": 1.344,
"step": 6310
},
{
"epoch": 0.3235715748515257,
"grad_norm": 0.27209803462028503,
"learning_rate": 0.0015,
"loss": 1.3395,
"step": 6320
},
{
"epoch": 0.32408355519148063,
"grad_norm": 0.2847345173358917,
"learning_rate": 0.0015,
"loss": 1.3278,
"step": 6330
},
{
"epoch": 0.3245955355314356,
"grad_norm": 0.29409244656562805,
"learning_rate": 0.0015,
"loss": 1.352,
"step": 6340
},
{
"epoch": 0.32510751587139053,
"grad_norm": 0.26782944798469543,
"learning_rate": 0.0015,
"loss": 1.3211,
"step": 6350
},
{
"epoch": 0.3256194962113455,
"grad_norm": 0.27680841088294983,
"learning_rate": 0.0015,
"loss": 1.3168,
"step": 6360
},
{
"epoch": 0.3261314765513004,
"grad_norm": 0.28913265466690063,
"learning_rate": 0.0015,
"loss": 1.3412,
"step": 6370
},
{
"epoch": 0.3266434568912554,
"grad_norm": 0.2598094046115875,
"learning_rate": 0.0015,
"loss": 1.3235,
"step": 6380
},
{
"epoch": 0.3271554372312103,
"grad_norm": 0.2622967064380646,
"learning_rate": 0.0015,
"loss": 1.3353,
"step": 6390
},
{
"epoch": 0.32766741757116524,
"grad_norm": 0.2802422046661377,
"learning_rate": 0.0015,
"loss": 1.3278,
"step": 6400
},
{
"epoch": 0.3281793979111202,
"grad_norm": 0.2863336503505707,
"learning_rate": 0.0015,
"loss": 1.3421,
"step": 6410
},
{
"epoch": 0.32869137825107514,
"grad_norm": 0.28782033920288086,
"learning_rate": 0.0015,
"loss": 1.3395,
"step": 6420
},
{
"epoch": 0.3292033585910301,
"grad_norm": 0.2650611698627472,
"learning_rate": 0.0015,
"loss": 1.3461,
"step": 6430
},
{
"epoch": 0.32971533893098504,
"grad_norm": 0.28210777044296265,
"learning_rate": 0.0015,
"loss": 1.3452,
"step": 6440
},
{
"epoch": 0.33022731927094,
"grad_norm": 0.29541024565696716,
"learning_rate": 0.0015,
"loss": 1.3304,
"step": 6450
},
{
"epoch": 0.33073929961089493,
"grad_norm": 0.27473190426826477,
"learning_rate": 0.0015,
"loss": 1.3277,
"step": 6460
},
{
"epoch": 0.3312512799508499,
"grad_norm": 0.2899293005466461,
"learning_rate": 0.0015,
"loss": 1.3193,
"step": 6470
},
{
"epoch": 0.33176326029080483,
"grad_norm": 0.2961236834526062,
"learning_rate": 0.0015,
"loss": 1.3252,
"step": 6480
},
{
"epoch": 0.3322752406307598,
"grad_norm": 0.2859441637992859,
"learning_rate": 0.0015,
"loss": 1.3327,
"step": 6490
},
{
"epoch": 0.3327872209707147,
"grad_norm": 0.26721256971359253,
"learning_rate": 0.0015,
"loss": 1.344,
"step": 6500
},
{
"epoch": 0.33329920131066965,
"grad_norm": 0.27258962392807007,
"learning_rate": 0.0015,
"loss": 1.3291,
"step": 6510
},
{
"epoch": 0.3338111816506246,
"grad_norm": 0.2868225872516632,
"learning_rate": 0.0015,
"loss": 1.3542,
"step": 6520
},
{
"epoch": 0.33432316199057954,
"grad_norm": 0.27058276534080505,
"learning_rate": 0.0015,
"loss": 1.3428,
"step": 6530
},
{
"epoch": 0.3348351423305345,
"grad_norm": 0.2648937404155731,
"learning_rate": 0.0015,
"loss": 1.3345,
"step": 6540
},
{
"epoch": 0.33534712267048944,
"grad_norm": 0.2588028609752655,
"learning_rate": 0.0015,
"loss": 1.3163,
"step": 6550
},
{
"epoch": 0.3358591030104444,
"grad_norm": 0.2773786783218384,
"learning_rate": 0.0015,
"loss": 1.3353,
"step": 6560
},
{
"epoch": 0.33637108335039934,
"grad_norm": 0.2635444402694702,
"learning_rate": 0.0015,
"loss": 1.3073,
"step": 6570
},
{
"epoch": 0.3368830636903543,
"grad_norm": 0.28633764386177063,
"learning_rate": 0.0015,
"loss": 1.3085,
"step": 6580
},
{
"epoch": 0.33739504403030923,
"grad_norm": 0.29486966133117676,
"learning_rate": 0.0015,
"loss": 1.3316,
"step": 6590
},
{
"epoch": 0.3379070243702642,
"grad_norm": 0.2629407048225403,
"learning_rate": 0.0015,
"loss": 1.3319,
"step": 6600
},
{
"epoch": 0.33841900471021913,
"grad_norm": 0.2779609262943268,
"learning_rate": 0.0015,
"loss": 1.3043,
"step": 6610
},
{
"epoch": 0.33893098505017405,
"grad_norm": 0.2911774218082428,
"learning_rate": 0.0015,
"loss": 1.361,
"step": 6620
},
{
"epoch": 0.339442965390129,
"grad_norm": 0.26540687680244446,
"learning_rate": 0.0015,
"loss": 1.3095,
"step": 6630
},
{
"epoch": 0.33995494573008395,
"grad_norm": 0.27710777521133423,
"learning_rate": 0.0015,
"loss": 1.3173,
"step": 6640
},
{
"epoch": 0.3404669260700389,
"grad_norm": 0.2614011764526367,
"learning_rate": 0.0015,
"loss": 1.3178,
"step": 6650
},
{
"epoch": 0.34097890640999384,
"grad_norm": 0.2797437906265259,
"learning_rate": 0.0015,
"loss": 1.3287,
"step": 6660
},
{
"epoch": 0.3414908867499488,
"grad_norm": 0.28846311569213867,
"learning_rate": 0.0015,
"loss": 1.3222,
"step": 6670
},
{
"epoch": 0.34200286708990374,
"grad_norm": 0.2507641911506653,
"learning_rate": 0.0015,
"loss": 1.3297,
"step": 6680
},
{
"epoch": 0.3425148474298587,
"grad_norm": 0.277458518743515,
"learning_rate": 0.0015,
"loss": 1.3092,
"step": 6690
},
{
"epoch": 0.34302682776981364,
"grad_norm": 0.28139162063598633,
"learning_rate": 0.0015,
"loss": 1.3509,
"step": 6700
},
{
"epoch": 0.3435388081097686,
"grad_norm": 0.26460030674934387,
"learning_rate": 0.0015,
"loss": 1.3357,
"step": 6710
},
{
"epoch": 0.34405078844972353,
"grad_norm": 0.2602977752685547,
"learning_rate": 0.0015,
"loss": 1.3375,
"step": 6720
},
{
"epoch": 0.34456276878967845,
"grad_norm": 0.3062650263309479,
"learning_rate": 0.0015,
"loss": 1.3225,
"step": 6730
},
{
"epoch": 0.34507474912963343,
"grad_norm": 0.27152612805366516,
"learning_rate": 0.0015,
"loss": 1.3326,
"step": 6740
},
{
"epoch": 0.34558672946958835,
"grad_norm": 0.2585943341255188,
"learning_rate": 0.0015,
"loss": 1.3275,
"step": 6750
},
{
"epoch": 0.3460987098095433,
"grad_norm": 0.2826108932495117,
"learning_rate": 0.0015,
"loss": 1.3143,
"step": 6760
},
{
"epoch": 0.34661069014949825,
"grad_norm": 0.2719128131866455,
"learning_rate": 0.0015,
"loss": 1.3136,
"step": 6770
},
{
"epoch": 0.3471226704894532,
"grad_norm": 0.2605542540550232,
"learning_rate": 0.0015,
"loss": 1.3207,
"step": 6780
},
{
"epoch": 0.34763465082940814,
"grad_norm": 0.26649779081344604,
"learning_rate": 0.0015,
"loss": 1.304,
"step": 6790
},
{
"epoch": 0.3481466311693631,
"grad_norm": 0.28349971771240234,
"learning_rate": 0.0015,
"loss": 1.3176,
"step": 6800
},
{
"epoch": 0.34865861150931804,
"grad_norm": 0.27145761251449585,
"learning_rate": 0.0015,
"loss": 1.3294,
"step": 6810
},
{
"epoch": 0.349170591849273,
"grad_norm": 0.26513341069221497,
"learning_rate": 0.0015,
"loss": 1.3299,
"step": 6820
},
{
"epoch": 0.34968257218922794,
"grad_norm": 0.2701232135295868,
"learning_rate": 0.0015,
"loss": 1.3028,
"step": 6830
},
{
"epoch": 0.35019455252918286,
"grad_norm": 0.27336186170578003,
"learning_rate": 0.0015,
"loss": 1.3253,
"step": 6840
},
{
"epoch": 0.35070653286913783,
"grad_norm": 0.26006847620010376,
"learning_rate": 0.0015,
"loss": 1.3097,
"step": 6850
},
{
"epoch": 0.35121851320909275,
"grad_norm": 0.2867346405982971,
"learning_rate": 0.0015,
"loss": 1.3489,
"step": 6860
},
{
"epoch": 0.35173049354904773,
"grad_norm": 0.2665490210056305,
"learning_rate": 0.0015,
"loss": 1.3029,
"step": 6870
},
{
"epoch": 0.35224247388900265,
"grad_norm": 0.26250341534614563,
"learning_rate": 0.0015,
"loss": 1.324,
"step": 6880
},
{
"epoch": 0.3527544542289576,
"grad_norm": 0.27404358983039856,
"learning_rate": 0.0015,
"loss": 1.3222,
"step": 6890
},
{
"epoch": 0.35326643456891255,
"grad_norm": 0.271932989358902,
"learning_rate": 0.0015,
"loss": 1.3068,
"step": 6900
},
{
"epoch": 0.3537784149088675,
"grad_norm": 0.25479060411453247,
"learning_rate": 0.0015,
"loss": 1.3143,
"step": 6910
},
{
"epoch": 0.35429039524882244,
"grad_norm": 0.2571351230144501,
"learning_rate": 0.0015,
"loss": 1.2886,
"step": 6920
},
{
"epoch": 0.35480237558877736,
"grad_norm": 0.2612917125225067,
"learning_rate": 0.0015,
"loss": 1.3199,
"step": 6930
},
{
"epoch": 0.35531435592873234,
"grad_norm": 0.2573522925376892,
"learning_rate": 0.0015,
"loss": 1.3143,
"step": 6940
},
{
"epoch": 0.35582633626868726,
"grad_norm": 0.2598212659358978,
"learning_rate": 0.0015,
"loss": 1.3039,
"step": 6950
},
{
"epoch": 0.35633831660864224,
"grad_norm": 0.2575034201145172,
"learning_rate": 0.0015,
"loss": 1.3095,
"step": 6960
},
{
"epoch": 0.35685029694859716,
"grad_norm": 0.2559545636177063,
"learning_rate": 0.0015,
"loss": 1.2971,
"step": 6970
},
{
"epoch": 0.35736227728855213,
"grad_norm": 0.26087066531181335,
"learning_rate": 0.0015,
"loss": 1.3023,
"step": 6980
},
{
"epoch": 0.35787425762850705,
"grad_norm": 0.2606737017631531,
"learning_rate": 0.0015,
"loss": 1.3098,
"step": 6990
},
{
"epoch": 0.35838623796846203,
"grad_norm": 0.27495986223220825,
"learning_rate": 0.0015,
"loss": 1.3249,
"step": 7000
},
{
"epoch": 0.35889821830841695,
"grad_norm": 0.25473734736442566,
"learning_rate": 0.0015,
"loss": 1.3253,
"step": 7010
},
{
"epoch": 0.3594101986483719,
"grad_norm": 0.2764824330806732,
"learning_rate": 0.0015,
"loss": 1.3101,
"step": 7020
},
{
"epoch": 0.35992217898832685,
"grad_norm": 0.27935823798179626,
"learning_rate": 0.0015,
"loss": 1.3268,
"step": 7030
},
{
"epoch": 0.36043415932828177,
"grad_norm": 0.26057881116867065,
"learning_rate": 0.0015,
"loss": 1.2999,
"step": 7040
},
{
"epoch": 0.36094613966823674,
"grad_norm": 0.27014756202697754,
"learning_rate": 0.0015,
"loss": 1.3083,
"step": 7050
},
{
"epoch": 0.36145812000819166,
"grad_norm": 0.26150983572006226,
"learning_rate": 0.0015,
"loss": 1.3059,
"step": 7060
},
{
"epoch": 0.36197010034814664,
"grad_norm": 0.2634667158126831,
"learning_rate": 0.0015,
"loss": 1.3325,
"step": 7070
},
{
"epoch": 0.36248208068810156,
"grad_norm": 0.2591879665851593,
"learning_rate": 0.0015,
"loss": 1.3004,
"step": 7080
},
{
"epoch": 0.36299406102805654,
"grad_norm": 0.27941566705703735,
"learning_rate": 0.0015,
"loss": 1.3216,
"step": 7090
},
{
"epoch": 0.36350604136801146,
"grad_norm": 0.2634701430797577,
"learning_rate": 0.0015,
"loss": 1.3043,
"step": 7100
},
{
"epoch": 0.36401802170796643,
"grad_norm": 0.2601988613605499,
"learning_rate": 0.0015,
"loss": 1.3128,
"step": 7110
},
{
"epoch": 0.36453000204792135,
"grad_norm": 0.2701079249382019,
"learning_rate": 0.0015,
"loss": 1.2908,
"step": 7120
},
{
"epoch": 0.36504198238787633,
"grad_norm": 0.2694578170776367,
"learning_rate": 0.0015,
"loss": 1.303,
"step": 7130
},
{
"epoch": 0.36555396272783125,
"grad_norm": 0.2465587705373764,
"learning_rate": 0.0015,
"loss": 1.3177,
"step": 7140
},
{
"epoch": 0.36606594306778617,
"grad_norm": 0.26136472821235657,
"learning_rate": 0.0015,
"loss": 1.3112,
"step": 7150
},
{
"epoch": 0.36657792340774115,
"grad_norm": 0.2548895478248596,
"learning_rate": 0.0015,
"loss": 1.3114,
"step": 7160
},
{
"epoch": 0.36708990374769607,
"grad_norm": 0.2586556673049927,
"learning_rate": 0.0015,
"loss": 1.3076,
"step": 7170
},
{
"epoch": 0.36760188408765104,
"grad_norm": 0.25887277722358704,
"learning_rate": 0.0015,
"loss": 1.3217,
"step": 7180
},
{
"epoch": 0.36811386442760596,
"grad_norm": 0.2628803253173828,
"learning_rate": 0.0015,
"loss": 1.3012,
"step": 7190
},
{
"epoch": 0.36862584476756094,
"grad_norm": 0.2630269527435303,
"learning_rate": 0.0015,
"loss": 1.3187,
"step": 7200
},
{
"epoch": 0.36913782510751586,
"grad_norm": 0.2589748501777649,
"learning_rate": 0.0015,
"loss": 1.2885,
"step": 7210
},
{
"epoch": 0.36964980544747084,
"grad_norm": 0.262361615896225,
"learning_rate": 0.0015,
"loss": 1.2962,
"step": 7220
},
{
"epoch": 0.37016178578742576,
"grad_norm": 0.24950037896633148,
"learning_rate": 0.0015,
"loss": 1.3026,
"step": 7230
},
{
"epoch": 0.37067376612738073,
"grad_norm": 0.2537461817264557,
"learning_rate": 0.0015,
"loss": 1.2971,
"step": 7240
},
{
"epoch": 0.37118574646733565,
"grad_norm": 0.25920331478118896,
"learning_rate": 0.0015,
"loss": 1.2951,
"step": 7250
},
{
"epoch": 0.3716977268072906,
"grad_norm": 0.2526357173919678,
"learning_rate": 0.0015,
"loss": 1.2989,
"step": 7260
},
{
"epoch": 0.37220970714724555,
"grad_norm": 0.28876397013664246,
"learning_rate": 0.0015,
"loss": 1.3063,
"step": 7270
},
{
"epoch": 0.37272168748720047,
"grad_norm": 0.27300864458084106,
"learning_rate": 0.0015,
"loss": 1.2954,
"step": 7280
},
{
"epoch": 0.37323366782715545,
"grad_norm": 0.26332223415374756,
"learning_rate": 0.0015,
"loss": 1.3329,
"step": 7290
},
{
"epoch": 0.37374564816711037,
"grad_norm": 0.26332515478134155,
"learning_rate": 0.0015,
"loss": 1.2908,
"step": 7300
},
{
"epoch": 0.37425762850706534,
"grad_norm": 0.2604503631591797,
"learning_rate": 0.0015,
"loss": 1.3002,
"step": 7310
},
{
"epoch": 0.37476960884702026,
"grad_norm": 0.25917840003967285,
"learning_rate": 0.0015,
"loss": 1.2983,
"step": 7320
},
{
"epoch": 0.37528158918697524,
"grad_norm": 0.26824817061424255,
"learning_rate": 0.0015,
"loss": 1.3183,
"step": 7330
},
{
"epoch": 0.37579356952693016,
"grad_norm": 0.2575696110725403,
"learning_rate": 0.0015,
"loss": 1.318,
"step": 7340
},
{
"epoch": 0.37630554986688514,
"grad_norm": 0.2578194737434387,
"learning_rate": 0.0015,
"loss": 1.2833,
"step": 7350
},
{
"epoch": 0.37681753020684006,
"grad_norm": 0.2768312096595764,
"learning_rate": 0.0015,
"loss": 1.2948,
"step": 7360
},
{
"epoch": 0.377329510546795,
"grad_norm": 0.2382088154554367,
"learning_rate": 0.0015,
"loss": 1.3,
"step": 7370
},
{
"epoch": 0.37784149088674995,
"grad_norm": 0.2637539803981781,
"learning_rate": 0.0015,
"loss": 1.2792,
"step": 7380
},
{
"epoch": 0.3783534712267049,
"grad_norm": 0.2832081615924835,
"learning_rate": 0.0015,
"loss": 1.3097,
"step": 7390
},
{
"epoch": 0.37886545156665985,
"grad_norm": 0.2672945261001587,
"learning_rate": 0.0015,
"loss": 1.2989,
"step": 7400
},
{
"epoch": 0.37937743190661477,
"grad_norm": 0.24696801602840424,
"learning_rate": 0.0015,
"loss": 1.3174,
"step": 7410
},
{
"epoch": 0.37988941224656975,
"grad_norm": 0.2638930082321167,
"learning_rate": 0.0015,
"loss": 1.295,
"step": 7420
},
{
"epoch": 0.38040139258652467,
"grad_norm": 0.2714937925338745,
"learning_rate": 0.0015,
"loss": 1.2917,
"step": 7430
},
{
"epoch": 0.38091337292647964,
"grad_norm": 0.2469353824853897,
"learning_rate": 0.0015,
"loss": 1.2919,
"step": 7440
},
{
"epoch": 0.38142535326643456,
"grad_norm": 0.25035470724105835,
"learning_rate": 0.0015,
"loss": 1.2896,
"step": 7450
},
{
"epoch": 0.38193733360638954,
"grad_norm": 0.26178446412086487,
"learning_rate": 0.0015,
"loss": 1.2891,
"step": 7460
},
{
"epoch": 0.38244931394634446,
"grad_norm": 0.26942870020866394,
"learning_rate": 0.0015,
"loss": 1.2723,
"step": 7470
},
{
"epoch": 0.3829612942862994,
"grad_norm": 0.26943838596343994,
"learning_rate": 0.0015,
"loss": 1.284,
"step": 7480
},
{
"epoch": 0.38347327462625436,
"grad_norm": 0.25865715742111206,
"learning_rate": 0.0015,
"loss": 1.3063,
"step": 7490
},
{
"epoch": 0.3839852549662093,
"grad_norm": 0.27455562353134155,
"learning_rate": 0.0015,
"loss": 1.2988,
"step": 7500
},
{
"epoch": 0.38449723530616425,
"grad_norm": 0.2636263370513916,
"learning_rate": 0.0015,
"loss": 1.2739,
"step": 7510
},
{
"epoch": 0.3850092156461192,
"grad_norm": 0.26559826731681824,
"learning_rate": 0.0015,
"loss": 1.2958,
"step": 7520
},
{
"epoch": 0.38552119598607415,
"grad_norm": 0.2592698335647583,
"learning_rate": 0.0015,
"loss": 1.2981,
"step": 7530
},
{
"epoch": 0.38603317632602907,
"grad_norm": 0.25872740149497986,
"learning_rate": 0.0015,
"loss": 1.3005,
"step": 7540
},
{
"epoch": 0.38654515666598405,
"grad_norm": 0.26369425654411316,
"learning_rate": 0.0015,
"loss": 1.3021,
"step": 7550
},
{
"epoch": 0.38705713700593897,
"grad_norm": 0.25757378339767456,
"learning_rate": 0.0015,
"loss": 1.302,
"step": 7560
},
{
"epoch": 0.38756911734589394,
"grad_norm": 0.27320241928100586,
"learning_rate": 0.0015,
"loss": 1.2802,
"step": 7570
},
{
"epoch": 0.38808109768584886,
"grad_norm": 0.2795805335044861,
"learning_rate": 0.0015,
"loss": 1.295,
"step": 7580
},
{
"epoch": 0.3885930780258038,
"grad_norm": 0.26023516058921814,
"learning_rate": 0.0015,
"loss": 1.2889,
"step": 7590
},
{
"epoch": 0.38910505836575876,
"grad_norm": 0.2582970857620239,
"learning_rate": 0.0015,
"loss": 1.302,
"step": 7600
},
{
"epoch": 0.3896170387057137,
"grad_norm": 0.2473934441804886,
"learning_rate": 0.0015,
"loss": 1.3023,
"step": 7610
},
{
"epoch": 0.39012901904566866,
"grad_norm": 0.2547856271266937,
"learning_rate": 0.0015,
"loss": 1.29,
"step": 7620
},
{
"epoch": 0.3906409993856236,
"grad_norm": 0.26764586567878723,
"learning_rate": 0.0015,
"loss": 1.2905,
"step": 7630
},
{
"epoch": 0.39115297972557855,
"grad_norm": 0.2481442391872406,
"learning_rate": 0.0015,
"loss": 1.3164,
"step": 7640
},
{
"epoch": 0.3916649600655335,
"grad_norm": 0.25532233715057373,
"learning_rate": 0.0015,
"loss": 1.2958,
"step": 7650
},
{
"epoch": 0.39217694040548845,
"grad_norm": 0.24001578986644745,
"learning_rate": 0.0015,
"loss": 1.2827,
"step": 7660
},
{
"epoch": 0.39268892074544337,
"grad_norm": 0.2489776611328125,
"learning_rate": 0.0015,
"loss": 1.2742,
"step": 7670
},
{
"epoch": 0.39320090108539835,
"grad_norm": 0.23535743355751038,
"learning_rate": 0.0015,
"loss": 1.2855,
"step": 7680
},
{
"epoch": 0.39371288142535327,
"grad_norm": 0.25811052322387695,
"learning_rate": 0.0015,
"loss": 1.2971,
"step": 7690
},
{
"epoch": 0.3942248617653082,
"grad_norm": 0.24241647124290466,
"learning_rate": 0.0015,
"loss": 1.2968,
"step": 7700
},
{
"epoch": 0.39473684210526316,
"grad_norm": 0.25648635625839233,
"learning_rate": 0.0015,
"loss": 1.2916,
"step": 7710
},
{
"epoch": 0.3952488224452181,
"grad_norm": 0.2703993618488312,
"learning_rate": 0.0015,
"loss": 1.2909,
"step": 7720
},
{
"epoch": 0.39576080278517306,
"grad_norm": 0.2558510899543762,
"learning_rate": 0.0015,
"loss": 1.2913,
"step": 7730
},
{
"epoch": 0.396272783125128,
"grad_norm": 0.2394089698791504,
"learning_rate": 0.0015,
"loss": 1.2968,
"step": 7740
},
{
"epoch": 0.39678476346508296,
"grad_norm": 0.2338177114725113,
"learning_rate": 0.0015,
"loss": 1.2894,
"step": 7750
},
{
"epoch": 0.3972967438050379,
"grad_norm": 0.25422418117523193,
"learning_rate": 0.0015,
"loss": 1.2958,
"step": 7760
},
{
"epoch": 0.39780872414499285,
"grad_norm": 0.2437313348054886,
"learning_rate": 0.0015,
"loss": 1.2878,
"step": 7770
},
{
"epoch": 0.3983207044849478,
"grad_norm": 0.26623979210853577,
"learning_rate": 0.0015,
"loss": 1.2915,
"step": 7780
},
{
"epoch": 0.39883268482490275,
"grad_norm": 0.24698524177074432,
"learning_rate": 0.0015,
"loss": 1.2949,
"step": 7790
},
{
"epoch": 0.39934466516485767,
"grad_norm": 0.23496921360492706,
"learning_rate": 0.0015,
"loss": 1.3069,
"step": 7800
},
{
"epoch": 0.3998566455048126,
"grad_norm": 0.2393864393234253,
"learning_rate": 0.0015,
"loss": 1.2913,
"step": 7810
},
{
"epoch": 0.40036862584476757,
"grad_norm": 0.24716414511203766,
"learning_rate": 0.0015,
"loss": 1.2829,
"step": 7820
},
{
"epoch": 0.4008806061847225,
"grad_norm": 0.24985013902187347,
"learning_rate": 0.0015,
"loss": 1.2773,
"step": 7830
},
{
"epoch": 0.40139258652467746,
"grad_norm": 0.24895814061164856,
"learning_rate": 0.0015,
"loss": 1.2889,
"step": 7840
},
{
"epoch": 0.4019045668646324,
"grad_norm": 0.2497827261686325,
"learning_rate": 0.0015,
"loss": 1.2747,
"step": 7850
},
{
"epoch": 0.40241654720458736,
"grad_norm": 0.23879243433475494,
"learning_rate": 0.0015,
"loss": 1.3071,
"step": 7860
},
{
"epoch": 0.4029285275445423,
"grad_norm": 0.24402157962322235,
"learning_rate": 0.0015,
"loss": 1.2924,
"step": 7870
},
{
"epoch": 0.40344050788449726,
"grad_norm": 0.24736930429935455,
"learning_rate": 0.0015,
"loss": 1.2643,
"step": 7880
},
{
"epoch": 0.4039524882244522,
"grad_norm": 0.2525321841239929,
"learning_rate": 0.0015,
"loss": 1.3014,
"step": 7890
},
{
"epoch": 0.40446446856440715,
"grad_norm": 0.2575211226940155,
"learning_rate": 0.0015,
"loss": 1.2625,
"step": 7900
},
{
"epoch": 0.4049764489043621,
"grad_norm": 0.24405083060264587,
"learning_rate": 0.0015,
"loss": 1.2834,
"step": 7910
},
{
"epoch": 0.405488429244317,
"grad_norm": 0.28250402212142944,
"learning_rate": 0.0015,
"loss": 1.2814,
"step": 7920
},
{
"epoch": 0.40600040958427197,
"grad_norm": 0.2795003056526184,
"learning_rate": 0.0015,
"loss": 1.3154,
"step": 7930
},
{
"epoch": 0.4065123899242269,
"grad_norm": 0.24883300065994263,
"learning_rate": 0.0015,
"loss": 1.2887,
"step": 7940
},
{
"epoch": 0.40702437026418187,
"grad_norm": 0.2502342164516449,
"learning_rate": 0.0015,
"loss": 1.3033,
"step": 7950
},
{
"epoch": 0.4075363506041368,
"grad_norm": 0.24973638355731964,
"learning_rate": 0.0015,
"loss": 1.2947,
"step": 7960
},
{
"epoch": 0.40804833094409176,
"grad_norm": 0.24371185898780823,
"learning_rate": 0.0015,
"loss": 1.2908,
"step": 7970
},
{
"epoch": 0.4085603112840467,
"grad_norm": 0.24570930004119873,
"learning_rate": 0.0015,
"loss": 1.2879,
"step": 7980
},
{
"epoch": 0.40907229162400166,
"grad_norm": 0.23717066645622253,
"learning_rate": 0.0015,
"loss": 1.2928,
"step": 7990
},
{
"epoch": 0.4095842719639566,
"grad_norm": 0.24726137518882751,
"learning_rate": 0.0015,
"loss": 1.2915,
"step": 8000
},
{
"epoch": 0.41009625230391156,
"grad_norm": 0.2352866679430008,
"learning_rate": 0.0015,
"loss": 1.2817,
"step": 8010
},
{
"epoch": 0.4106082326438665,
"grad_norm": 0.251365065574646,
"learning_rate": 0.0015,
"loss": 1.2979,
"step": 8020
},
{
"epoch": 0.4111202129838214,
"grad_norm": 0.22410385310649872,
"learning_rate": 0.0015,
"loss": 1.2749,
"step": 8030
},
{
"epoch": 0.4116321933237764,
"grad_norm": 0.25029605627059937,
"learning_rate": 0.0015,
"loss": 1.2862,
"step": 8040
},
{
"epoch": 0.4121441736637313,
"grad_norm": 0.25629550218582153,
"learning_rate": 0.0015,
"loss": 1.2749,
"step": 8050
},
{
"epoch": 0.41265615400368627,
"grad_norm": 0.23836827278137207,
"learning_rate": 0.0015,
"loss": 1.28,
"step": 8060
},
{
"epoch": 0.4131681343436412,
"grad_norm": 0.23752672970294952,
"learning_rate": 0.0015,
"loss": 1.2916,
"step": 8070
},
{
"epoch": 0.41368011468359617,
"grad_norm": 0.26047077775001526,
"learning_rate": 0.0015,
"loss": 1.2718,
"step": 8080
},
{
"epoch": 0.4141920950235511,
"grad_norm": 0.24297983944416046,
"learning_rate": 0.0015,
"loss": 1.2961,
"step": 8090
},
{
"epoch": 0.41470407536350606,
"grad_norm": 0.24528458714485168,
"learning_rate": 0.0015,
"loss": 1.2591,
"step": 8100
},
{
"epoch": 0.415216055703461,
"grad_norm": 0.24459367990493774,
"learning_rate": 0.0015,
"loss": 1.2754,
"step": 8110
},
{
"epoch": 0.41572803604341596,
"grad_norm": 0.24630287289619446,
"learning_rate": 0.0015,
"loss": 1.2864,
"step": 8120
},
{
"epoch": 0.4162400163833709,
"grad_norm": 0.2514908015727997,
"learning_rate": 0.0015,
"loss": 1.2847,
"step": 8130
},
{
"epoch": 0.4167519967233258,
"grad_norm": 0.227911576628685,
"learning_rate": 0.0015,
"loss": 1.2798,
"step": 8140
},
{
"epoch": 0.4172639770632808,
"grad_norm": 0.2512179911136627,
"learning_rate": 0.0015,
"loss": 1.2817,
"step": 8150
},
{
"epoch": 0.4177759574032357,
"grad_norm": 0.24971604347229004,
"learning_rate": 0.0015,
"loss": 1.2856,
"step": 8160
},
{
"epoch": 0.4182879377431907,
"grad_norm": 0.24980546534061432,
"learning_rate": 0.0015,
"loss": 1.2932,
"step": 8170
},
{
"epoch": 0.4187999180831456,
"grad_norm": 0.2510388493537903,
"learning_rate": 0.0015,
"loss": 1.2849,
"step": 8180
},
{
"epoch": 0.41931189842310057,
"grad_norm": 0.23916485905647278,
"learning_rate": 0.0015,
"loss": 1.2787,
"step": 8190
},
{
"epoch": 0.4198238787630555,
"grad_norm": 0.2525003254413605,
"learning_rate": 0.0015,
"loss": 1.2856,
"step": 8200
},
{
"epoch": 0.42033585910301047,
"grad_norm": 0.25865113735198975,
"learning_rate": 0.0015,
"loss": 1.2473,
"step": 8210
},
{
"epoch": 0.4208478394429654,
"grad_norm": 0.24689891934394836,
"learning_rate": 0.0015,
"loss": 1.2663,
"step": 8220
},
{
"epoch": 0.4213598197829203,
"grad_norm": 0.2257513701915741,
"learning_rate": 0.0015,
"loss": 1.2576,
"step": 8230
},
{
"epoch": 0.4218718001228753,
"grad_norm": 0.2339119166135788,
"learning_rate": 0.0015,
"loss": 1.3053,
"step": 8240
},
{
"epoch": 0.4223837804628302,
"grad_norm": 0.2590661942958832,
"learning_rate": 0.0015,
"loss": 1.2698,
"step": 8250
},
{
"epoch": 0.4228957608027852,
"grad_norm": 0.2483995407819748,
"learning_rate": 0.0015,
"loss": 1.2728,
"step": 8260
},
{
"epoch": 0.4234077411427401,
"grad_norm": 0.23534591495990753,
"learning_rate": 0.0015,
"loss": 1.2867,
"step": 8270
},
{
"epoch": 0.4239197214826951,
"grad_norm": 0.22678501904010773,
"learning_rate": 0.0015,
"loss": 1.2775,
"step": 8280
},
{
"epoch": 0.42443170182265,
"grad_norm": 0.2298179715871811,
"learning_rate": 0.0015,
"loss": 1.2866,
"step": 8290
},
{
"epoch": 0.424943682162605,
"grad_norm": 0.2495158165693283,
"learning_rate": 0.0015,
"loss": 1.2762,
"step": 8300
},
{
"epoch": 0.4254556625025599,
"grad_norm": 0.22808024287223816,
"learning_rate": 0.0015,
"loss": 1.269,
"step": 8310
},
{
"epoch": 0.42596764284251487,
"grad_norm": 0.24249188601970673,
"learning_rate": 0.0015,
"loss": 1.2881,
"step": 8320
},
{
"epoch": 0.4264796231824698,
"grad_norm": 0.2539406418800354,
"learning_rate": 0.0015,
"loss": 1.2618,
"step": 8330
},
{
"epoch": 0.4269916035224247,
"grad_norm": 0.2367791384458542,
"learning_rate": 0.0015,
"loss": 1.2762,
"step": 8340
},
{
"epoch": 0.4275035838623797,
"grad_norm": 0.2301592379808426,
"learning_rate": 0.0015,
"loss": 1.2724,
"step": 8350
},
{
"epoch": 0.4280155642023346,
"grad_norm": 0.24136430025100708,
"learning_rate": 0.0015,
"loss": 1.2629,
"step": 8360
},
{
"epoch": 0.4285275445422896,
"grad_norm": 0.23719066381454468,
"learning_rate": 0.0015,
"loss": 1.2624,
"step": 8370
},
{
"epoch": 0.4290395248822445,
"grad_norm": 0.2514694631099701,
"learning_rate": 0.0015,
"loss": 1.2686,
"step": 8380
},
{
"epoch": 0.4295515052221995,
"grad_norm": 0.24186182022094727,
"learning_rate": 0.0015,
"loss": 1.2823,
"step": 8390
},
{
"epoch": 0.4300634855621544,
"grad_norm": 0.23494115471839905,
"learning_rate": 0.0015,
"loss": 1.2534,
"step": 8400
},
{
"epoch": 0.4305754659021094,
"grad_norm": 0.2518327534198761,
"learning_rate": 0.0015,
"loss": 1.2913,
"step": 8410
},
{
"epoch": 0.4310874462420643,
"grad_norm": 0.23622803390026093,
"learning_rate": 0.0015,
"loss": 1.2652,
"step": 8420
},
{
"epoch": 0.4315994265820193,
"grad_norm": 0.22990188002586365,
"learning_rate": 0.0015,
"loss": 1.277,
"step": 8430
},
{
"epoch": 0.4321114069219742,
"grad_norm": 0.23679761588573456,
"learning_rate": 0.0015,
"loss": 1.2839,
"step": 8440
},
{
"epoch": 0.4326233872619291,
"grad_norm": 0.25512683391571045,
"learning_rate": 0.0015,
"loss": 1.2818,
"step": 8450
},
{
"epoch": 0.4331353676018841,
"grad_norm": 0.24284730851650238,
"learning_rate": 0.0015,
"loss": 1.2882,
"step": 8460
},
{
"epoch": 0.433647347941839,
"grad_norm": 0.24152646958827972,
"learning_rate": 0.0015,
"loss": 1.2727,
"step": 8470
},
{
"epoch": 0.434159328281794,
"grad_norm": 0.24133774638175964,
"learning_rate": 0.0015,
"loss": 1.2743,
"step": 8480
},
{
"epoch": 0.4346713086217489,
"grad_norm": 0.23270800709724426,
"learning_rate": 0.0015,
"loss": 1.2651,
"step": 8490
},
{
"epoch": 0.4351832889617039,
"grad_norm": 0.2446971833705902,
"learning_rate": 0.0015,
"loss": 1.268,
"step": 8500
},
{
"epoch": 0.4356952693016588,
"grad_norm": 0.23358875513076782,
"learning_rate": 0.0015,
"loss": 1.2774,
"step": 8510
},
{
"epoch": 0.4362072496416138,
"grad_norm": 0.22265927493572235,
"learning_rate": 0.0015,
"loss": 1.2602,
"step": 8520
},
{
"epoch": 0.4367192299815687,
"grad_norm": 0.22781646251678467,
"learning_rate": 0.0015,
"loss": 1.2724,
"step": 8530
},
{
"epoch": 0.4372312103215237,
"grad_norm": 0.23868761956691742,
"learning_rate": 0.0015,
"loss": 1.2581,
"step": 8540
},
{
"epoch": 0.4377431906614786,
"grad_norm": 0.2235594540834427,
"learning_rate": 0.0015,
"loss": 1.2741,
"step": 8550
},
{
"epoch": 0.4382551710014335,
"grad_norm": 0.2419920712709427,
"learning_rate": 0.0015,
"loss": 1.2765,
"step": 8560
},
{
"epoch": 0.4387671513413885,
"grad_norm": 0.27400338649749756,
"learning_rate": 0.0015,
"loss": 1.2635,
"step": 8570
},
{
"epoch": 0.4392791316813434,
"grad_norm": 0.23386618494987488,
"learning_rate": 0.0015,
"loss": 1.2806,
"step": 8580
},
{
"epoch": 0.4397911120212984,
"grad_norm": 0.24642907083034515,
"learning_rate": 0.0015,
"loss": 1.2739,
"step": 8590
},
{
"epoch": 0.4403030923612533,
"grad_norm": 0.2347201406955719,
"learning_rate": 0.0015,
"loss": 1.2581,
"step": 8600
},
{
"epoch": 0.4408150727012083,
"grad_norm": 0.22591201961040497,
"learning_rate": 0.0015,
"loss": 1.2882,
"step": 8610
},
{
"epoch": 0.4413270530411632,
"grad_norm": 0.2508542537689209,
"learning_rate": 0.0015,
"loss": 1.2699,
"step": 8620
},
{
"epoch": 0.4418390333811182,
"grad_norm": 0.2366652637720108,
"learning_rate": 0.0015,
"loss": 1.2522,
"step": 8630
},
{
"epoch": 0.4423510137210731,
"grad_norm": 0.22938509285449982,
"learning_rate": 0.0015,
"loss": 1.2676,
"step": 8640
},
{
"epoch": 0.4428629940610281,
"grad_norm": 0.22820281982421875,
"learning_rate": 0.0015,
"loss": 1.2712,
"step": 8650
},
{
"epoch": 0.443374974400983,
"grad_norm": 0.22258944809436798,
"learning_rate": 0.0015,
"loss": 1.2721,
"step": 8660
},
{
"epoch": 0.4438869547409379,
"grad_norm": 0.23942533135414124,
"learning_rate": 0.0015,
"loss": 1.2659,
"step": 8670
},
{
"epoch": 0.4443989350808929,
"grad_norm": 0.23312713205814362,
"learning_rate": 0.0015,
"loss": 1.2755,
"step": 8680
},
{
"epoch": 0.4449109154208478,
"grad_norm": 0.2283553183078766,
"learning_rate": 0.0015,
"loss": 1.2537,
"step": 8690
},
{
"epoch": 0.4454228957608028,
"grad_norm": 0.23631595075130463,
"learning_rate": 0.0015,
"loss": 1.2487,
"step": 8700
},
{
"epoch": 0.4459348761007577,
"grad_norm": 0.2447190135717392,
"learning_rate": 0.0015,
"loss": 1.2529,
"step": 8710
},
{
"epoch": 0.4464468564407127,
"grad_norm": 0.24584966897964478,
"learning_rate": 0.0015,
"loss": 1.2738,
"step": 8720
},
{
"epoch": 0.4469588367806676,
"grad_norm": 0.2374550849199295,
"learning_rate": 0.0015,
"loss": 1.2791,
"step": 8730
},
{
"epoch": 0.4474708171206226,
"grad_norm": 0.240436390042305,
"learning_rate": 0.0015,
"loss": 1.2518,
"step": 8740
},
{
"epoch": 0.4479827974605775,
"grad_norm": 0.23341523110866547,
"learning_rate": 0.0015,
"loss": 1.2688,
"step": 8750
},
{
"epoch": 0.4484947778005325,
"grad_norm": 0.24230003356933594,
"learning_rate": 0.0015,
"loss": 1.2379,
"step": 8760
},
{
"epoch": 0.4490067581404874,
"grad_norm": 0.2401583343744278,
"learning_rate": 0.0015,
"loss": 1.2699,
"step": 8770
},
{
"epoch": 0.4495187384804423,
"grad_norm": 0.22647708654403687,
"learning_rate": 0.0015,
"loss": 1.2656,
"step": 8780
},
{
"epoch": 0.4500307188203973,
"grad_norm": 0.24045558273792267,
"learning_rate": 0.0015,
"loss": 1.2531,
"step": 8790
},
{
"epoch": 0.4505426991603522,
"grad_norm": 0.2597295045852661,
"learning_rate": 0.0015,
"loss": 1.2568,
"step": 8800
},
{
"epoch": 0.4510546795003072,
"grad_norm": 0.22485364973545074,
"learning_rate": 0.0015,
"loss": 1.2478,
"step": 8810
},
{
"epoch": 0.4515666598402621,
"grad_norm": 0.23133698105812073,
"learning_rate": 0.0015,
"loss": 1.2688,
"step": 8820
},
{
"epoch": 0.4520786401802171,
"grad_norm": 0.22866465151309967,
"learning_rate": 0.0015,
"loss": 1.2516,
"step": 8830
},
{
"epoch": 0.452590620520172,
"grad_norm": 0.2258300632238388,
"learning_rate": 0.0015,
"loss": 1.2571,
"step": 8840
},
{
"epoch": 0.453102600860127,
"grad_norm": 0.23454922437667847,
"learning_rate": 0.0015,
"loss": 1.2413,
"step": 8850
},
{
"epoch": 0.4536145812000819,
"grad_norm": 0.22673968970775604,
"learning_rate": 0.0015,
"loss": 1.2504,
"step": 8860
},
{
"epoch": 0.4541265615400369,
"grad_norm": 0.24363909661769867,
"learning_rate": 0.0015,
"loss": 1.2511,
"step": 8870
},
{
"epoch": 0.4546385418799918,
"grad_norm": 0.25056564807891846,
"learning_rate": 0.0015,
"loss": 1.2423,
"step": 8880
},
{
"epoch": 0.45515052221994673,
"grad_norm": 0.2318125218153,
"learning_rate": 0.0015,
"loss": 1.2753,
"step": 8890
},
{
"epoch": 0.4556625025599017,
"grad_norm": 0.22525230050086975,
"learning_rate": 0.0015,
"loss": 1.2389,
"step": 8900
},
{
"epoch": 0.4561744828998566,
"grad_norm": 0.23389683663845062,
"learning_rate": 0.0015,
"loss": 1.2457,
"step": 8910
},
{
"epoch": 0.4566864632398116,
"grad_norm": 0.23282834887504578,
"learning_rate": 0.0015,
"loss": 1.2628,
"step": 8920
},
{
"epoch": 0.4571984435797665,
"grad_norm": 0.24000655114650726,
"learning_rate": 0.0015,
"loss": 1.2637,
"step": 8930
},
{
"epoch": 0.4577104239197215,
"grad_norm": 0.22707650065422058,
"learning_rate": 0.0015,
"loss": 1.2651,
"step": 8940
},
{
"epoch": 0.4582224042596764,
"grad_norm": 0.24544113874435425,
"learning_rate": 0.0015,
"loss": 1.2597,
"step": 8950
},
{
"epoch": 0.4587343845996314,
"grad_norm": 0.2471536099910736,
"learning_rate": 0.0015,
"loss": 1.2583,
"step": 8960
},
{
"epoch": 0.4592463649395863,
"grad_norm": 0.2399998903274536,
"learning_rate": 0.0015,
"loss": 1.2587,
"step": 8970
},
{
"epoch": 0.4597583452795413,
"grad_norm": 0.239053875207901,
"learning_rate": 0.0015,
"loss": 1.2604,
"step": 8980
},
{
"epoch": 0.4602703256194962,
"grad_norm": 0.23578478395938873,
"learning_rate": 0.0015,
"loss": 1.251,
"step": 8990
},
{
"epoch": 0.46078230595945113,
"grad_norm": 0.22768492996692657,
"learning_rate": 0.0015,
"loss": 1.2584,
"step": 9000
},
{
"epoch": 0.4612942862994061,
"grad_norm": 0.2407897710800171,
"learning_rate": 0.0015,
"loss": 1.2551,
"step": 9010
},
{
"epoch": 0.46180626663936103,
"grad_norm": 0.24113765358924866,
"learning_rate": 0.0015,
"loss": 1.2686,
"step": 9020
},
{
"epoch": 0.462318246979316,
"grad_norm": 0.23086939752101898,
"learning_rate": 0.0015,
"loss": 1.2521,
"step": 9030
},
{
"epoch": 0.4628302273192709,
"grad_norm": 0.2428579032421112,
"learning_rate": 0.0015,
"loss": 1.2539,
"step": 9040
},
{
"epoch": 0.4633422076592259,
"grad_norm": 0.23166462779045105,
"learning_rate": 0.0015,
"loss": 1.2452,
"step": 9050
},
{
"epoch": 0.4638541879991808,
"grad_norm": 0.23648124933242798,
"learning_rate": 0.0015,
"loss": 1.2522,
"step": 9060
},
{
"epoch": 0.4643661683391358,
"grad_norm": 0.23984448611736298,
"learning_rate": 0.0015,
"loss": 1.2556,
"step": 9070
},
{
"epoch": 0.4648781486790907,
"grad_norm": 0.22623547911643982,
"learning_rate": 0.0015,
"loss": 1.2496,
"step": 9080
},
{
"epoch": 0.4653901290190457,
"grad_norm": 0.23154547810554504,
"learning_rate": 0.0015,
"loss": 1.2688,
"step": 9090
},
{
"epoch": 0.4659021093590006,
"grad_norm": 0.24457304179668427,
"learning_rate": 0.0015,
"loss": 1.2457,
"step": 9100
},
{
"epoch": 0.46641408969895554,
"grad_norm": 0.22743169963359833,
"learning_rate": 0.0015,
"loss": 1.2533,
"step": 9110
},
{
"epoch": 0.4669260700389105,
"grad_norm": 0.23356840014457703,
"learning_rate": 0.0015,
"loss": 1.2529,
"step": 9120
},
{
"epoch": 0.46743805037886543,
"grad_norm": 0.23355025053024292,
"learning_rate": 0.0015,
"loss": 1.2595,
"step": 9130
},
{
"epoch": 0.4679500307188204,
"grad_norm": 0.21895302832126617,
"learning_rate": 0.0015,
"loss": 1.2613,
"step": 9140
},
{
"epoch": 0.46846201105877533,
"grad_norm": 0.23437921702861786,
"learning_rate": 0.0015,
"loss": 1.2631,
"step": 9150
},
{
"epoch": 0.4689739913987303,
"grad_norm": 0.22628231346607208,
"learning_rate": 0.0015,
"loss": 1.2634,
"step": 9160
},
{
"epoch": 0.4694859717386852,
"grad_norm": 0.2286689728498459,
"learning_rate": 0.0015,
"loss": 1.2412,
"step": 9170
},
{
"epoch": 0.4699979520786402,
"grad_norm": 0.21830707788467407,
"learning_rate": 0.0015,
"loss": 1.2714,
"step": 9180
},
{
"epoch": 0.4705099324185951,
"grad_norm": 0.2502080500125885,
"learning_rate": 0.0015,
"loss": 1.2419,
"step": 9190
},
{
"epoch": 0.4710219127585501,
"grad_norm": 0.21958868205547333,
"learning_rate": 0.0015,
"loss": 1.2406,
"step": 9200
},
{
"epoch": 0.471533893098505,
"grad_norm": 0.22988547384738922,
"learning_rate": 0.0015,
"loss": 1.2802,
"step": 9210
},
{
"epoch": 0.47204587343845994,
"grad_norm": 0.22131182253360748,
"learning_rate": 0.0015,
"loss": 1.2496,
"step": 9220
},
{
"epoch": 0.4725578537784149,
"grad_norm": 0.24254952371120453,
"learning_rate": 0.0015,
"loss": 1.2702,
"step": 9230
},
{
"epoch": 0.47306983411836984,
"grad_norm": 0.22780196368694305,
"learning_rate": 0.0015,
"loss": 1.2452,
"step": 9240
},
{
"epoch": 0.4735818144583248,
"grad_norm": 0.22993087768554688,
"learning_rate": 0.0015,
"loss": 1.2475,
"step": 9250
},
{
"epoch": 0.47409379479827973,
"grad_norm": 0.21792259812355042,
"learning_rate": 0.0015,
"loss": 1.2532,
"step": 9260
},
{
"epoch": 0.4746057751382347,
"grad_norm": 0.22392146289348602,
"learning_rate": 0.0015,
"loss": 1.2451,
"step": 9270
},
{
"epoch": 0.47511775547818963,
"grad_norm": 0.24879144132137299,
"learning_rate": 0.0015,
"loss": 1.2492,
"step": 9280
},
{
"epoch": 0.4756297358181446,
"grad_norm": 0.21757066249847412,
"learning_rate": 0.0015,
"loss": 1.2508,
"step": 9290
},
{
"epoch": 0.4761417161580995,
"grad_norm": 0.23313356935977936,
"learning_rate": 0.0015,
"loss": 1.2532,
"step": 9300
},
{
"epoch": 0.4766536964980545,
"grad_norm": 0.25208523869514465,
"learning_rate": 0.0015,
"loss": 1.2286,
"step": 9310
},
{
"epoch": 0.4771656768380094,
"grad_norm": 0.2262171059846878,
"learning_rate": 0.0015,
"loss": 1.2398,
"step": 9320
},
{
"epoch": 0.47767765717796434,
"grad_norm": 0.2252594530582428,
"learning_rate": 0.0015,
"loss": 1.2525,
"step": 9330
},
{
"epoch": 0.4781896375179193,
"grad_norm": 0.2281142771244049,
"learning_rate": 0.0015,
"loss": 1.2453,
"step": 9340
},
{
"epoch": 0.47870161785787424,
"grad_norm": 0.22341011464595795,
"learning_rate": 0.0015,
"loss": 1.2628,
"step": 9350
},
{
"epoch": 0.4792135981978292,
"grad_norm": 0.22117526829242706,
"learning_rate": 0.0015,
"loss": 1.2597,
"step": 9360
},
{
"epoch": 0.47972557853778414,
"grad_norm": 0.2359929233789444,
"learning_rate": 0.0015,
"loss": 1.2504,
"step": 9370
},
{
"epoch": 0.4802375588777391,
"grad_norm": 0.2348971962928772,
"learning_rate": 0.0015,
"loss": 1.2352,
"step": 9380
},
{
"epoch": 0.48074953921769403,
"grad_norm": 0.23461927473545074,
"learning_rate": 0.0015,
"loss": 1.2383,
"step": 9390
},
{
"epoch": 0.481261519557649,
"grad_norm": 0.2463158220052719,
"learning_rate": 0.0015,
"loss": 1.2329,
"step": 9400
},
{
"epoch": 0.48177349989760393,
"grad_norm": 0.240493506193161,
"learning_rate": 0.0015,
"loss": 1.2614,
"step": 9410
},
{
"epoch": 0.48228548023755885,
"grad_norm": 0.22357292473316193,
"learning_rate": 0.0015,
"loss": 1.2553,
"step": 9420
},
{
"epoch": 0.4827974605775138,
"grad_norm": 0.2223501205444336,
"learning_rate": 0.0015,
"loss": 1.245,
"step": 9430
},
{
"epoch": 0.48330944091746875,
"grad_norm": 0.2278713434934616,
"learning_rate": 0.0015,
"loss": 1.2544,
"step": 9440
},
{
"epoch": 0.4838214212574237,
"grad_norm": 0.23052051663398743,
"learning_rate": 0.0015,
"loss": 1.2614,
"step": 9450
},
{
"epoch": 0.48433340159737864,
"grad_norm": 0.22685429453849792,
"learning_rate": 0.0015,
"loss": 1.2613,
"step": 9460
},
{
"epoch": 0.4848453819373336,
"grad_norm": 0.22306014597415924,
"learning_rate": 0.0015,
"loss": 1.2289,
"step": 9470
},
{
"epoch": 0.48535736227728854,
"grad_norm": 0.22385765612125397,
"learning_rate": 0.0015,
"loss": 1.2452,
"step": 9480
},
{
"epoch": 0.4858693426172435,
"grad_norm": 0.22245322167873383,
"learning_rate": 0.0015,
"loss": 1.2541,
"step": 9490
},
{
"epoch": 0.48638132295719844,
"grad_norm": 0.2279806137084961,
"learning_rate": 0.0015,
"loss": 1.2557,
"step": 9500
},
{
"epoch": 0.4868933032971534,
"grad_norm": 0.2449760138988495,
"learning_rate": 0.0015,
"loss": 1.2358,
"step": 9510
},
{
"epoch": 0.48740528363710833,
"grad_norm": 0.22621648013591766,
"learning_rate": 0.0015,
"loss": 1.2466,
"step": 9520
},
{
"epoch": 0.48791726397706325,
"grad_norm": 0.22223225235939026,
"learning_rate": 0.0015,
"loss": 1.2522,
"step": 9530
},
{
"epoch": 0.48842924431701823,
"grad_norm": 0.23512163758277893,
"learning_rate": 0.0015,
"loss": 1.2542,
"step": 9540
},
{
"epoch": 0.48894122465697315,
"grad_norm": 0.21729685366153717,
"learning_rate": 0.0015,
"loss": 1.224,
"step": 9550
},
{
"epoch": 0.4894532049969281,
"grad_norm": 0.22177568078041077,
"learning_rate": 0.0015,
"loss": 1.2624,
"step": 9560
},
{
"epoch": 0.48996518533688305,
"grad_norm": 0.22674211859703064,
"learning_rate": 0.0015,
"loss": 1.2191,
"step": 9570
},
{
"epoch": 0.490477165676838,
"grad_norm": 0.25243934988975525,
"learning_rate": 0.0015,
"loss": 1.2327,
"step": 9580
},
{
"epoch": 0.49098914601679294,
"grad_norm": 0.22206014394760132,
"learning_rate": 0.0015,
"loss": 1.2369,
"step": 9590
},
{
"epoch": 0.4915011263567479,
"grad_norm": 0.21915268898010254,
"learning_rate": 0.0015,
"loss": 1.2475,
"step": 9600
},
{
"epoch": 0.49201310669670284,
"grad_norm": 0.219084694981575,
"learning_rate": 0.0015,
"loss": 1.2469,
"step": 9610
},
{
"epoch": 0.4925250870366578,
"grad_norm": 0.21210044622421265,
"learning_rate": 0.0015,
"loss": 1.2385,
"step": 9620
},
{
"epoch": 0.49303706737661274,
"grad_norm": 0.22252093255519867,
"learning_rate": 0.0015,
"loss": 1.2652,
"step": 9630
},
{
"epoch": 0.49354904771656766,
"grad_norm": 0.2407660186290741,
"learning_rate": 0.0015,
"loss": 1.2436,
"step": 9640
},
{
"epoch": 0.49406102805652263,
"grad_norm": 0.22691743075847626,
"learning_rate": 0.0015,
"loss": 1.2254,
"step": 9650
},
{
"epoch": 0.49457300839647755,
"grad_norm": 0.23666201531887054,
"learning_rate": 0.0015,
"loss": 1.2297,
"step": 9660
},
{
"epoch": 0.49508498873643253,
"grad_norm": 0.21549946069717407,
"learning_rate": 0.0015,
"loss": 1.238,
"step": 9670
},
{
"epoch": 0.49559696907638745,
"grad_norm": 0.22083760797977448,
"learning_rate": 0.0015,
"loss": 1.2531,
"step": 9680
},
{
"epoch": 0.4961089494163424,
"grad_norm": 0.23391181230545044,
"learning_rate": 0.0015,
"loss": 1.1973,
"step": 9690
},
{
"epoch": 0.49662092975629735,
"grad_norm": 0.21990463137626648,
"learning_rate": 0.0015,
"loss": 1.2357,
"step": 9700
},
{
"epoch": 0.4971329100962523,
"grad_norm": 0.22842243313789368,
"learning_rate": 0.0015,
"loss": 1.2566,
"step": 9710
},
{
"epoch": 0.49764489043620724,
"grad_norm": 0.2154964953660965,
"learning_rate": 0.0015,
"loss": 1.2489,
"step": 9720
},
{
"epoch": 0.4981568707761622,
"grad_norm": 0.23381535708904266,
"learning_rate": 0.0015,
"loss": 1.2379,
"step": 9730
},
{
"epoch": 0.49866885111611714,
"grad_norm": 0.23405200242996216,
"learning_rate": 0.0015,
"loss": 1.251,
"step": 9740
},
{
"epoch": 0.49918083145607206,
"grad_norm": 0.24905334413051605,
"learning_rate": 0.0015,
"loss": 1.2247,
"step": 9750
},
{
"epoch": 0.49969281179602704,
"grad_norm": 0.22687901556491852,
"learning_rate": 0.0015,
"loss": 1.2362,
"step": 9760
},
{
"epoch": 0.500204792135982,
"grad_norm": 0.21950958669185638,
"learning_rate": 0.0015,
"loss": 1.2304,
"step": 9770
},
{
"epoch": 0.5007167724759369,
"grad_norm": 0.24343635141849518,
"learning_rate": 0.0015,
"loss": 1.2313,
"step": 9780
},
{
"epoch": 0.5012287528158919,
"grad_norm": 0.2238016575574875,
"learning_rate": 0.0015,
"loss": 1.2504,
"step": 9790
},
{
"epoch": 0.5017407331558468,
"grad_norm": 0.22162608802318573,
"learning_rate": 0.0015,
"loss": 1.2242,
"step": 9800
},
{
"epoch": 0.5022527134958018,
"grad_norm": 0.2090781331062317,
"learning_rate": 0.0015,
"loss": 1.2214,
"step": 9810
},
{
"epoch": 0.5027646938357567,
"grad_norm": 0.23861265182495117,
"learning_rate": 0.0015,
"loss": 1.2554,
"step": 9820
},
{
"epoch": 0.5032766741757116,
"grad_norm": 0.24569468200206757,
"learning_rate": 0.0015,
"loss": 1.2525,
"step": 9830
},
{
"epoch": 0.5037886545156666,
"grad_norm": 0.22713309526443481,
"learning_rate": 0.0015,
"loss": 1.2513,
"step": 9840
},
{
"epoch": 0.5043006348556216,
"grad_norm": 0.22980822622776031,
"learning_rate": 0.0015,
"loss": 1.2493,
"step": 9850
},
{
"epoch": 0.5048126151955765,
"grad_norm": 0.23609554767608643,
"learning_rate": 0.0015,
"loss": 1.2366,
"step": 9860
},
{
"epoch": 0.5053245955355314,
"grad_norm": 0.2115827053785324,
"learning_rate": 0.0015,
"loss": 1.2558,
"step": 9870
},
{
"epoch": 0.5058365758754864,
"grad_norm": 0.20506598055362701,
"learning_rate": 0.0015,
"loss": 1.2421,
"step": 9880
},
{
"epoch": 0.5063485562154413,
"grad_norm": 0.21842671930789948,
"learning_rate": 0.0015,
"loss": 1.2328,
"step": 9890
},
{
"epoch": 0.5068605365553963,
"grad_norm": 0.2390349954366684,
"learning_rate": 0.0015,
"loss": 1.2494,
"step": 9900
},
{
"epoch": 0.5073725168953512,
"grad_norm": 0.21842844784259796,
"learning_rate": 0.0015,
"loss": 1.243,
"step": 9910
},
{
"epoch": 0.5078844972353062,
"grad_norm": 0.21210695803165436,
"learning_rate": 0.0015,
"loss": 1.2438,
"step": 9920
},
{
"epoch": 0.5083964775752611,
"grad_norm": 0.21826642751693726,
"learning_rate": 0.0015,
"loss": 1.2402,
"step": 9930
},
{
"epoch": 0.5089084579152161,
"grad_norm": 0.21249307692050934,
"learning_rate": 0.0015,
"loss": 1.2168,
"step": 9940
},
{
"epoch": 0.509420438255171,
"grad_norm": 0.22593854367733002,
"learning_rate": 0.0015,
"loss": 1.222,
"step": 9950
},
{
"epoch": 0.509932418595126,
"grad_norm": 0.22972868382930756,
"learning_rate": 0.0015,
"loss": 1.2577,
"step": 9960
},
{
"epoch": 0.5104443989350809,
"grad_norm": 0.21808108687400818,
"learning_rate": 0.0015,
"loss": 1.2301,
"step": 9970
},
{
"epoch": 0.5109563792750358,
"grad_norm": 0.21525093913078308,
"learning_rate": 0.0015,
"loss": 1.2412,
"step": 9980
},
{
"epoch": 0.5114683596149908,
"grad_norm": 0.22222475707530975,
"learning_rate": 0.0015,
"loss": 1.237,
"step": 9990
},
{
"epoch": 0.5119803399549457,
"grad_norm": 0.23491185903549194,
"learning_rate": 0.0015,
"loss": 1.2436,
"step": 10000
},
{
"epoch": 0.5124923202949007,
"grad_norm": 0.23327389359474182,
"learning_rate": 0.0015,
"loss": 1.223,
"step": 10010
},
{
"epoch": 0.5130043006348556,
"grad_norm": 0.21225926280021667,
"learning_rate": 0.0015,
"loss": 1.2215,
"step": 10020
},
{
"epoch": 0.5135162809748106,
"grad_norm": 0.21181495487689972,
"learning_rate": 0.0015,
"loss": 1.2297,
"step": 10030
},
{
"epoch": 0.5140282613147655,
"grad_norm": 0.21177121996879578,
"learning_rate": 0.0015,
"loss": 1.2228,
"step": 10040
},
{
"epoch": 0.5145402416547205,
"grad_norm": 0.22206859290599823,
"learning_rate": 0.0015,
"loss": 1.2579,
"step": 10050
},
{
"epoch": 0.5150522219946754,
"grad_norm": 0.21502964198589325,
"learning_rate": 0.0015,
"loss": 1.2298,
"step": 10060
},
{
"epoch": 0.5155642023346303,
"grad_norm": 0.22302408516407013,
"learning_rate": 0.0015,
"loss": 1.2226,
"step": 10070
},
{
"epoch": 0.5160761826745853,
"grad_norm": 0.21490171551704407,
"learning_rate": 0.0015,
"loss": 1.2554,
"step": 10080
},
{
"epoch": 0.5165881630145402,
"grad_norm": 0.22137999534606934,
"learning_rate": 0.0015,
"loss": 1.2189,
"step": 10090
},
{
"epoch": 0.5171001433544952,
"grad_norm": 0.21363165974617004,
"learning_rate": 0.0015,
"loss": 1.2533,
"step": 10100
},
{
"epoch": 0.5176121236944501,
"grad_norm": 0.23033399879932404,
"learning_rate": 0.0015,
"loss": 1.2406,
"step": 10110
},
{
"epoch": 0.5181241040344051,
"grad_norm": 0.22692923247814178,
"learning_rate": 0.0015,
"loss": 1.2294,
"step": 10120
},
{
"epoch": 0.51863608437436,
"grad_norm": 0.23053601384162903,
"learning_rate": 0.0015,
"loss": 1.2351,
"step": 10130
},
{
"epoch": 0.519148064714315,
"grad_norm": 0.21180744469165802,
"learning_rate": 0.0015,
"loss": 1.2518,
"step": 10140
},
{
"epoch": 0.5196600450542699,
"grad_norm": 0.2388363927602768,
"learning_rate": 0.0015,
"loss": 1.2188,
"step": 10150
},
{
"epoch": 0.5201720253942249,
"grad_norm": 0.22531351447105408,
"learning_rate": 0.0015,
"loss": 1.2242,
"step": 10160
},
{
"epoch": 0.5206840057341798,
"grad_norm": 0.2166026532649994,
"learning_rate": 0.0015,
"loss": 1.2122,
"step": 10170
},
{
"epoch": 0.5211959860741348,
"grad_norm": 0.23231609165668488,
"learning_rate": 0.0015,
"loss": 1.2078,
"step": 10180
},
{
"epoch": 0.5217079664140897,
"grad_norm": 0.2189248949289322,
"learning_rate": 0.0015,
"loss": 1.2392,
"step": 10190
},
{
"epoch": 0.5222199467540446,
"grad_norm": 0.21036341786384583,
"learning_rate": 0.0015,
"loss": 1.2325,
"step": 10200
},
{
"epoch": 0.5227319270939996,
"grad_norm": 0.21162335574626923,
"learning_rate": 0.0015,
"loss": 1.2348,
"step": 10210
},
{
"epoch": 0.5232439074339545,
"grad_norm": 0.21558861434459686,
"learning_rate": 0.0015,
"loss": 1.2343,
"step": 10220
},
{
"epoch": 0.5237558877739095,
"grad_norm": 0.22100234031677246,
"learning_rate": 0.0015,
"loss": 1.2373,
"step": 10230
},
{
"epoch": 0.5242678681138644,
"grad_norm": 0.225110724568367,
"learning_rate": 0.0015,
"loss": 1.2368,
"step": 10240
},
{
"epoch": 0.5247798484538194,
"grad_norm": 0.21674303710460663,
"learning_rate": 0.0015,
"loss": 1.2365,
"step": 10250
},
{
"epoch": 0.5252918287937743,
"grad_norm": 0.23076364398002625,
"learning_rate": 0.0015,
"loss": 1.2202,
"step": 10260
},
{
"epoch": 0.5258038091337293,
"grad_norm": 0.23180685937404633,
"learning_rate": 0.0015,
"loss": 1.234,
"step": 10270
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.21580268442630768,
"learning_rate": 0.0015,
"loss": 1.2372,
"step": 10280
},
{
"epoch": 0.5268277698136391,
"grad_norm": 0.2099384069442749,
"learning_rate": 0.0015,
"loss": 1.2118,
"step": 10290
},
{
"epoch": 0.5273397501535941,
"grad_norm": 0.23586790263652802,
"learning_rate": 0.0015,
"loss": 1.2482,
"step": 10300
},
{
"epoch": 0.527851730493549,
"grad_norm": 0.2149907946586609,
"learning_rate": 0.0015,
"loss": 1.2469,
"step": 10310
},
{
"epoch": 0.528363710833504,
"grad_norm": 0.21271546185016632,
"learning_rate": 0.0015,
"loss": 1.2325,
"step": 10320
},
{
"epoch": 0.5288756911734589,
"grad_norm": 0.20998185873031616,
"learning_rate": 0.0015,
"loss": 1.247,
"step": 10330
},
{
"epoch": 0.5293876715134139,
"grad_norm": 0.23234112560749054,
"learning_rate": 0.0015,
"loss": 1.2395,
"step": 10340
},
{
"epoch": 0.5298996518533688,
"grad_norm": 0.2261328250169754,
"learning_rate": 0.0015,
"loss": 1.2244,
"step": 10350
},
{
"epoch": 0.5304116321933238,
"grad_norm": 0.2102995663881302,
"learning_rate": 0.0015,
"loss": 1.2307,
"step": 10360
},
{
"epoch": 0.5309236125332787,
"grad_norm": 0.21107365190982819,
"learning_rate": 0.0015,
"loss": 1.2195,
"step": 10370
},
{
"epoch": 0.5314355928732337,
"grad_norm": 0.2249820977449417,
"learning_rate": 0.0015,
"loss": 1.2499,
"step": 10380
},
{
"epoch": 0.5319475732131886,
"grad_norm": 0.2142641544342041,
"learning_rate": 0.0015,
"loss": 1.2329,
"step": 10390
},
{
"epoch": 0.5324595535531436,
"grad_norm": 0.2172004133462906,
"learning_rate": 0.0015,
"loss": 1.2098,
"step": 10400
},
{
"epoch": 0.5329715338930985,
"grad_norm": 0.19984416663646698,
"learning_rate": 0.0015,
"loss": 1.2135,
"step": 10410
},
{
"epoch": 0.5334835142330534,
"grad_norm": 0.22618216276168823,
"learning_rate": 0.0015,
"loss": 1.2173,
"step": 10420
},
{
"epoch": 0.5339954945730084,
"grad_norm": 0.22356146574020386,
"learning_rate": 0.0015,
"loss": 1.2423,
"step": 10430
},
{
"epoch": 0.5345074749129634,
"grad_norm": 0.2300511598587036,
"learning_rate": 0.0015,
"loss": 1.2308,
"step": 10440
},
{
"epoch": 0.5350194552529183,
"grad_norm": 0.22442519664764404,
"learning_rate": 0.0015,
"loss": 1.2435,
"step": 10450
},
{
"epoch": 0.5355314355928732,
"grad_norm": 0.21556325256824493,
"learning_rate": 0.0015,
"loss": 1.2499,
"step": 10460
},
{
"epoch": 0.5360434159328282,
"grad_norm": 0.21608006954193115,
"learning_rate": 0.0015,
"loss": 1.2367,
"step": 10470
},
{
"epoch": 0.5365553962727831,
"grad_norm": 0.22256320714950562,
"learning_rate": 0.0015,
"loss": 1.2325,
"step": 10480
},
{
"epoch": 0.5370673766127381,
"grad_norm": 0.22661398351192474,
"learning_rate": 0.0015,
"loss": 1.2253,
"step": 10490
},
{
"epoch": 0.537579356952693,
"grad_norm": 0.21327906847000122,
"learning_rate": 0.0015,
"loss": 1.215,
"step": 10500
},
{
"epoch": 0.5380913372926479,
"grad_norm": 0.21695594489574432,
"learning_rate": 0.0015,
"loss": 1.2372,
"step": 10510
},
{
"epoch": 0.5386033176326029,
"grad_norm": 0.20584948360919952,
"learning_rate": 0.0015,
"loss": 1.2491,
"step": 10520
},
{
"epoch": 0.5391152979725579,
"grad_norm": 0.2212359756231308,
"learning_rate": 0.0015,
"loss": 1.2415,
"step": 10530
},
{
"epoch": 0.5396272783125128,
"grad_norm": 0.2696838974952698,
"learning_rate": 0.0015,
"loss": 1.2254,
"step": 10540
},
{
"epoch": 0.5401392586524677,
"grad_norm": 0.21417804062366486,
"learning_rate": 0.0015,
"loss": 1.2307,
"step": 10550
},
{
"epoch": 0.5406512389924227,
"grad_norm": 0.2126997709274292,
"learning_rate": 0.0015,
"loss": 1.2134,
"step": 10560
},
{
"epoch": 0.5411632193323777,
"grad_norm": 0.21690891683101654,
"learning_rate": 0.0015,
"loss": 1.2136,
"step": 10570
},
{
"epoch": 0.5416751996723326,
"grad_norm": 0.21153941750526428,
"learning_rate": 0.0015,
"loss": 1.2157,
"step": 10580
},
{
"epoch": 0.5421871800122875,
"grad_norm": 0.21089473366737366,
"learning_rate": 0.0015,
"loss": 1.2272,
"step": 10590
},
{
"epoch": 0.5426991603522425,
"grad_norm": 0.2564721703529358,
"learning_rate": 0.0015,
"loss": 1.2026,
"step": 10600
},
{
"epoch": 0.5432111406921974,
"grad_norm": 0.2235645204782486,
"learning_rate": 0.0015,
"loss": 1.2373,
"step": 10610
},
{
"epoch": 0.5437231210321524,
"grad_norm": 0.21624423563480377,
"learning_rate": 0.0015,
"loss": 1.2208,
"step": 10620
},
{
"epoch": 0.5442351013721073,
"grad_norm": 0.22423268854618073,
"learning_rate": 0.0015,
"loss": 1.2246,
"step": 10630
},
{
"epoch": 0.5447470817120622,
"grad_norm": 0.20781590044498444,
"learning_rate": 0.0015,
"loss": 1.2197,
"step": 10640
},
{
"epoch": 0.5452590620520172,
"grad_norm": 0.21837033331394196,
"learning_rate": 0.0015,
"loss": 1.2195,
"step": 10650
},
{
"epoch": 0.5457710423919722,
"grad_norm": 0.23481489717960358,
"learning_rate": 0.0015,
"loss": 1.2221,
"step": 10660
},
{
"epoch": 0.5462830227319271,
"grad_norm": 0.20522017776966095,
"learning_rate": 0.0015,
"loss": 1.2119,
"step": 10670
},
{
"epoch": 0.546795003071882,
"grad_norm": 0.24082933366298676,
"learning_rate": 0.0015,
"loss": 1.2115,
"step": 10680
},
{
"epoch": 0.547306983411837,
"grad_norm": 0.21289277076721191,
"learning_rate": 0.0015,
"loss": 1.2386,
"step": 10690
},
{
"epoch": 0.547818963751792,
"grad_norm": 0.21003836393356323,
"learning_rate": 0.0015,
"loss": 1.2107,
"step": 10700
},
{
"epoch": 0.5483309440917469,
"grad_norm": 0.21242666244506836,
"learning_rate": 0.0015,
"loss": 1.2429,
"step": 10710
},
{
"epoch": 0.5488429244317018,
"grad_norm": 0.2271721065044403,
"learning_rate": 0.0015,
"loss": 1.2314,
"step": 10720
},
{
"epoch": 0.5493549047716567,
"grad_norm": 0.21104945242404938,
"learning_rate": 0.0015,
"loss": 1.2342,
"step": 10730
},
{
"epoch": 0.5498668851116117,
"grad_norm": 0.2085346132516861,
"learning_rate": 0.0015,
"loss": 1.2271,
"step": 10740
},
{
"epoch": 0.5503788654515667,
"grad_norm": 0.22231942415237427,
"learning_rate": 0.0015,
"loss": 1.2306,
"step": 10750
},
{
"epoch": 0.5508908457915216,
"grad_norm": 0.21245570480823517,
"learning_rate": 0.0015,
"loss": 1.2258,
"step": 10760
},
{
"epoch": 0.5514028261314765,
"grad_norm": 0.19826675951480865,
"learning_rate": 0.0015,
"loss": 1.2163,
"step": 10770
},
{
"epoch": 0.5519148064714315,
"grad_norm": 0.22163072228431702,
"learning_rate": 0.0015,
"loss": 1.229,
"step": 10780
},
{
"epoch": 0.5524267868113865,
"grad_norm": 0.21903766691684723,
"learning_rate": 0.0015,
"loss": 1.2139,
"step": 10790
},
{
"epoch": 0.5529387671513414,
"grad_norm": 0.2075222283601761,
"learning_rate": 0.0015,
"loss": 1.2129,
"step": 10800
},
{
"epoch": 0.5534507474912963,
"grad_norm": 0.21938522160053253,
"learning_rate": 0.0015,
"loss": 1.2232,
"step": 10810
},
{
"epoch": 0.5539627278312513,
"grad_norm": 0.21770595014095306,
"learning_rate": 0.0015,
"loss": 1.2465,
"step": 10820
},
{
"epoch": 0.5544747081712063,
"grad_norm": 0.20712700486183167,
"learning_rate": 0.0015,
"loss": 1.2183,
"step": 10830
},
{
"epoch": 0.5549866885111612,
"grad_norm": 0.22477000951766968,
"learning_rate": 0.0015,
"loss": 1.2186,
"step": 10840
},
{
"epoch": 0.5554986688511161,
"grad_norm": 0.21939463913440704,
"learning_rate": 0.0015,
"loss": 1.2355,
"step": 10850
},
{
"epoch": 0.556010649191071,
"grad_norm": 0.2524956464767456,
"learning_rate": 0.0015,
"loss": 1.2092,
"step": 10860
},
{
"epoch": 0.556522629531026,
"grad_norm": 0.2115110456943512,
"learning_rate": 0.0015,
"loss": 1.2137,
"step": 10870
},
{
"epoch": 0.557034609870981,
"grad_norm": 0.20509475469589233,
"learning_rate": 0.0015,
"loss": 1.2234,
"step": 10880
},
{
"epoch": 0.5575465902109359,
"grad_norm": 0.21247826516628265,
"learning_rate": 0.0015,
"loss": 1.2234,
"step": 10890
},
{
"epoch": 0.5580585705508908,
"grad_norm": 0.21064293384552002,
"learning_rate": 0.0015,
"loss": 1.2289,
"step": 10900
},
{
"epoch": 0.5585705508908458,
"grad_norm": 0.21902692317962646,
"learning_rate": 0.0015,
"loss": 1.2085,
"step": 10910
},
{
"epoch": 0.5590825312308008,
"grad_norm": 0.21347709000110626,
"learning_rate": 0.0015,
"loss": 1.2151,
"step": 10920
},
{
"epoch": 0.5595945115707557,
"grad_norm": 0.20034797489643097,
"learning_rate": 0.0015,
"loss": 1.218,
"step": 10930
},
{
"epoch": 0.5601064919107106,
"grad_norm": 0.20223546028137207,
"learning_rate": 0.0015,
"loss": 1.2176,
"step": 10940
},
{
"epoch": 0.5606184722506655,
"grad_norm": 0.23771893978118896,
"learning_rate": 0.0015,
"loss": 1.2297,
"step": 10950
},
{
"epoch": 0.5611304525906206,
"grad_norm": 0.24617038667201996,
"learning_rate": 0.0015,
"loss": 1.2331,
"step": 10960
},
{
"epoch": 0.5616424329305755,
"grad_norm": 0.2169172167778015,
"learning_rate": 0.0015,
"loss": 1.2319,
"step": 10970
},
{
"epoch": 0.5621544132705304,
"grad_norm": 0.21281367540359497,
"learning_rate": 0.0015,
"loss": 1.2205,
"step": 10980
},
{
"epoch": 0.5626663936104853,
"grad_norm": 0.21705804765224457,
"learning_rate": 0.0015,
"loss": 1.2138,
"step": 10990
},
{
"epoch": 0.5631783739504403,
"grad_norm": 0.19822140038013458,
"learning_rate": 0.0015,
"loss": 1.2339,
"step": 11000
},
{
"epoch": 0.5636903542903953,
"grad_norm": 0.20427508652210236,
"learning_rate": 0.0015,
"loss": 1.2195,
"step": 11010
},
{
"epoch": 0.5642023346303502,
"grad_norm": 0.2140669971704483,
"learning_rate": 0.0015,
"loss": 1.1975,
"step": 11020
},
{
"epoch": 0.5647143149703051,
"grad_norm": 0.20858561992645264,
"learning_rate": 0.0015,
"loss": 1.208,
"step": 11030
},
{
"epoch": 0.5652262953102601,
"grad_norm": 0.21723324060440063,
"learning_rate": 0.0015,
"loss": 1.2193,
"step": 11040
},
{
"epoch": 0.5657382756502151,
"grad_norm": 0.21611307561397552,
"learning_rate": 0.0015,
"loss": 1.2199,
"step": 11050
},
{
"epoch": 0.56625025599017,
"grad_norm": 0.21373584866523743,
"learning_rate": 0.0015,
"loss": 1.2065,
"step": 11060
},
{
"epoch": 0.5667622363301249,
"grad_norm": 0.2058737874031067,
"learning_rate": 0.0015,
"loss": 1.2019,
"step": 11070
},
{
"epoch": 0.5672742166700798,
"grad_norm": 0.22086186707019806,
"learning_rate": 0.0015,
"loss": 1.2108,
"step": 11080
},
{
"epoch": 0.5677861970100349,
"grad_norm": 0.21599149703979492,
"learning_rate": 0.0015,
"loss": 1.209,
"step": 11090
},
{
"epoch": 0.5682981773499898,
"grad_norm": 0.22241829335689545,
"learning_rate": 0.0015,
"loss": 1.2054,
"step": 11100
},
{
"epoch": 0.5688101576899447,
"grad_norm": 0.19618919491767883,
"learning_rate": 0.0015,
"loss": 1.2293,
"step": 11110
},
{
"epoch": 0.5693221380298996,
"grad_norm": 0.19986511766910553,
"learning_rate": 0.0015,
"loss": 1.1945,
"step": 11120
},
{
"epoch": 0.5698341183698546,
"grad_norm": 0.20131878554821014,
"learning_rate": 0.0015,
"loss": 1.2082,
"step": 11130
},
{
"epoch": 0.5703460987098096,
"grad_norm": 0.20655354857444763,
"learning_rate": 0.0015,
"loss": 1.2111,
"step": 11140
},
{
"epoch": 0.5708580790497645,
"grad_norm": 0.2156609296798706,
"learning_rate": 0.0015,
"loss": 1.2288,
"step": 11150
},
{
"epoch": 0.5713700593897194,
"grad_norm": 0.20367379486560822,
"learning_rate": 0.0015,
"loss": 1.2229,
"step": 11160
},
{
"epoch": 0.5718820397296743,
"grad_norm": 0.20256848633289337,
"learning_rate": 0.0015,
"loss": 1.2236,
"step": 11170
},
{
"epoch": 0.5723940200696294,
"grad_norm": 0.20862998068332672,
"learning_rate": 0.0015,
"loss": 1.2153,
"step": 11180
},
{
"epoch": 0.5729060004095843,
"grad_norm": 0.21000482141971588,
"learning_rate": 0.0015,
"loss": 1.2164,
"step": 11190
},
{
"epoch": 0.5734179807495392,
"grad_norm": 0.21778449416160583,
"learning_rate": 0.0015,
"loss": 1.2221,
"step": 11200
},
{
"epoch": 0.5739299610894941,
"grad_norm": 0.20954222977161407,
"learning_rate": 0.0015,
"loss": 1.2257,
"step": 11210
},
{
"epoch": 0.5744419414294492,
"grad_norm": 0.21105293929576874,
"learning_rate": 0.0015,
"loss": 1.2218,
"step": 11220
},
{
"epoch": 0.5749539217694041,
"grad_norm": 0.2167726457118988,
"learning_rate": 0.0015,
"loss": 1.2193,
"step": 11230
},
{
"epoch": 0.575465902109359,
"grad_norm": 0.20207858085632324,
"learning_rate": 0.0015,
"loss": 1.2243,
"step": 11240
},
{
"epoch": 0.5759778824493139,
"grad_norm": 0.21475255489349365,
"learning_rate": 0.0015,
"loss": 1.2222,
"step": 11250
},
{
"epoch": 0.576489862789269,
"grad_norm": 0.22506240010261536,
"learning_rate": 0.0015,
"loss": 1.2255,
"step": 11260
},
{
"epoch": 0.5770018431292239,
"grad_norm": 0.23033161461353302,
"learning_rate": 0.0015,
"loss": 1.2287,
"step": 11270
},
{
"epoch": 0.5775138234691788,
"grad_norm": 0.20455433428287506,
"learning_rate": 0.0015,
"loss": 1.2141,
"step": 11280
},
{
"epoch": 0.5780258038091337,
"grad_norm": 0.22457818686962128,
"learning_rate": 0.0015,
"loss": 1.2329,
"step": 11290
},
{
"epoch": 0.5785377841490886,
"grad_norm": 0.2011692076921463,
"learning_rate": 0.0015,
"loss": 1.213,
"step": 11300
},
{
"epoch": 0.5790497644890437,
"grad_norm": 0.20488318800926208,
"learning_rate": 0.0015,
"loss": 1.2224,
"step": 11310
},
{
"epoch": 0.5795617448289986,
"grad_norm": 0.22065885365009308,
"learning_rate": 0.0015,
"loss": 1.231,
"step": 11320
},
{
"epoch": 0.5800737251689535,
"grad_norm": 0.20532485842704773,
"learning_rate": 0.0015,
"loss": 1.2051,
"step": 11330
},
{
"epoch": 0.5805857055089084,
"grad_norm": 0.20642031729221344,
"learning_rate": 0.0015,
"loss": 1.215,
"step": 11340
},
{
"epoch": 0.5810976858488635,
"grad_norm": 0.20660312473773956,
"learning_rate": 0.0015,
"loss": 1.2191,
"step": 11350
},
{
"epoch": 0.5816096661888184,
"grad_norm": 0.21046073734760284,
"learning_rate": 0.0015,
"loss": 1.2142,
"step": 11360
},
{
"epoch": 0.5821216465287733,
"grad_norm": 0.21846343576908112,
"learning_rate": 0.0015,
"loss": 1.2205,
"step": 11370
},
{
"epoch": 0.5826336268687282,
"grad_norm": 0.20589517056941986,
"learning_rate": 0.0015,
"loss": 1.2057,
"step": 11380
},
{
"epoch": 0.5831456072086831,
"grad_norm": 0.20691034197807312,
"learning_rate": 0.0015,
"loss": 1.2064,
"step": 11390
},
{
"epoch": 0.5836575875486382,
"grad_norm": 0.21649305522441864,
"learning_rate": 0.0015,
"loss": 1.2032,
"step": 11400
},
{
"epoch": 0.5841695678885931,
"grad_norm": 0.2329801321029663,
"learning_rate": 0.0015,
"loss": 1.2196,
"step": 11410
},
{
"epoch": 0.584681548228548,
"grad_norm": 0.23256272077560425,
"learning_rate": 0.0015,
"loss": 1.2124,
"step": 11420
},
{
"epoch": 0.5851935285685029,
"grad_norm": 0.2036832720041275,
"learning_rate": 0.0015,
"loss": 1.2098,
"step": 11430
},
{
"epoch": 0.585705508908458,
"grad_norm": 0.21199576556682587,
"learning_rate": 0.0015,
"loss": 1.2266,
"step": 11440
},
{
"epoch": 0.5862174892484129,
"grad_norm": 0.2015303373336792,
"learning_rate": 0.0015,
"loss": 1.1916,
"step": 11450
},
{
"epoch": 0.5867294695883678,
"grad_norm": 0.2176617681980133,
"learning_rate": 0.0015,
"loss": 1.1888,
"step": 11460
},
{
"epoch": 0.5872414499283227,
"grad_norm": 0.21515142917633057,
"learning_rate": 0.0015,
"loss": 1.2096,
"step": 11470
},
{
"epoch": 0.5877534302682776,
"grad_norm": 0.21731404960155487,
"learning_rate": 0.0015,
"loss": 1.2077,
"step": 11480
},
{
"epoch": 0.5882654106082327,
"grad_norm": 0.20664644241333008,
"learning_rate": 0.0015,
"loss": 1.2027,
"step": 11490
},
{
"epoch": 0.5887773909481876,
"grad_norm": 0.20170624554157257,
"learning_rate": 0.0015,
"loss": 1.233,
"step": 11500
},
{
"epoch": 0.5892893712881425,
"grad_norm": 0.2092912346124649,
"learning_rate": 0.0015,
"loss": 1.2004,
"step": 11510
},
{
"epoch": 0.5898013516280974,
"grad_norm": 0.204396590590477,
"learning_rate": 0.0015,
"loss": 1.2052,
"step": 11520
},
{
"epoch": 0.5903133319680525,
"grad_norm": 0.2075720578432083,
"learning_rate": 0.0015,
"loss": 1.2042,
"step": 11530
},
{
"epoch": 0.5908253123080074,
"grad_norm": 0.19743815064430237,
"learning_rate": 0.0015,
"loss": 1.1974,
"step": 11540
},
{
"epoch": 0.5913372926479623,
"grad_norm": 0.19972637295722961,
"learning_rate": 0.0015,
"loss": 1.2021,
"step": 11550
},
{
"epoch": 0.5918492729879172,
"grad_norm": 0.20364214479923248,
"learning_rate": 0.0015,
"loss": 1.2149,
"step": 11560
},
{
"epoch": 0.5923612533278723,
"grad_norm": 0.20440620183944702,
"learning_rate": 0.0015,
"loss": 1.1855,
"step": 11570
},
{
"epoch": 0.5928732336678272,
"grad_norm": 0.21338412165641785,
"learning_rate": 0.0015,
"loss": 1.2022,
"step": 11580
},
{
"epoch": 0.5933852140077821,
"grad_norm": 0.2067076861858368,
"learning_rate": 0.0015,
"loss": 1.2109,
"step": 11590
},
{
"epoch": 0.593897194347737,
"grad_norm": 0.20598556101322174,
"learning_rate": 0.0015,
"loss": 1.2132,
"step": 11600
},
{
"epoch": 0.5944091746876919,
"grad_norm": 0.21331733465194702,
"learning_rate": 0.0015,
"loss": 1.2021,
"step": 11610
},
{
"epoch": 0.594921155027647,
"grad_norm": 0.23132279515266418,
"learning_rate": 0.0015,
"loss": 1.1954,
"step": 11620
},
{
"epoch": 0.5954331353676019,
"grad_norm": 0.2226603478193283,
"learning_rate": 0.0015,
"loss": 1.2055,
"step": 11630
},
{
"epoch": 0.5959451157075568,
"grad_norm": 0.19999723136425018,
"learning_rate": 0.0015,
"loss": 1.1961,
"step": 11640
},
{
"epoch": 0.5964570960475117,
"grad_norm": 0.19226787984371185,
"learning_rate": 0.0015,
"loss": 1.2056,
"step": 11650
},
{
"epoch": 0.5969690763874668,
"grad_norm": 0.20891976356506348,
"learning_rate": 0.0015,
"loss": 1.2023,
"step": 11660
},
{
"epoch": 0.5974810567274217,
"grad_norm": 0.19218876957893372,
"learning_rate": 0.0015,
"loss": 1.2027,
"step": 11670
},
{
"epoch": 0.5979930370673766,
"grad_norm": 0.20928075909614563,
"learning_rate": 0.0015,
"loss": 1.2176,
"step": 11680
},
{
"epoch": 0.5985050174073315,
"grad_norm": 0.204718217253685,
"learning_rate": 0.0015,
"loss": 1.2014,
"step": 11690
},
{
"epoch": 0.5990169977472865,
"grad_norm": 0.22869887948036194,
"learning_rate": 0.0015,
"loss": 1.1888,
"step": 11700
},
{
"epoch": 0.5995289780872415,
"grad_norm": 0.19692908227443695,
"learning_rate": 0.0015,
"loss": 1.2161,
"step": 11710
},
{
"epoch": 0.6000409584271964,
"grad_norm": 0.2099919617176056,
"learning_rate": 0.0015,
"loss": 1.1968,
"step": 11720
},
{
"epoch": 0.6005529387671513,
"grad_norm": 0.20044675469398499,
"learning_rate": 0.0015,
"loss": 1.2071,
"step": 11730
},
{
"epoch": 0.6010649191071062,
"grad_norm": 0.20645897090435028,
"learning_rate": 0.0015,
"loss": 1.2142,
"step": 11740
},
{
"epoch": 0.6015768994470613,
"grad_norm": 0.20446518063545227,
"learning_rate": 0.0015,
"loss": 1.1907,
"step": 11750
},
{
"epoch": 0.6020888797870162,
"grad_norm": 0.19793803989887238,
"learning_rate": 0.0015,
"loss": 1.2237,
"step": 11760
},
{
"epoch": 0.6026008601269711,
"grad_norm": 0.23807552456855774,
"learning_rate": 0.0015,
"loss": 1.2072,
"step": 11770
},
{
"epoch": 0.603112840466926,
"grad_norm": 0.20290285348892212,
"learning_rate": 0.0015,
"loss": 1.2048,
"step": 11780
},
{
"epoch": 0.6036248208068811,
"grad_norm": 0.21725532412528992,
"learning_rate": 0.0015,
"loss": 1.1961,
"step": 11790
},
{
"epoch": 0.604136801146836,
"grad_norm": 0.20467454195022583,
"learning_rate": 0.0015,
"loss": 1.2301,
"step": 11800
},
{
"epoch": 0.6046487814867909,
"grad_norm": 0.20618268847465515,
"learning_rate": 0.0015,
"loss": 1.2026,
"step": 11810
},
{
"epoch": 0.6051607618267458,
"grad_norm": 0.2097761183977127,
"learning_rate": 0.0015,
"loss": 1.1992,
"step": 11820
},
{
"epoch": 0.6056727421667008,
"grad_norm": 0.21861404180526733,
"learning_rate": 0.0015,
"loss": 1.2047,
"step": 11830
},
{
"epoch": 0.6061847225066558,
"grad_norm": 0.2066473513841629,
"learning_rate": 0.0015,
"loss": 1.2022,
"step": 11840
},
{
"epoch": 0.6066967028466107,
"grad_norm": 0.203571155667305,
"learning_rate": 0.0015,
"loss": 1.1729,
"step": 11850
},
{
"epoch": 0.6072086831865656,
"grad_norm": 0.20523090660572052,
"learning_rate": 0.0015,
"loss": 1.222,
"step": 11860
},
{
"epoch": 0.6077206635265205,
"grad_norm": 0.2021731734275818,
"learning_rate": 0.0015,
"loss": 1.1983,
"step": 11870
},
{
"epoch": 0.6082326438664756,
"grad_norm": 0.20643019676208496,
"learning_rate": 0.0015,
"loss": 1.2147,
"step": 11880
},
{
"epoch": 0.6087446242064305,
"grad_norm": 0.21817174553871155,
"learning_rate": 0.0015,
"loss": 1.1988,
"step": 11890
},
{
"epoch": 0.6092566045463854,
"grad_norm": 0.21849657595157623,
"learning_rate": 0.0015,
"loss": 1.1908,
"step": 11900
},
{
"epoch": 0.6097685848863403,
"grad_norm": 0.21117383241653442,
"learning_rate": 0.0015,
"loss": 1.2318,
"step": 11910
},
{
"epoch": 0.6102805652262953,
"grad_norm": 0.2120293378829956,
"learning_rate": 0.0015,
"loss": 1.2071,
"step": 11920
},
{
"epoch": 0.6107925455662503,
"grad_norm": 0.20229868590831757,
"learning_rate": 0.0015,
"loss": 1.191,
"step": 11930
},
{
"epoch": 0.6113045259062052,
"grad_norm": 0.19626636803150177,
"learning_rate": 0.0015,
"loss": 1.2172,
"step": 11940
},
{
"epoch": 0.6118165062461601,
"grad_norm": 0.21968694031238556,
"learning_rate": 0.0015,
"loss": 1.1901,
"step": 11950
},
{
"epoch": 0.612328486586115,
"grad_norm": 0.22982917726039886,
"learning_rate": 0.0015,
"loss": 1.2023,
"step": 11960
},
{
"epoch": 0.6128404669260701,
"grad_norm": 0.20328094065189362,
"learning_rate": 0.0015,
"loss": 1.193,
"step": 11970
},
{
"epoch": 0.613352447266025,
"grad_norm": 0.20781250298023224,
"learning_rate": 0.0015,
"loss": 1.1871,
"step": 11980
},
{
"epoch": 0.6138644276059799,
"grad_norm": 0.1945171356201172,
"learning_rate": 0.0015,
"loss": 1.1954,
"step": 11990
},
{
"epoch": 0.6143764079459348,
"grad_norm": 0.2018270492553711,
"learning_rate": 0.0015,
"loss": 1.1848,
"step": 12000
},
{
"epoch": 0.6148883882858899,
"grad_norm": 0.20180918276309967,
"learning_rate": 0.0015,
"loss": 1.2081,
"step": 12010
},
{
"epoch": 0.6154003686258448,
"grad_norm": 0.20221208035945892,
"learning_rate": 0.0015,
"loss": 1.2076,
"step": 12020
},
{
"epoch": 0.6159123489657997,
"grad_norm": 0.2013401836156845,
"learning_rate": 0.0015,
"loss": 1.2211,
"step": 12030
},
{
"epoch": 0.6164243293057546,
"grad_norm": 0.20016033947467804,
"learning_rate": 0.0015,
"loss": 1.2037,
"step": 12040
},
{
"epoch": 0.6169363096457096,
"grad_norm": 0.20722372829914093,
"learning_rate": 0.0015,
"loss": 1.2052,
"step": 12050
},
{
"epoch": 0.6174482899856646,
"grad_norm": 0.21285022795200348,
"learning_rate": 0.0015,
"loss": 1.2066,
"step": 12060
},
{
"epoch": 0.6179602703256195,
"grad_norm": 0.21281997859477997,
"learning_rate": 0.0015,
"loss": 1.1955,
"step": 12070
},
{
"epoch": 0.6184722506655744,
"grad_norm": 0.19675594568252563,
"learning_rate": 0.0015,
"loss": 1.2088,
"step": 12080
},
{
"epoch": 0.6189842310055294,
"grad_norm": 0.21459296345710754,
"learning_rate": 0.0015,
"loss": 1.2255,
"step": 12090
},
{
"epoch": 0.6194962113454844,
"grad_norm": 0.20511606335639954,
"learning_rate": 0.0015,
"loss": 1.2,
"step": 12100
},
{
"epoch": 0.6200081916854393,
"grad_norm": 0.20228254795074463,
"learning_rate": 0.0015,
"loss": 1.1906,
"step": 12110
},
{
"epoch": 0.6205201720253942,
"grad_norm": 0.1966087371110916,
"learning_rate": 0.0015,
"loss": 1.1771,
"step": 12120
},
{
"epoch": 0.6210321523653491,
"grad_norm": 0.2050897479057312,
"learning_rate": 0.0015,
"loss": 1.1931,
"step": 12130
},
{
"epoch": 0.6215441327053041,
"grad_norm": 0.20761296153068542,
"learning_rate": 0.0015,
"loss": 1.1796,
"step": 12140
},
{
"epoch": 0.6220561130452591,
"grad_norm": 0.19282642006874084,
"learning_rate": 0.0015,
"loss": 1.2022,
"step": 12150
},
{
"epoch": 0.622568093385214,
"grad_norm": 0.2018144577741623,
"learning_rate": 0.0015,
"loss": 1.2151,
"step": 12160
},
{
"epoch": 0.6230800737251689,
"grad_norm": 0.19583159685134888,
"learning_rate": 0.0015,
"loss": 1.2027,
"step": 12170
},
{
"epoch": 0.6235920540651239,
"grad_norm": 0.22334228456020355,
"learning_rate": 0.0015,
"loss": 1.2158,
"step": 12180
},
{
"epoch": 0.6241040344050789,
"grad_norm": 0.2306404560804367,
"learning_rate": 0.0015,
"loss": 1.1856,
"step": 12190
},
{
"epoch": 0.6246160147450338,
"grad_norm": 0.21355292201042175,
"learning_rate": 0.0015,
"loss": 1.1723,
"step": 12200
},
{
"epoch": 0.6251279950849887,
"grad_norm": 0.19845044612884521,
"learning_rate": 0.0015,
"loss": 1.2052,
"step": 12210
},
{
"epoch": 0.6256399754249437,
"grad_norm": 0.2062026709318161,
"learning_rate": 0.0015,
"loss": 1.2093,
"step": 12220
},
{
"epoch": 0.6261519557648987,
"grad_norm": 0.20521892607212067,
"learning_rate": 0.0015,
"loss": 1.1888,
"step": 12230
},
{
"epoch": 0.6266639361048536,
"grad_norm": 0.20746907591819763,
"learning_rate": 0.0015,
"loss": 1.2038,
"step": 12240
},
{
"epoch": 0.6271759164448085,
"grad_norm": 0.19719459116458893,
"learning_rate": 0.0015,
"loss": 1.1995,
"step": 12250
},
{
"epoch": 0.6276878967847634,
"grad_norm": 0.20681564509868622,
"learning_rate": 0.0015,
"loss": 1.2157,
"step": 12260
},
{
"epoch": 0.6281998771247184,
"grad_norm": 0.20236019790172577,
"learning_rate": 0.0015,
"loss": 1.1859,
"step": 12270
},
{
"epoch": 0.6287118574646734,
"grad_norm": 0.22654055058956146,
"learning_rate": 0.0015,
"loss": 1.1961,
"step": 12280
},
{
"epoch": 0.6292238378046283,
"grad_norm": 0.1928294599056244,
"learning_rate": 0.0015,
"loss": 1.1932,
"step": 12290
},
{
"epoch": 0.6297358181445832,
"grad_norm": 0.21249711513519287,
"learning_rate": 0.0015,
"loss": 1.2018,
"step": 12300
},
{
"epoch": 0.6302477984845382,
"grad_norm": 0.19809094071388245,
"learning_rate": 0.0015,
"loss": 1.1806,
"step": 12310
},
{
"epoch": 0.6307597788244932,
"grad_norm": 0.1965721845626831,
"learning_rate": 0.0015,
"loss": 1.1956,
"step": 12320
},
{
"epoch": 0.6312717591644481,
"grad_norm": 0.20646794140338898,
"learning_rate": 0.0015,
"loss": 1.1907,
"step": 12330
},
{
"epoch": 0.631783739504403,
"grad_norm": 0.19848330318927765,
"learning_rate": 0.0015,
"loss": 1.2049,
"step": 12340
},
{
"epoch": 0.632295719844358,
"grad_norm": 0.19884952902793884,
"learning_rate": 0.0015,
"loss": 1.1886,
"step": 12350
},
{
"epoch": 0.6328077001843129,
"grad_norm": 0.21490252017974854,
"learning_rate": 0.0015,
"loss": 1.2033,
"step": 12360
},
{
"epoch": 0.6333196805242679,
"grad_norm": 0.21076445281505585,
"learning_rate": 0.0015,
"loss": 1.1725,
"step": 12370
},
{
"epoch": 0.6338316608642228,
"grad_norm": 0.20743723213672638,
"learning_rate": 0.0015,
"loss": 1.2118,
"step": 12380
},
{
"epoch": 0.6343436412041777,
"grad_norm": 0.2091572880744934,
"learning_rate": 0.0015,
"loss": 1.2058,
"step": 12390
},
{
"epoch": 0.6348556215441327,
"grad_norm": 0.19593819975852966,
"learning_rate": 0.0015,
"loss": 1.1789,
"step": 12400
},
{
"epoch": 0.6353676018840877,
"grad_norm": 0.21120460331439972,
"learning_rate": 0.0015,
"loss": 1.199,
"step": 12410
},
{
"epoch": 0.6358795822240426,
"grad_norm": 0.19703616201877594,
"learning_rate": 0.0015,
"loss": 1.2062,
"step": 12420
},
{
"epoch": 0.6363915625639975,
"grad_norm": 0.2228432148694992,
"learning_rate": 0.0015,
"loss": 1.2046,
"step": 12430
},
{
"epoch": 0.6369035429039525,
"grad_norm": 0.19556592404842377,
"learning_rate": 0.0015,
"loss": 1.1958,
"step": 12440
},
{
"epoch": 0.6374155232439075,
"grad_norm": 0.2118174135684967,
"learning_rate": 0.0015,
"loss": 1.2158,
"step": 12450
},
{
"epoch": 0.6379275035838624,
"grad_norm": 0.19802866876125336,
"learning_rate": 0.0015,
"loss": 1.1889,
"step": 12460
},
{
"epoch": 0.6384394839238173,
"grad_norm": 0.2045314460992813,
"learning_rate": 0.0015,
"loss": 1.2052,
"step": 12470
},
{
"epoch": 0.6389514642637723,
"grad_norm": 0.20061345398426056,
"learning_rate": 0.0015,
"loss": 1.1859,
"step": 12480
},
{
"epoch": 0.6394634446037272,
"grad_norm": 0.19872547686100006,
"learning_rate": 0.0015,
"loss": 1.2002,
"step": 12490
},
{
"epoch": 0.6399754249436822,
"grad_norm": 0.2001519650220871,
"learning_rate": 0.0015,
"loss": 1.192,
"step": 12500
},
{
"epoch": 0.6404874052836371,
"grad_norm": 0.20049947500228882,
"learning_rate": 0.0015,
"loss": 1.1919,
"step": 12510
},
{
"epoch": 0.640999385623592,
"grad_norm": 0.20143716037273407,
"learning_rate": 0.0015,
"loss": 1.1821,
"step": 12520
},
{
"epoch": 0.641511365963547,
"grad_norm": 0.19347570836544037,
"learning_rate": 0.0015,
"loss": 1.2135,
"step": 12530
},
{
"epoch": 0.642023346303502,
"grad_norm": 0.19492658972740173,
"learning_rate": 0.0015,
"loss": 1.1891,
"step": 12540
},
{
"epoch": 0.6425353266434569,
"grad_norm": 0.19527223706245422,
"learning_rate": 0.0015,
"loss": 1.2102,
"step": 12550
},
{
"epoch": 0.6430473069834118,
"grad_norm": 0.1927892118692398,
"learning_rate": 0.0015,
"loss": 1.1714,
"step": 12560
},
{
"epoch": 0.6435592873233668,
"grad_norm": 0.2009015530347824,
"learning_rate": 0.0015,
"loss": 1.2035,
"step": 12570
},
{
"epoch": 0.6440712676633217,
"grad_norm": 0.21776844561100006,
"learning_rate": 0.0015,
"loss": 1.1777,
"step": 12580
},
{
"epoch": 0.6445832480032767,
"grad_norm": 0.19154374301433563,
"learning_rate": 0.0015,
"loss": 1.1906,
"step": 12590
},
{
"epoch": 0.6450952283432316,
"grad_norm": 0.19381144642829895,
"learning_rate": 0.0015,
"loss": 1.1778,
"step": 12600
},
{
"epoch": 0.6456072086831866,
"grad_norm": 0.19017955660820007,
"learning_rate": 0.0015,
"loss": 1.1967,
"step": 12610
},
{
"epoch": 0.6461191890231415,
"grad_norm": 0.21785299479961395,
"learning_rate": 0.0015,
"loss": 1.2088,
"step": 12620
},
{
"epoch": 0.6466311693630965,
"grad_norm": 0.2039538025856018,
"learning_rate": 0.0015,
"loss": 1.1663,
"step": 12630
},
{
"epoch": 0.6471431497030514,
"grad_norm": 0.19732427597045898,
"learning_rate": 0.0015,
"loss": 1.1913,
"step": 12640
},
{
"epoch": 0.6476551300430063,
"grad_norm": 0.1911800503730774,
"learning_rate": 0.0015,
"loss": 1.2052,
"step": 12650
},
{
"epoch": 0.6481671103829613,
"grad_norm": 0.19413244724273682,
"learning_rate": 0.0015,
"loss": 1.1804,
"step": 12660
},
{
"epoch": 0.6486790907229162,
"grad_norm": 0.1838771104812622,
"learning_rate": 0.0015,
"loss": 1.1911,
"step": 12670
},
{
"epoch": 0.6491910710628712,
"grad_norm": 0.1838536560535431,
"learning_rate": 0.0015,
"loss": 1.1991,
"step": 12680
},
{
"epoch": 0.6497030514028261,
"grad_norm": 0.20453278720378876,
"learning_rate": 0.0015,
"loss": 1.1992,
"step": 12690
},
{
"epoch": 0.6502150317427811,
"grad_norm": 0.21677398681640625,
"learning_rate": 0.0015,
"loss": 1.1811,
"step": 12700
},
{
"epoch": 0.650727012082736,
"grad_norm": 0.19484928250312805,
"learning_rate": 0.0015,
"loss": 1.1924,
"step": 12710
},
{
"epoch": 0.651238992422691,
"grad_norm": 0.1887393295764923,
"learning_rate": 0.0015,
"loss": 1.1978,
"step": 12720
},
{
"epoch": 0.6517509727626459,
"grad_norm": 0.19239051640033722,
"learning_rate": 0.0015,
"loss": 1.2051,
"step": 12730
},
{
"epoch": 0.6522629531026009,
"grad_norm": 0.20435065031051636,
"learning_rate": 0.0015,
"loss": 1.153,
"step": 12740
},
{
"epoch": 0.6527749334425558,
"grad_norm": 0.2020270824432373,
"learning_rate": 0.0015,
"loss": 1.2096,
"step": 12750
},
{
"epoch": 0.6532869137825108,
"grad_norm": 0.21720841526985168,
"learning_rate": 0.0015,
"loss": 1.1776,
"step": 12760
},
{
"epoch": 0.6537988941224657,
"grad_norm": 0.19210828840732574,
"learning_rate": 0.0015,
"loss": 1.1894,
"step": 12770
},
{
"epoch": 0.6543108744624206,
"grad_norm": 0.19044719636440277,
"learning_rate": 0.0015,
"loss": 1.1894,
"step": 12780
},
{
"epoch": 0.6548228548023756,
"grad_norm": 0.20893365144729614,
"learning_rate": 0.0015,
"loss": 1.1916,
"step": 12790
},
{
"epoch": 0.6553348351423305,
"grad_norm": 0.20288752019405365,
"learning_rate": 0.0015,
"loss": 1.2018,
"step": 12800
},
{
"epoch": 0.6558468154822855,
"grad_norm": 0.1970445066690445,
"learning_rate": 0.0015,
"loss": 1.1728,
"step": 12810
},
{
"epoch": 0.6563587958222404,
"grad_norm": 0.19928324222564697,
"learning_rate": 0.0015,
"loss": 1.1959,
"step": 12820
},
{
"epoch": 0.6568707761621954,
"grad_norm": 0.1929846554994583,
"learning_rate": 0.0015,
"loss": 1.1885,
"step": 12830
},
{
"epoch": 0.6573827565021503,
"grad_norm": 0.20633605122566223,
"learning_rate": 0.0015,
"loss": 1.2145,
"step": 12840
},
{
"epoch": 0.6578947368421053,
"grad_norm": 0.19971442222595215,
"learning_rate": 0.0015,
"loss": 1.188,
"step": 12850
},
{
"epoch": 0.6584067171820602,
"grad_norm": 0.18677356839179993,
"learning_rate": 0.0015,
"loss": 1.1943,
"step": 12860
},
{
"epoch": 0.6589186975220152,
"grad_norm": 0.1940857172012329,
"learning_rate": 0.0015,
"loss": 1.1921,
"step": 12870
},
{
"epoch": 0.6594306778619701,
"grad_norm": 0.20788009464740753,
"learning_rate": 0.0015,
"loss": 1.1922,
"step": 12880
},
{
"epoch": 0.659942658201925,
"grad_norm": 0.20371931791305542,
"learning_rate": 0.0015,
"loss": 1.1963,
"step": 12890
},
{
"epoch": 0.66045463854188,
"grad_norm": 0.19461549818515778,
"learning_rate": 0.0015,
"loss": 1.1639,
"step": 12900
},
{
"epoch": 0.6609666188818349,
"grad_norm": 0.19904249906539917,
"learning_rate": 0.0015,
"loss": 1.1708,
"step": 12910
},
{
"epoch": 0.6614785992217899,
"grad_norm": 0.2062397003173828,
"learning_rate": 0.0015,
"loss": 1.1937,
"step": 12920
},
{
"epoch": 0.6619905795617448,
"grad_norm": 0.20642533898353577,
"learning_rate": 0.0015,
"loss": 1.1929,
"step": 12930
},
{
"epoch": 0.6625025599016998,
"grad_norm": 0.19433195888996124,
"learning_rate": 0.0015,
"loss": 1.1886,
"step": 12940
},
{
"epoch": 0.6630145402416547,
"grad_norm": 0.1951138973236084,
"learning_rate": 0.0015,
"loss": 1.1847,
"step": 12950
},
{
"epoch": 0.6635265205816097,
"grad_norm": 0.19220565259456635,
"learning_rate": 0.0015,
"loss": 1.1847,
"step": 12960
},
{
"epoch": 0.6640385009215646,
"grad_norm": 0.1887965053319931,
"learning_rate": 0.0015,
"loss": 1.1791,
"step": 12970
},
{
"epoch": 0.6645504812615196,
"grad_norm": 0.18562547862529755,
"learning_rate": 0.0015,
"loss": 1.1677,
"step": 12980
},
{
"epoch": 0.6650624616014745,
"grad_norm": 0.1826203167438507,
"learning_rate": 0.0015,
"loss": 1.1796,
"step": 12990
},
{
"epoch": 0.6655744419414295,
"grad_norm": 0.18740873038768768,
"learning_rate": 0.0015,
"loss": 1.1797,
"step": 13000
},
{
"epoch": 0.6660864222813844,
"grad_norm": 0.1979881227016449,
"learning_rate": 0.0015,
"loss": 1.198,
"step": 13010
},
{
"epoch": 0.6665984026213393,
"grad_norm": 0.20608335733413696,
"learning_rate": 0.0015,
"loss": 1.1926,
"step": 13020
},
{
"epoch": 0.6671103829612943,
"grad_norm": 0.21441541612148285,
"learning_rate": 0.0015,
"loss": 1.2049,
"step": 13030
},
{
"epoch": 0.6676223633012492,
"grad_norm": 0.22678618133068085,
"learning_rate": 0.0015,
"loss": 1.1917,
"step": 13040
},
{
"epoch": 0.6681343436412042,
"grad_norm": 0.19718590378761292,
"learning_rate": 0.0015,
"loss": 1.1968,
"step": 13050
},
{
"epoch": 0.6686463239811591,
"grad_norm": 0.19607524573802948,
"learning_rate": 0.0015,
"loss": 1.1721,
"step": 13060
},
{
"epoch": 0.6691583043211141,
"grad_norm": 0.19298435747623444,
"learning_rate": 0.0015,
"loss": 1.1979,
"step": 13070
},
{
"epoch": 0.669670284661069,
"grad_norm": 0.19610482454299927,
"learning_rate": 0.0015,
"loss": 1.1919,
"step": 13080
},
{
"epoch": 0.670182265001024,
"grad_norm": 0.19872240722179413,
"learning_rate": 0.0015,
"loss": 1.183,
"step": 13090
},
{
"epoch": 0.6706942453409789,
"grad_norm": 0.1863928586244583,
"learning_rate": 0.0015,
"loss": 1.1868,
"step": 13100
},
{
"epoch": 0.6712062256809338,
"grad_norm": 0.19495519995689392,
"learning_rate": 0.0015,
"loss": 1.2084,
"step": 13110
},
{
"epoch": 0.6717182060208888,
"grad_norm": 0.19348977506160736,
"learning_rate": 0.0015,
"loss": 1.1981,
"step": 13120
},
{
"epoch": 0.6722301863608438,
"grad_norm": 0.19418825209140778,
"learning_rate": 0.0015,
"loss": 1.2081,
"step": 13130
},
{
"epoch": 0.6727421667007987,
"grad_norm": 0.19263537228107452,
"learning_rate": 0.0015,
"loss": 1.181,
"step": 13140
},
{
"epoch": 0.6732541470407536,
"grad_norm": 0.19272197782993317,
"learning_rate": 0.0015,
"loss": 1.1908,
"step": 13150
},
{
"epoch": 0.6737661273807086,
"grad_norm": 0.19103066623210907,
"learning_rate": 0.0015,
"loss": 1.164,
"step": 13160
},
{
"epoch": 0.6742781077206635,
"grad_norm": 0.19996246695518494,
"learning_rate": 0.0015,
"loss": 1.1951,
"step": 13170
},
{
"epoch": 0.6747900880606185,
"grad_norm": 0.2288653403520584,
"learning_rate": 0.0015,
"loss": 1.2188,
"step": 13180
},
{
"epoch": 0.6753020684005734,
"grad_norm": 0.1978132575750351,
"learning_rate": 0.0015,
"loss": 1.177,
"step": 13190
},
{
"epoch": 0.6758140487405284,
"grad_norm": 0.2042623907327652,
"learning_rate": 0.0015,
"loss": 1.1833,
"step": 13200
},
{
"epoch": 0.6763260290804833,
"grad_norm": 0.1838945895433426,
"learning_rate": 0.0015,
"loss": 1.1638,
"step": 13210
},
{
"epoch": 0.6768380094204383,
"grad_norm": 0.18537567555904388,
"learning_rate": 0.0015,
"loss": 1.1879,
"step": 13220
},
{
"epoch": 0.6773499897603932,
"grad_norm": 0.19888518750667572,
"learning_rate": 0.0015,
"loss": 1.1648,
"step": 13230
},
{
"epoch": 0.6778619701003481,
"grad_norm": 0.20373912155628204,
"learning_rate": 0.0015,
"loss": 1.2043,
"step": 13240
},
{
"epoch": 0.6783739504403031,
"grad_norm": 0.19218416512012482,
"learning_rate": 0.0015,
"loss": 1.1553,
"step": 13250
},
{
"epoch": 0.678885930780258,
"grad_norm": 0.1989835649728775,
"learning_rate": 0.0015,
"loss": 1.1679,
"step": 13260
},
{
"epoch": 0.679397911120213,
"grad_norm": 0.20067016780376434,
"learning_rate": 0.0015,
"loss": 1.1827,
"step": 13270
},
{
"epoch": 0.6799098914601679,
"grad_norm": 0.19568151235580444,
"learning_rate": 0.0015,
"loss": 1.1839,
"step": 13280
},
{
"epoch": 0.6804218718001229,
"grad_norm": 0.2029784619808197,
"learning_rate": 0.0015,
"loss": 1.1787,
"step": 13290
},
{
"epoch": 0.6809338521400778,
"grad_norm": 0.19807346165180206,
"learning_rate": 0.0015,
"loss": 1.1763,
"step": 13300
},
{
"epoch": 0.6814458324800328,
"grad_norm": 0.1898653358221054,
"learning_rate": 0.0015,
"loss": 1.2075,
"step": 13310
},
{
"epoch": 0.6819578128199877,
"grad_norm": 0.2038862705230713,
"learning_rate": 0.0015,
"loss": 1.1773,
"step": 13320
},
{
"epoch": 0.6824697931599426,
"grad_norm": 0.18675602972507477,
"learning_rate": 0.0015,
"loss": 1.1888,
"step": 13330
},
{
"epoch": 0.6829817734998976,
"grad_norm": 0.20663636922836304,
"learning_rate": 0.0015,
"loss": 1.169,
"step": 13340
},
{
"epoch": 0.6834937538398526,
"grad_norm": 0.1998421996831894,
"learning_rate": 0.0015,
"loss": 1.1725,
"step": 13350
},
{
"epoch": 0.6840057341798075,
"grad_norm": 0.20095355808734894,
"learning_rate": 0.0015,
"loss": 1.1727,
"step": 13360
},
{
"epoch": 0.6845177145197624,
"grad_norm": 0.19053997099399567,
"learning_rate": 0.0015,
"loss": 1.1759,
"step": 13370
},
{
"epoch": 0.6850296948597174,
"grad_norm": 0.20177049934864044,
"learning_rate": 0.0015,
"loss": 1.1845,
"step": 13380
},
{
"epoch": 0.6855416751996724,
"grad_norm": 0.19868339598178864,
"learning_rate": 0.0015,
"loss": 1.178,
"step": 13390
},
{
"epoch": 0.6860536555396273,
"grad_norm": 0.1922164112329483,
"learning_rate": 0.0015,
"loss": 1.1536,
"step": 13400
},
{
"epoch": 0.6865656358795822,
"grad_norm": 0.2025415003299713,
"learning_rate": 0.0015,
"loss": 1.1849,
"step": 13410
},
{
"epoch": 0.6870776162195372,
"grad_norm": 0.19813013076782227,
"learning_rate": 0.0015,
"loss": 1.1803,
"step": 13420
},
{
"epoch": 0.6875895965594921,
"grad_norm": 0.18536531925201416,
"learning_rate": 0.0015,
"loss": 1.1686,
"step": 13430
},
{
"epoch": 0.6881015768994471,
"grad_norm": 0.1998080015182495,
"learning_rate": 0.0015,
"loss": 1.1949,
"step": 13440
},
{
"epoch": 0.688613557239402,
"grad_norm": 0.1955641508102417,
"learning_rate": 0.0015,
"loss": 1.1758,
"step": 13450
},
{
"epoch": 0.6891255375793569,
"grad_norm": 0.19140900671482086,
"learning_rate": 0.0015,
"loss": 1.1675,
"step": 13460
},
{
"epoch": 0.6896375179193119,
"grad_norm": 0.20261794328689575,
"learning_rate": 0.0015,
"loss": 1.1802,
"step": 13470
},
{
"epoch": 0.6901494982592669,
"grad_norm": 0.19682539999485016,
"learning_rate": 0.0015,
"loss": 1.1798,
"step": 13480
},
{
"epoch": 0.6906614785992218,
"grad_norm": 0.2020127922296524,
"learning_rate": 0.0015,
"loss": 1.172,
"step": 13490
},
{
"epoch": 0.6911734589391767,
"grad_norm": 0.19824573397636414,
"learning_rate": 0.0015,
"loss": 1.1888,
"step": 13500
},
{
"epoch": 0.6916854392791317,
"grad_norm": 0.20089636743068695,
"learning_rate": 0.0015,
"loss": 1.1865,
"step": 13510
},
{
"epoch": 0.6921974196190867,
"grad_norm": 0.1954367458820343,
"learning_rate": 0.0015,
"loss": 1.1734,
"step": 13520
},
{
"epoch": 0.6927093999590416,
"grad_norm": 0.1989155411720276,
"learning_rate": 0.0015,
"loss": 1.1676,
"step": 13530
},
{
"epoch": 0.6932213802989965,
"grad_norm": 0.20354506373405457,
"learning_rate": 0.0015,
"loss": 1.1638,
"step": 13540
},
{
"epoch": 0.6937333606389514,
"grad_norm": 0.18505001068115234,
"learning_rate": 0.0015,
"loss": 1.1623,
"step": 13550
},
{
"epoch": 0.6942453409789064,
"grad_norm": 0.19758115708827972,
"learning_rate": 0.0015,
"loss": 1.1715,
"step": 13560
},
{
"epoch": 0.6947573213188614,
"grad_norm": 0.19761599600315094,
"learning_rate": 0.0015,
"loss": 1.1892,
"step": 13570
},
{
"epoch": 0.6952693016588163,
"grad_norm": 0.2028966248035431,
"learning_rate": 0.0015,
"loss": 1.1779,
"step": 13580
},
{
"epoch": 0.6957812819987712,
"grad_norm": 0.1852991133928299,
"learning_rate": 0.0015,
"loss": 1.1756,
"step": 13590
},
{
"epoch": 0.6962932623387262,
"grad_norm": 0.18972176313400269,
"learning_rate": 0.0015,
"loss": 1.1583,
"step": 13600
},
{
"epoch": 0.6968052426786812,
"grad_norm": 0.18746834993362427,
"learning_rate": 0.0015,
"loss": 1.1758,
"step": 13610
},
{
"epoch": 0.6973172230186361,
"grad_norm": 0.1831192672252655,
"learning_rate": 0.0015,
"loss": 1.1904,
"step": 13620
},
{
"epoch": 0.697829203358591,
"grad_norm": 0.21230356395244598,
"learning_rate": 0.0015,
"loss": 1.1673,
"step": 13630
},
{
"epoch": 0.698341183698546,
"grad_norm": 0.2109021544456482,
"learning_rate": 0.0015,
"loss": 1.176,
"step": 13640
},
{
"epoch": 0.698853164038501,
"grad_norm": 0.18572686612606049,
"learning_rate": 0.0015,
"loss": 1.195,
"step": 13650
},
{
"epoch": 0.6993651443784559,
"grad_norm": 0.19169217348098755,
"learning_rate": 0.0015,
"loss": 1.1865,
"step": 13660
},
{
"epoch": 0.6998771247184108,
"grad_norm": 0.18918085098266602,
"learning_rate": 0.0015,
"loss": 1.1788,
"step": 13670
},
{
"epoch": 0.7003891050583657,
"grad_norm": 0.19315798580646515,
"learning_rate": 0.0014955269451601939,
"loss": 1.1739,
"step": 13680
},
{
"epoch": 0.7009010853983207,
"grad_norm": 0.18943412601947784,
"learning_rate": 0.0014896616625957439,
"loss": 1.1649,
"step": 13690
},
{
"epoch": 0.7014130657382757,
"grad_norm": 0.19846367835998535,
"learning_rate": 0.001483819382986655,
"loss": 1.1883,
"step": 13700
},
{
"epoch": 0.7019250460782306,
"grad_norm": 0.19269226491451263,
"learning_rate": 0.001478000016118014,
"loss": 1.1775,
"step": 13710
},
{
"epoch": 0.7024370264181855,
"grad_norm": 0.19260330498218536,
"learning_rate": 0.0014722034721287212,
"loss": 1.169,
"step": 13720
},
{
"epoch": 0.7029490067581405,
"grad_norm": 0.19868920743465424,
"learning_rate": 0.0014664296615101004,
"loss": 1.1671,
"step": 13730
},
{
"epoch": 0.7034609870980955,
"grad_norm": 0.1958989053964615,
"learning_rate": 0.0014606784951045186,
"loss": 1.2049,
"step": 13740
},
{
"epoch": 0.7039729674380504,
"grad_norm": 0.194174125790596,
"learning_rate": 0.0014549498841040086,
"loss": 1.1703,
"step": 13750
},
{
"epoch": 0.7044849477780053,
"grad_norm": 0.19567228853702545,
"learning_rate": 0.0014492437400488976,
"loss": 1.1649,
"step": 13760
},
{
"epoch": 0.7049969281179602,
"grad_norm": 0.191901296377182,
"learning_rate": 0.0014435599748264416,
"loss": 1.169,
"step": 13770
},
{
"epoch": 0.7055089084579153,
"grad_norm": 0.1933002918958664,
"learning_rate": 0.0014378985006694644,
"loss": 1.1873,
"step": 13780
},
{
"epoch": 0.7060208887978702,
"grad_norm": 0.20665253698825836,
"learning_rate": 0.0014322592301550022,
"loss": 1.1773,
"step": 13790
},
{
"epoch": 0.7065328691378251,
"grad_norm": 0.19543762505054474,
"learning_rate": 0.0014266420762029542,
"loss": 1.1738,
"step": 13800
},
{
"epoch": 0.70704484947778,
"grad_norm": 0.186002716422081,
"learning_rate": 0.0014210469520747377,
"loss": 1.1783,
"step": 13810
},
{
"epoch": 0.707556829817735,
"grad_norm": 0.1872335523366928,
"learning_rate": 0.0014154737713719476,
"loss": 1.1918,
"step": 13820
},
{
"epoch": 0.70806881015769,
"grad_norm": 0.1909414827823639,
"learning_rate": 0.0014099224480350252,
"loss": 1.1587,
"step": 13830
},
{
"epoch": 0.7085807904976449,
"grad_norm": 0.1957162618637085,
"learning_rate": 0.0014043928963419256,
"loss": 1.1783,
"step": 13840
},
{
"epoch": 0.7090927708375998,
"grad_norm": 0.1931842565536499,
"learning_rate": 0.0013988850309067965,
"loss": 1.1749,
"step": 13850
},
{
"epoch": 0.7096047511775547,
"grad_norm": 0.2018897980451584,
"learning_rate": 0.0013933987666786593,
"loss": 1.1457,
"step": 13860
},
{
"epoch": 0.7101167315175098,
"grad_norm": 0.1824326366186142,
"learning_rate": 0.0013879340189400947,
"loss": 1.1861,
"step": 13870
},
{
"epoch": 0.7106287118574647,
"grad_norm": 0.19200804829597473,
"learning_rate": 0.0013824907033059355,
"loss": 1.1669,
"step": 13880
},
{
"epoch": 0.7111406921974196,
"grad_norm": 0.18873439729213715,
"learning_rate": 0.001377068735721964,
"loss": 1.1555,
"step": 13890
},
{
"epoch": 0.7116526725373745,
"grad_norm": 0.19836601614952087,
"learning_rate": 0.0013716680324636122,
"loss": 1.1536,
"step": 13900
},
{
"epoch": 0.7121646528773296,
"grad_norm": 0.2006756067276001,
"learning_rate": 0.001366288510134671,
"loss": 1.1595,
"step": 13910
},
{
"epoch": 0.7126766332172845,
"grad_norm": 0.18679478764533997,
"learning_rate": 0.0013609300856660014,
"loss": 1.1762,
"step": 13920
},
{
"epoch": 0.7131886135572394,
"grad_norm": 0.19826917350292206,
"learning_rate": 0.001355592676314251,
"loss": 1.1752,
"step": 13930
},
{
"epoch": 0.7137005938971943,
"grad_norm": 0.18885891139507294,
"learning_rate": 0.0013502761996605787,
"loss": 1.1731,
"step": 13940
},
{
"epoch": 0.7142125742371493,
"grad_norm": 0.1888403594493866,
"learning_rate": 0.0013449805736093791,
"loss": 1.1536,
"step": 13950
},
{
"epoch": 0.7147245545771043,
"grad_norm": 0.20078985393047333,
"learning_rate": 0.0013397057163870173,
"loss": 1.1545,
"step": 13960
},
{
"epoch": 0.7152365349170592,
"grad_norm": 0.19156110286712646,
"learning_rate": 0.001334451546540564,
"loss": 1.148,
"step": 13970
},
{
"epoch": 0.7157485152570141,
"grad_norm": 0.19765546917915344,
"learning_rate": 0.0013292179829365398,
"loss": 1.1776,
"step": 13980
},
{
"epoch": 0.716260495596969,
"grad_norm": 0.1948610097169876,
"learning_rate": 0.001324004944759661,
"loss": 1.1597,
"step": 13990
},
{
"epoch": 0.7167724759369241,
"grad_norm": 0.1816781461238861,
"learning_rate": 0.0013188123515115915,
"loss": 1.1484,
"step": 14000
},
{
"epoch": 0.717284456276879,
"grad_norm": 0.2072591632604599,
"learning_rate": 0.0013136401230097012,
"loss": 1.1678,
"step": 14010
},
{
"epoch": 0.7177964366168339,
"grad_norm": 0.19381676614284515,
"learning_rate": 0.0013084881793858267,
"loss": 1.1714,
"step": 14020
},
{
"epoch": 0.7183084169567888,
"grad_norm": 0.178278848528862,
"learning_rate": 0.0013033564410850373,
"loss": 1.162,
"step": 14030
},
{
"epoch": 0.7188203972967439,
"grad_norm": 0.18733732402324677,
"learning_rate": 0.001298244828864409,
"loss": 1.1565,
"step": 14040
},
{
"epoch": 0.7193323776366988,
"grad_norm": 0.18614625930786133,
"learning_rate": 0.0012931532637917983,
"loss": 1.1678,
"step": 14050
},
{
"epoch": 0.7198443579766537,
"grad_norm": 0.17618735134601593,
"learning_rate": 0.0012880816672446245,
"loss": 1.1723,
"step": 14060
},
{
"epoch": 0.7203563383166086,
"grad_norm": 0.17765553295612335,
"learning_rate": 0.0012830299609086558,
"loss": 1.1511,
"step": 14070
},
{
"epoch": 0.7208683186565635,
"grad_norm": 0.19092194736003876,
"learning_rate": 0.0012779980667767994,
"loss": 1.1679,
"step": 14080
},
{
"epoch": 0.7213802989965186,
"grad_norm": 0.18768686056137085,
"learning_rate": 0.0012729859071478975,
"loss": 1.1668,
"step": 14090
},
{
"epoch": 0.7218922793364735,
"grad_norm": 0.18770349025726318,
"learning_rate": 0.0012679934046255271,
"loss": 1.1749,
"step": 14100
},
{
"epoch": 0.7224042596764284,
"grad_norm": 0.1935562640428543,
"learning_rate": 0.0012630204821168047,
"loss": 1.1535,
"step": 14110
},
{
"epoch": 0.7229162400163833,
"grad_norm": 0.17887477576732635,
"learning_rate": 0.0012580670628311967,
"loss": 1.1541,
"step": 14120
},
{
"epoch": 0.7234282203563384,
"grad_norm": 0.18734948337078094,
"learning_rate": 0.0012531330702793323,
"loss": 1.1669,
"step": 14130
},
{
"epoch": 0.7239402006962933,
"grad_norm": 0.17879174649715424,
"learning_rate": 0.0012482184282718238,
"loss": 1.1905,
"step": 14140
},
{
"epoch": 0.7244521810362482,
"grad_norm": 0.1950501948595047,
"learning_rate": 0.0012433230609180889,
"loss": 1.1446,
"step": 14150
},
{
"epoch": 0.7249641613762031,
"grad_norm": 0.1801559329032898,
"learning_rate": 0.0012384468926251798,
"loss": 1.1367,
"step": 14160
},
{
"epoch": 0.7254761417161582,
"grad_norm": 0.17999699711799622,
"learning_rate": 0.0012335898480966146,
"loss": 1.1402,
"step": 14170
},
{
"epoch": 0.7259881220561131,
"grad_norm": 0.18279437720775604,
"learning_rate": 0.0012287518523312166,
"loss": 1.1597,
"step": 14180
},
{
"epoch": 0.726500102396068,
"grad_norm": 0.19126516580581665,
"learning_rate": 0.001223932830621954,
"loss": 1.1604,
"step": 14190
},
{
"epoch": 0.7270120827360229,
"grad_norm": 0.18581058084964752,
"learning_rate": 0.0012191327085547877,
"loss": 1.1532,
"step": 14200
},
{
"epoch": 0.7275240630759778,
"grad_norm": 0.20243413746356964,
"learning_rate": 0.0012143514120075223,
"loss": 1.1495,
"step": 14210
},
{
"epoch": 0.7280360434159329,
"grad_norm": 0.19404320418834686,
"learning_rate": 0.0012095888671486597,
"loss": 1.1567,
"step": 14220
},
{
"epoch": 0.7285480237558878,
"grad_norm": 0.18503792583942413,
"learning_rate": 0.0012048450004362614,
"loss": 1.128,
"step": 14230
},
{
"epoch": 0.7290600040958427,
"grad_norm": 0.19073212146759033,
"learning_rate": 0.0012001197386168117,
"loss": 1.1458,
"step": 14240
},
{
"epoch": 0.7295719844357976,
"grad_norm": 0.2037813812494278,
"learning_rate": 0.0011954130087240865,
"loss": 1.1741,
"step": 14250
},
{
"epoch": 0.7300839647757527,
"grad_norm": 0.18591246008872986,
"learning_rate": 0.0011907247380780264,
"loss": 1.1458,
"step": 14260
},
{
"epoch": 0.7305959451157076,
"grad_norm": 0.18210938572883606,
"learning_rate": 0.0011860548542836156,
"loss": 1.1695,
"step": 14270
},
{
"epoch": 0.7311079254556625,
"grad_norm": 0.18794593214988708,
"learning_rate": 0.0011814032852297623,
"loss": 1.1458,
"step": 14280
},
{
"epoch": 0.7316199057956174,
"grad_norm": 0.1834757775068283,
"learning_rate": 0.001176769959088186,
"loss": 1.1485,
"step": 14290
},
{
"epoch": 0.7321318861355723,
"grad_norm": 0.1770770400762558,
"learning_rate": 0.0011721548043123092,
"loss": 1.1473,
"step": 14300
},
{
"epoch": 0.7326438664755274,
"grad_norm": 0.19540582597255707,
"learning_rate": 0.0011675577496361507,
"loss": 1.14,
"step": 14310
},
{
"epoch": 0.7331558468154823,
"grad_norm": 0.18834899365901947,
"learning_rate": 0.0011629787240732272,
"loss": 1.1326,
"step": 14320
},
{
"epoch": 0.7336678271554372,
"grad_norm": 0.18618904054164886,
"learning_rate": 0.0011584176569154553,
"loss": 1.1388,
"step": 14330
},
{
"epoch": 0.7341798074953921,
"grad_norm": 0.1807902604341507,
"learning_rate": 0.0011538744777320608,
"loss": 1.1448,
"step": 14340
},
{
"epoch": 0.7346917878353472,
"grad_norm": 0.18239812552928925,
"learning_rate": 0.0011493491163684908,
"loss": 1.1355,
"step": 14350
},
{
"epoch": 0.7352037681753021,
"grad_norm": 0.18156401813030243,
"learning_rate": 0.0011448415029453305,
"loss": 1.1309,
"step": 14360
},
{
"epoch": 0.735715748515257,
"grad_norm": 0.1813691258430481,
"learning_rate": 0.0011403515678572234,
"loss": 1.134,
"step": 14370
},
{
"epoch": 0.7362277288552119,
"grad_norm": 0.18241450190544128,
"learning_rate": 0.0011358792417717981,
"loss": 1.1378,
"step": 14380
},
{
"epoch": 0.736739709195167,
"grad_norm": 0.18394464254379272,
"learning_rate": 0.001131424455628596,
"loss": 1.1497,
"step": 14390
},
{
"epoch": 0.7372516895351219,
"grad_norm": 0.18612609803676605,
"learning_rate": 0.0011269871406380059,
"loss": 1.1669,
"step": 14400
},
{
"epoch": 0.7377636698750768,
"grad_norm": 0.18373136222362518,
"learning_rate": 0.001122567228280201,
"loss": 1.1453,
"step": 14410
},
{
"epoch": 0.7382756502150317,
"grad_norm": 0.193937748670578,
"learning_rate": 0.001118164650304082,
"loss": 1.1357,
"step": 14420
},
{
"epoch": 0.7387876305549866,
"grad_norm": 0.18261444568634033,
"learning_rate": 0.0011137793387262216,
"loss": 1.169,
"step": 14430
},
{
"epoch": 0.7392996108949417,
"grad_norm": 0.19592134654521942,
"learning_rate": 0.0011094112258298167,
"loss": 1.1518,
"step": 14440
},
{
"epoch": 0.7398115912348966,
"grad_norm": 0.17495043575763702,
"learning_rate": 0.0011050602441636402,
"loss": 1.1481,
"step": 14450
},
{
"epoch": 0.7403235715748515,
"grad_norm": 0.18108507990837097,
"learning_rate": 0.001100726326541002,
"loss": 1.1327,
"step": 14460
},
{
"epoch": 0.7408355519148064,
"grad_norm": 0.1797986775636673,
"learning_rate": 0.00109640940603871,
"loss": 1.1394,
"step": 14470
},
{
"epoch": 0.7413475322547615,
"grad_norm": 0.18484458327293396,
"learning_rate": 0.001092109415996037,
"loss": 1.1188,
"step": 14480
},
{
"epoch": 0.7418595125947164,
"grad_norm": 0.1784062534570694,
"learning_rate": 0.0010878262900136915,
"loss": 1.125,
"step": 14490
},
{
"epoch": 0.7423714929346713,
"grad_norm": 0.1869814693927765,
"learning_rate": 0.0010835599619527924,
"loss": 1.1417,
"step": 14500
},
{
"epoch": 0.7428834732746262,
"grad_norm": 0.18346761167049408,
"learning_rate": 0.0010793103659338475,
"loss": 1.1182,
"step": 14510
},
{
"epoch": 0.7433954536145811,
"grad_norm": 0.188985213637352,
"learning_rate": 0.0010750774363357356,
"loss": 1.1412,
"step": 14520
},
{
"epoch": 0.7439074339545362,
"grad_norm": 0.1802164912223816,
"learning_rate": 0.0010708611077946955,
"loss": 1.1338,
"step": 14530
},
{
"epoch": 0.7444194142944911,
"grad_norm": 0.17940784990787506,
"learning_rate": 0.0010666613152033133,
"loss": 1.1477,
"step": 14540
},
{
"epoch": 0.744931394634446,
"grad_norm": 0.19481126964092255,
"learning_rate": 0.00106247799370952,
"loss": 1.1306,
"step": 14550
},
{
"epoch": 0.7454433749744009,
"grad_norm": 0.17663590610027313,
"learning_rate": 0.0010583110787155889,
"loss": 1.1395,
"step": 14560
},
{
"epoch": 0.745955355314356,
"grad_norm": 0.18392081558704376,
"learning_rate": 0.001054160505877137,
"loss": 1.1339,
"step": 14570
},
{
"epoch": 0.7464673356543109,
"grad_norm": 0.1872582733631134,
"learning_rate": 0.0010500262111021333,
"loss": 1.1271,
"step": 14580
},
{
"epoch": 0.7469793159942658,
"grad_norm": 0.18514196574687958,
"learning_rate": 0.0010459081305499078,
"loss": 1.1561,
"step": 14590
},
{
"epoch": 0.7474912963342207,
"grad_norm": 0.18902930617332458,
"learning_rate": 0.0010418062006301674,
"loss": 1.1402,
"step": 14600
},
{
"epoch": 0.7480032766741758,
"grad_norm": 0.1824546903371811,
"learning_rate": 0.0010377203580020109,
"loss": 1.1439,
"step": 14610
},
{
"epoch": 0.7485152570141307,
"grad_norm": 0.1803770363330841,
"learning_rate": 0.001033650539572954,
"loss": 1.1313,
"step": 14620
},
{
"epoch": 0.7490272373540856,
"grad_norm": 0.19267936050891876,
"learning_rate": 0.0010295966824979534,
"loss": 1.1082,
"step": 14630
},
{
"epoch": 0.7495392176940405,
"grad_norm": 0.19047097861766815,
"learning_rate": 0.0010255587241784366,
"loss": 1.122,
"step": 14640
},
{
"epoch": 0.7500511980339954,
"grad_norm": 0.1689426302909851,
"learning_rate": 0.0010215366022613358,
"loss": 1.1172,
"step": 14650
},
{
"epoch": 0.7505631783739505,
"grad_norm": 0.18644796311855316,
"learning_rate": 0.0010175302546381246,
"loss": 1.146,
"step": 14660
},
{
"epoch": 0.7510751587139054,
"grad_norm": 0.18672852218151093,
"learning_rate": 0.0010135396194438586,
"loss": 1.1386,
"step": 14670
},
{
"epoch": 0.7515871390538603,
"grad_norm": 0.19166767597198486,
"learning_rate": 0.0010095646350562206,
"loss": 1.1365,
"step": 14680
},
{
"epoch": 0.7520991193938152,
"grad_norm": 0.18109376728534698,
"learning_rate": 0.0010056052400945696,
"loss": 1.113,
"step": 14690
},
{
"epoch": 0.7526110997337703,
"grad_norm": 0.17950654029846191,
"learning_rate": 0.0010016613734189915,
"loss": 1.1474,
"step": 14700
},
{
"epoch": 0.7531230800737252,
"grad_norm": 0.184305801987648,
"learning_rate": 0.0009977329741293565,
"loss": 1.1199,
"step": 14710
},
{
"epoch": 0.7536350604136801,
"grad_norm": 0.18768514692783356,
"learning_rate": 0.0009938199815643773,
"loss": 1.1451,
"step": 14720
},
{
"epoch": 0.754147040753635,
"grad_norm": 0.17981773614883423,
"learning_rate": 0.0009899223353006738,
"loss": 1.1423,
"step": 14730
},
{
"epoch": 0.75465902109359,
"grad_norm": 0.17722870409488678,
"learning_rate": 0.0009860399751518388,
"loss": 1.1208,
"step": 14740
},
{
"epoch": 0.755171001433545,
"grad_norm": 0.18367789685726166,
"learning_rate": 0.0009821728411675095,
"loss": 1.148,
"step": 14750
},
{
"epoch": 0.7556829817734999,
"grad_norm": 0.18441089987754822,
"learning_rate": 0.0009783208736324418,
"loss": 1.1112,
"step": 14760
},
{
"epoch": 0.7561949621134548,
"grad_norm": 0.1897488385438919,
"learning_rate": 0.000974484013065587,
"loss": 1.1231,
"step": 14770
},
{
"epoch": 0.7567069424534097,
"grad_norm": 0.18716907501220703,
"learning_rate": 0.0009706622002191746,
"loss": 1.1018,
"step": 14780
},
{
"epoch": 0.7572189227933648,
"grad_norm": 0.18121209740638733,
"learning_rate": 0.0009668553760777972,
"loss": 1.1225,
"step": 14790
},
{
"epoch": 0.7577309031333197,
"grad_norm": 0.19911837577819824,
"learning_rate": 0.0009630634818574985,
"loss": 1.1266,
"step": 14800
},
{
"epoch": 0.7582428834732746,
"grad_norm": 0.169275164604187,
"learning_rate": 0.0009592864590048661,
"loss": 1.1152,
"step": 14810
},
{
"epoch": 0.7587548638132295,
"grad_norm": 0.1855994015932083,
"learning_rate": 0.0009555242491961278,
"loss": 1.1318,
"step": 14820
},
{
"epoch": 0.7592668441531846,
"grad_norm": 0.17527516186237335,
"learning_rate": 0.0009517767943362495,
"loss": 1.0988,
"step": 14830
},
{
"epoch": 0.7597788244931395,
"grad_norm": 0.18066614866256714,
"learning_rate": 0.0009480440365580401,
"loss": 1.1097,
"step": 14840
},
{
"epoch": 0.7602908048330944,
"grad_norm": 0.17801222205162048,
"learning_rate": 0.000944325918221256,
"loss": 1.1196,
"step": 14850
},
{
"epoch": 0.7608027851730493,
"grad_norm": 0.19464291632175446,
"learning_rate": 0.0009406223819117125,
"loss": 1.1319,
"step": 14860
},
{
"epoch": 0.7613147655130043,
"grad_norm": 0.1878882348537445,
"learning_rate": 0.0009369333704403964,
"loss": 1.13,
"step": 14870
},
{
"epoch": 0.7618267458529593,
"grad_norm": 0.17626269161701202,
"learning_rate": 0.0009332588268425832,
"loss": 1.1181,
"step": 14880
},
{
"epoch": 0.7623387261929142,
"grad_norm": 0.1895529329776764,
"learning_rate": 0.0009295986943769574,
"loss": 1.1333,
"step": 14890
},
{
"epoch": 0.7628507065328691,
"grad_norm": 0.1784052848815918,
"learning_rate": 0.0009259529165247364,
"loss": 1.1242,
"step": 14900
},
{
"epoch": 0.763362686872824,
"grad_norm": 0.17965124547481537,
"learning_rate": 0.0009223214369887976,
"loss": 1.1258,
"step": 14910
},
{
"epoch": 0.7638746672127791,
"grad_norm": 0.17978616058826447,
"learning_rate": 0.0009187041996928093,
"loss": 1.1125,
"step": 14920
},
{
"epoch": 0.764386647552734,
"grad_norm": 0.18885265290737152,
"learning_rate": 0.0009151011487803643,
"loss": 1.1061,
"step": 14930
},
{
"epoch": 0.7648986278926889,
"grad_norm": 0.18489712476730347,
"learning_rate": 0.0009115122286141184,
"loss": 1.127,
"step": 14940
},
{
"epoch": 0.7654106082326438,
"grad_norm": 0.17437365651130676,
"learning_rate": 0.0009079373837749296,
"loss": 1.1148,
"step": 14950
},
{
"epoch": 0.7659225885725988,
"grad_norm": 0.18147113919258118,
"learning_rate": 0.0009043765590610044,
"loss": 1.1014,
"step": 14960
},
{
"epoch": 0.7664345689125538,
"grad_norm": 0.17263419926166534,
"learning_rate": 0.0009008296994870436,
"loss": 1.1118,
"step": 14970
},
{
"epoch": 0.7669465492525087,
"grad_norm": 0.17921820282936096,
"learning_rate": 0.000897296750283394,
"loss": 1.1245,
"step": 14980
},
{
"epoch": 0.7674585295924636,
"grad_norm": 0.17663663625717163,
"learning_rate": 0.0008937776568952028,
"loss": 1.1078,
"step": 14990
},
{
"epoch": 0.7679705099324186,
"grad_norm": 0.17961500585079193,
"learning_rate": 0.0008902723649815751,
"loss": 1.0977,
"step": 15000
},
{
"epoch": 0.7684824902723736,
"grad_norm": 0.18368123471736908,
"learning_rate": 0.0008867808204147341,
"loss": 1.103,
"step": 15010
},
{
"epoch": 0.7689944706123285,
"grad_norm": 0.18269400298595428,
"learning_rate": 0.0008833029692791867,
"loss": 1.108,
"step": 15020
},
{
"epoch": 0.7695064509522834,
"grad_norm": 0.1727774292230606,
"learning_rate": 0.0008798387578708893,
"loss": 1.1033,
"step": 15030
},
{
"epoch": 0.7700184312922383,
"grad_norm": 0.18222136795520782,
"learning_rate": 0.0008763881326964195,
"loss": 1.1089,
"step": 15040
},
{
"epoch": 0.7705304116321933,
"grad_norm": 0.1899970918893814,
"learning_rate": 0.0008729510404721502,
"loss": 1.1039,
"step": 15050
},
{
"epoch": 0.7710423919721483,
"grad_norm": 0.18128469586372375,
"learning_rate": 0.0008695274281234262,
"loss": 1.1078,
"step": 15060
},
{
"epoch": 0.7715543723121032,
"grad_norm": 0.18401475250720978,
"learning_rate": 0.0008661172427837451,
"loss": 1.1023,
"step": 15070
},
{
"epoch": 0.7720663526520581,
"grad_norm": 0.18456844985485077,
"learning_rate": 0.0008627204317939403,
"loss": 1.1187,
"step": 15080
},
{
"epoch": 0.7725783329920131,
"grad_norm": 0.18838796019554138,
"learning_rate": 0.0008593369427013692,
"loss": 1.0908,
"step": 15090
},
{
"epoch": 0.7730903133319681,
"grad_norm": 0.18515382707118988,
"learning_rate": 0.0008559667232591014,
"loss": 1.1099,
"step": 15100
},
{
"epoch": 0.773602293671923,
"grad_norm": 0.18746817111968994,
"learning_rate": 0.0008526097214251135,
"loss": 1.1073,
"step": 15110
},
{
"epoch": 0.7741142740118779,
"grad_norm": 0.18683654069900513,
"learning_rate": 0.0008492658853614846,
"loss": 1.1195,
"step": 15120
},
{
"epoch": 0.7746262543518329,
"grad_norm": 0.17560458183288574,
"learning_rate": 0.0008459351634335962,
"loss": 1.0919,
"step": 15130
},
{
"epoch": 0.7751382346917879,
"grad_norm": 0.17539164423942566,
"learning_rate": 0.0008426175042093346,
"loss": 1.1082,
"step": 15140
},
{
"epoch": 0.7756502150317428,
"grad_norm": 0.17442087829113007,
"learning_rate": 0.0008393128564582973,
"loss": 1.1077,
"step": 15150
},
{
"epoch": 0.7761621953716977,
"grad_norm": 0.17610372602939606,
"learning_rate": 0.0008360211691510009,
"loss": 1.0976,
"step": 15160
},
{
"epoch": 0.7766741757116526,
"grad_norm": 0.18700052797794342,
"learning_rate": 0.0008327423914580938,
"loss": 1.1116,
"step": 15170
},
{
"epoch": 0.7771861560516076,
"grad_norm": 0.18908992409706116,
"learning_rate": 0.0008294764727495717,
"loss": 1.1266,
"step": 15180
},
{
"epoch": 0.7776981363915626,
"grad_norm": 0.17554494738578796,
"learning_rate": 0.0008262233625939947,
"loss": 1.1228,
"step": 15190
},
{
"epoch": 0.7782101167315175,
"grad_norm": 0.1848273128271103,
"learning_rate": 0.0008229830107577095,
"loss": 1.1032,
"step": 15200
},
{
"epoch": 0.7787220970714724,
"grad_norm": 0.1751490831375122,
"learning_rate": 0.0008197553672040732,
"loss": 1.1022,
"step": 15210
},
{
"epoch": 0.7792340774114274,
"grad_norm": 0.19107986986637115,
"learning_rate": 0.0008165403820926805,
"loss": 1.1107,
"step": 15220
},
{
"epoch": 0.7797460577513824,
"grad_norm": 0.17038871347904205,
"learning_rate": 0.000813338005778595,
"loss": 1.0906,
"step": 15230
},
{
"epoch": 0.7802580380913373,
"grad_norm": 0.17573246359825134,
"learning_rate": 0.0008101481888115815,
"loss": 1.1185,
"step": 15240
},
{
"epoch": 0.7807700184312922,
"grad_norm": 0.18138054013252258,
"learning_rate": 0.000806970881935343,
"loss": 1.1068,
"step": 15250
},
{
"epoch": 0.7812819987712472,
"grad_norm": 0.18504558503627777,
"learning_rate": 0.00080380603608676,
"loss": 1.1187,
"step": 15260
},
{
"epoch": 0.7817939791112021,
"grad_norm": 0.1914263665676117,
"learning_rate": 0.0008006536023951326,
"loss": 1.1028,
"step": 15270
},
{
"epoch": 0.7823059594511571,
"grad_norm": 0.17930828034877777,
"learning_rate": 0.0007975135321814267,
"loss": 1.12,
"step": 15280
},
{
"epoch": 0.782817939791112,
"grad_norm": 0.18710237741470337,
"learning_rate": 0.0007943857769575209,
"loss": 1.0943,
"step": 15290
},
{
"epoch": 0.783329920131067,
"grad_norm": 0.18522420525550842,
"learning_rate": 0.0007912702884254589,
"loss": 1.1125,
"step": 15300
},
{
"epoch": 0.7838419004710219,
"grad_norm": 0.17634257674217224,
"learning_rate": 0.0007881670184767039,
"loss": 1.0855,
"step": 15310
},
{
"epoch": 0.7843538808109769,
"grad_norm": 0.1925361305475235,
"learning_rate": 0.0007850759191913941,
"loss": 1.0957,
"step": 15320
},
{
"epoch": 0.7848658611509318,
"grad_norm": 0.18163706362247467,
"learning_rate": 0.0007819969428376047,
"loss": 1.0994,
"step": 15330
},
{
"epoch": 0.7853778414908867,
"grad_norm": 0.1802321821451187,
"learning_rate": 0.0007789300418706098,
"loss": 1.1043,
"step": 15340
},
{
"epoch": 0.7858898218308417,
"grad_norm": 0.20434251427650452,
"learning_rate": 0.0007758751689321484,
"loss": 1.0943,
"step": 15350
},
{
"epoch": 0.7864018021707967,
"grad_norm": 0.1818198412656784,
"learning_rate": 0.0007728322768496924,
"loss": 1.0916,
"step": 15360
},
{
"epoch": 0.7869137825107516,
"grad_norm": 0.18060991168022156,
"learning_rate": 0.0007698013186357197,
"loss": 1.1122,
"step": 15370
},
{
"epoch": 0.7874257628507065,
"grad_norm": 0.18546059727668762,
"learning_rate": 0.0007667822474869874,
"loss": 1.1075,
"step": 15380
},
{
"epoch": 0.7879377431906615,
"grad_norm": 0.18823228776454926,
"learning_rate": 0.0007637750167838097,
"loss": 1.1197,
"step": 15390
},
{
"epoch": 0.7884497235306164,
"grad_norm": 0.17590127885341644,
"learning_rate": 0.0007607795800893374,
"loss": 1.0865,
"step": 15400
},
{
"epoch": 0.7889617038705714,
"grad_norm": 0.18602034449577332,
"learning_rate": 0.000757795891148842,
"loss": 1.1,
"step": 15410
},
{
"epoch": 0.7894736842105263,
"grad_norm": 0.19357922673225403,
"learning_rate": 0.0007548239038889995,
"loss": 1.1015,
"step": 15420
},
{
"epoch": 0.7899856645504812,
"grad_norm": 0.17590965330600739,
"learning_rate": 0.000751863572417181,
"loss": 1.1113,
"step": 15430
},
{
"epoch": 0.7904976448904362,
"grad_norm": 0.1751716434955597,
"learning_rate": 0.0007489148510207429,
"loss": 1.0898,
"step": 15440
},
{
"epoch": 0.7910096252303912,
"grad_norm": 0.17589299380779266,
"learning_rate": 0.000745977694166321,
"loss": 1.0931,
"step": 15450
},
{
"epoch": 0.7915216055703461,
"grad_norm": 0.17544785141944885,
"learning_rate": 0.0007430520564991282,
"loss": 1.0914,
"step": 15460
},
{
"epoch": 0.792033585910301,
"grad_norm": 0.18367989361286163,
"learning_rate": 0.0007401378928422531,
"loss": 1.1043,
"step": 15470
},
{
"epoch": 0.792545566250256,
"grad_norm": 0.17736022174358368,
"learning_rate": 0.0007372351581959634,
"loss": 1.1252,
"step": 15480
},
{
"epoch": 0.7930575465902109,
"grad_norm": 0.18722687661647797,
"learning_rate": 0.0007343438077370098,
"loss": 1.095,
"step": 15490
},
{
"epoch": 0.7935695269301659,
"grad_norm": 0.1756405234336853,
"learning_rate": 0.0007314637968179351,
"loss": 1.1017,
"step": 15500
},
{
"epoch": 0.7940815072701208,
"grad_norm": 0.17875617742538452,
"learning_rate": 0.0007285950809663841,
"loss": 1.0979,
"step": 15510
},
{
"epoch": 0.7945934876100758,
"grad_norm": 0.17093615233898163,
"learning_rate": 0.0007257376158844169,
"loss": 1.0886,
"step": 15520
},
{
"epoch": 0.7951054679500307,
"grad_norm": 0.18361063301563263,
"learning_rate": 0.0007228913574478252,
"loss": 1.1089,
"step": 15530
},
{
"epoch": 0.7956174482899857,
"grad_norm": 0.1857183277606964,
"learning_rate": 0.0007200562617054503,
"loss": 1.0806,
"step": 15540
},
{
"epoch": 0.7961294286299406,
"grad_norm": 0.1974077820777893,
"learning_rate": 0.0007172322848785056,
"loss": 1.088,
"step": 15550
},
{
"epoch": 0.7966414089698955,
"grad_norm": 0.173116534948349,
"learning_rate": 0.0007144193833598987,
"loss": 1.0921,
"step": 15560
},
{
"epoch": 0.7971533893098505,
"grad_norm": 0.17753879725933075,
"learning_rate": 0.0007116175137135599,
"loss": 1.0846,
"step": 15570
},
{
"epoch": 0.7976653696498055,
"grad_norm": 0.1796150505542755,
"learning_rate": 0.0007088266326737707,
"loss": 1.0816,
"step": 15580
},
{
"epoch": 0.7981773499897604,
"grad_norm": 0.17271041870117188,
"learning_rate": 0.0007060466971444953,
"loss": 1.0875,
"step": 15590
},
{
"epoch": 0.7986893303297153,
"grad_norm": 0.1766566038131714,
"learning_rate": 0.0007032776641987162,
"loss": 1.085,
"step": 15600
},
{
"epoch": 0.7992013106696703,
"grad_norm": 0.17464908957481384,
"learning_rate": 0.0007005194910777697,
"loss": 1.0669,
"step": 15610
},
{
"epoch": 0.7997132910096252,
"grad_norm": 0.18235880136489868,
"learning_rate": 0.0006977721351906876,
"loss": 1.0983,
"step": 15620
},
{
"epoch": 0.8002252713495802,
"grad_norm": 0.17582911252975464,
"learning_rate": 0.0006950355541135377,
"loss": 1.0748,
"step": 15630
},
{
"epoch": 0.8007372516895351,
"grad_norm": 0.18529601395130157,
"learning_rate": 0.0006923097055887701,
"loss": 1.082,
"step": 15640
},
{
"epoch": 0.80124923202949,
"grad_norm": 0.18771891295909882,
"learning_rate": 0.000689594547524564,
"loss": 1.0792,
"step": 15650
},
{
"epoch": 0.801761212369445,
"grad_norm": 0.18567664921283722,
"learning_rate": 0.0006868900379941773,
"loss": 1.0929,
"step": 15660
},
{
"epoch": 0.8022731927094,
"grad_norm": 0.18062008917331696,
"learning_rate": 0.0006841961352353004,
"loss": 1.0952,
"step": 15670
},
{
"epoch": 0.8027851730493549,
"grad_norm": 0.17383413016796112,
"learning_rate": 0.0006815127976494104,
"loss": 1.1029,
"step": 15680
},
{
"epoch": 0.8032971533893098,
"grad_norm": 0.17971891164779663,
"learning_rate": 0.0006788399838011287,
"loss": 1.1032,
"step": 15690
},
{
"epoch": 0.8038091337292648,
"grad_norm": 0.17936407029628754,
"learning_rate": 0.0006761776524175815,
"loss": 1.1001,
"step": 15700
},
{
"epoch": 0.8043211140692197,
"grad_norm": 0.18222102522850037,
"learning_rate": 0.0006735257623877627,
"loss": 1.0872,
"step": 15710
},
{
"epoch": 0.8048330944091747,
"grad_norm": 0.18015074729919434,
"learning_rate": 0.0006708842727618985,
"loss": 1.0991,
"step": 15720
},
{
"epoch": 0.8053450747491296,
"grad_norm": 0.17375022172927856,
"learning_rate": 0.0006682531427508156,
"loss": 1.0623,
"step": 15730
},
{
"epoch": 0.8058570550890846,
"grad_norm": 0.1764671802520752,
"learning_rate": 0.0006656323317253108,
"loss": 1.0984,
"step": 15740
},
{
"epoch": 0.8063690354290395,
"grad_norm": 0.1692001074552536,
"learning_rate": 0.0006630217992155241,
"loss": 1.0859,
"step": 15750
},
{
"epoch": 0.8068810157689945,
"grad_norm": 0.17819392681121826,
"learning_rate": 0.0006604215049103134,
"loss": 1.0899,
"step": 15760
},
{
"epoch": 0.8073929961089494,
"grad_norm": 0.17758633196353912,
"learning_rate": 0.0006578314086566325,
"loss": 1.0826,
"step": 15770
},
{
"epoch": 0.8079049764489044,
"grad_norm": 0.17600396275520325,
"learning_rate": 0.0006552514704589104,
"loss": 1.0912,
"step": 15780
},
{
"epoch": 0.8084169567888593,
"grad_norm": 0.177523672580719,
"learning_rate": 0.0006526816504784343,
"loss": 1.0814,
"step": 15790
},
{
"epoch": 0.8089289371288143,
"grad_norm": 0.17935074865818024,
"learning_rate": 0.0006501219090327343,
"loss": 1.0859,
"step": 15800
},
{
"epoch": 0.8094409174687692,
"grad_norm": 0.18292473256587982,
"learning_rate": 0.0006475722065949703,
"loss": 1.0716,
"step": 15810
},
{
"epoch": 0.8099528978087241,
"grad_norm": 0.18235322833061218,
"learning_rate": 0.000645032503793322,
"loss": 1.085,
"step": 15820
},
{
"epoch": 0.8104648781486791,
"grad_norm": 0.18412081897258759,
"learning_rate": 0.0006425027614103806,
"loss": 1.0872,
"step": 15830
},
{
"epoch": 0.810976858488634,
"grad_norm": 0.17389538884162903,
"learning_rate": 0.0006399829403825436,
"loss": 1.0935,
"step": 15840
},
{
"epoch": 0.811488838828589,
"grad_norm": 0.17470002174377441,
"learning_rate": 0.0006374730017994116,
"loss": 1.0603,
"step": 15850
},
{
"epoch": 0.8120008191685439,
"grad_norm": 0.17814920842647552,
"learning_rate": 0.0006349729069031867,
"loss": 1.1096,
"step": 15860
},
{
"epoch": 0.8125127995084989,
"grad_norm": 0.18193413317203522,
"learning_rate": 0.000632482617088075,
"loss": 1.076,
"step": 15870
},
{
"epoch": 0.8130247798484538,
"grad_norm": 0.18022698163986206,
"learning_rate": 0.0006300020938996901,
"loss": 1.0868,
"step": 15880
},
{
"epoch": 0.8135367601884088,
"grad_norm": 0.16944915056228638,
"learning_rate": 0.0006275312990344587,
"loss": 1.0857,
"step": 15890
},
{
"epoch": 0.8140487405283637,
"grad_norm": 0.17860791087150574,
"learning_rate": 0.0006250701943390303,
"loss": 1.0885,
"step": 15900
},
{
"epoch": 0.8145607208683187,
"grad_norm": 0.169233039021492,
"learning_rate": 0.0006226187418096868,
"loss": 1.0701,
"step": 15910
},
{
"epoch": 0.8150727012082736,
"grad_norm": 0.18404126167297363,
"learning_rate": 0.0006201769035917569,
"loss": 1.0862,
"step": 15920
},
{
"epoch": 0.8155846815482285,
"grad_norm": 0.1732415407896042,
"learning_rate": 0.0006177446419790303,
"loss": 1.0552,
"step": 15930
},
{
"epoch": 0.8160966618881835,
"grad_norm": 0.17680327594280243,
"learning_rate": 0.0006153219194131765,
"loss": 1.0839,
"step": 15940
},
{
"epoch": 0.8166086422281384,
"grad_norm": 0.168556347489357,
"learning_rate": 0.000612908698483164,
"loss": 1.0628,
"step": 15950
},
{
"epoch": 0.8171206225680934,
"grad_norm": 0.1826118528842926,
"learning_rate": 0.0006105049419246835,
"loss": 1.0855,
"step": 15960
},
{
"epoch": 0.8176326029080483,
"grad_norm": 0.17182965576648712,
"learning_rate": 0.0006081106126195717,
"loss": 1.0669,
"step": 15970
},
{
"epoch": 0.8181445832480033,
"grad_norm": 0.16935127973556519,
"learning_rate": 0.0006057256735952383,
"loss": 1.083,
"step": 15980
},
{
"epoch": 0.8186565635879582,
"grad_norm": 0.17464590072631836,
"learning_rate": 0.0006033500880240954,
"loss": 1.0671,
"step": 15990
},
{
"epoch": 0.8191685439279132,
"grad_norm": 0.17747105658054352,
"learning_rate": 0.0006009838192229885,
"loss": 1.0678,
"step": 16000
},
{
"epoch": 0.8196805242678681,
"grad_norm": 0.17449192702770233,
"learning_rate": 0.0005986268306526304,
"loss": 1.0796,
"step": 16010
},
{
"epoch": 0.8201925046078231,
"grad_norm": 0.17097654938697815,
"learning_rate": 0.0005962790859170364,
"loss": 1.0778,
"step": 16020
},
{
"epoch": 0.820704484947778,
"grad_norm": 0.16904379427433014,
"learning_rate": 0.0005939405487629626,
"loss": 1.0843,
"step": 16030
},
{
"epoch": 0.821216465287733,
"grad_norm": 0.17497345805168152,
"learning_rate": 0.0005916111830793466,
"loss": 1.101,
"step": 16040
},
{
"epoch": 0.8217284456276879,
"grad_norm": 0.1789994090795517,
"learning_rate": 0.0005892909528967487,
"loss": 1.0845,
"step": 16050
},
{
"epoch": 0.8222404259676428,
"grad_norm": 0.1678200364112854,
"learning_rate": 0.0005869798223867978,
"loss": 1.0606,
"step": 16060
},
{
"epoch": 0.8227524063075978,
"grad_norm": 0.17383365333080292,
"learning_rate": 0.000584677755861637,
"loss": 1.0674,
"step": 16070
},
{
"epoch": 0.8232643866475527,
"grad_norm": 0.17335745692253113,
"learning_rate": 0.0005823847177733732,
"loss": 1.0965,
"step": 16080
},
{
"epoch": 0.8237763669875077,
"grad_norm": 0.16967058181762695,
"learning_rate": 0.0005801006727135282,
"loss": 1.0677,
"step": 16090
},
{
"epoch": 0.8242883473274626,
"grad_norm": 0.16847650706768036,
"learning_rate": 0.0005778255854124912,
"loss": 1.0791,
"step": 16100
},
{
"epoch": 0.8248003276674176,
"grad_norm": 0.17251423001289368,
"learning_rate": 0.0005755594207389755,
"loss": 1.0806,
"step": 16110
},
{
"epoch": 0.8253123080073725,
"grad_norm": 0.17555896937847137,
"learning_rate": 0.0005733021436994743,
"loss": 1.066,
"step": 16120
},
{
"epoch": 0.8258242883473275,
"grad_norm": 0.16997992992401123,
"learning_rate": 0.000571053719437722,
"loss": 1.0876,
"step": 16130
},
{
"epoch": 0.8263362686872824,
"grad_norm": 0.17845116555690765,
"learning_rate": 0.0005688141132341551,
"loss": 1.085,
"step": 16140
},
{
"epoch": 0.8268482490272373,
"grad_norm": 0.1836511194705963,
"learning_rate": 0.0005665832905053756,
"loss": 1.0769,
"step": 16150
},
{
"epoch": 0.8273602293671923,
"grad_norm": 0.1753719449043274,
"learning_rate": 0.0005643612168036182,
"loss": 1.0742,
"step": 16160
},
{
"epoch": 0.8278722097071473,
"grad_norm": 0.17152993381023407,
"learning_rate": 0.0005621478578162176,
"loss": 1.0761,
"step": 16170
},
{
"epoch": 0.8283841900471022,
"grad_norm": 0.18273817002773285,
"learning_rate": 0.0005599431793650786,
"loss": 1.0803,
"step": 16180
},
{
"epoch": 0.8288961703870571,
"grad_norm": 0.1865053027868271,
"learning_rate": 0.0005577471474061485,
"loss": 1.0695,
"step": 16190
},
{
"epoch": 0.8294081507270121,
"grad_norm": 0.16600672900676727,
"learning_rate": 0.0005555597280288918,
"loss": 1.0844,
"step": 16200
},
{
"epoch": 0.829920131066967,
"grad_norm": 0.1850479394197464,
"learning_rate": 0.0005533808874557656,
"loss": 1.0658,
"step": 16210
},
{
"epoch": 0.830432111406922,
"grad_norm": 0.17687514424324036,
"learning_rate": 0.000551210592041699,
"loss": 1.072,
"step": 16220
},
{
"epoch": 0.8309440917468769,
"grad_norm": 0.1833869218826294,
"learning_rate": 0.000549048808273573,
"loss": 1.0739,
"step": 16230
},
{
"epoch": 0.8314560720868319,
"grad_norm": 0.1750813126564026,
"learning_rate": 0.0005468955027697031,
"loss": 1.0851,
"step": 16240
},
{
"epoch": 0.8319680524267868,
"grad_norm": 0.18595030903816223,
"learning_rate": 0.0005447506422793241,
"loss": 1.0615,
"step": 16250
},
{
"epoch": 0.8324800327667418,
"grad_norm": 0.1711542159318924,
"learning_rate": 0.0005426141936820762,
"loss": 1.0689,
"step": 16260
},
{
"epoch": 0.8329920131066967,
"grad_norm": 0.18596914410591125,
"learning_rate": 0.000540486123987494,
"loss": 1.0574,
"step": 16270
},
{
"epoch": 0.8335039934466516,
"grad_norm": 0.17115946114063263,
"learning_rate": 0.0005383664003344964,
"loss": 1.0703,
"step": 16280
},
{
"epoch": 0.8340159737866066,
"grad_norm": 0.1802951842546463,
"learning_rate": 0.0005362549899908805,
"loss": 1.074,
"step": 16290
},
{
"epoch": 0.8345279541265616,
"grad_norm": 0.18504950404167175,
"learning_rate": 0.0005341518603528143,
"loss": 1.0747,
"step": 16300
},
{
"epoch": 0.8350399344665165,
"grad_norm": 0.17508040368556976,
"learning_rate": 0.000532056978944335,
"loss": 1.0784,
"step": 16310
},
{
"epoch": 0.8355519148064714,
"grad_norm": 0.1866855025291443,
"learning_rate": 0.0005299703134168463,
"loss": 1.0799,
"step": 16320
},
{
"epoch": 0.8360638951464264,
"grad_norm": 0.16678877174854279,
"learning_rate": 0.0005278918315486196,
"loss": 1.0531,
"step": 16330
},
{
"epoch": 0.8365758754863813,
"grad_norm": 0.1872544288635254,
"learning_rate": 0.000525821501244296,
"loss": 1.0768,
"step": 16340
},
{
"epoch": 0.8370878558263363,
"grad_norm": 0.17887745797634125,
"learning_rate": 0.0005237592905343908,
"loss": 1.0552,
"step": 16350
},
{
"epoch": 0.8375998361662912,
"grad_norm": 0.1764066219329834,
"learning_rate": 0.0005217051675748001,
"loss": 1.0511,
"step": 16360
},
{
"epoch": 0.8381118165062461,
"grad_norm": 0.17765092849731445,
"learning_rate": 0.0005196591006463087,
"loss": 1.0645,
"step": 16370
},
{
"epoch": 0.8386237968462011,
"grad_norm": 0.17197942733764648,
"learning_rate": 0.0005176210581541006,
"loss": 1.0561,
"step": 16380
},
{
"epoch": 0.8391357771861561,
"grad_norm": 0.1778382807970047,
"learning_rate": 0.0005155910086272709,
"loss": 1.0818,
"step": 16390
},
{
"epoch": 0.839647757526111,
"grad_norm": 0.1758384257555008,
"learning_rate": 0.00051356892071834,
"loss": 1.0755,
"step": 16400
},
{
"epoch": 0.8401597378660659,
"grad_norm": 0.17765450477600098,
"learning_rate": 0.0005115547632027694,
"loss": 1.0622,
"step": 16410
},
{
"epoch": 0.8406717182060209,
"grad_norm": 0.1722906529903412,
"learning_rate": 0.0005095485049784797,
"loss": 1.0562,
"step": 16420
},
{
"epoch": 0.8411836985459759,
"grad_norm": 0.18041284382343292,
"learning_rate": 0.0005075501150653699,
"loss": 1.0563,
"step": 16430
},
{
"epoch": 0.8416956788859308,
"grad_norm": 0.1721327304840088,
"learning_rate": 0.0005055595626048399,
"loss": 1.0872,
"step": 16440
},
{
"epoch": 0.8422076592258857,
"grad_norm": 0.17623233795166016,
"learning_rate": 0.000503576816859313,
"loss": 1.0768,
"step": 16450
},
{
"epoch": 0.8427196395658406,
"grad_norm": 0.1824178546667099,
"learning_rate": 0.000501601847211762,
"loss": 1.0773,
"step": 16460
},
{
"epoch": 0.8432316199057956,
"grad_norm": 0.17492622137069702,
"learning_rate": 0.0004996346231652357,
"loss": 1.0751,
"step": 16470
},
{
"epoch": 0.8437436002457506,
"grad_norm": 0.19331291317939758,
"learning_rate": 0.0004976751143423888,
"loss": 1.0522,
"step": 16480
},
{
"epoch": 0.8442555805857055,
"grad_norm": 0.17318172752857208,
"learning_rate": 0.0004957232904850122,
"loss": 1.0611,
"step": 16490
},
{
"epoch": 0.8447675609256604,
"grad_norm": 0.18951846659183502,
"learning_rate": 0.0004937791214535661,
"loss": 1.0584,
"step": 16500
},
{
"epoch": 0.8452795412656154,
"grad_norm": 0.17713989317417145,
"learning_rate": 0.0004918425772267145,
"loss": 1.0542,
"step": 16510
},
{
"epoch": 0.8457915216055704,
"grad_norm": 0.16759324073791504,
"learning_rate": 0.0004899136279008613,
"loss": 1.0689,
"step": 16520
},
{
"epoch": 0.8463035019455253,
"grad_norm": 0.18664461374282837,
"learning_rate": 0.000487992243689689,
"loss": 1.0732,
"step": 16530
},
{
"epoch": 0.8468154822854802,
"grad_norm": 0.17348751425743103,
"learning_rate": 0.00048607839492369886,
"loss": 1.0762,
"step": 16540
},
{
"epoch": 0.8473274626254352,
"grad_norm": 0.17233343422412872,
"learning_rate": 0.0004841720520497518,
"loss": 1.0579,
"step": 16550
},
{
"epoch": 0.8478394429653902,
"grad_norm": 0.18232837319374084,
"learning_rate": 0.0004822731856306133,
"loss": 1.0576,
"step": 16560
},
{
"epoch": 0.8483514233053451,
"grad_norm": 0.17330168187618256,
"learning_rate": 0.000480381766344498,
"loss": 1.044,
"step": 16570
},
{
"epoch": 0.8488634036453,
"grad_norm": 0.1745171695947647,
"learning_rate": 0.00047849776498461725,
"loss": 1.07,
"step": 16580
},
{
"epoch": 0.8493753839852549,
"grad_norm": 0.1749190390110016,
"learning_rate": 0.00047662115245872787,
"loss": 1.0666,
"step": 16590
},
{
"epoch": 0.84988736432521,
"grad_norm": 0.17629800736904144,
"learning_rate": 0.0004747518997886834,
"loss": 1.0694,
"step": 16600
},
{
"epoch": 0.8503993446651649,
"grad_norm": 0.17141848802566528,
"learning_rate": 0.00047288997810998585,
"loss": 1.0752,
"step": 16610
},
{
"epoch": 0.8509113250051198,
"grad_norm": 0.16317421197891235,
"learning_rate": 0.00047103535867134064,
"loss": 1.0575,
"step": 16620
},
{
"epoch": 0.8514233053450747,
"grad_norm": 0.1698952317237854,
"learning_rate": 0.0004691880128342126,
"loss": 1.054,
"step": 16630
},
{
"epoch": 0.8519352856850297,
"grad_norm": 0.17862023413181305,
"learning_rate": 0.00046734791207238334,
"loss": 1.0578,
"step": 16640
},
{
"epoch": 0.8524472660249847,
"grad_norm": 0.17291221022605896,
"learning_rate": 0.0004655150279715109,
"loss": 1.0614,
"step": 16650
},
{
"epoch": 0.8529592463649396,
"grad_norm": 0.18683776259422302,
"learning_rate": 0.0004636893322286915,
"loss": 1.0587,
"step": 16660
},
{
"epoch": 0.8534712267048945,
"grad_norm": 0.17157678306102753,
"learning_rate": 0.00046187079665202144,
"loss": 1.0876,
"step": 16670
},
{
"epoch": 0.8539832070448494,
"grad_norm": 0.16680538654327393,
"learning_rate": 0.0004600593931601628,
"loss": 1.0608,
"step": 16680
},
{
"epoch": 0.8544951873848045,
"grad_norm": 0.17904032766819,
"learning_rate": 0.00045825509378190934,
"loss": 1.0622,
"step": 16690
},
{
"epoch": 0.8550071677247594,
"grad_norm": 0.17377473413944244,
"learning_rate": 0.0004564578706557547,
"loss": 1.0761,
"step": 16700
},
{
"epoch": 0.8555191480647143,
"grad_norm": 0.17606638371944427,
"learning_rate": 0.0004546676960294617,
"loss": 1.0627,
"step": 16710
},
{
"epoch": 0.8560311284046692,
"grad_norm": 0.1655128300189972,
"learning_rate": 0.0004528845422596346,
"loss": 1.0579,
"step": 16720
},
{
"epoch": 0.8565431087446242,
"grad_norm": 0.185993954539299,
"learning_rate": 0.0004511083818112919,
"loss": 1.0604,
"step": 16730
},
{
"epoch": 0.8570550890845792,
"grad_norm": 0.18218767642974854,
"learning_rate": 0.00044933918725744066,
"loss": 1.0595,
"step": 16740
},
{
"epoch": 0.8575670694245341,
"grad_norm": 0.16947178542613983,
"learning_rate": 0.000447576931278654,
"loss": 1.0494,
"step": 16750
},
{
"epoch": 0.858079049764489,
"grad_norm": 0.17753495275974274,
"learning_rate": 0.00044582158666264793,
"loss": 1.0522,
"step": 16760
},
{
"epoch": 0.858591030104444,
"grad_norm": 0.1756090372800827,
"learning_rate": 0.0004440731263038627,
"loss": 1.074,
"step": 16770
},
{
"epoch": 0.859103010444399,
"grad_norm": 0.18287988007068634,
"learning_rate": 0.00044233152320304276,
"loss": 1.0883,
"step": 16780
},
{
"epoch": 0.8596149907843539,
"grad_norm": 0.18234935402870178,
"learning_rate": 0.0004405967504668205,
"loss": 1.0481,
"step": 16790
},
{
"epoch": 0.8601269711243088,
"grad_norm": 0.17408689856529236,
"learning_rate": 0.0004388687813073016,
"loss": 1.0672,
"step": 16800
},
{
"epoch": 0.8606389514642637,
"grad_norm": 0.1746188998222351,
"learning_rate": 0.00043714758904165,
"loss": 1.0581,
"step": 16810
},
{
"epoch": 0.8611509318042188,
"grad_norm": 0.17414236068725586,
"learning_rate": 0.0004354331470916772,
"loss": 1.0296,
"step": 16820
},
{
"epoch": 0.8616629121441737,
"grad_norm": 0.17176198959350586,
"learning_rate": 0.00043372542898343074,
"loss": 1.048,
"step": 16830
},
{
"epoch": 0.8621748924841286,
"grad_norm": 0.17366254329681396,
"learning_rate": 0.0004320244083467865,
"loss": 1.0584,
"step": 16840
},
{
"epoch": 0.8626868728240835,
"grad_norm": 0.17431634664535522,
"learning_rate": 0.0004303300589150403,
"loss": 1.0747,
"step": 16850
},
{
"epoch": 0.8631988531640385,
"grad_norm": 0.17983673512935638,
"learning_rate": 0.0004286423545245033,
"loss": 1.0477,
"step": 16860
},
{
"epoch": 0.8637108335039935,
"grad_norm": 0.17973174154758453,
"learning_rate": 0.00042696126911409766,
"loss": 1.0733,
"step": 16870
},
{
"epoch": 0.8642228138439484,
"grad_norm": 0.17209124565124512,
"learning_rate": 0.0004252867767249536,
"loss": 1.0553,
"step": 16880
},
{
"epoch": 0.8647347941839033,
"grad_norm": 0.17548377811908722,
"learning_rate": 0.0004236188515000098,
"loss": 1.0317,
"step": 16890
},
{
"epoch": 0.8652467745238582,
"grad_norm": 0.1856032758951187,
"learning_rate": 0.0004219574676836124,
"loss": 1.0645,
"step": 16900
},
{
"epoch": 0.8657587548638133,
"grad_norm": 0.171828031539917,
"learning_rate": 0.0004203025996211187,
"loss": 1.0468,
"step": 16910
},
{
"epoch": 0.8662707352037682,
"grad_norm": 0.1737641543149948,
"learning_rate": 0.00041865422175850074,
"loss": 1.0593,
"step": 16920
},
{
"epoch": 0.8667827155437231,
"grad_norm": 0.17497050762176514,
"learning_rate": 0.00041701230864194997,
"loss": 1.0558,
"step": 16930
},
{
"epoch": 0.867294695883678,
"grad_norm": 0.1742735356092453,
"learning_rate": 0.00041537683491748515,
"loss": 1.0524,
"step": 16940
},
{
"epoch": 0.8678066762236331,
"grad_norm": 0.16955190896987915,
"learning_rate": 0.00041374777533055996,
"loss": 1.0734,
"step": 16950
},
{
"epoch": 0.868318656563588,
"grad_norm": 0.17131267488002777,
"learning_rate": 0.00041212510472567404,
"loss": 1.047,
"step": 16960
},
{
"epoch": 0.8688306369035429,
"grad_norm": 0.18686212599277496,
"learning_rate": 0.00041050879804598354,
"loss": 1.0628,
"step": 16970
},
{
"epoch": 0.8693426172434978,
"grad_norm": 0.18018223345279694,
"learning_rate": 0.0004088988303329146,
"loss": 1.0727,
"step": 16980
},
{
"epoch": 0.8698545975834528,
"grad_norm": 0.17378225922584534,
"learning_rate": 0.00040729517672577834,
"loss": 1.0608,
"step": 16990
},
{
"epoch": 0.8703665779234078,
"grad_norm": 0.17299434542655945,
"learning_rate": 0.0004056978124613862,
"loss": 1.0572,
"step": 17000
},
{
"epoch": 0.8708785582633627,
"grad_norm": 0.17272843420505524,
"learning_rate": 0.0004041067128736684,
"loss": 1.068,
"step": 17010
},
{
"epoch": 0.8713905386033176,
"grad_norm": 0.17482733726501465,
"learning_rate": 0.0004025218533932921,
"loss": 1.0434,
"step": 17020
},
{
"epoch": 0.8719025189432725,
"grad_norm": 0.17604181170463562,
"learning_rate": 0.00040094320954728313,
"loss": 1.0473,
"step": 17030
},
{
"epoch": 0.8724144992832276,
"grad_norm": 0.17563997209072113,
"learning_rate": 0.000399370756958647,
"loss": 1.0326,
"step": 17040
},
{
"epoch": 0.8729264796231825,
"grad_norm": 0.17245963215827942,
"learning_rate": 0.00039780447134599286,
"loss": 1.0473,
"step": 17050
},
{
"epoch": 0.8734384599631374,
"grad_norm": 0.1761290282011032,
"learning_rate": 0.00039624432852315933,
"loss": 1.0521,
"step": 17060
},
{
"epoch": 0.8739504403030923,
"grad_norm": 0.17559461295604706,
"learning_rate": 0.0003946903043988396,
"loss": 1.0499,
"step": 17070
},
{
"epoch": 0.8744624206430474,
"grad_norm": 0.16970165073871613,
"learning_rate": 0.00039314237497621053,
"loss": 1.0653,
"step": 17080
},
{
"epoch": 0.8749744009830023,
"grad_norm": 0.1792786717414856,
"learning_rate": 0.00039160051635256165,
"loss": 1.0554,
"step": 17090
},
{
"epoch": 0.8754863813229572,
"grad_norm": 0.16863805055618286,
"learning_rate": 0.0003900647047189262,
"loss": 1.0524,
"step": 17100
},
{
"epoch": 0.8759983616629121,
"grad_norm": 0.1794777661561966,
"learning_rate": 0.0003885349163597133,
"loss": 1.0741,
"step": 17110
},
{
"epoch": 0.876510342002867,
"grad_norm": 0.1949402540922165,
"learning_rate": 0.0003870111276523419,
"loss": 1.0458,
"step": 17120
},
{
"epoch": 0.8770223223428221,
"grad_norm": 0.17837046086788177,
"learning_rate": 0.0003854933150668761,
"loss": 1.0484,
"step": 17130
},
{
"epoch": 0.877534302682777,
"grad_norm": 0.16682222485542297,
"learning_rate": 0.00038398145516566133,
"loss": 1.0643,
"step": 17140
},
{
"epoch": 0.8780462830227319,
"grad_norm": 0.17241717875003815,
"learning_rate": 0.00038247552460296324,
"loss": 1.0561,
"step": 17150
},
{
"epoch": 0.8785582633626868,
"grad_norm": 0.16557161509990692,
"learning_rate": 0.00038097550012460626,
"loss": 1.0614,
"step": 17160
},
{
"epoch": 0.8790702437026419,
"grad_norm": 0.17597849667072296,
"learning_rate": 0.00037948135856761536,
"loss": 1.0541,
"step": 17170
},
{
"epoch": 0.8795822240425968,
"grad_norm": 0.17368751764297485,
"learning_rate": 0.00037799307685985786,
"loss": 1.0482,
"step": 17180
},
{
"epoch": 0.8800942043825517,
"grad_norm": 0.17278683185577393,
"learning_rate": 0.00037651063201968706,
"loss": 1.0493,
"step": 17190
},
{
"epoch": 0.8806061847225066,
"grad_norm": 0.17373493313789368,
"learning_rate": 0.00037503400115558816,
"loss": 1.0547,
"step": 17200
},
{
"epoch": 0.8811181650624617,
"grad_norm": 0.1761094480752945,
"learning_rate": 0.0003735631614658236,
"loss": 1.0476,
"step": 17210
},
{
"epoch": 0.8816301454024166,
"grad_norm": 0.1749420464038849,
"learning_rate": 0.00037209809023808216,
"loss": 1.0313,
"step": 17220
},
{
"epoch": 0.8821421257423715,
"grad_norm": 0.1756523847579956,
"learning_rate": 0.0003706387648491272,
"loss": 1.0551,
"step": 17230
},
{
"epoch": 0.8826541060823264,
"grad_norm": 0.1767933964729309,
"learning_rate": 0.0003691851627644478,
"loss": 1.0385,
"step": 17240
},
{
"epoch": 0.8831660864222813,
"grad_norm": 0.17991852760314941,
"learning_rate": 0.00036773726153791126,
"loss": 1.0534,
"step": 17250
},
{
"epoch": 0.8836780667622364,
"grad_norm": 0.17097926139831543,
"learning_rate": 0.00036629503881141533,
"loss": 1.0424,
"step": 17260
},
{
"epoch": 0.8841900471021913,
"grad_norm": 0.1836550533771515,
"learning_rate": 0.00036485847231454427,
"loss": 1.0627,
"step": 17270
},
{
"epoch": 0.8847020274421462,
"grad_norm": 0.18745499849319458,
"learning_rate": 0.00036342753986422373,
"loss": 1.0475,
"step": 17280
},
{
"epoch": 0.8852140077821011,
"grad_norm": 0.17117556929588318,
"learning_rate": 0.00036200221936437925,
"loss": 1.0457,
"step": 17290
},
{
"epoch": 0.8857259881220562,
"grad_norm": 0.17555800080299377,
"learning_rate": 0.0003605824888055944,
"loss": 1.0505,
"step": 17300
},
{
"epoch": 0.8862379684620111,
"grad_norm": 0.17367680370807648,
"learning_rate": 0.00035916832626477105,
"loss": 1.0433,
"step": 17310
},
{
"epoch": 0.886749948801966,
"grad_norm": 0.16771985590457916,
"learning_rate": 0.0003577597099047911,
"loss": 1.0405,
"step": 17320
},
{
"epoch": 0.8872619291419209,
"grad_norm": 0.17749017477035522,
"learning_rate": 0.00035635661797417894,
"loss": 1.0326,
"step": 17330
},
{
"epoch": 0.8877739094818758,
"grad_norm": 0.1756659597158432,
"learning_rate": 0.0003549590288067658,
"loss": 1.0481,
"step": 17340
},
{
"epoch": 0.8882858898218309,
"grad_norm": 0.17804957926273346,
"learning_rate": 0.00035356692082135497,
"loss": 1.0348,
"step": 17350
},
{
"epoch": 0.8887978701617858,
"grad_norm": 0.17013497650623322,
"learning_rate": 0.000352180272521389,
"loss": 1.0444,
"step": 17360
},
{
"epoch": 0.8893098505017407,
"grad_norm": 0.16462627053260803,
"learning_rate": 0.000350799062494617,
"loss": 1.0473,
"step": 17370
},
{
"epoch": 0.8898218308416956,
"grad_norm": 0.18292909860610962,
"learning_rate": 0.00034942326941276463,
"loss": 1.0548,
"step": 17380
},
{
"epoch": 0.8903338111816507,
"grad_norm": 0.16778182983398438,
"learning_rate": 0.00034805287203120474,
"loss": 1.0486,
"step": 17390
},
{
"epoch": 0.8908457915216056,
"grad_norm": 0.17783689498901367,
"learning_rate": 0.0003466878491886288,
"loss": 1.0422,
"step": 17400
},
{
"epoch": 0.8913577718615605,
"grad_norm": 0.17219282686710358,
"learning_rate": 0.0003453281798067208,
"loss": 1.036,
"step": 17410
},
{
"epoch": 0.8918697522015154,
"grad_norm": 0.17862632870674133,
"learning_rate": 0.00034397384288983114,
"loss": 1.0441,
"step": 17420
},
{
"epoch": 0.8923817325414705,
"grad_norm": 0.17450949549674988,
"learning_rate": 0.00034262481752465293,
"loss": 1.0629,
"step": 17430
},
{
"epoch": 0.8928937128814254,
"grad_norm": 0.17378470301628113,
"learning_rate": 0.00034128108287989866,
"loss": 1.0322,
"step": 17440
},
{
"epoch": 0.8934056932213803,
"grad_norm": 0.17379970848560333,
"learning_rate": 0.00033994261820597885,
"loss": 1.0553,
"step": 17450
},
{
"epoch": 0.8939176735613352,
"grad_norm": 0.17971958220005035,
"learning_rate": 0.00033860940283468143,
"loss": 1.0532,
"step": 17460
},
{
"epoch": 0.8944296539012901,
"grad_norm": 0.17435471713542938,
"learning_rate": 0.0003372814161788526,
"loss": 1.0289,
"step": 17470
},
{
"epoch": 0.8949416342412452,
"grad_norm": 0.17900234460830688,
"learning_rate": 0.00033595863773207914,
"loss": 1.0407,
"step": 17480
},
{
"epoch": 0.8954536145812001,
"grad_norm": 0.1703522503376007,
"learning_rate": 0.00033464104706837144,
"loss": 1.0505,
"step": 17490
},
{
"epoch": 0.895965594921155,
"grad_norm": 0.1772749274969101,
"learning_rate": 0.00033332862384184833,
"loss": 1.0504,
"step": 17500
},
{
"epoch": 0.8964775752611099,
"grad_norm": 0.19156505167484283,
"learning_rate": 0.0003320213477864227,
"loss": 1.0537,
"step": 17510
},
{
"epoch": 0.896989555601065,
"grad_norm": 0.17889319360256195,
"learning_rate": 0.00033071919871548877,
"loss": 1.0371,
"step": 17520
},
{
"epoch": 0.8975015359410199,
"grad_norm": 0.17776621878147125,
"learning_rate": 0.0003294221565216104,
"loss": 1.0498,
"step": 17530
},
{
"epoch": 0.8980135162809748,
"grad_norm": 0.1731380671262741,
"learning_rate": 0.0003281302011762101,
"loss": 1.048,
"step": 17540
},
{
"epoch": 0.8985254966209297,
"grad_norm": 0.17784886062145233,
"learning_rate": 0.0003268433127292607,
"loss": 1.0477,
"step": 17550
},
{
"epoch": 0.8990374769608847,
"grad_norm": 0.17313584685325623,
"learning_rate": 0.00032556147130897615,
"loss": 1.0323,
"step": 17560
},
{
"epoch": 0.8995494573008397,
"grad_norm": 0.17907077074050903,
"learning_rate": 0.00032428465712150536,
"loss": 1.0527,
"step": 17570
},
{
"epoch": 0.9000614376407946,
"grad_norm": 0.1737951934337616,
"learning_rate": 0.0003230128504506268,
"loss": 1.036,
"step": 17580
},
{
"epoch": 0.9005734179807495,
"grad_norm": 0.17653332650661469,
"learning_rate": 0.00032174603165744314,
"loss": 1.0478,
"step": 17590
},
{
"epoch": 0.9010853983207044,
"grad_norm": 0.16936801373958588,
"learning_rate": 0.00032048418118007897,
"loss": 1.0452,
"step": 17600
},
{
"epoch": 0.9015973786606595,
"grad_norm": 0.17044688761234283,
"learning_rate": 0.00031922727953337794,
"loss": 1.0433,
"step": 17610
},
{
"epoch": 0.9021093590006144,
"grad_norm": 0.16897530853748322,
"learning_rate": 0.0003179753073086024,
"loss": 1.041,
"step": 17620
},
{
"epoch": 0.9026213393405693,
"grad_norm": 0.17904484272003174,
"learning_rate": 0.00031672824517313354,
"loss": 1.0562,
"step": 17630
},
{
"epoch": 0.9031333196805242,
"grad_norm": 0.1729121208190918,
"learning_rate": 0.0003154860738701725,
"loss": 1.0345,
"step": 17640
},
{
"epoch": 0.9036453000204792,
"grad_norm": 0.17275741696357727,
"learning_rate": 0.00031424877421844385,
"loss": 1.0494,
"step": 17650
},
{
"epoch": 0.9041572803604342,
"grad_norm": 0.16756050288677216,
"learning_rate": 0.0003130163271118985,
"loss": 1.0305,
"step": 17660
},
{
"epoch": 0.9046692607003891,
"grad_norm": 0.17867998778820038,
"learning_rate": 0.00031178871351941924,
"loss": 1.045,
"step": 17670
},
{
"epoch": 0.905181241040344,
"grad_norm": 0.17364557087421417,
"learning_rate": 0.00031056591448452663,
"loss": 1.0407,
"step": 17680
},
{
"epoch": 0.905693221380299,
"grad_norm": 0.18060193955898285,
"learning_rate": 0.0003093479111250863,
"loss": 1.0404,
"step": 17690
},
{
"epoch": 0.906205201720254,
"grad_norm": 0.17321224510669708,
"learning_rate": 0.0003081346846330176,
"loss": 1.0338,
"step": 17700
},
{
"epoch": 0.9067171820602089,
"grad_norm": 0.1827027052640915,
"learning_rate": 0.0003069262162740026,
"loss": 1.0513,
"step": 17710
},
{
"epoch": 0.9072291624001638,
"grad_norm": 0.17330406606197357,
"learning_rate": 0.0003057224873871977,
"loss": 1.0537,
"step": 17720
},
{
"epoch": 0.9077411427401187,
"grad_norm": 0.1664852797985077,
"learning_rate": 0.00030452347938494435,
"loss": 1.0385,
"step": 17730
},
{
"epoch": 0.9082531230800738,
"grad_norm": 0.1791536808013916,
"learning_rate": 0.00030332917375248324,
"loss": 1.0205,
"step": 17740
},
{
"epoch": 0.9087651034200287,
"grad_norm": 0.168918177485466,
"learning_rate": 0.0003021395520476674,
"loss": 1.0278,
"step": 17750
},
{
"epoch": 0.9092770837599836,
"grad_norm": 0.17502665519714355,
"learning_rate": 0.00030095459590067796,
"loss": 1.0533,
"step": 17760
},
{
"epoch": 0.9097890640999385,
"grad_norm": 0.17242580652236938,
"learning_rate": 0.00029977428701374024,
"loss": 1.0465,
"step": 17770
},
{
"epoch": 0.9103010444398935,
"grad_norm": 0.16884900629520416,
"learning_rate": 0.0002985986071608414,
"loss": 1.0553,
"step": 17780
},
{
"epoch": 0.9108130247798485,
"grad_norm": 0.17999139428138733,
"learning_rate": 0.00029742753818744894,
"loss": 1.052,
"step": 17790
},
{
"epoch": 0.9113250051198034,
"grad_norm": 0.19205188751220703,
"learning_rate": 0.0002962610620102301,
"loss": 1.0386,
"step": 17800
},
{
"epoch": 0.9118369854597583,
"grad_norm": 0.17089873552322388,
"learning_rate": 0.00029509916061677314,
"loss": 1.0519,
"step": 17810
},
{
"epoch": 0.9123489657997133,
"grad_norm": 0.1669624298810959,
"learning_rate": 0.0002939418160653087,
"loss": 1.045,
"step": 17820
},
{
"epoch": 0.9128609461396683,
"grad_norm": 0.1757606416940689,
"learning_rate": 0.000292789010484433,
"loss": 1.0311,
"step": 17830
},
{
"epoch": 0.9133729264796232,
"grad_norm": 0.1726016104221344,
"learning_rate": 0.00029164072607283187,
"loss": 1.0302,
"step": 17840
},
{
"epoch": 0.9138849068195781,
"grad_norm": 0.17893843352794647,
"learning_rate": 0.0002904969450990057,
"loss": 1.0236,
"step": 17850
},
{
"epoch": 0.914396887159533,
"grad_norm": 0.17613349854946136,
"learning_rate": 0.00028935764990099594,
"loss": 1.0467,
"step": 17860
},
{
"epoch": 0.914908867499488,
"grad_norm": 0.1762663722038269,
"learning_rate": 0.00028822282288611204,
"loss": 1.0143,
"step": 17870
},
{
"epoch": 0.915420847839443,
"grad_norm": 0.17385472357273102,
"learning_rate": 0.00028709244653066,
"loss": 1.0373,
"step": 17880
},
{
"epoch": 0.9159328281793979,
"grad_norm": 0.173353374004364,
"learning_rate": 0.0002859665033796716,
"loss": 1.0231,
"step": 17890
},
{
"epoch": 0.9164448085193528,
"grad_norm": 0.1739385724067688,
"learning_rate": 0.0002848449760466353,
"loss": 1.0174,
"step": 17900
},
{
"epoch": 0.9169567888593078,
"grad_norm": 0.17758533358573914,
"learning_rate": 0.000283727847213227,
"loss": 1.0271,
"step": 17910
},
{
"epoch": 0.9174687691992628,
"grad_norm": 0.17424450814723969,
"learning_rate": 0.00028261509962904325,
"loss": 1.0464,
"step": 17920
},
{
"epoch": 0.9179807495392177,
"grad_norm": 0.18018485605716705,
"learning_rate": 0.0002815067161113347,
"loss": 1.0379,
"step": 17930
},
{
"epoch": 0.9184927298791726,
"grad_norm": 0.18166567385196686,
"learning_rate": 0.0002804026795447407,
"loss": 1.0364,
"step": 17940
},
{
"epoch": 0.9190047102191276,
"grad_norm": 0.17235900461673737,
"learning_rate": 0.00027930297288102513,
"loss": 1.052,
"step": 17950
},
{
"epoch": 0.9195166905590826,
"grad_norm": 0.17493902146816254,
"learning_rate": 0.000278207579138813,
"loss": 1.0377,
"step": 17960
},
{
"epoch": 0.9200286708990375,
"grad_norm": 0.17957419157028198,
"learning_rate": 0.0002771164814033282,
"loss": 1.0392,
"step": 17970
},
{
"epoch": 0.9205406512389924,
"grad_norm": 0.178439199924469,
"learning_rate": 0.00027602966282613264,
"loss": 1.0333,
"step": 17980
},
{
"epoch": 0.9210526315789473,
"grad_norm": 0.17528565227985382,
"learning_rate": 0.0002749471066248655,
"loss": 1.035,
"step": 17990
},
{
"epoch": 0.9215646119189023,
"grad_norm": 0.18786676228046417,
"learning_rate": 0.0002738687960829849,
"loss": 1.0263,
"step": 18000
},
{
"epoch": 0.9220765922588573,
"grad_norm": 0.18565250933170319,
"learning_rate": 0.00027279471454950873,
"loss": 1.0266,
"step": 18010
},
{
"epoch": 0.9225885725988122,
"grad_norm": 0.17576780915260315,
"learning_rate": 0.00027172484543875865,
"loss": 1.0472,
"step": 18020
},
{
"epoch": 0.9231005529387671,
"grad_norm": 0.17549046874046326,
"learning_rate": 0.00027065917223010303,
"loss": 1.0357,
"step": 18030
},
{
"epoch": 0.9236125332787221,
"grad_norm": 0.17524850368499756,
"learning_rate": 0.00026959767846770227,
"loss": 1.0194,
"step": 18040
},
{
"epoch": 0.9241245136186771,
"grad_norm": 0.18681474030017853,
"learning_rate": 0.00026854034776025495,
"loss": 1.0406,
"step": 18050
},
{
"epoch": 0.924636493958632,
"grad_norm": 0.1830626130104065,
"learning_rate": 0.000267487163780744,
"loss": 1.0445,
"step": 18060
},
{
"epoch": 0.9251484742985869,
"grad_norm": 0.1787140816450119,
"learning_rate": 0.00026643811026618537,
"loss": 1.0365,
"step": 18070
},
{
"epoch": 0.9256604546385419,
"grad_norm": 0.1781841665506363,
"learning_rate": 0.00026539317101737637,
"loss": 1.0278,
"step": 18080
},
{
"epoch": 0.9261724349784968,
"grad_norm": 0.18114568293094635,
"learning_rate": 0.00026435232989864576,
"loss": 1.0273,
"step": 18090
},
{
"epoch": 0.9266844153184518,
"grad_norm": 0.18065612018108368,
"learning_rate": 0.0002633155708376045,
"loss": 1.0435,
"step": 18100
},
{
"epoch": 0.9271963956584067,
"grad_norm": 0.17828424274921417,
"learning_rate": 0.0002622828778248974,
"loss": 1.0103,
"step": 18110
},
{
"epoch": 0.9277083759983616,
"grad_norm": 0.17807289958000183,
"learning_rate": 0.0002612542349139565,
"loss": 1.0437,
"step": 18120
},
{
"epoch": 0.9282203563383166,
"grad_norm": 0.17496445775032043,
"learning_rate": 0.0002602296262207541,
"loss": 1.0219,
"step": 18130
},
{
"epoch": 0.9287323366782716,
"grad_norm": 0.17806415259838104,
"learning_rate": 0.00025920903592355785,
"loss": 1.0256,
"step": 18140
},
{
"epoch": 0.9292443170182265,
"grad_norm": 0.17231720685958862,
"learning_rate": 0.00025819244826268654,
"loss": 1.0487,
"step": 18150
},
{
"epoch": 0.9297562973581814,
"grad_norm": 0.18158575892448425,
"learning_rate": 0.00025717984754026655,
"loss": 1.0258,
"step": 18160
},
{
"epoch": 0.9302682776981364,
"grad_norm": 0.17217537760734558,
"learning_rate": 0.0002561712181199894,
"loss": 1.012,
"step": 18170
},
{
"epoch": 0.9307802580380914,
"grad_norm": 0.16844135522842407,
"learning_rate": 0.0002551665444268703,
"loss": 1.0449,
"step": 18180
},
{
"epoch": 0.9312922383780463,
"grad_norm": 0.17478111386299133,
"learning_rate": 0.0002541658109470081,
"loss": 1.0357,
"step": 18190
},
{
"epoch": 0.9318042187180012,
"grad_norm": 0.17291343212127686,
"learning_rate": 0.00025316900222734496,
"loss": 1.0406,
"step": 18200
},
{
"epoch": 0.9323161990579562,
"grad_norm": 0.17205969989299774,
"learning_rate": 0.00025217610287542845,
"loss": 1.0263,
"step": 18210
},
{
"epoch": 0.9328281793979111,
"grad_norm": 0.17579463124275208,
"learning_rate": 0.0002511870975591733,
"loss": 1.0487,
"step": 18220
},
{
"epoch": 0.9333401597378661,
"grad_norm": 0.185591459274292,
"learning_rate": 0.00025020197100662507,
"loss": 1.0289,
"step": 18230
},
{
"epoch": 0.933852140077821,
"grad_norm": 0.18697933852672577,
"learning_rate": 0.0002492207080057241,
"loss": 1.0445,
"step": 18240
},
{
"epoch": 0.934364120417776,
"grad_norm": 0.1702352613210678,
"learning_rate": 0.00024824329340407056,
"loss": 1.017,
"step": 18250
},
{
"epoch": 0.9348761007577309,
"grad_norm": 0.17386525869369507,
"learning_rate": 0.0002472697121086907,
"loss": 1.0265,
"step": 18260
},
{
"epoch": 0.9353880810976859,
"grad_norm": 0.17194058001041412,
"learning_rate": 0.0002462999490858035,
"loss": 1.0305,
"step": 18270
},
{
"epoch": 0.9359000614376408,
"grad_norm": 0.17600733041763306,
"learning_rate": 0.00024533398936058893,
"loss": 1.0161,
"step": 18280
},
{
"epoch": 0.9364120417775957,
"grad_norm": 0.17031820118427277,
"learning_rate": 0.0002443718180169563,
"loss": 1.0435,
"step": 18290
},
{
"epoch": 0.9369240221175507,
"grad_norm": 0.17277632653713226,
"learning_rate": 0.00024341342019731398,
"loss": 1.0321,
"step": 18300
},
{
"epoch": 0.9374360024575056,
"grad_norm": 0.17314958572387695,
"learning_rate": 0.00024245878110234033,
"loss": 1.0419,
"step": 18310
},
{
"epoch": 0.9379479827974606,
"grad_norm": 0.17943693697452545,
"learning_rate": 0.0002415078859907547,
"loss": 1.0455,
"step": 18320
},
{
"epoch": 0.9384599631374155,
"grad_norm": 0.17218518257141113,
"learning_rate": 0.00024056072017909026,
"loss": 1.0174,
"step": 18330
},
{
"epoch": 0.9389719434773705,
"grad_norm": 0.1672009378671646,
"learning_rate": 0.0002396172690414667,
"loss": 1.0304,
"step": 18340
},
{
"epoch": 0.9394839238173254,
"grad_norm": 0.16872192919254303,
"learning_rate": 0.00023867751800936513,
"loss": 1.0334,
"step": 18350
},
{
"epoch": 0.9399959041572804,
"grad_norm": 0.17709334194660187,
"learning_rate": 0.0002377414525714023,
"loss": 1.043,
"step": 18360
},
{
"epoch": 0.9405078844972353,
"grad_norm": 0.17235656082630157,
"learning_rate": 0.00023680905827310717,
"loss": 1.0296,
"step": 18370
},
{
"epoch": 0.9410198648371902,
"grad_norm": 0.17677216231822968,
"learning_rate": 0.0002358803207166974,
"loss": 1.0304,
"step": 18380
},
{
"epoch": 0.9415318451771452,
"grad_norm": 0.17921361327171326,
"learning_rate": 0.00023495522556085693,
"loss": 1.0287,
"step": 18390
},
{
"epoch": 0.9420438255171002,
"grad_norm": 0.18774552643299103,
"learning_rate": 0.0002340337585205149,
"loss": 1.0303,
"step": 18400
},
{
"epoch": 0.9425558058570551,
"grad_norm": 0.1885557323694229,
"learning_rate": 0.00023311590536662463,
"loss": 1.0225,
"step": 18410
},
{
"epoch": 0.94306778619701,
"grad_norm": 0.17091277241706848,
"learning_rate": 0.00023220165192594432,
"loss": 1.0216,
"step": 18420
},
{
"epoch": 0.943579766536965,
"grad_norm": 0.17530862987041473,
"learning_rate": 0.00023129098408081777,
"loss": 1.0303,
"step": 18430
},
{
"epoch": 0.9440917468769199,
"grad_norm": 0.17937549948692322,
"learning_rate": 0.00023038388776895662,
"loss": 1.0234,
"step": 18440
},
{
"epoch": 0.9446037272168749,
"grad_norm": 0.1720314472913742,
"learning_rate": 0.00022948034898322335,
"loss": 1.0304,
"step": 18450
},
{
"epoch": 0.9451157075568298,
"grad_norm": 0.1731894463300705,
"learning_rate": 0.00022858035377141452,
"loss": 1.021,
"step": 18460
},
{
"epoch": 0.9456276878967848,
"grad_norm": 0.17468558251857758,
"learning_rate": 0.00022768388823604584,
"loss": 1.0224,
"step": 18470
},
{
"epoch": 0.9461396682367397,
"grad_norm": 0.17135438323020935,
"learning_rate": 0.00022679093853413717,
"loss": 1.0392,
"step": 18480
},
{
"epoch": 0.9466516485766947,
"grad_norm": 0.1784532517194748,
"learning_rate": 0.00022590149087699918,
"loss": 1.0183,
"step": 18490
},
{
"epoch": 0.9471636289166496,
"grad_norm": 0.18522332608699799,
"learning_rate": 0.00022501553153001985,
"loss": 1.0361,
"step": 18500
},
{
"epoch": 0.9476756092566045,
"grad_norm": 0.18401268124580383,
"learning_rate": 0.00022413304681245284,
"loss": 1.0329,
"step": 18510
},
{
"epoch": 0.9481875895965595,
"grad_norm": 0.16760528087615967,
"learning_rate": 0.00022325402309720624,
"loss": 1.0199,
"step": 18520
},
{
"epoch": 0.9486995699365144,
"grad_norm": 0.18120263516902924,
"learning_rate": 0.00022237844681063175,
"loss": 1.0252,
"step": 18530
},
{
"epoch": 0.9492115502764694,
"grad_norm": 0.1899506002664566,
"learning_rate": 0.00022150630443231562,
"loss": 1.0064,
"step": 18540
},
{
"epoch": 0.9497235306164243,
"grad_norm": 0.1819719672203064,
"learning_rate": 0.00022063758249486932,
"loss": 1.0246,
"step": 18550
},
{
"epoch": 0.9502355109563793,
"grad_norm": 0.17660754919052124,
"learning_rate": 0.00021977226758372213,
"loss": 1.0305,
"step": 18560
},
{
"epoch": 0.9507474912963342,
"grad_norm": 0.17415086925029755,
"learning_rate": 0.00021891034633691347,
"loss": 1.0369,
"step": 18570
},
{
"epoch": 0.9512594716362892,
"grad_norm": 0.17310403287410736,
"learning_rate": 0.00021805180544488684,
"loss": 1.0272,
"step": 18580
},
{
"epoch": 0.9517714519762441,
"grad_norm": 0.17484420537948608,
"learning_rate": 0.0002171966316502845,
"loss": 1.028,
"step": 18590
},
{
"epoch": 0.952283432316199,
"grad_norm": 0.18543212115764618,
"learning_rate": 0.00021634481174774217,
"loss": 1.0296,
"step": 18600
},
{
"epoch": 0.952795412656154,
"grad_norm": 0.1763850450515747,
"learning_rate": 0.00021549633258368582,
"loss": 1.0307,
"step": 18610
},
{
"epoch": 0.953307392996109,
"grad_norm": 0.16824059188365936,
"learning_rate": 0.00021465118105612805,
"loss": 1.0206,
"step": 18620
},
{
"epoch": 0.9538193733360639,
"grad_norm": 0.17931176722049713,
"learning_rate": 0.00021380934411446574,
"loss": 1.016,
"step": 18630
},
{
"epoch": 0.9543313536760188,
"grad_norm": 0.18147091567516327,
"learning_rate": 0.00021297080875927913,
"loss": 1.0211,
"step": 18640
},
{
"epoch": 0.9548433340159738,
"grad_norm": 0.18163631856441498,
"learning_rate": 0.00021213556204213033,
"loss": 1.0263,
"step": 18650
},
{
"epoch": 0.9553553143559287,
"grad_norm": 0.17591601610183716,
"learning_rate": 0.00021130359106536384,
"loss": 1.0417,
"step": 18660
},
{
"epoch": 0.9558672946958837,
"grad_norm": 0.17677730321884155,
"learning_rate": 0.00021047488298190723,
"loss": 1.0299,
"step": 18670
},
{
"epoch": 0.9563792750358386,
"grad_norm": 0.17326125502586365,
"learning_rate": 0.0002096494249950729,
"loss": 1.0268,
"step": 18680
},
{
"epoch": 0.9568912553757936,
"grad_norm": 0.1793946474790573,
"learning_rate": 0.00020882720435836026,
"loss": 1.0355,
"step": 18690
},
{
"epoch": 0.9574032357157485,
"grad_norm": 0.1703524887561798,
"learning_rate": 0.00020800820837525892,
"loss": 1.005,
"step": 18700
},
{
"epoch": 0.9579152160557035,
"grad_norm": 0.17965586483478546,
"learning_rate": 0.000207192424399053,
"loss": 1.0182,
"step": 18710
},
{
"epoch": 0.9584271963956584,
"grad_norm": 0.16650822758674622,
"learning_rate": 0.00020637983983262526,
"loss": 1.0304,
"step": 18720
},
{
"epoch": 0.9589391767356134,
"grad_norm": 0.1700984239578247,
"learning_rate": 0.00020557044212826323,
"loss": 1.0103,
"step": 18730
},
{
"epoch": 0.9594511570755683,
"grad_norm": 0.18094299733638763,
"learning_rate": 0.0002047642187874647,
"loss": 1.0247,
"step": 18740
},
{
"epoch": 0.9599631374155232,
"grad_norm": 0.16972561180591583,
"learning_rate": 0.0002039611573607455,
"loss": 1.0328,
"step": 18750
},
{
"epoch": 0.9604751177554782,
"grad_norm": 0.1718764752149582,
"learning_rate": 0.0002031612454474467,
"loss": 1.0015,
"step": 18760
},
{
"epoch": 0.9609870980954331,
"grad_norm": 0.17211291193962097,
"learning_rate": 0.00020236447069554324,
"loss": 1.0485,
"step": 18770
},
{
"epoch": 0.9614990784353881,
"grad_norm": 0.17325459420681,
"learning_rate": 0.00020157082080145356,
"loss": 1.0122,
"step": 18780
},
{
"epoch": 0.962011058775343,
"grad_norm": 0.1677115559577942,
"learning_rate": 0.00020078028350984888,
"loss": 1.0144,
"step": 18790
},
{
"epoch": 0.962523039115298,
"grad_norm": 0.17302511632442474,
"learning_rate": 0.00019999284661346487,
"loss": 1.0247,
"step": 18800
},
{
"epoch": 0.9630350194552529,
"grad_norm": 0.1713932901620865,
"learning_rate": 0.00019920849795291223,
"loss": 1.0135,
"step": 18810
},
{
"epoch": 0.9635469997952079,
"grad_norm": 0.1779249906539917,
"learning_rate": 0.00019842722541648977,
"loss": 1.0166,
"step": 18820
},
{
"epoch": 0.9640589801351628,
"grad_norm": 0.17072229087352753,
"learning_rate": 0.00019764901693999665,
"loss": 1.0214,
"step": 18830
},
{
"epoch": 0.9645709604751177,
"grad_norm": 0.17682915925979614,
"learning_rate": 0.00019687386050654655,
"loss": 1.0412,
"step": 18840
},
{
"epoch": 0.9650829408150727,
"grad_norm": 0.17209376394748688,
"learning_rate": 0.00019610174414638203,
"loss": 1.0139,
"step": 18850
},
{
"epoch": 0.9655949211550277,
"grad_norm": 0.16988667845726013,
"learning_rate": 0.0001953326559366896,
"loss": 1.03,
"step": 18860
},
{
"epoch": 0.9661069014949826,
"grad_norm": 0.17056208848953247,
"learning_rate": 0.0001945665840014157,
"loss": 1.0335,
"step": 18870
},
{
"epoch": 0.9666188818349375,
"grad_norm": 0.17054276168346405,
"learning_rate": 0.0001938035165110831,
"loss": 1.0281,
"step": 18880
},
{
"epoch": 0.9671308621748925,
"grad_norm": 0.17490647733211517,
"learning_rate": 0.00019304344168260865,
"loss": 1.0401,
"step": 18890
},
{
"epoch": 0.9676428425148474,
"grad_norm": 0.17823657393455505,
"learning_rate": 0.00019228634777912089,
"loss": 1.0225,
"step": 18900
},
{
"epoch": 0.9681548228548024,
"grad_norm": 0.1651022583246231,
"learning_rate": 0.00019153222310977906,
"loss": 1.0088,
"step": 18910
},
{
"epoch": 0.9686668031947573,
"grad_norm": 0.18135780096054077,
"learning_rate": 0.00019078105602959264,
"loss": 1.0289,
"step": 18920
},
{
"epoch": 0.9691787835347123,
"grad_norm": 0.17016355693340302,
"learning_rate": 0.00019003283493924117,
"loss": 1.0111,
"step": 18930
},
{
"epoch": 0.9696907638746672,
"grad_norm": 0.17754383385181427,
"learning_rate": 0.00018928754828489555,
"loss": 1.0291,
"step": 18940
},
{
"epoch": 0.9702027442146222,
"grad_norm": 0.16962246596813202,
"learning_rate": 0.00018854518455803946,
"loss": 1.0228,
"step": 18950
},
{
"epoch": 0.9707147245545771,
"grad_norm": 0.17820075154304504,
"learning_rate": 0.00018780573229529142,
"loss": 1.0231,
"step": 18960
},
{
"epoch": 0.971226704894532,
"grad_norm": 0.16597416996955872,
"learning_rate": 0.00018706918007822834,
"loss": 1.0327,
"step": 18970
},
{
"epoch": 0.971738685234487,
"grad_norm": 0.17721499502658844,
"learning_rate": 0.00018633551653320852,
"loss": 1.0084,
"step": 18980
},
{
"epoch": 0.972250665574442,
"grad_norm": 0.17141114175319672,
"learning_rate": 0.0001856047303311967,
"loss": 1.0361,
"step": 18990
},
{
"epoch": 0.9727626459143969,
"grad_norm": 0.17473644018173218,
"learning_rate": 0.0001848768101875884,
"loss": 1.0051,
"step": 19000
},
{
"epoch": 0.9732746262543518,
"grad_norm": 0.17746561765670776,
"learning_rate": 0.00018415174486203638,
"loss": 1.0266,
"step": 19010
},
{
"epoch": 0.9737866065943068,
"grad_norm": 0.16750702261924744,
"learning_rate": 0.00018342952315827656,
"loss": 1.0282,
"step": 19020
},
{
"epoch": 0.9742985869342617,
"grad_norm": 0.1748443841934204,
"learning_rate": 0.00018271013392395522,
"loss": 1.0183,
"step": 19030
},
{
"epoch": 0.9748105672742167,
"grad_norm": 0.17715822160243988,
"learning_rate": 0.0001819935660504572,
"loss": 1.0145,
"step": 19040
},
{
"epoch": 0.9753225476141716,
"grad_norm": 0.17972363531589508,
"learning_rate": 0.0001812798084727336,
"loss": 1.0069,
"step": 19050
},
{
"epoch": 0.9758345279541265,
"grad_norm": 0.17496472597122192,
"learning_rate": 0.00018056885016913175,
"loss": 1.0074,
"step": 19060
},
{
"epoch": 0.9763465082940815,
"grad_norm": 0.18323951959609985,
"learning_rate": 0.00017986068016122433,
"loss": 1.0487,
"step": 19070
},
{
"epoch": 0.9768584886340365,
"grad_norm": 0.16890741884708405,
"learning_rate": 0.00017915528751364033,
"loss": 1.0153,
"step": 19080
},
{
"epoch": 0.9773704689739914,
"grad_norm": 0.17116831243038177,
"learning_rate": 0.0001784526613338959,
"loss": 1.0132,
"step": 19090
},
{
"epoch": 0.9778824493139463,
"grad_norm": 0.17036503553390503,
"learning_rate": 0.00017775279077222617,
"loss": 1.0228,
"step": 19100
},
{
"epoch": 0.9783944296539013,
"grad_norm": 0.17859075963497162,
"learning_rate": 0.00017705566502141802,
"loss": 1.0123,
"step": 19110
},
{
"epoch": 0.9789064099938563,
"grad_norm": 0.17719532549381256,
"learning_rate": 0.00017636127331664266,
"loss": 1.0385,
"step": 19120
},
{
"epoch": 0.9794183903338112,
"grad_norm": 0.17673194408416748,
"learning_rate": 0.00017566960493528995,
"loss": 1.0224,
"step": 19130
},
{
"epoch": 0.9799303706737661,
"grad_norm": 0.1806950718164444,
"learning_rate": 0.00017498064919680242,
"loss": 1.0111,
"step": 19140
},
{
"epoch": 0.9804423510137211,
"grad_norm": 0.16843082010746002,
"learning_rate": 0.00017429439546251066,
"loss": 1.0059,
"step": 19150
},
{
"epoch": 0.980954331353676,
"grad_norm": 0.17275168001651764,
"learning_rate": 0.00017361083313546875,
"loss": 1.037,
"step": 19160
},
{
"epoch": 0.981466311693631,
"grad_norm": 0.17250047624111176,
"learning_rate": 0.0001729299516602907,
"loss": 1.0193,
"step": 19170
},
{
"epoch": 0.9819782920335859,
"grad_norm": 0.17009197175502777,
"learning_rate": 0.00017225174052298777,
"loss": 1.0412,
"step": 19180
},
{
"epoch": 0.9824902723735408,
"grad_norm": 0.16845643520355225,
"learning_rate": 0.0001715761892508056,
"loss": 1.0268,
"step": 19190
},
{
"epoch": 0.9830022527134958,
"grad_norm": 0.16763417422771454,
"learning_rate": 0.0001709032874120629,
"loss": 1.0425,
"step": 19200
},
{
"epoch": 0.9835142330534508,
"grad_norm": 0.1747148334980011,
"learning_rate": 0.00017023302461599015,
"loss": 1.0228,
"step": 19210
},
{
"epoch": 0.9840262133934057,
"grad_norm": 0.17626087367534637,
"learning_rate": 0.0001695653905125693,
"loss": 1.0142,
"step": 19220
},
{
"epoch": 0.9845381937333606,
"grad_norm": 0.17711155116558075,
"learning_rate": 0.00016890037479237377,
"loss": 1.0238,
"step": 19230
},
{
"epoch": 0.9850501740733156,
"grad_norm": 0.1858174353837967,
"learning_rate": 0.00016823796718640937,
"loss": 1.033,
"step": 19240
},
{
"epoch": 0.9855621544132706,
"grad_norm": 0.1855236142873764,
"learning_rate": 0.0001675781574659558,
"loss": 1.0276,
"step": 19250
},
{
"epoch": 0.9860741347532255,
"grad_norm": 0.16916634142398834,
"learning_rate": 0.0001669209354424084,
"loss": 1.0208,
"step": 19260
},
{
"epoch": 0.9865861150931804,
"grad_norm": 0.18142545223236084,
"learning_rate": 0.00016626629096712137,
"loss": 1.0302,
"step": 19270
},
{
"epoch": 0.9870980954331353,
"grad_norm": 0.16748617589473724,
"learning_rate": 0.00016561421393125036,
"loss": 1.0244,
"step": 19280
},
{
"epoch": 0.9876100757730903,
"grad_norm": 0.180519700050354,
"learning_rate": 0.000164964694265597,
"loss": 1.0009,
"step": 19290
},
{
"epoch": 0.9881220561130453,
"grad_norm": 0.16856172680854797,
"learning_rate": 0.00016431772194045298,
"loss": 1.009,
"step": 19300
},
{
"epoch": 0.9886340364530002,
"grad_norm": 0.17907920479774475,
"learning_rate": 0.00016367328696544536,
"loss": 1.0182,
"step": 19310
},
{
"epoch": 0.9891460167929551,
"grad_norm": 0.18012414872646332,
"learning_rate": 0.00016303137938938238,
"loss": 1.0238,
"step": 19320
},
{
"epoch": 0.9896579971329101,
"grad_norm": 0.17940422892570496,
"learning_rate": 0.0001623919893000996,
"loss": 1.035,
"step": 19330
},
{
"epoch": 0.9901699774728651,
"grad_norm": 0.17534732818603516,
"learning_rate": 0.00016175510682430694,
"loss": 1.0282,
"step": 19340
},
{
"epoch": 0.99068195781282,
"grad_norm": 0.17742076516151428,
"learning_rate": 0.0001611207221274363,
"loss": 1.0308,
"step": 19350
},
{
"epoch": 0.9911939381527749,
"grad_norm": 0.174584299325943,
"learning_rate": 0.00016048882541348943,
"loss": 1.0433,
"step": 19360
},
{
"epoch": 0.9917059184927299,
"grad_norm": 0.17817029356956482,
"learning_rate": 0.00015985940692488709,
"loss": 1.0088,
"step": 19370
},
{
"epoch": 0.9922178988326849,
"grad_norm": 0.1764860898256302,
"learning_rate": 0.00015923245694231792,
"loss": 1.0051,
"step": 19380
},
{
"epoch": 0.9927298791726398,
"grad_norm": 0.1679990142583847,
"learning_rate": 0.00015860796578458873,
"loss": 1.0383,
"step": 19390
},
{
"epoch": 0.9932418595125947,
"grad_norm": 0.17141203582286835,
"learning_rate": 0.00015798592380847468,
"loss": 1.0367,
"step": 19400
},
{
"epoch": 0.9937538398525496,
"grad_norm": 0.17301303148269653,
"learning_rate": 0.00015736632140857067,
"loss": 1.0227,
"step": 19410
},
{
"epoch": 0.9942658201925046,
"grad_norm": 0.17585515975952148,
"learning_rate": 0.00015674914901714278,
"loss": 1.0373,
"step": 19420
},
{
"epoch": 0.9947778005324596,
"grad_norm": 0.17036980390548706,
"learning_rate": 0.0001561343971039807,
"loss": 1.0025,
"step": 19430
},
{
"epoch": 0.9952897808724145,
"grad_norm": 0.1802191138267517,
"learning_rate": 0.00015552205617625053,
"loss": 1.0378,
"step": 19440
},
{
"epoch": 0.9958017612123694,
"grad_norm": 0.17641904950141907,
"learning_rate": 0.000154912116778348,
"loss": 1.0317,
"step": 19450
},
{
"epoch": 0.9963137415523244,
"grad_norm": 0.18595443665981293,
"learning_rate": 0.0001543045694917528,
"loss": 1.0081,
"step": 19460
},
{
"epoch": 0.9968257218922794,
"grad_norm": 0.17444072663784027,
"learning_rate": 0.0001536994049348828,
"loss": 1.0242,
"step": 19470
},
{
"epoch": 0.9973377022322343,
"grad_norm": 0.17894035577774048,
"learning_rate": 0.00015309661376294953,
"loss": 1.0269,
"step": 19480
},
{
"epoch": 0.9978496825721892,
"grad_norm": 0.17125560343265533,
"learning_rate": 0.00015249618666781352,
"loss": 1.0189,
"step": 19490
},
{
"epoch": 0.9983616629121441,
"grad_norm": 0.1681634485721588,
"learning_rate": 0.0001518981143778408,
"loss": 1.0014,
"step": 19500
},
{
"epoch": 0.9988736432520992,
"grad_norm": 0.17360231280326843,
"learning_rate": 0.0001513023876577597,
"loss": 1.0033,
"step": 19510
},
{
"epoch": 0.9993856235920541,
"grad_norm": 0.17242667078971863,
"learning_rate": 0.00015070899730851815,
"loss": 1.0236,
"step": 19520
},
{
"epoch": 0.999897603932009,
"grad_norm": 0.16095665097236633,
"learning_rate": 0.0001501179341671418,
"loss": 1.0393,
"step": 19530
}
],
"logging_steps": 10,
"max_steps": 19532,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8288897328545792e+17,
"train_batch_size": 512,
"trial_name": null,
"trial_params": null
}