CKPT1024 / trainer_state.json

Upload 12 files

477cf9b verified 24 days ago

184 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 1024,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0009765625,
	"grad_norm": 0.6541444063186646,
	"learning_rate": 0.0,
	"loss": 1.0280990600585938,
	"step": 1
	},
	{
	"epoch": 0.001953125,
	"grad_norm": 0.4356674551963806,
	"learning_rate": 4e-05,
	"loss": 0.8305179476737976,
	"step": 2
	},
	{
	"epoch": 0.0029296875,
	"grad_norm": 0.3900858759880066,
	"learning_rate": 8e-05,
	"loss": 0.7835474014282227,
	"step": 3
	},
	{
	"epoch": 0.00390625,
	"grad_norm": 0.3717947006225586,
	"learning_rate": 0.00012,
	"loss": 1.1571688652038574,
	"step": 4
	},
	{
	"epoch": 0.0048828125,
	"grad_norm": 0.2760661542415619,
	"learning_rate": 0.00016,
	"loss": 0.8141135573387146,
	"step": 5
	},
	{
	"epoch": 0.005859375,
	"grad_norm": 0.24524882435798645,
	"learning_rate": 0.0002,
	"loss": 0.29919666051864624,
	"step": 6
	},
	{
	"epoch": 0.0068359375,
	"grad_norm": 0.3155483305454254,
	"learning_rate": 0.00019980372914622178,
	"loss": 0.916366696357727,
	"step": 7
	},
	{
	"epoch": 0.0078125,
	"grad_norm": 1.0419310331344604,
	"learning_rate": 0.00019960745829244357,
	"loss": 0.986505389213562,
	"step": 8
	},
	{
	"epoch": 0.0087890625,
	"grad_norm": 0.32395845651626587,
	"learning_rate": 0.00019941118743866537,
	"loss": 0.7845190167427063,
	"step": 9
	},
	{
	"epoch": 0.009765625,
	"grad_norm": 0.564084529876709,
	"learning_rate": 0.00019921491658488717,
	"loss": 1.0922366380691528,
	"step": 10
	},
	{
	"epoch": 0.0107421875,
	"grad_norm": 0.4066593647003174,
	"learning_rate": 0.00019901864573110893,
	"loss": 1.0279463529586792,
	"step": 11
	},
	{
	"epoch": 0.01171875,
	"grad_norm": 0.43442535400390625,
	"learning_rate": 0.00019882237487733073,
	"loss": 0.9713175892829895,
	"step": 12
	},
	{
	"epoch": 0.0126953125,
	"grad_norm": 0.26689526438713074,
	"learning_rate": 0.0001986261040235525,
	"loss": 0.38461241126060486,
	"step": 13
	},
	{
	"epoch": 0.013671875,
	"grad_norm": 0.41254541277885437,
	"learning_rate": 0.0001984298331697743,
	"loss": 0.7746479511260986,
	"step": 14
	},
	{
	"epoch": 0.0146484375,
	"grad_norm": 0.39432424306869507,
	"learning_rate": 0.0001982335623159961,
	"loss": 0.7843194603919983,
	"step": 15
	},
	{
	"epoch": 0.015625,
	"grad_norm": 0.4303337037563324,
	"learning_rate": 0.0001980372914622179,
	"loss": 0.6613403558731079,
	"step": 16
	},
	{
	"epoch": 0.0166015625,
	"grad_norm": 0.875269889831543,
	"learning_rate": 0.00019784102060843966,
	"loss": 1.0992671251296997,
	"step": 17
	},
	{
	"epoch": 0.017578125,
	"grad_norm": 0.21415413916110992,
	"learning_rate": 0.00019764474975466145,
	"loss": 0.2784216105937958,
	"step": 18
	},
	{
	"epoch": 0.0185546875,
	"grad_norm": 0.4318086504936218,
	"learning_rate": 0.00019744847890088322,
	"loss": 0.6146124005317688,
	"step": 19
	},
	{
	"epoch": 0.01953125,
	"grad_norm": 0.20149515569210052,
	"learning_rate": 0.00019725220804710502,
	"loss": 0.3920556306838989,
	"step": 20
	},
	{
	"epoch": 0.0205078125,
	"grad_norm": 0.358688622713089,
	"learning_rate": 0.0001970559371933268,
	"loss": 0.6672685742378235,
	"step": 21
	},
	{
	"epoch": 0.021484375,
	"grad_norm": 0.5916730165481567,
	"learning_rate": 0.00019685966633954858,
	"loss": 1.0804443359375,
	"step": 22
	},
	{
	"epoch": 0.0224609375,
	"grad_norm": 0.3139825761318207,
	"learning_rate": 0.00019666339548577038,
	"loss": 0.7358766794204712,
	"step": 23
	},
	{
	"epoch": 0.0234375,
	"grad_norm": 0.4019712805747986,
	"learning_rate": 0.00019646712463199215,
	"loss": 0.7362902164459229,
	"step": 24
	},
	{
	"epoch": 0.0244140625,
	"grad_norm": 0.2874290347099304,
	"learning_rate": 0.00019627085377821394,
	"loss": 0.6446189284324646,
	"step": 25
	},
	{
	"epoch": 0.025390625,
	"grad_norm": 0.357494592666626,
	"learning_rate": 0.0001960745829244357,
	"loss": 0.2820976972579956,
	"step": 26
	},
	{
	"epoch": 0.0263671875,
	"grad_norm": 0.22216391563415527,
	"learning_rate": 0.00019587831207065753,
	"loss": 0.6020435094833374,
	"step": 27
	},
	{
	"epoch": 0.02734375,
	"grad_norm": 0.23284995555877686,
	"learning_rate": 0.0001956820412168793,
	"loss": 0.44151532649993896,
	"step": 28
	},
	{
	"epoch": 0.0283203125,
	"grad_norm": 0.3594605028629303,
	"learning_rate": 0.0001954857703631011,
	"loss": 0.9414041042327881,
	"step": 29
	},
	{
	"epoch": 0.029296875,
	"grad_norm": 0.4460504353046417,
	"learning_rate": 0.00019528949950932287,
	"loss": 0.7148531079292297,
	"step": 30
	},
	{
	"epoch": 0.0302734375,
	"grad_norm": 0.3392362892627716,
	"learning_rate": 0.00019509322865554466,
	"loss": 0.7185512781143188,
	"step": 31
	},
	{
	"epoch": 0.03125,
	"grad_norm": 0.3340625464916229,
	"learning_rate": 0.00019489695780176643,
	"loss": 0.6613262891769409,
	"step": 32
	},
	{
	"epoch": 0.0322265625,
	"grad_norm": 0.26223355531692505,
	"learning_rate": 0.00019470068694798826,
	"loss": 0.590149462223053,
	"step": 33
	},
	{
	"epoch": 0.033203125,
	"grad_norm": 0.3481689691543579,
	"learning_rate": 0.00019450441609421002,
	"loss": 0.5590913891792297,
	"step": 34
	},
	{
	"epoch": 0.0341796875,
	"grad_norm": 0.4775488078594208,
	"learning_rate": 0.00019430814524043182,
	"loss": 0.927351176738739,
	"step": 35
	},
	{
	"epoch": 0.03515625,
	"grad_norm": 0.4474835693836212,
	"learning_rate": 0.0001941118743866536,
	"loss": 0.7719380855560303,
	"step": 36
	},
	{
	"epoch": 0.0361328125,
	"grad_norm": 0.3538999855518341,
	"learning_rate": 0.00019391560353287536,
	"loss": 1.0287561416625977,
	"step": 37
	},
	{
	"epoch": 0.037109375,
	"grad_norm": 0.5018237233161926,
	"learning_rate": 0.00019371933267909715,
	"loss": 1.049814224243164,
	"step": 38
	},
	{
	"epoch": 0.0380859375,
	"grad_norm": 0.5052743554115295,
	"learning_rate": 0.00019352306182531895,
	"loss": 0.39767658710479736,
	"step": 39
	},
	{
	"epoch": 0.0390625,
	"grad_norm": 0.46170520782470703,
	"learning_rate": 0.00019332679097154075,
	"loss": 0.9849376678466797,
	"step": 40
	},
	{
	"epoch": 0.0400390625,
	"grad_norm": 0.5961291193962097,
	"learning_rate": 0.00019313052011776251,
	"loss": 0.8527336716651917,
	"step": 41
	},
	{
	"epoch": 0.041015625,
	"grad_norm": 0.4002876579761505,
	"learning_rate": 0.0001929342492639843,
	"loss": 0.7445047497749329,
	"step": 42
	},
	{
	"epoch": 0.0419921875,
	"grad_norm": 0.6382992267608643,
	"learning_rate": 0.00019273797841020608,
	"loss": 0.7587878704071045,
	"step": 43
	},
	{
	"epoch": 0.04296875,
	"grad_norm": 0.4204530715942383,
	"learning_rate": 0.00019254170755642788,
	"loss": 0.943995475769043,
	"step": 44
	},
	{
	"epoch": 0.0439453125,
	"grad_norm": 0.29038068652153015,
	"learning_rate": 0.00019234543670264967,
	"loss": 0.4540131688117981,
	"step": 45
	},
	{
	"epoch": 0.044921875,
	"grad_norm": 0.41968628764152527,
	"learning_rate": 0.00019214916584887147,
	"loss": 0.3900204300880432,
	"step": 46
	},
	{
	"epoch": 0.0458984375,
	"grad_norm": 0.5870251059532166,
	"learning_rate": 0.00019195289499509324,
	"loss": 0.8700598478317261,
	"step": 47
	},
	{
	"epoch": 0.046875,
	"grad_norm": 0.3120124042034149,
	"learning_rate": 0.00019175662414131503,
	"loss": 0.2866731882095337,
	"step": 48
	},
	{
	"epoch": 0.0478515625,
	"grad_norm": 0.31891942024230957,
	"learning_rate": 0.0001915603532875368,
	"loss": 0.7711223363876343,
	"step": 49
	},
	{
	"epoch": 0.048828125,
	"grad_norm": 0.4250207543373108,
	"learning_rate": 0.0001913640824337586,
	"loss": 0.7499758005142212,
	"step": 50
	},
	{
	"epoch": 0.0498046875,
	"grad_norm": 0.4769924581050873,
	"learning_rate": 0.0001911678115799804,
	"loss": 0.8479812145233154,
	"step": 51
	},
	{
	"epoch": 0.05078125,
	"grad_norm": 0.2966979146003723,
	"learning_rate": 0.00019097154072620216,
	"loss": 0.8125182390213013,
	"step": 52
	},
	{
	"epoch": 0.0517578125,
	"grad_norm": 0.4924452006816864,
	"learning_rate": 0.00019077526987242396,
	"loss": 1.006331443786621,
	"step": 53
	},
	{
	"epoch": 0.052734375,
	"grad_norm": 0.5558736324310303,
	"learning_rate": 0.00019057899901864573,
	"loss": 0.8218062520027161,
	"step": 54
	},
	{
	"epoch": 0.0537109375,
	"grad_norm": 0.488903284072876,
	"learning_rate": 0.00019038272816486752,
	"loss": 0.7451006770133972,
	"step": 55
	},
	{
	"epoch": 0.0546875,
	"grad_norm": 0.6092124581336975,
	"learning_rate": 0.00019018645731108932,
	"loss": 0.3371097445487976,
	"step": 56
	},
	{
	"epoch": 0.0556640625,
	"grad_norm": 0.34885621070861816,
	"learning_rate": 0.00018999018645731111,
	"loss": 0.9263520836830139,
	"step": 57
	},
	{
	"epoch": 0.056640625,
	"grad_norm": 0.41470521688461304,
	"learning_rate": 0.00018979391560353288,
	"loss": 0.8741390109062195,
	"step": 58
	},
	{
	"epoch": 0.0576171875,
	"grad_norm": 0.32286664843559265,
	"learning_rate": 0.00018959764474975468,
	"loss": 0.6128658056259155,
	"step": 59
	},
	{
	"epoch": 0.05859375,
	"grad_norm": 0.43667954206466675,
	"learning_rate": 0.00018940137389597645,
	"loss": 0.822106122970581,
	"step": 60
	},
	{
	"epoch": 0.0595703125,
	"grad_norm": 0.5501149892807007,
	"learning_rate": 0.00018920510304219824,
	"loss": 0.2981743812561035,
	"step": 61
	},
	{
	"epoch": 0.060546875,
	"grad_norm": 0.5234649777412415,
	"learning_rate": 0.00018900883218842004,
	"loss": 0.710310161113739,
	"step": 62
	},
	{
	"epoch": 0.0615234375,
	"grad_norm": 0.5040559768676758,
	"learning_rate": 0.00018881256133464184,
	"loss": 1.0355676412582397,
	"step": 63
	},
	{
	"epoch": 0.0625,
	"grad_norm": 0.4435643255710602,
	"learning_rate": 0.0001886162904808636,
	"loss": 1.031105399131775,
	"step": 64
	},
	{
	"epoch": 0.0634765625,
	"grad_norm": 0.4987465441226959,
	"learning_rate": 0.0001884200196270854,
	"loss": 0.7753915190696716,
	"step": 65
	},
	{
	"epoch": 0.064453125,
	"grad_norm": 0.3633696436882019,
	"learning_rate": 0.00018822374877330717,
	"loss": 1.2376799583435059,
	"step": 66
	},
	{
	"epoch": 0.0654296875,
	"grad_norm": 1.0342258214950562,
	"learning_rate": 0.00018802747791952894,
	"loss": 0.6145737171173096,
	"step": 67
	},
	{
	"epoch": 0.06640625,
	"grad_norm": 0.47045138478279114,
	"learning_rate": 0.00018783120706575076,
	"loss": 0.8622407913208008,
	"step": 68
	},
	{
	"epoch": 0.0673828125,
	"grad_norm": 0.47864851355552673,
	"learning_rate": 0.00018763493621197253,
	"loss": 0.6727300882339478,
	"step": 69
	},
	{
	"epoch": 0.068359375,
	"grad_norm": 0.38102060556411743,
	"learning_rate": 0.00018743866535819433,
	"loss": 0.7417519092559814,
	"step": 70
	},
	{
	"epoch": 0.0693359375,
	"grad_norm": 0.4229515492916107,
	"learning_rate": 0.0001872423945044161,
	"loss": 0.46951866149902344,
	"step": 71
	},
	{
	"epoch": 0.0703125,
	"grad_norm": 0.4868115186691284,
	"learning_rate": 0.0001870461236506379,
	"loss": 0.32457292079925537,
	"step": 72
	},
	{
	"epoch": 0.0712890625,
	"grad_norm": 0.298020601272583,
	"learning_rate": 0.00018684985279685966,
	"loss": 0.2501494288444519,
	"step": 73
	},
	{
	"epoch": 0.072265625,
	"grad_norm": 0.49870651960372925,
	"learning_rate": 0.00018665358194308145,
	"loss": 0.5599403381347656,
	"step": 74
	},
	{
	"epoch": 0.0732421875,
	"grad_norm": 0.5717479586601257,
	"learning_rate": 0.00018645731108930325,
	"loss": 0.4725653827190399,
	"step": 75
	},
	{
	"epoch": 0.07421875,
	"grad_norm": 0.5230128765106201,
	"learning_rate": 0.00018626104023552505,
	"loss": 1.0607699155807495,
	"step": 76
	},
	{
	"epoch": 0.0751953125,
	"grad_norm": 0.4279435873031616,
	"learning_rate": 0.00018606476938174682,
	"loss": 0.5628142952919006,
	"step": 77
	},
	{
	"epoch": 0.076171875,
	"grad_norm": 0.6166331171989441,
	"learning_rate": 0.0001858684985279686,
	"loss": 0.44837141036987305,
	"step": 78
	},
	{
	"epoch": 0.0771484375,
	"grad_norm": 0.6329861879348755,
	"learning_rate": 0.00018567222767419038,
	"loss": 0.5013883709907532,
	"step": 79
	},
	{
	"epoch": 0.078125,
	"grad_norm": 0.2921103239059448,
	"learning_rate": 0.00018547595682041218,
	"loss": 0.541824996471405,
	"step": 80
	},
	{
	"epoch": 0.0791015625,
	"grad_norm": 0.36744800209999084,
	"learning_rate": 0.00018527968596663397,
	"loss": 0.3878925144672394,
	"step": 81
	},
	{
	"epoch": 0.080078125,
	"grad_norm": 0.34045904874801636,
	"learning_rate": 0.00018508341511285574,
	"loss": 0.33476194739341736,
	"step": 82
	},
	{
	"epoch": 0.0810546875,
	"grad_norm": 0.48908546566963196,
	"learning_rate": 0.00018488714425907754,
	"loss": 1.003555178642273,
	"step": 83
	},
	{
	"epoch": 0.08203125,
	"grad_norm": 0.4683694839477539,
	"learning_rate": 0.0001846908734052993,
	"loss": 0.7300649285316467,
	"step": 84
	},
	{
	"epoch": 0.0830078125,
	"grad_norm": 0.3560928404331207,
	"learning_rate": 0.0001844946025515211,
	"loss": 0.4525097608566284,
	"step": 85
	},
	{
	"epoch": 0.083984375,
	"grad_norm": 1.481307864189148,
	"learning_rate": 0.0001842983316977429,
	"loss": 0.5444833040237427,
	"step": 86
	},
	{
	"epoch": 0.0849609375,
	"grad_norm": 0.42610403895378113,
	"learning_rate": 0.0001841020608439647,
	"loss": 0.7340827584266663,
	"step": 87
	},
	{
	"epoch": 0.0859375,
	"grad_norm": 0.6035026907920837,
	"learning_rate": 0.00018390578999018646,
	"loss": 0.5589049458503723,
	"step": 88
	},
	{
	"epoch": 0.0869140625,
	"grad_norm": 0.6075074076652527,
	"learning_rate": 0.00018370951913640826,
	"loss": 0.4969009757041931,
	"step": 89
	},
	{
	"epoch": 0.087890625,
	"grad_norm": 0.6751372814178467,
	"learning_rate": 0.00018351324828263003,
	"loss": 0.46451041102409363,
	"step": 90
	},
	{
	"epoch": 0.0888671875,
	"grad_norm": 0.5816373229026794,
	"learning_rate": 0.00018331697742885182,
	"loss": 1.024427056312561,
	"step": 91
	},
	{
	"epoch": 0.08984375,
	"grad_norm": 0.6644161939620972,
	"learning_rate": 0.00018312070657507362,
	"loss": 0.778592586517334,
	"step": 92
	},
	{
	"epoch": 0.0908203125,
	"grad_norm": 0.652209997177124,
	"learning_rate": 0.00018292443572129541,
	"loss": 0.8565710783004761,
	"step": 93
	},
	{
	"epoch": 0.091796875,
	"grad_norm": 0.9109074473381042,
	"learning_rate": 0.00018272816486751718,
	"loss": 0.6693978309631348,
	"step": 94
	},
	{
	"epoch": 0.0927734375,
	"grad_norm": 0.5235186219215393,
	"learning_rate": 0.00018253189401373895,
	"loss": 0.8255172967910767,
	"step": 95
	},
	{
	"epoch": 0.09375,
	"grad_norm": 0.8362122178077698,
	"learning_rate": 0.00018233562315996075,
	"loss": 0.5858157873153687,
	"step": 96
	},
	{
	"epoch": 0.0947265625,
	"grad_norm": 0.6753116846084595,
	"learning_rate": 0.00018213935230618254,
	"loss": 0.6682421565055847,
	"step": 97
	},
	{
	"epoch": 0.095703125,
	"grad_norm": 0.5394794940948486,
	"learning_rate": 0.00018194308145240434,
	"loss": 0.3218158781528473,
	"step": 98
	},
	{
	"epoch": 0.0966796875,
	"grad_norm": 3.2796010971069336,
	"learning_rate": 0.0001817468105986261,
	"loss": 0.681085467338562,
	"step": 99
	},
	{
	"epoch": 0.09765625,
	"grad_norm": 0.38390907645225525,
	"learning_rate": 0.0001815505397448479,
	"loss": 0.39554187655448914,
	"step": 100
	},
	{
	"epoch": 0.0986328125,
	"grad_norm": 0.5289499759674072,
	"learning_rate": 0.00018135426889106967,
	"loss": 1.0264520645141602,
	"step": 101
	},
	{
	"epoch": 0.099609375,
	"grad_norm": 0.8211148977279663,
	"learning_rate": 0.00018115799803729147,
	"loss": 0.8588113784790039,
	"step": 102
	},
	{
	"epoch": 0.1005859375,
	"grad_norm": 0.4771063029766083,
	"learning_rate": 0.00018096172718351327,
	"loss": 0.7471244931221008,
	"step": 103
	},
	{
	"epoch": 0.1015625,
	"grad_norm": 0.6326794624328613,
	"learning_rate": 0.00018076545632973506,
	"loss": 0.6081597805023193,
	"step": 104
	},
	{
	"epoch": 0.1025390625,
	"grad_norm": 0.7229248285293579,
	"learning_rate": 0.00018056918547595683,
	"loss": 0.8315082788467407,
	"step": 105
	},
	{
	"epoch": 0.103515625,
	"grad_norm": 0.6803163290023804,
	"learning_rate": 0.00018037291462217863,
	"loss": 0.8308911323547363,
	"step": 106
	},
	{
	"epoch": 0.1044921875,
	"grad_norm": 0.5268850326538086,
	"learning_rate": 0.0001801766437684004,
	"loss": 0.8480656743049622,
	"step": 107
	},
	{
	"epoch": 0.10546875,
	"grad_norm": 0.7849289178848267,
	"learning_rate": 0.0001799803729146222,
	"loss": 0.8200575113296509,
	"step": 108
	},
	{
	"epoch": 0.1064453125,
	"grad_norm": 0.4259982407093048,
	"learning_rate": 0.00017978410206084396,
	"loss": 0.44367721676826477,
	"step": 109
	},
	{
	"epoch": 0.107421875,
	"grad_norm": 0.4788619577884674,
	"learning_rate": 0.00017958783120706576,
	"loss": 0.6017763018608093,
	"step": 110
	},
	{
	"epoch": 0.1083984375,
	"grad_norm": 0.34434452652931213,
	"learning_rate": 0.00017939156035328755,
	"loss": 0.29681769013404846,
	"step": 111
	},
	{
	"epoch": 0.109375,
	"grad_norm": 1.1506884098052979,
	"learning_rate": 0.00017919528949950932,
	"loss": 0.6520863771438599,
	"step": 112
	},
	{
	"epoch": 0.1103515625,
	"grad_norm": 0.8348999619483948,
	"learning_rate": 0.00017899901864573112,
	"loss": 0.6035414934158325,
	"step": 113
	},
	{
	"epoch": 0.111328125,
	"grad_norm": 0.5550518035888672,
	"learning_rate": 0.00017880274779195289,
	"loss": 0.7711564302444458,
	"step": 114
	},
	{
	"epoch": 0.1123046875,
	"grad_norm": 0.28814634680747986,
	"learning_rate": 0.00017860647693817468,
	"loss": 0.8325987458229065,
	"step": 115
	},
	{
	"epoch": 0.11328125,
	"grad_norm": 0.3833630084991455,
	"learning_rate": 0.00017841020608439648,
	"loss": 0.3345921039581299,
	"step": 116
	},
	{
	"epoch": 0.1142578125,
	"grad_norm": 0.8784507513046265,
	"learning_rate": 0.00017821393523061827,
	"loss": 0.4186948239803314,
	"step": 117
	},
	{
	"epoch": 0.115234375,
	"grad_norm": 0.7263842225074768,
	"learning_rate": 0.00017801766437684004,
	"loss": 0.5570493936538696,
	"step": 118
	},
	{
	"epoch": 0.1162109375,
	"grad_norm": 0.6391569972038269,
	"learning_rate": 0.00017782139352306184,
	"loss": 1.0257431268692017,
	"step": 119
	},
	{
	"epoch": 0.1171875,
	"grad_norm": 0.6025450229644775,
	"learning_rate": 0.0001776251226692836,
	"loss": 0.8676729202270508,
	"step": 120
	},
	{
	"epoch": 0.1181640625,
	"grad_norm": 0.3776579201221466,
	"learning_rate": 0.0001774288518155054,
	"loss": 0.5870720148086548,
	"step": 121
	},
	{
	"epoch": 0.119140625,
	"grad_norm": 0.40912336111068726,
	"learning_rate": 0.0001772325809617272,
	"loss": 0.9210044145584106,
	"step": 122
	},
	{
	"epoch": 0.1201171875,
	"grad_norm": 0.5036085247993469,
	"learning_rate": 0.000177036310107949,
	"loss": 0.47378072142601013,
	"step": 123
	},
	{
	"epoch": 0.12109375,
	"grad_norm": 0.5508134961128235,
	"learning_rate": 0.00017684003925417076,
	"loss": 0.8295834064483643,
	"step": 124
	},
	{
	"epoch": 0.1220703125,
	"grad_norm": 0.5522392392158508,
	"learning_rate": 0.00017664376840039253,
	"loss": 0.793156087398529,
	"step": 125
	},
	{
	"epoch": 0.123046875,
	"grad_norm": 1.0098820924758911,
	"learning_rate": 0.00017644749754661433,
	"loss": 0.5780155658721924,
	"step": 126
	},
	{
	"epoch": 0.1240234375,
	"grad_norm": 0.6178780198097229,
	"learning_rate": 0.00017625122669283612,
	"loss": 0.5129156708717346,
	"step": 127
	},
	{
	"epoch": 0.125,
	"grad_norm": 0.6224352121353149,
	"learning_rate": 0.00017605495583905792,
	"loss": 0.8498928546905518,
	"step": 128
	},
	{
	"epoch": 0.1259765625,
	"grad_norm": 0.7869983315467834,
	"learning_rate": 0.0001758586849852797,
	"loss": 0.9180670976638794,
	"step": 129
	},
	{
	"epoch": 0.126953125,
	"grad_norm": 0.4122680127620697,
	"learning_rate": 0.00017566241413150148,
	"loss": 0.510919988155365,
	"step": 130
	},
	{
	"epoch": 0.1279296875,
	"grad_norm": 0.7221843004226685,
	"learning_rate": 0.00017546614327772325,
	"loss": 0.3977488875389099,
	"step": 131
	},
	{
	"epoch": 0.12890625,
	"grad_norm": 1.155800461769104,
	"learning_rate": 0.00017526987242394505,
	"loss": 0.6549078226089478,
	"step": 132
	},
	{
	"epoch": 0.1298828125,
	"grad_norm": 0.7164724469184875,
	"learning_rate": 0.00017507360157016685,
	"loss": 0.8306566476821899,
	"step": 133
	},
	{
	"epoch": 0.130859375,
	"grad_norm": 0.7600284814834595,
	"learning_rate": 0.00017487733071638864,
	"loss": 0.34278520941734314,
	"step": 134
	},
	{
	"epoch": 0.1318359375,
	"grad_norm": 0.8636081218719482,
	"learning_rate": 0.0001746810598626104,
	"loss": 0.8881778717041016,
	"step": 135
	},
	{
	"epoch": 0.1328125,
	"grad_norm": 1.0904357433319092,
	"learning_rate": 0.0001744847890088322,
	"loss": 0.4423227310180664,
	"step": 136
	},
	{
	"epoch": 0.1337890625,
	"grad_norm": 0.5639862418174744,
	"learning_rate": 0.00017428851815505397,
	"loss": 0.8610935211181641,
	"step": 137
	},
	{
	"epoch": 0.134765625,
	"grad_norm": 1.05929696559906,
	"learning_rate": 0.00017409224730127577,
	"loss": 1.1729753017425537,
	"step": 138
	},
	{
	"epoch": 0.1357421875,
	"grad_norm": 1.0731761455535889,
	"learning_rate": 0.00017389597644749757,
	"loss": 0.6459341049194336,
	"step": 139
	},
	{
	"epoch": 0.13671875,
	"grad_norm": 0.7464702725410461,
	"learning_rate": 0.00017369970559371934,
	"loss": 0.5368601083755493,
	"step": 140
	},
	{
	"epoch": 0.1376953125,
	"grad_norm": 0.5722304582595825,
	"learning_rate": 0.00017350343473994113,
	"loss": 0.9642695784568787,
	"step": 141
	},
	{
	"epoch": 0.138671875,
	"grad_norm": 0.5044945478439331,
	"learning_rate": 0.0001733071638861629,
	"loss": 0.49555253982543945,
	"step": 142
	},
	{
	"epoch": 0.1396484375,
	"grad_norm": 0.8069168329238892,
	"learning_rate": 0.0001731108930323847,
	"loss": 0.8796389698982239,
	"step": 143
	},
	{
	"epoch": 0.140625,
	"grad_norm": 0.5269959568977356,
	"learning_rate": 0.00017291462217860646,
	"loss": 0.9928920269012451,
	"step": 144
	},
	{
	"epoch": 0.1416015625,
	"grad_norm": 0.6606360077857971,
	"learning_rate": 0.0001727183513248283,
	"loss": 1.0528640747070312,
	"step": 145
	},
	{
	"epoch": 0.142578125,
	"grad_norm": 0.7145242691040039,
	"learning_rate": 0.00017252208047105006,
	"loss": 1.1252766847610474,
	"step": 146
	},
	{
	"epoch": 0.1435546875,
	"grad_norm": 0.5808660984039307,
	"learning_rate": 0.00017232580961727185,
	"loss": 0.24914072453975677,
	"step": 147
	},
	{
	"epoch": 0.14453125,
	"grad_norm": 0.8544529676437378,
	"learning_rate": 0.00017212953876349362,
	"loss": 0.4420434832572937,
	"step": 148
	},
	{
	"epoch": 0.1455078125,
	"grad_norm": 0.899334728717804,
	"learning_rate": 0.00017193326790971542,
	"loss": 0.7128512263298035,
	"step": 149
	},
	{
	"epoch": 0.146484375,
	"grad_norm": 0.36327579617500305,
	"learning_rate": 0.00017173699705593719,
	"loss": 0.5503419637680054,
	"step": 150
	},
	{
	"epoch": 0.1474609375,
	"grad_norm": 0.553255021572113,
	"learning_rate": 0.000171540726202159,
	"loss": 0.5796535015106201,
	"step": 151
	},
	{
	"epoch": 0.1484375,
	"grad_norm": 0.41036659479141235,
	"learning_rate": 0.00017134445534838078,
	"loss": 0.8935849666595459,
	"step": 152
	},
	{
	"epoch": 0.1494140625,
	"grad_norm": 0.3723013997077942,
	"learning_rate": 0.00017114818449460257,
	"loss": 0.39106485247612,
	"step": 153
	},
	{
	"epoch": 0.150390625,
	"grad_norm": 0.654262900352478,
	"learning_rate": 0.00017095191364082434,
	"loss": 1.0176405906677246,
	"step": 154
	},
	{
	"epoch": 0.1513671875,
	"grad_norm": 0.5707812309265137,
	"learning_rate": 0.0001707556427870461,
	"loss": 0.6580768823623657,
	"step": 155
	},
	{
	"epoch": 0.15234375,
	"grad_norm": 0.35879406332969666,
	"learning_rate": 0.0001705593719332679,
	"loss": 0.4050876200199127,
	"step": 156
	},
	{
	"epoch": 0.1533203125,
	"grad_norm": 0.5701449513435364,
	"learning_rate": 0.0001703631010794897,
	"loss": 0.9737375974655151,
	"step": 157
	},
	{
	"epoch": 0.154296875,
	"grad_norm": 0.4461202919483185,
	"learning_rate": 0.0001701668302257115,
	"loss": 0.9864733815193176,
	"step": 158
	},
	{
	"epoch": 0.1552734375,
	"grad_norm": 0.6229621767997742,
	"learning_rate": 0.00016997055937193327,
	"loss": 0.35883933305740356,
	"step": 159
	},
	{
	"epoch": 0.15625,
	"grad_norm": 0.5390028357505798,
	"learning_rate": 0.00016977428851815506,
	"loss": 0.5791765451431274,
	"step": 160
	},
	{
	"epoch": 0.1572265625,
	"grad_norm": 0.7851611375808716,
	"learning_rate": 0.00016957801766437683,
	"loss": 0.9032300114631653,
	"step": 161
	},
	{
	"epoch": 0.158203125,
	"grad_norm": 0.6211395263671875,
	"learning_rate": 0.00016938174681059863,
	"loss": 0.5069928765296936,
	"step": 162
	},
	{
	"epoch": 0.1591796875,
	"grad_norm": 0.8290377855300903,
	"learning_rate": 0.00016918547595682042,
	"loss": 0.8917738795280457,
	"step": 163
	},
	{
	"epoch": 0.16015625,
	"grad_norm": 0.42707324028015137,
	"learning_rate": 0.00016898920510304222,
	"loss": 0.606585681438446,
	"step": 164
	},
	{
	"epoch": 0.1611328125,
	"grad_norm": 0.49472010135650635,
	"learning_rate": 0.000168792934249264,
	"loss": 1.0100075006484985,
	"step": 165
	},
	{
	"epoch": 0.162109375,
	"grad_norm": 0.48441267013549805,
	"learning_rate": 0.00016859666339548579,
	"loss": 0.7145558595657349,
	"step": 166
	},
	{
	"epoch": 0.1630859375,
	"grad_norm": 0.5181763172149658,
	"learning_rate": 0.00016840039254170755,
	"loss": 0.8088749647140503,
	"step": 167
	},
	{
	"epoch": 0.1640625,
	"grad_norm": 0.4702328145503998,
	"learning_rate": 0.00016820412168792935,
	"loss": 0.5631542801856995,
	"step": 168
	},
	{
	"epoch": 0.1650390625,
	"grad_norm": 0.35454344749450684,
	"learning_rate": 0.00016800785083415115,
	"loss": 0.31744396686553955,
	"step": 169
	},
	{
	"epoch": 0.166015625,
	"grad_norm": 0.5193122029304504,
	"learning_rate": 0.00016781157998037291,
	"loss": 0.7338438034057617,
	"step": 170
	},
	{
	"epoch": 0.1669921875,
	"grad_norm": 0.49799400568008423,
	"learning_rate": 0.0001676153091265947,
	"loss": 0.7910654544830322,
	"step": 171
	},
	{
	"epoch": 0.16796875,
	"grad_norm": 0.4855571389198303,
	"learning_rate": 0.00016741903827281648,
	"loss": 0.38415610790252686,
	"step": 172
	},
	{
	"epoch": 0.1689453125,
	"grad_norm": 0.8796041011810303,
	"learning_rate": 0.00016722276741903828,
	"loss": 0.6042807102203369,
	"step": 173
	},
	{
	"epoch": 0.169921875,
	"grad_norm": 0.6005135774612427,
	"learning_rate": 0.00016702649656526007,
	"loss": 0.6617047786712646,
	"step": 174
	},
	{
	"epoch": 0.1708984375,
	"grad_norm": 0.6359293460845947,
	"learning_rate": 0.00016683022571148187,
	"loss": 0.5227914452552795,
	"step": 175
	},
	{
	"epoch": 0.171875,
	"grad_norm": 0.46007266640663147,
	"learning_rate": 0.00016663395485770364,
	"loss": 0.6881235837936401,
	"step": 176
	},
	{
	"epoch": 0.1728515625,
	"grad_norm": 0.37411797046661377,
	"learning_rate": 0.00016643768400392543,
	"loss": 0.7384200096130371,
	"step": 177
	},
	{
	"epoch": 0.173828125,
	"grad_norm": 0.4021860659122467,
	"learning_rate": 0.0001662414131501472,
	"loss": 1.1738500595092773,
	"step": 178
	},
	{
	"epoch": 0.1748046875,
	"grad_norm": 0.3674755096435547,
	"learning_rate": 0.000166045142296369,
	"loss": 0.37539663910865784,
	"step": 179
	},
	{
	"epoch": 0.17578125,
	"grad_norm": 0.5051441788673401,
	"learning_rate": 0.0001658488714425908,
	"loss": 0.6273016333580017,
	"step": 180
	},
	{
	"epoch": 0.1767578125,
	"grad_norm": 0.6807597279548645,
	"learning_rate": 0.0001656526005888126,
	"loss": 0.4195510447025299,
	"step": 181
	},
	{
	"epoch": 0.177734375,
	"grad_norm": 0.3345419466495514,
	"learning_rate": 0.00016545632973503436,
	"loss": 0.8546851873397827,
	"step": 182
	},
	{
	"epoch": 0.1787109375,
	"grad_norm": 0.33821800351142883,
	"learning_rate": 0.00016526005888125615,
	"loss": 0.522655725479126,
	"step": 183
	},
	{
	"epoch": 0.1796875,
	"grad_norm": 0.3145562708377838,
	"learning_rate": 0.00016506378802747792,
	"loss": 0.3799128532409668,
	"step": 184
	},
	{
	"epoch": 0.1806640625,
	"grad_norm": 0.44908636808395386,
	"learning_rate": 0.0001648675171736997,
	"loss": 0.6263326406478882,
	"step": 185
	},
	{
	"epoch": 0.181640625,
	"grad_norm": 0.7736865282058716,
	"learning_rate": 0.00016467124631992151,
	"loss": 0.3385460078716278,
	"step": 186
	},
	{
	"epoch": 0.1826171875,
	"grad_norm": 0.5184527635574341,
	"learning_rate": 0.00016447497546614328,
	"loss": 0.7980771064758301,
	"step": 187
	},
	{
	"epoch": 0.18359375,
	"grad_norm": 0.41774502396583557,
	"learning_rate": 0.00016427870461236508,
	"loss": 0.7745299339294434,
	"step": 188
	},
	{
	"epoch": 0.1845703125,
	"grad_norm": 0.43824154138565063,
	"learning_rate": 0.00016408243375858685,
	"loss": 0.9190135598182678,
	"step": 189
	},
	{
	"epoch": 0.185546875,
	"grad_norm": 0.4037880301475525,
	"learning_rate": 0.00016388616290480864,
	"loss": 0.5671911239624023,
	"step": 190
	},
	{
	"epoch": 0.1865234375,
	"grad_norm": 0.3757816255092621,
	"learning_rate": 0.0001636898920510304,
	"loss": 0.39916592836380005,
	"step": 191
	},
	{
	"epoch": 0.1875,
	"grad_norm": 0.4747844636440277,
	"learning_rate": 0.00016349362119725224,
	"loss": 0.9217299818992615,
	"step": 192
	},
	{
	"epoch": 0.1884765625,
	"grad_norm": 0.42307209968566895,
	"learning_rate": 0.000163297350343474,
	"loss": 0.8852982521057129,
	"step": 193
	},
	{
	"epoch": 0.189453125,
	"grad_norm": 0.47294488549232483,
	"learning_rate": 0.0001631010794896958,
	"loss": 1.0635476112365723,
	"step": 194
	},
	{
	"epoch": 0.1904296875,
	"grad_norm": 0.3519342243671417,
	"learning_rate": 0.00016290480863591757,
	"loss": 0.33460623025894165,
	"step": 195
	},
	{
	"epoch": 0.19140625,
	"grad_norm": 0.418151319026947,
	"learning_rate": 0.00016270853778213936,
	"loss": 0.8776851296424866,
	"step": 196
	},
	{
	"epoch": 0.1923828125,
	"grad_norm": 0.3954712152481079,
	"learning_rate": 0.00016251226692836113,
	"loss": 0.9358173608779907,
	"step": 197
	},
	{
	"epoch": 0.193359375,
	"grad_norm": 0.35646897554397583,
	"learning_rate": 0.00016231599607458293,
	"loss": 0.43795716762542725,
	"step": 198
	},
	{
	"epoch": 0.1943359375,
	"grad_norm": 0.41675063967704773,
	"learning_rate": 0.00016211972522080473,
	"loss": 0.8348654508590698,
	"step": 199
	},
	{
	"epoch": 0.1953125,
	"grad_norm": 0.5800544023513794,
	"learning_rate": 0.0001619234543670265,
	"loss": 0.5580507516860962,
	"step": 200
	},
	{
	"epoch": 0.1962890625,
	"grad_norm": 0.44925832748413086,
	"learning_rate": 0.0001617271835132483,
	"loss": 0.47444453835487366,
	"step": 201
	},
	{
	"epoch": 0.197265625,
	"grad_norm": 0.48447439074516296,
	"learning_rate": 0.00016153091265947006,
	"loss": 0.5927308797836304,
	"step": 202
	},
	{
	"epoch": 0.1982421875,
	"grad_norm": 0.37814846634864807,
	"learning_rate": 0.00016133464180569186,
	"loss": 0.8504298329353333,
	"step": 203
	},
	{
	"epoch": 0.19921875,
	"grad_norm": 0.4171026051044464,
	"learning_rate": 0.00016113837095191365,
	"loss": 1.0796414613723755,
	"step": 204
	},
	{
	"epoch": 0.2001953125,
	"grad_norm": 0.4570372402667999,
	"learning_rate": 0.00016094210009813545,
	"loss": 0.6229358315467834,
	"step": 205
	},
	{
	"epoch": 0.201171875,
	"grad_norm": 0.6294324994087219,
	"learning_rate": 0.00016074582924435722,
	"loss": 0.8749011158943176,
	"step": 206
	},
	{
	"epoch": 0.2021484375,
	"grad_norm": 0.42371129989624023,
	"learning_rate": 0.000160549558390579,
	"loss": 0.9866290092468262,
	"step": 207
	},
	{
	"epoch": 0.203125,
	"grad_norm": 0.5329370498657227,
	"learning_rate": 0.00016035328753680078,
	"loss": 0.7568405270576477,
	"step": 208
	},
	{
	"epoch": 0.2041015625,
	"grad_norm": 0.37205901741981506,
	"learning_rate": 0.00016015701668302258,
	"loss": 0.7115534543991089,
	"step": 209
	},
	{
	"epoch": 0.205078125,
	"grad_norm": 0.4536517262458801,
	"learning_rate": 0.00015996074582924437,
	"loss": 0.5152509808540344,
	"step": 210
	},
	{
	"epoch": 0.2060546875,
	"grad_norm": 2.319321393966675,
	"learning_rate": 0.00015976447497546617,
	"loss": 0.2915653586387634,
	"step": 211
	},
	{
	"epoch": 0.20703125,
	"grad_norm": 0.7047526836395264,
	"learning_rate": 0.00015956820412168794,
	"loss": 0.3070187568664551,
	"step": 212
	},
	{
	"epoch": 0.2080078125,
	"grad_norm": 0.6068500280380249,
	"learning_rate": 0.0001593719332679097,
	"loss": 0.8103427290916443,
	"step": 213
	},
	{
	"epoch": 0.208984375,
	"grad_norm": 0.3588794469833374,
	"learning_rate": 0.0001591756624141315,
	"loss": 0.4655485153198242,
	"step": 214
	},
	{
	"epoch": 0.2099609375,
	"grad_norm": 0.6561040878295898,
	"learning_rate": 0.0001589793915603533,
	"loss": 0.5353362560272217,
	"step": 215
	},
	{
	"epoch": 0.2109375,
	"grad_norm": 0.6485084891319275,
	"learning_rate": 0.0001587831207065751,
	"loss": 0.8601769804954529,
	"step": 216
	},
	{
	"epoch": 0.2119140625,
	"grad_norm": 0.4718208909034729,
	"learning_rate": 0.00015858684985279686,
	"loss": 0.6897189617156982,
	"step": 217
	},
	{
	"epoch": 0.212890625,
	"grad_norm": 0.7453560829162598,
	"learning_rate": 0.00015839057899901866,
	"loss": 1.0387171506881714,
	"step": 218
	},
	{
	"epoch": 0.2138671875,
	"grad_norm": 0.41157087683677673,
	"learning_rate": 0.00015819430814524043,
	"loss": 0.4910873770713806,
	"step": 219
	},
	{
	"epoch": 0.21484375,
	"grad_norm": 0.4198990762233734,
	"learning_rate": 0.00015799803729146222,
	"loss": 0.588080108165741,
	"step": 220
	},
	{
	"epoch": 0.2158203125,
	"grad_norm": 0.7791650295257568,
	"learning_rate": 0.00015780176643768402,
	"loss": 0.754984974861145,
	"step": 221
	},
	{
	"epoch": 0.216796875,
	"grad_norm": 1.4430909156799316,
	"learning_rate": 0.00015760549558390581,
	"loss": 0.5313946008682251,
	"step": 222
	},
	{
	"epoch": 0.2177734375,
	"grad_norm": 0.4399142861366272,
	"learning_rate": 0.00015740922473012758,
	"loss": 0.523280918598175,
	"step": 223
	},
	{
	"epoch": 0.21875,
	"grad_norm": 0.4177611470222473,
	"learning_rate": 0.00015721295387634938,
	"loss": 0.7598159313201904,
	"step": 224
	},
	{
	"epoch": 0.2197265625,
	"grad_norm": 0.4408816397190094,
	"learning_rate": 0.00015701668302257115,
	"loss": 0.8131666779518127,
	"step": 225
	},
	{
	"epoch": 0.220703125,
	"grad_norm": 0.4228694438934326,
	"learning_rate": 0.00015682041216879294,
	"loss": 1.0456180572509766,
	"step": 226
	},
	{
	"epoch": 0.2216796875,
	"grad_norm": 0.6313449144363403,
	"learning_rate": 0.00015662414131501474,
	"loss": 0.496864914894104,
	"step": 227
	},
	{
	"epoch": 0.22265625,
	"grad_norm": 0.48103493452072144,
	"learning_rate": 0.0001564278704612365,
	"loss": 0.5967347621917725,
	"step": 228
	},
	{
	"epoch": 0.2236328125,
	"grad_norm": 0.3548172116279602,
	"learning_rate": 0.0001562315996074583,
	"loss": 0.3325611650943756,
	"step": 229
	},
	{
	"epoch": 0.224609375,
	"grad_norm": 0.41543763875961304,
	"learning_rate": 0.00015603532875368007,
	"loss": 0.9223452806472778,
	"step": 230
	},
	{
	"epoch": 0.2255859375,
	"grad_norm": 0.6072061061859131,
	"learning_rate": 0.00015583905789990187,
	"loss": 0.2860236167907715,
	"step": 231
	},
	{
	"epoch": 0.2265625,
	"grad_norm": 0.3232869505882263,
	"learning_rate": 0.00015564278704612364,
	"loss": 0.7308738231658936,
	"step": 232
	},
	{
	"epoch": 0.2275390625,
	"grad_norm": 0.5271327495574951,
	"learning_rate": 0.00015544651619234546,
	"loss": 1.0354498624801636,
	"step": 233
	},
	{
	"epoch": 0.228515625,
	"grad_norm": 0.626105546951294,
	"learning_rate": 0.00015525024533856723,
	"loss": 1.0841856002807617,
	"step": 234
	},
	{
	"epoch": 0.2294921875,
	"grad_norm": 0.5628311634063721,
	"learning_rate": 0.00015505397448478903,
	"loss": 0.8868529200553894,
	"step": 235
	},
	{
	"epoch": 0.23046875,
	"grad_norm": 0.4290577471256256,
	"learning_rate": 0.0001548577036310108,
	"loss": 0.5887943506240845,
	"step": 236
	},
	{
	"epoch": 0.2314453125,
	"grad_norm": 0.743786096572876,
	"learning_rate": 0.0001546614327772326,
	"loss": 0.8314348459243774,
	"step": 237
	},
	{
	"epoch": 0.232421875,
	"grad_norm": 0.34498658776283264,
	"learning_rate": 0.00015446516192345436,
	"loss": 0.6171099543571472,
	"step": 238
	},
	{
	"epoch": 0.2333984375,
	"grad_norm": 0.7894997596740723,
	"learning_rate": 0.00015426889106967616,
	"loss": 0.614283561706543,
	"step": 239
	},
	{
	"epoch": 0.234375,
	"grad_norm": 0.4631381034851074,
	"learning_rate": 0.00015407262021589795,
	"loss": 0.6744101047515869,
	"step": 240
	},
	{
	"epoch": 0.2353515625,
	"grad_norm": 0.44523295760154724,
	"learning_rate": 0.00015387634936211975,
	"loss": 0.7094103097915649,
	"step": 241
	},
	{
	"epoch": 0.236328125,
	"grad_norm": 0.7059242725372314,
	"learning_rate": 0.00015368007850834152,
	"loss": 0.6856737732887268,
	"step": 242
	},
	{
	"epoch": 0.2373046875,
	"grad_norm": 1.0360506772994995,
	"learning_rate": 0.00015348380765456329,
	"loss": 1.101341962814331,
	"step": 243
	},
	{
	"epoch": 0.23828125,
	"grad_norm": 0.6630859375,
	"learning_rate": 0.00015328753680078508,
	"loss": 0.8815068006515503,
	"step": 244
	},
	{
	"epoch": 0.2392578125,
	"grad_norm": 0.4162105321884155,
	"learning_rate": 0.00015309126594700688,
	"loss": 0.39801689982414246,
	"step": 245
	},
	{
	"epoch": 0.240234375,
	"grad_norm": 0.5786510109901428,
	"learning_rate": 0.00015289499509322867,
	"loss": 0.5399383902549744,
	"step": 246
	},
	{
	"epoch": 0.2412109375,
	"grad_norm": 0.5430185794830322,
	"learning_rate": 0.00015269872423945044,
	"loss": 0.5432325601577759,
	"step": 247
	},
	{
	"epoch": 0.2421875,
	"grad_norm": 0.3750382959842682,
	"learning_rate": 0.00015250245338567224,
	"loss": 0.49265092611312866,
	"step": 248
	},
	{
	"epoch": 0.2431640625,
	"grad_norm": 0.5081580877304077,
	"learning_rate": 0.000152306182531894,
	"loss": 0.8720104098320007,
	"step": 249
	},
	{
	"epoch": 0.244140625,
	"grad_norm": 0.5619673728942871,
	"learning_rate": 0.0001521099116781158,
	"loss": 0.4022529125213623,
	"step": 250
	},
	{
	"epoch": 0.2451171875,
	"grad_norm": 0.3996225893497467,
	"learning_rate": 0.0001519136408243376,
	"loss": 0.443879097700119,
	"step": 251
	},
	{
	"epoch": 0.24609375,
	"grad_norm": 0.4688915014266968,
	"learning_rate": 0.0001517173699705594,
	"loss": 0.47562721371650696,
	"step": 252
	},
	{
	"epoch": 0.2470703125,
	"grad_norm": 1.7595641613006592,
	"learning_rate": 0.00015152109911678116,
	"loss": 0.5174474716186523,
	"step": 253
	},
	{
	"epoch": 0.248046875,
	"grad_norm": 0.47813650965690613,
	"learning_rate": 0.00015132482826300296,
	"loss": 0.8565359711647034,
	"step": 254
	},
	{
	"epoch": 0.2490234375,
	"grad_norm": 0.49612802267074585,
	"learning_rate": 0.00015112855740922473,
	"loss": 0.4736977815628052,
	"step": 255
	},
	{
	"epoch": 0.25,
	"grad_norm": 0.4370449483394623,
	"learning_rate": 0.00015093228655544652,
	"loss": 0.7566809058189392,
	"step": 256
	},
	{
	"epoch": 0.2509765625,
	"grad_norm": 0.43916988372802734,
	"learning_rate": 0.00015073601570166832,
	"loss": 0.8396226763725281,
	"step": 257
	},
	{
	"epoch": 0.251953125,
	"grad_norm": 0.7745673060417175,
	"learning_rate": 0.0001505397448478901,
	"loss": 0.3085971772670746,
	"step": 258
	},
	{
	"epoch": 0.2529296875,
	"grad_norm": 0.4097643792629242,
	"learning_rate": 0.00015034347399411188,
	"loss": 0.2730502188205719,
	"step": 259
	},
	{
	"epoch": 0.25390625,
	"grad_norm": 0.4131183624267578,
	"learning_rate": 0.00015014720314033365,
	"loss": 0.5422588586807251,
	"step": 260
	},
	{
	"epoch": 0.2548828125,
	"grad_norm": 0.469498872756958,
	"learning_rate": 0.00014995093228655545,
	"loss": 0.6572885513305664,
	"step": 261
	},
	{
	"epoch": 0.255859375,
	"grad_norm": 0.3662133514881134,
	"learning_rate": 0.00014975466143277725,
	"loss": 0.9272421598434448,
	"step": 262
	},
	{
	"epoch": 0.2568359375,
	"grad_norm": 0.38194844126701355,
	"learning_rate": 0.00014955839057899904,
	"loss": 0.6010634303092957,
	"step": 263
	},
	{
	"epoch": 0.2578125,
	"grad_norm": 0.3645467758178711,
	"learning_rate": 0.0001493621197252208,
	"loss": 0.9131143093109131,
	"step": 264
	},
	{
	"epoch": 0.2587890625,
	"grad_norm": 0.3304290771484375,
	"learning_rate": 0.0001491658488714426,
	"loss": 0.4593530297279358,
	"step": 265
	},
	{
	"epoch": 0.259765625,
	"grad_norm": 0.7529020309448242,
	"learning_rate": 0.00014896957801766437,
	"loss": 0.5219628810882568,
	"step": 266
	},
	{
	"epoch": 0.2607421875,
	"grad_norm": 0.4974548816680908,
	"learning_rate": 0.00014877330716388617,
	"loss": 0.7617945075035095,
	"step": 267
	},
	{
	"epoch": 0.26171875,
	"grad_norm": 0.28884655237197876,
	"learning_rate": 0.00014857703631010797,
	"loss": 0.4288986921310425,
	"step": 268
	},
	{
	"epoch": 0.2626953125,
	"grad_norm": 0.5195730328559875,
	"learning_rate": 0.00014838076545632976,
	"loss": 0.830593466758728,
	"step": 269
	},
	{
	"epoch": 0.263671875,
	"grad_norm": 0.40689924359321594,
	"learning_rate": 0.00014818449460255153,
	"loss": 0.7528857588768005,
	"step": 270
	},
	{
	"epoch": 0.2646484375,
	"grad_norm": 0.33955928683280945,
	"learning_rate": 0.00014798822374877333,
	"loss": 0.5274187326431274,
	"step": 271
	},
	{
	"epoch": 0.265625,
	"grad_norm": 1.0572726726531982,
	"learning_rate": 0.0001477919528949951,
	"loss": 0.7389089465141296,
	"step": 272
	},
	{
	"epoch": 0.2666015625,
	"grad_norm": 0.5191348791122437,
	"learning_rate": 0.00014759568204121686,
	"loss": 0.4842514991760254,
	"step": 273
	},
	{
	"epoch": 0.267578125,
	"grad_norm": 0.3779315650463104,
	"learning_rate": 0.00014739941118743866,
	"loss": 0.7406666278839111,
	"step": 274
	},
	{
	"epoch": 0.2685546875,
	"grad_norm": 0.6065999865531921,
	"learning_rate": 0.00014720314033366046,
	"loss": 0.6771246790885925,
	"step": 275
	},
	{
	"epoch": 0.26953125,
	"grad_norm": 0.537529468536377,
	"learning_rate": 0.00014700686947988225,
	"loss": 0.861257791519165,
	"step": 276
	},
	{
	"epoch": 0.2705078125,
	"grad_norm": 0.3961732089519501,
	"learning_rate": 0.00014681059862610402,
	"loss": 0.9672999382019043,
	"step": 277
	},
	{
	"epoch": 0.271484375,
	"grad_norm": 0.45974740386009216,
	"learning_rate": 0.00014661432777232582,
	"loss": 0.5789016485214233,
	"step": 278
	},
	{
	"epoch": 0.2724609375,
	"grad_norm": 0.7211292386054993,
	"learning_rate": 0.00014641805691854759,
	"loss": 0.867314338684082,
	"step": 279
	},
	{
	"epoch": 0.2734375,
	"grad_norm": 0.6938930749893188,
	"learning_rate": 0.00014622178606476938,
	"loss": 0.4570122957229614,
	"step": 280
	},
	{
	"epoch": 0.2744140625,
	"grad_norm": 0.5093329548835754,
	"learning_rate": 0.00014602551521099118,
	"loss": 0.9487482309341431,
	"step": 281
	},
	{
	"epoch": 0.275390625,
	"grad_norm": 0.4403358995914459,
	"learning_rate": 0.00014582924435721297,
	"loss": 0.5330759286880493,
	"step": 282
	},
	{
	"epoch": 0.2763671875,
	"grad_norm": 0.5305198431015015,
	"learning_rate": 0.00014563297350343474,
	"loss": 0.8727459907531738,
	"step": 283
	},
	{
	"epoch": 0.27734375,
	"grad_norm": 0.49577099084854126,
	"learning_rate": 0.00014543670264965654,
	"loss": 0.6166709065437317,
	"step": 284
	},
	{
	"epoch": 0.2783203125,
	"grad_norm": 0.4856763780117035,
	"learning_rate": 0.0001452404317958783,
	"loss": 0.920722484588623,
	"step": 285
	},
	{
	"epoch": 0.279296875,
	"grad_norm": 0.3397112786769867,
	"learning_rate": 0.0001450441609421001,
	"loss": 1.001542329788208,
	"step": 286
	},
	{
	"epoch": 0.2802734375,
	"grad_norm": 0.591691792011261,
	"learning_rate": 0.0001448478900883219,
	"loss": 0.4898494780063629,
	"step": 287
	},
	{
	"epoch": 0.28125,
	"grad_norm": 0.45293164253234863,
	"learning_rate": 0.00014465161923454367,
	"loss": 0.4958389401435852,
	"step": 288
	},
	{
	"epoch": 0.2822265625,
	"grad_norm": 0.38414305448532104,
	"learning_rate": 0.00014445534838076546,
	"loss": 0.3971215784549713,
	"step": 289
	},
	{
	"epoch": 0.283203125,
	"grad_norm": 0.5568608045578003,
	"learning_rate": 0.00014425907752698723,
	"loss": 0.7953230142593384,
	"step": 290
	},
	{
	"epoch": 0.2841796875,
	"grad_norm": 0.3680984377861023,
	"learning_rate": 0.00014406280667320903,
	"loss": 0.703729510307312,
	"step": 291
	},
	{
	"epoch": 0.28515625,
	"grad_norm": 0.4263870120048523,
	"learning_rate": 0.00014386653581943082,
	"loss": 0.7433100938796997,
	"step": 292
	},
	{
	"epoch": 0.2861328125,
	"grad_norm": 1.3262213468551636,
	"learning_rate": 0.00014367026496565262,
	"loss": 0.8011248111724854,
	"step": 293
	},
	{
	"epoch": 0.287109375,
	"grad_norm": 0.44766104221343994,
	"learning_rate": 0.0001434739941118744,
	"loss": 0.6682827472686768,
	"step": 294
	},
	{
	"epoch": 0.2880859375,
	"grad_norm": 0.7399169206619263,
	"learning_rate": 0.00014327772325809619,
	"loss": 0.8356127142906189,
	"step": 295
	},
	{
	"epoch": 0.2890625,
	"grad_norm": 0.3582242727279663,
	"learning_rate": 0.00014308145240431795,
	"loss": 0.7127545475959778,
	"step": 296
	},
	{
	"epoch": 0.2900390625,
	"grad_norm": 0.5251145958900452,
	"learning_rate": 0.00014288518155053975,
	"loss": 0.7467620968818665,
	"step": 297
	},
	{
	"epoch": 0.291015625,
	"grad_norm": 0.639377772808075,
	"learning_rate": 0.00014268891069676155,
	"loss": 0.434887170791626,
	"step": 298
	},
	{
	"epoch": 0.2919921875,
	"grad_norm": 0.5007404685020447,
	"learning_rate": 0.00014249263984298334,
	"loss": 1.028229832649231,
	"step": 299
	},
	{
	"epoch": 0.29296875,
	"grad_norm": 0.41101035475730896,
	"learning_rate": 0.0001422963689892051,
	"loss": 0.8766242265701294,
	"step": 300
	},
	{
	"epoch": 0.2939453125,
	"grad_norm": 0.3938690721988678,
	"learning_rate": 0.0001421000981354269,
	"loss": 0.7176960706710815,
	"step": 301
	},
	{
	"epoch": 0.294921875,
	"grad_norm": 0.5939344763755798,
	"learning_rate": 0.00014190382728164868,
	"loss": 0.6655953526496887,
	"step": 302
	},
	{
	"epoch": 0.2958984375,
	"grad_norm": 0.47224998474121094,
	"learning_rate": 0.00014170755642787047,
	"loss": 0.9155608415603638,
	"step": 303
	},
	{
	"epoch": 0.296875,
	"grad_norm": 0.41344454884529114,
	"learning_rate": 0.00014151128557409227,
	"loss": 0.6017557382583618,
	"step": 304
	},
	{
	"epoch": 0.2978515625,
	"grad_norm": 0.514320969581604,
	"learning_rate": 0.00014131501472031404,
	"loss": 0.6184566617012024,
	"step": 305
	},
	{
	"epoch": 0.298828125,
	"grad_norm": 0.5005887150764465,
	"learning_rate": 0.00014111874386653583,
	"loss": 0.6652892231941223,
	"step": 306
	},
	{
	"epoch": 0.2998046875,
	"grad_norm": 0.5872619152069092,
	"learning_rate": 0.0001409224730127576,
	"loss": 0.8618959784507751,
	"step": 307
	},
	{
	"epoch": 0.30078125,
	"grad_norm": 0.5114542245864868,
	"learning_rate": 0.0001407262021589794,
	"loss": 0.6637990474700928,
	"step": 308
	},
	{
	"epoch": 0.3017578125,
	"grad_norm": 1.141750693321228,
	"learning_rate": 0.00014052993130520117,
	"loss": 0.6234999299049377,
	"step": 309
	},
	{
	"epoch": 0.302734375,
	"grad_norm": 0.4786873459815979,
	"learning_rate": 0.000140333660451423,
	"loss": 0.9601540565490723,
	"step": 310
	},
	{
	"epoch": 0.3037109375,
	"grad_norm": 0.6048462390899658,
	"learning_rate": 0.00014013738959764476,
	"loss": 0.5895652770996094,
	"step": 311
	},
	{
	"epoch": 0.3046875,
	"grad_norm": 0.7435188889503479,
	"learning_rate": 0.00013994111874386655,
	"loss": 1.196149468421936,
	"step": 312
	},
	{
	"epoch": 0.3056640625,
	"grad_norm": 0.7936303019523621,
	"learning_rate": 0.00013974484789008832,
	"loss": 0.6073983907699585,
	"step": 313
	},
	{
	"epoch": 0.306640625,
	"grad_norm": 0.5199156403541565,
	"learning_rate": 0.00013954857703631012,
	"loss": 0.2734944224357605,
	"step": 314
	},
	{
	"epoch": 0.3076171875,
	"grad_norm": 0.38845276832580566,
	"learning_rate": 0.0001393523061825319,
	"loss": 0.604506254196167,
	"step": 315
	},
	{
	"epoch": 0.30859375,
	"grad_norm": 0.6925122737884521,
	"learning_rate": 0.0001391560353287537,
	"loss": 1.0446012020111084,
	"step": 316
	},
	{
	"epoch": 0.3095703125,
	"grad_norm": 0.4950433075428009,
	"learning_rate": 0.00013895976447497548,
	"loss": 1.027349591255188,
	"step": 317
	},
	{
	"epoch": 0.310546875,
	"grad_norm": 0.36179935932159424,
	"learning_rate": 0.00013876349362119725,
	"loss": 0.6760075688362122,
	"step": 318
	},
	{
	"epoch": 0.3115234375,
	"grad_norm": 0.3730153739452362,
	"learning_rate": 0.00013856722276741904,
	"loss": 0.47779884934425354,
	"step": 319
	},
	{
	"epoch": 0.3125,
	"grad_norm": 0.6181739568710327,
	"learning_rate": 0.0001383709519136408,
	"loss": 0.4747524857521057,
	"step": 320
	},
	{
	"epoch": 0.3134765625,
	"grad_norm": 0.8233240246772766,
	"learning_rate": 0.0001381746810598626,
	"loss": 0.490276575088501,
	"step": 321
	},
	{
	"epoch": 0.314453125,
	"grad_norm": 0.6492604613304138,
	"learning_rate": 0.0001379784102060844,
	"loss": 0.44847172498703003,
	"step": 322
	},
	{
	"epoch": 0.3154296875,
	"grad_norm": 0.5506369471549988,
	"learning_rate": 0.0001377821393523062,
	"loss": 0.47955968976020813,
	"step": 323
	},
	{
	"epoch": 0.31640625,
	"grad_norm": 0.4187554717063904,
	"learning_rate": 0.00013758586849852797,
	"loss": 0.6466250419616699,
	"step": 324
	},
	{
	"epoch": 0.3173828125,
	"grad_norm": 0.3976380527019501,
	"learning_rate": 0.00013738959764474977,
	"loss": 0.756473183631897,
	"step": 325
	},
	{
	"epoch": 0.318359375,
	"grad_norm": 0.6089552044868469,
	"learning_rate": 0.00013719332679097153,
	"loss": 0.9309840202331543,
	"step": 326
	},
	{
	"epoch": 0.3193359375,
	"grad_norm": 0.31628501415252686,
	"learning_rate": 0.00013699705593719333,
	"loss": 0.7739764451980591,
	"step": 327
	},
	{
	"epoch": 0.3203125,
	"grad_norm": 0.6984357237815857,
	"learning_rate": 0.00013680078508341513,
	"loss": 1.0047030448913574,
	"step": 328
	},
	{
	"epoch": 0.3212890625,
	"grad_norm": 0.42705219984054565,
	"learning_rate": 0.00013660451422963692,
	"loss": 0.5215034484863281,
	"step": 329
	},
	{
	"epoch": 0.322265625,
	"grad_norm": 0.3548984229564667,
	"learning_rate": 0.0001364082433758587,
	"loss": 0.777184009552002,
	"step": 330
	},
	{
	"epoch": 0.3232421875,
	"grad_norm": 0.6042805314064026,
	"learning_rate": 0.00013621197252208046,
	"loss": 0.469806432723999,
	"step": 331
	},
	{
	"epoch": 0.32421875,
	"grad_norm": 0.43482446670532227,
	"learning_rate": 0.00013601570166830226,
	"loss": 0.8123322129249573,
	"step": 332
	},
	{
	"epoch": 0.3251953125,
	"grad_norm": 0.4851783812046051,
	"learning_rate": 0.00013581943081452405,
	"loss": 1.1560527086257935,
	"step": 333
	},
	{
	"epoch": 0.326171875,
	"grad_norm": 0.681423008441925,
	"learning_rate": 0.00013562315996074585,
	"loss": 0.5681013464927673,
	"step": 334
	},
	{
	"epoch": 0.3271484375,
	"grad_norm": 0.43838411569595337,
	"learning_rate": 0.00013542688910696762,
	"loss": 0.8758999109268188,
	"step": 335
	},
	{
	"epoch": 0.328125,
	"grad_norm": 0.5508302450180054,
	"learning_rate": 0.0001352306182531894,
	"loss": 0.7725740671157837,
	"step": 336
	},
	{
	"epoch": 0.3291015625,
	"grad_norm": 0.2603519856929779,
	"learning_rate": 0.00013503434739941118,
	"loss": 0.357033908367157,
	"step": 337
	},
	{
	"epoch": 0.330078125,
	"grad_norm": 0.38098394870758057,
	"learning_rate": 0.00013483807654563298,
	"loss": 0.41752922534942627,
	"step": 338
	},
	{
	"epoch": 0.3310546875,
	"grad_norm": 0.5308575630187988,
	"learning_rate": 0.00013464180569185477,
	"loss": 0.6187021732330322,
	"step": 339
	},
	{
	"epoch": 0.33203125,
	"grad_norm": 0.4033392369747162,
	"learning_rate": 0.00013444553483807657,
	"loss": 0.9481551647186279,
	"step": 340
	},
	{
	"epoch": 0.3330078125,
	"grad_norm": 0.3999135494232178,
	"learning_rate": 0.00013424926398429834,
	"loss": 0.6853100657463074,
	"step": 341
	},
	{
	"epoch": 0.333984375,
	"grad_norm": 0.4521353840827942,
	"learning_rate": 0.00013405299313052013,
	"loss": 1.0335659980773926,
	"step": 342
	},
	{
	"epoch": 0.3349609375,
	"grad_norm": 0.3538281321525574,
	"learning_rate": 0.0001338567222767419,
	"loss": 0.821506142616272,
	"step": 343
	},
	{
	"epoch": 0.3359375,
	"grad_norm": 0.49575889110565186,
	"learning_rate": 0.0001336604514229637,
	"loss": 0.6124354004859924,
	"step": 344
	},
	{
	"epoch": 0.3369140625,
	"grad_norm": 0.37985700368881226,
	"learning_rate": 0.0001334641805691855,
	"loss": 0.6803320646286011,
	"step": 345
	},
	{
	"epoch": 0.337890625,
	"grad_norm": 0.3533600866794586,
	"learning_rate": 0.00013326790971540726,
	"loss": 0.7260403037071228,
	"step": 346
	},
	{
	"epoch": 0.3388671875,
	"grad_norm": 0.49213504791259766,
	"learning_rate": 0.00013307163886162906,
	"loss": 0.9051091074943542,
	"step": 347
	},
	{
	"epoch": 0.33984375,
	"grad_norm": 0.37704166769981384,
	"learning_rate": 0.00013287536800785083,
	"loss": 0.4471222460269928,
	"step": 348
	},
	{
	"epoch": 0.3408203125,
	"grad_norm": 0.4309573471546173,
	"learning_rate": 0.00013267909715407262,
	"loss": 0.749025285243988,
	"step": 349
	},
	{
	"epoch": 0.341796875,
	"grad_norm": 0.7491689920425415,
	"learning_rate": 0.0001324828263002944,
	"loss": 1.1318167448043823,
	"step": 350
	},
	{
	"epoch": 0.3427734375,
	"grad_norm": 0.3965498208999634,
	"learning_rate": 0.00013228655544651622,
	"loss": 0.8451839685440063,
	"step": 351
	},
	{
	"epoch": 0.34375,
	"grad_norm": 0.4446418285369873,
	"learning_rate": 0.00013209028459273798,
	"loss": 0.7875360250473022,
	"step": 352
	},
	{
	"epoch": 0.3447265625,
	"grad_norm": 0.3396705985069275,
	"learning_rate": 0.00013189401373895978,
	"loss": 0.8446518182754517,
	"step": 353
	},
	{
	"epoch": 0.345703125,
	"grad_norm": 0.3436250388622284,
	"learning_rate": 0.00013169774288518155,
	"loss": 0.8995112180709839,
	"step": 354
	},
	{
	"epoch": 0.3466796875,
	"grad_norm": 0.33643823862075806,
	"learning_rate": 0.00013150147203140334,
	"loss": 0.6253601312637329,
	"step": 355
	},
	{
	"epoch": 0.34765625,
	"grad_norm": 0.39978718757629395,
	"learning_rate": 0.0001313052011776251,
	"loss": 0.31882500648498535,
	"step": 356
	},
	{
	"epoch": 0.3486328125,
	"grad_norm": 0.3054925799369812,
	"learning_rate": 0.00013110893032384694,
	"loss": 0.3698769807815552,
	"step": 357
	},
	{
	"epoch": 0.349609375,
	"grad_norm": 0.3789948523044586,
	"learning_rate": 0.0001309126594700687,
	"loss": 0.9039162397384644,
	"step": 358
	},
	{
	"epoch": 0.3505859375,
	"grad_norm": 0.4192582964897156,
	"learning_rate": 0.0001307163886162905,
	"loss": 0.7852678298950195,
	"step": 359
	},
	{
	"epoch": 0.3515625,
	"grad_norm": 0.5130710601806641,
	"learning_rate": 0.00013052011776251227,
	"loss": 0.7745686769485474,
	"step": 360
	},
	{
	"epoch": 0.3525390625,
	"grad_norm": 0.39334234595298767,
	"learning_rate": 0.00013032384690873404,
	"loss": 0.7644802331924438,
	"step": 361
	},
	{
	"epoch": 0.353515625,
	"grad_norm": 0.6141180992126465,
	"learning_rate": 0.00013012757605495583,
	"loss": 0.6028044819831848,
	"step": 362
	},
	{
	"epoch": 0.3544921875,
	"grad_norm": 0.33263200521469116,
	"learning_rate": 0.00012993130520117763,
	"loss": 0.6908546090126038,
	"step": 363
	},
	{
	"epoch": 0.35546875,
	"grad_norm": 0.3901807367801666,
	"learning_rate": 0.00012973503434739943,
	"loss": 0.8896909952163696,
	"step": 364
	},
	{
	"epoch": 0.3564453125,
	"grad_norm": 0.3889808654785156,
	"learning_rate": 0.0001295387634936212,
	"loss": 0.622492790222168,
	"step": 365
	},
	{
	"epoch": 0.357421875,
	"grad_norm": 0.41004979610443115,
	"learning_rate": 0.000129342492639843,
	"loss": 0.6293104887008667,
	"step": 366
	},
	{
	"epoch": 0.3583984375,
	"grad_norm": 0.32929369807243347,
	"learning_rate": 0.00012914622178606476,
	"loss": 0.7049382925033569,
	"step": 367
	},
	{
	"epoch": 0.359375,
	"grad_norm": 0.5189999341964722,
	"learning_rate": 0.00012894995093228656,
	"loss": 0.9230547547340393,
	"step": 368
	},
	{
	"epoch": 0.3603515625,
	"grad_norm": 0.290991872549057,
	"learning_rate": 0.00012875368007850835,
	"loss": 0.5716772079467773,
	"step": 369
	},
	{
	"epoch": 0.361328125,
	"grad_norm": 0.3976893126964569,
	"learning_rate": 0.00012855740922473015,
	"loss": 0.4593455493450165,
	"step": 370
	},
	{
	"epoch": 0.3623046875,
	"grad_norm": 0.38385459780693054,
	"learning_rate": 0.00012836113837095192,
	"loss": 0.4766542315483093,
	"step": 371
	},
	{
	"epoch": 0.36328125,
	"grad_norm": 0.45652449131011963,
	"learning_rate": 0.0001281648675171737,
	"loss": 0.9292062520980835,
	"step": 372
	},
	{
	"epoch": 0.3642578125,
	"grad_norm": 0.384463906288147,
	"learning_rate": 0.00012796859666339548,
	"loss": 0.7896109223365784,
	"step": 373
	},
	{
	"epoch": 0.365234375,
	"grad_norm": 0.43412724137306213,
	"learning_rate": 0.00012777232580961728,
	"loss": 0.6185650825500488,
	"step": 374
	},
	{
	"epoch": 0.3662109375,
	"grad_norm": 0.4574507772922516,
	"learning_rate": 0.00012757605495583907,
	"loss": 0.5614027380943298,
	"step": 375
	},
	{
	"epoch": 0.3671875,
	"grad_norm": 0.2921536862850189,
	"learning_rate": 0.00012737978410206084,
	"loss": 0.26786333322525024,
	"step": 376
	},
	{
	"epoch": 0.3681640625,
	"grad_norm": 0.5887529850006104,
	"learning_rate": 0.00012718351324828264,
	"loss": 0.4167410433292389,
	"step": 377
	},
	{
	"epoch": 0.369140625,
	"grad_norm": 0.3651127815246582,
	"learning_rate": 0.0001269872423945044,
	"loss": 1.0140016078948975,
	"step": 378
	},
	{
	"epoch": 0.3701171875,
	"grad_norm": 0.47206228971481323,
	"learning_rate": 0.0001267909715407262,
	"loss": 0.8293377757072449,
	"step": 379
	},
	{
	"epoch": 0.37109375,
	"grad_norm": 0.6319689154624939,
	"learning_rate": 0.000126594700686948,
	"loss": 0.7301446795463562,
	"step": 380
	},
	{
	"epoch": 0.3720703125,
	"grad_norm": 0.5163951516151428,
	"learning_rate": 0.0001263984298331698,
	"loss": 0.9944421648979187,
	"step": 381
	},
	{
	"epoch": 0.373046875,
	"grad_norm": 0.519072949886322,
	"learning_rate": 0.00012620215897939156,
	"loss": 0.6176541447639465,
	"step": 382
	},
	{
	"epoch": 0.3740234375,
	"grad_norm": 3.0750813484191895,
	"learning_rate": 0.00012600588812561336,
	"loss": 0.7531320452690125,
	"step": 383
	},
	{
	"epoch": 0.375,
	"grad_norm": 0.3246331512928009,
	"learning_rate": 0.00012580961727183513,
	"loss": 0.3269459903240204,
	"step": 384
	},
	{
	"epoch": 0.3759765625,
	"grad_norm": 1.1105197668075562,
	"learning_rate": 0.00012561334641805692,
	"loss": 0.4228656589984894,
	"step": 385
	},
	{
	"epoch": 0.376953125,
	"grad_norm": 0.6776182055473328,
	"learning_rate": 0.00012541707556427872,
	"loss": 0.791953980922699,
	"step": 386
	},
	{
	"epoch": 0.3779296875,
	"grad_norm": 0.4413786828517914,
	"learning_rate": 0.00012522080471050052,
	"loss": 0.7953442335128784,
	"step": 387
	},
	{
	"epoch": 0.37890625,
	"grad_norm": 0.4036264419555664,
	"learning_rate": 0.00012502453385672228,
	"loss": 0.6062744855880737,
	"step": 388
	},
	{
	"epoch": 0.3798828125,
	"grad_norm": 1.0638166666030884,
	"learning_rate": 0.00012482826300294408,
	"loss": 1.0578093528747559,
	"step": 389
	},
	{
	"epoch": 0.380859375,
	"grad_norm": 0.2518276572227478,
	"learning_rate": 0.00012463199214916585,
	"loss": 0.5070685148239136,
	"step": 390
	},
	{
	"epoch": 0.3818359375,
	"grad_norm": 0.3338214159011841,
	"learning_rate": 0.00012443572129538762,
	"loss": 0.7665579915046692,
	"step": 391
	},
	{
	"epoch": 0.3828125,
	"grad_norm": 0.4730507433414459,
	"learning_rate": 0.00012423945044160944,
	"loss": 0.48353517055511475,
	"step": 392
	},
	{
	"epoch": 0.3837890625,
	"grad_norm": 0.3488924503326416,
	"learning_rate": 0.0001240431795878312,
	"loss": 0.4422420561313629,
	"step": 393
	},
	{
	"epoch": 0.384765625,
	"grad_norm": 0.2397361695766449,
	"learning_rate": 0.000123846908734053,
	"loss": 0.7025644183158875,
	"step": 394
	},
	{
	"epoch": 0.3857421875,
	"grad_norm": 0.3638167679309845,
	"learning_rate": 0.00012365063788027478,
	"loss": 0.5372107625007629,
	"step": 395
	},
	{
	"epoch": 0.38671875,
	"grad_norm": 0.4088346064090729,
	"learning_rate": 0.00012345436702649657,
	"loss": 0.7636011838912964,
	"step": 396
	},
	{
	"epoch": 0.3876953125,
	"grad_norm": 0.36985111236572266,
	"learning_rate": 0.00012325809617271834,
	"loss": 0.6720612645149231,
	"step": 397
	},
	{
	"epoch": 0.388671875,
	"grad_norm": 0.37556055188179016,
	"learning_rate": 0.00012306182531894016,
	"loss": 0.8087592124938965,
	"step": 398
	},
	{
	"epoch": 0.3896484375,
	"grad_norm": 0.6851724982261658,
	"learning_rate": 0.00012286555446516193,
	"loss": 0.780835747718811,
	"step": 399
	},
	{
	"epoch": 0.390625,
	"grad_norm": 0.3453989326953888,
	"learning_rate": 0.00012266928361138373,
	"loss": 0.8235517740249634,
	"step": 400
	},
	{
	"epoch": 0.3916015625,
	"grad_norm": 0.43622198700904846,
	"learning_rate": 0.0001224730127576055,
	"loss": 0.3758167028427124,
	"step": 401
	},
	{
	"epoch": 0.392578125,
	"grad_norm": 0.4364018142223358,
	"learning_rate": 0.0001222767419038273,
	"loss": 0.7123017907142639,
	"step": 402
	},
	{
	"epoch": 0.3935546875,
	"grad_norm": 0.24169716238975525,
	"learning_rate": 0.00012208047105004906,
	"loss": 0.48390328884124756,
	"step": 403
	},
	{
	"epoch": 0.39453125,
	"grad_norm": 3.4902851581573486,
	"learning_rate": 0.00012188420019627087,
	"loss": 0.8519951105117798,
	"step": 404
	},
	{
	"epoch": 0.3955078125,
	"grad_norm": 0.8332751989364624,
	"learning_rate": 0.00012168792934249264,
	"loss": 0.7562370896339417,
	"step": 405
	},
	{
	"epoch": 0.396484375,
	"grad_norm": 0.3582589030265808,
	"learning_rate": 0.00012149165848871442,
	"loss": 0.3723471164703369,
	"step": 406
	},
	{
	"epoch": 0.3974609375,
	"grad_norm": 0.48302146792411804,
	"learning_rate": 0.00012129538763493622,
	"loss": 1.0008171796798706,
	"step": 407
	},
	{
	"epoch": 0.3984375,
	"grad_norm": 0.3510138988494873,
	"learning_rate": 0.000121099116781158,
	"loss": 0.30772703886032104,
	"step": 408
	},
	{
	"epoch": 0.3994140625,
	"grad_norm": 0.2771015763282776,
	"learning_rate": 0.0001209028459273798,
	"loss": 0.4403090178966522,
	"step": 409
	},
	{
	"epoch": 0.400390625,
	"grad_norm": 0.42239415645599365,
	"learning_rate": 0.00012070657507360156,
	"loss": 0.5451241731643677,
	"step": 410
	},
	{
	"epoch": 0.4013671875,
	"grad_norm": 0.27876874804496765,
	"learning_rate": 0.00012051030421982336,
	"loss": 0.3590753972530365,
	"step": 411
	},
	{
	"epoch": 0.40234375,
	"grad_norm": 0.42854824662208557,
	"learning_rate": 0.00012031403336604514,
	"loss": 1.0192680358886719,
	"step": 412
	},
	{
	"epoch": 0.4033203125,
	"grad_norm": 0.32980695366859436,
	"learning_rate": 0.00012011776251226694,
	"loss": 0.6476566195487976,
	"step": 413
	},
	{
	"epoch": 0.404296875,
	"grad_norm": 0.45046037435531616,
	"learning_rate": 0.00011992149165848872,
	"loss": 0.9548048973083496,
	"step": 414
	},
	{
	"epoch": 0.4052734375,
	"grad_norm": 0.4176082909107208,
	"learning_rate": 0.00011972522080471052,
	"loss": 0.3793225586414337,
	"step": 415
	},
	{
	"epoch": 0.40625,
	"grad_norm": 0.335823118686676,
	"learning_rate": 0.00011952894995093229,
	"loss": 0.5807560086250305,
	"step": 416
	},
	{
	"epoch": 0.4072265625,
	"grad_norm": 0.4758591651916504,
	"learning_rate": 0.00011933267909715408,
	"loss": 0.3924551010131836,
	"step": 417
	},
	{
	"epoch": 0.408203125,
	"grad_norm": 0.21527709066867828,
	"learning_rate": 0.00011913640824337586,
	"loss": 0.1651245653629303,
	"step": 418
	},
	{
	"epoch": 0.4091796875,
	"grad_norm": 0.31255391240119934,
	"learning_rate": 0.00011894013738959766,
	"loss": 0.6133516430854797,
	"step": 419
	},
	{
	"epoch": 0.41015625,
	"grad_norm": 0.40668365359306335,
	"learning_rate": 0.00011874386653581944,
	"loss": 0.894720196723938,
	"step": 420
	},
	{
	"epoch": 0.4111328125,
	"grad_norm": 0.35574087500572205,
	"learning_rate": 0.00011854759568204121,
	"loss": 0.9017484188079834,
	"step": 421
	},
	{
	"epoch": 0.412109375,
	"grad_norm": 0.3389612138271332,
	"learning_rate": 0.00011835132482826301,
	"loss": 0.7961660623550415,
	"step": 422
	},
	{
	"epoch": 0.4130859375,
	"grad_norm": 0.8334202766418457,
	"learning_rate": 0.00011815505397448479,
	"loss": 0.8654063940048218,
	"step": 423
	},
	{
	"epoch": 0.4140625,
	"grad_norm": 0.5917571187019348,
	"learning_rate": 0.00011795878312070659,
	"loss": 0.631730318069458,
	"step": 424
	},
	{
	"epoch": 0.4150390625,
	"grad_norm": 0.4908443093299866,
	"learning_rate": 0.00011776251226692835,
	"loss": 0.3205869495868683,
	"step": 425
	},
	{
	"epoch": 0.416015625,
	"grad_norm": 0.8349789381027222,
	"learning_rate": 0.00011756624141315016,
	"loss": 0.8526176810264587,
	"step": 426
	},
	{
	"epoch": 0.4169921875,
	"grad_norm": 0.38712671399116516,
	"learning_rate": 0.00011736997055937193,
	"loss": 0.6580482125282288,
	"step": 427
	},
	{
	"epoch": 0.41796875,
	"grad_norm": 0.766034722328186,
	"learning_rate": 0.00011717369970559373,
	"loss": 0.5494309663772583,
	"step": 428
	},
	{
	"epoch": 0.4189453125,
	"grad_norm": 0.33322349190711975,
	"learning_rate": 0.00011697742885181551,
	"loss": 0.38351887464523315,
	"step": 429
	},
	{
	"epoch": 0.419921875,
	"grad_norm": 0.411155641078949,
	"learning_rate": 0.00011678115799803731,
	"loss": 0.8139836192131042,
	"step": 430
	},
	{
	"epoch": 0.4208984375,
	"grad_norm": 0.5857217907905579,
	"learning_rate": 0.00011658488714425908,
	"loss": 0.5668150186538696,
	"step": 431
	},
	{
	"epoch": 0.421875,
	"grad_norm": 0.8849710822105408,
	"learning_rate": 0.00011638861629048087,
	"loss": 0.5478008985519409,
	"step": 432
	},
	{
	"epoch": 0.4228515625,
	"grad_norm": 0.6771020293235779,
	"learning_rate": 0.00011619234543670265,
	"loss": 0.608709454536438,
	"step": 433
	},
	{
	"epoch": 0.423828125,
	"grad_norm": 0.30138713121414185,
	"learning_rate": 0.00011599607458292445,
	"loss": 0.8240669369697571,
	"step": 434
	},
	{
	"epoch": 0.4248046875,
	"grad_norm": 0.3273598253726959,
	"learning_rate": 0.00011579980372914623,
	"loss": 0.6287229657173157,
	"step": 435
	},
	{
	"epoch": 0.42578125,
	"grad_norm": 0.5044806003570557,
	"learning_rate": 0.000115603532875368,
	"loss": 0.735835075378418,
	"step": 436
	},
	{
	"epoch": 0.4267578125,
	"grad_norm": 0.34495776891708374,
	"learning_rate": 0.0001154072620215898,
	"loss": 0.7688421010971069,
	"step": 437
	},
	{
	"epoch": 0.427734375,
	"grad_norm": 0.41923069953918457,
	"learning_rate": 0.00011521099116781158,
	"loss": 0.679617166519165,
	"step": 438
	},
	{
	"epoch": 0.4287109375,
	"grad_norm": 0.3509843945503235,
	"learning_rate": 0.00011501472031403338,
	"loss": 0.7478575110435486,
	"step": 439
	},
	{
	"epoch": 0.4296875,
	"grad_norm": 0.4758707582950592,
	"learning_rate": 0.00011481844946025514,
	"loss": 0.48871147632598877,
	"step": 440
	},
	{
	"epoch": 0.4306640625,
	"grad_norm": 0.30272597074508667,
	"learning_rate": 0.00011462217860647695,
	"loss": 0.4311315715312958,
	"step": 441
	},
	{
	"epoch": 0.431640625,
	"grad_norm": 0.5226417779922485,
	"learning_rate": 0.00011442590775269872,
	"loss": 0.8198300004005432,
	"step": 442
	},
	{
	"epoch": 0.4326171875,
	"grad_norm": 0.41183850169181824,
	"learning_rate": 0.00011422963689892052,
	"loss": 0.9958367347717285,
	"step": 443
	},
	{
	"epoch": 0.43359375,
	"grad_norm": 0.384048193693161,
	"learning_rate": 0.0001140333660451423,
	"loss": 0.3194778859615326,
	"step": 444
	},
	{
	"epoch": 0.4345703125,
	"grad_norm": 0.5035115480422974,
	"learning_rate": 0.0001138370951913641,
	"loss": 0.6455928683280945,
	"step": 445
	},
	{
	"epoch": 0.435546875,
	"grad_norm": 0.4875551462173462,
	"learning_rate": 0.00011364082433758587,
	"loss": 0.799978494644165,
	"step": 446
	},
	{
	"epoch": 0.4365234375,
	"grad_norm": 0.3395763337612152,
	"learning_rate": 0.00011344455348380768,
	"loss": 0.47672414779663086,
	"step": 447
	},
	{
	"epoch": 0.4375,
	"grad_norm": 0.5594314932823181,
	"learning_rate": 0.00011324828263002944,
	"loss": 0.4325803518295288,
	"step": 448
	},
	{
	"epoch": 0.4384765625,
	"grad_norm": 0.44647228717803955,
	"learning_rate": 0.00011305201177625124,
	"loss": 0.8119433522224426,
	"step": 449
	},
	{
	"epoch": 0.439453125,
	"grad_norm": 0.3190518915653229,
	"learning_rate": 0.00011285574092247302,
	"loss": 0.4949466288089752,
	"step": 450
	},
	{
	"epoch": 0.4404296875,
	"grad_norm": 0.5943452715873718,
	"learning_rate": 0.00011265947006869479,
	"loss": 0.8245764374732971,
	"step": 451
	},
	{
	"epoch": 0.44140625,
	"grad_norm": 0.8067309260368347,
	"learning_rate": 0.00011246319921491659,
	"loss": 0.39331740140914917,
	"step": 452
	},
	{
	"epoch": 0.4423828125,
	"grad_norm": 0.4130857288837433,
	"learning_rate": 0.00011226692836113837,
	"loss": 1.0005946159362793,
	"step": 453
	},
	{
	"epoch": 0.443359375,
	"grad_norm": 0.6839224100112915,
	"learning_rate": 0.00011207065750736017,
	"loss": 0.453269362449646,
	"step": 454
	},
	{
	"epoch": 0.4443359375,
	"grad_norm": 0.6282085180282593,
	"learning_rate": 0.00011187438665358195,
	"loss": 0.7137607932090759,
	"step": 455
	},
	{
	"epoch": 0.4453125,
	"grad_norm": 0.49894508719444275,
	"learning_rate": 0.00011167811579980374,
	"loss": 0.6289803981781006,
	"step": 456
	},
	{
	"epoch": 0.4462890625,
	"grad_norm": 0.3570895493030548,
	"learning_rate": 0.00011148184494602551,
	"loss": 0.3711976110935211,
	"step": 457
	},
	{
	"epoch": 0.447265625,
	"grad_norm": 0.28931114077568054,
	"learning_rate": 0.00011128557409224731,
	"loss": 0.5629679560661316,
	"step": 458
	},
	{
	"epoch": 0.4482421875,
	"grad_norm": 1.2492791414260864,
	"learning_rate": 0.00011108930323846909,
	"loss": 0.5821082592010498,
	"step": 459
	},
	{
	"epoch": 0.44921875,
	"grad_norm": 0.29861876368522644,
	"learning_rate": 0.00011089303238469089,
	"loss": 0.4129573106765747,
	"step": 460
	},
	{
	"epoch": 0.4501953125,
	"grad_norm": 0.5244950652122498,
	"learning_rate": 0.00011069676153091267,
	"loss": 0.8300201296806335,
	"step": 461
	},
	{
	"epoch": 0.451171875,
	"grad_norm": 0.446435809135437,
	"learning_rate": 0.00011050049067713446,
	"loss": 0.7500958442687988,
	"step": 462
	},
	{
	"epoch": 0.4521484375,
	"grad_norm": 0.4531306028366089,
	"learning_rate": 0.00011030421982335623,
	"loss": 0.8492609262466431,
	"step": 463
	},
	{
	"epoch": 0.453125,
	"grad_norm": 0.46944308280944824,
	"learning_rate": 0.00011010794896957802,
	"loss": 0.6209090948104858,
	"step": 464
	},
	{
	"epoch": 0.4541015625,
	"grad_norm": 0.5465651154518127,
	"learning_rate": 0.00010991167811579981,
	"loss": 0.5176469087600708,
	"step": 465
	},
	{
	"epoch": 0.455078125,
	"grad_norm": 0.36550402641296387,
	"learning_rate": 0.00010971540726202158,
	"loss": 0.6358295679092407,
	"step": 466
	},
	{
	"epoch": 0.4560546875,
	"grad_norm": 0.48919910192489624,
	"learning_rate": 0.00010951913640824338,
	"loss": 0.5903019905090332,
	"step": 467
	},
	{
	"epoch": 0.45703125,
	"grad_norm": 0.4378332793712616,
	"learning_rate": 0.00010932286555446516,
	"loss": 0.6710047721862793,
	"step": 468
	},
	{
	"epoch": 0.4580078125,
	"grad_norm": 0.3095405101776123,
	"learning_rate": 0.00010912659470068696,
	"loss": 0.6787213683128357,
	"step": 469
	},
	{
	"epoch": 0.458984375,
	"grad_norm": 0.40901967883110046,
	"learning_rate": 0.00010893032384690874,
	"loss": 0.6371384859085083,
	"step": 470
	},
	{
	"epoch": 0.4599609375,
	"grad_norm": 0.3962486982345581,
	"learning_rate": 0.00010873405299313053,
	"loss": 0.5823498964309692,
	"step": 471
	},
	{
	"epoch": 0.4609375,
	"grad_norm": 0.4094708263874054,
	"learning_rate": 0.0001085377821393523,
	"loss": 1.0396480560302734,
	"step": 472
	},
	{
	"epoch": 0.4619140625,
	"grad_norm": 0.5117614269256592,
	"learning_rate": 0.0001083415112855741,
	"loss": 0.6320610642433167,
	"step": 473
	},
	{
	"epoch": 0.462890625,
	"grad_norm": 0.28345227241516113,
	"learning_rate": 0.00010814524043179588,
	"loss": 0.33279290795326233,
	"step": 474
	},
	{
	"epoch": 0.4638671875,
	"grad_norm": 0.5475791096687317,
	"learning_rate": 0.00010794896957801768,
	"loss": 0.359570175409317,
	"step": 475
	},
	{
	"epoch": 0.46484375,
	"grad_norm": 0.44176843762397766,
	"learning_rate": 0.00010775269872423946,
	"loss": 0.7576714158058167,
	"step": 476
	},
	{
	"epoch": 0.4658203125,
	"grad_norm": 0.473562628030777,
	"learning_rate": 0.00010755642787046125,
	"loss": 0.8758799433708191,
	"step": 477
	},
	{
	"epoch": 0.466796875,
	"grad_norm": 0.41919219493865967,
	"learning_rate": 0.00010736015701668302,
	"loss": 0.863654375076294,
	"step": 478
	},
	{
	"epoch": 0.4677734375,
	"grad_norm": 0.4215691089630127,
	"learning_rate": 0.0001071638861629048,
	"loss": 0.5004569292068481,
	"step": 479
	},
	{
	"epoch": 0.46875,
	"grad_norm": 0.36801034212112427,
	"learning_rate": 0.0001069676153091266,
	"loss": 0.9330754280090332,
	"step": 480
	},
	{
	"epoch": 0.4697265625,
	"grad_norm": 0.42489972710609436,
	"learning_rate": 0.00010677134445534837,
	"loss": 1.0529820919036865,
	"step": 481
	},
	{
	"epoch": 0.470703125,
	"grad_norm": 0.4067368507385254,
	"learning_rate": 0.00010657507360157018,
	"loss": 0.5453970432281494,
	"step": 482
	},
	{
	"epoch": 0.4716796875,
	"grad_norm": 0.28611162304878235,
	"learning_rate": 0.00010637880274779195,
	"loss": 0.2348572313785553,
	"step": 483
	},
	{
	"epoch": 0.47265625,
	"grad_norm": 0.40047627687454224,
	"learning_rate": 0.00010618253189401374,
	"loss": 0.4776308834552765,
	"step": 484
	},
	{
	"epoch": 0.4736328125,
	"grad_norm": 0.5168628692626953,
	"learning_rate": 0.00010598626104023553,
	"loss": 0.9922167062759399,
	"step": 485
	},
	{
	"epoch": 0.474609375,
	"grad_norm": 0.3620246946811676,
	"learning_rate": 0.00010578999018645732,
	"loss": 0.7285036444664001,
	"step": 486
	},
	{
	"epoch": 0.4755859375,
	"grad_norm": 0.42711782455444336,
	"learning_rate": 0.00010559371933267909,
	"loss": 0.6387231349945068,
	"step": 487
	},
	{
	"epoch": 0.4765625,
	"grad_norm": 0.2139827311038971,
	"learning_rate": 0.0001053974484789009,
	"loss": 0.4295338988304138,
	"step": 488
	},
	{
	"epoch": 0.4775390625,
	"grad_norm": 0.31191739439964294,
	"learning_rate": 0.00010520117762512267,
	"loss": 0.42860671877861023,
	"step": 489
	},
	{
	"epoch": 0.478515625,
	"grad_norm": 0.2909379303455353,
	"learning_rate": 0.00010500490677134447,
	"loss": 0.47065097093582153,
	"step": 490
	},
	{
	"epoch": 0.4794921875,
	"grad_norm": 0.48990437388420105,
	"learning_rate": 0.00010480863591756625,
	"loss": 0.8870656490325928,
	"step": 491
	},
	{
	"epoch": 0.48046875,
	"grad_norm": 0.5662127733230591,
	"learning_rate": 0.00010461236506378804,
	"loss": 0.8007984161376953,
	"step": 492
	},
	{
	"epoch": 0.4814453125,
	"grad_norm": 0.3656634986400604,
	"learning_rate": 0.00010441609421000981,
	"loss": 0.41389334201812744,
	"step": 493
	},
	{
	"epoch": 0.482421875,
	"grad_norm": 0.39840465784072876,
	"learning_rate": 0.0001042198233562316,
	"loss": 0.6927056908607483,
	"step": 494
	},
	{
	"epoch": 0.4833984375,
	"grad_norm": 0.641647219657898,
	"learning_rate": 0.00010402355250245339,
	"loss": 0.7912976145744324,
	"step": 495
	},
	{
	"epoch": 0.484375,
	"grad_norm": 0.4522266685962677,
	"learning_rate": 0.00010382728164867517,
	"loss": 0.615374743938446,
	"step": 496
	},
	{
	"epoch": 0.4853515625,
	"grad_norm": 0.415444016456604,
	"learning_rate": 0.00010363101079489697,
	"loss": 0.8559135794639587,
	"step": 497
	},
	{
	"epoch": 0.486328125,
	"grad_norm": 0.4477578401565552,
	"learning_rate": 0.00010343473994111874,
	"loss": 0.6109384298324585,
	"step": 498
	},
	{
	"epoch": 0.4873046875,
	"grad_norm": 0.33097633719444275,
	"learning_rate": 0.00010323846908734053,
	"loss": 0.6325762271881104,
	"step": 499
	},
	{
	"epoch": 0.48828125,
	"grad_norm": 0.38771572709083557,
	"learning_rate": 0.00010304219823356232,
	"loss": 0.5979640483856201,
	"step": 500
	},
	{
	"epoch": 0.4892578125,
	"grad_norm": 0.3339928984642029,
	"learning_rate": 0.00010284592737978411,
	"loss": 0.6619001626968384,
	"step": 501
	},
	{
	"epoch": 0.490234375,
	"grad_norm": 0.6400135159492493,
	"learning_rate": 0.00010264965652600588,
	"loss": 0.28338727355003357,
	"step": 502
	},
	{
	"epoch": 0.4912109375,
	"grad_norm": 0.35763970017433167,
	"learning_rate": 0.00010245338567222769,
	"loss": 0.6373124122619629,
	"step": 503
	},
	{
	"epoch": 0.4921875,
	"grad_norm": 0.2136622965335846,
	"learning_rate": 0.00010225711481844946,
	"loss": 0.2315329760313034,
	"step": 504
	},
	{
	"epoch": 0.4931640625,
	"grad_norm": 0.6324110627174377,
	"learning_rate": 0.00010206084396467126,
	"loss": 1.0045514106750488,
	"step": 505
	},
	{
	"epoch": 0.494140625,
	"grad_norm": 0.4471307694911957,
	"learning_rate": 0.00010186457311089304,
	"loss": 0.5188390016555786,
	"step": 506
	},
	{
	"epoch": 0.4951171875,
	"grad_norm": 0.38222211599349976,
	"learning_rate": 0.00010166830225711483,
	"loss": 0.7351740598678589,
	"step": 507
	},
	{
	"epoch": 0.49609375,
	"grad_norm": 0.41885000467300415,
	"learning_rate": 0.0001014720314033366,
	"loss": 0.9071688055992126,
	"step": 508
	},
	{
	"epoch": 0.4970703125,
	"grad_norm": 0.8193621635437012,
	"learning_rate": 0.00010127576054955839,
	"loss": 0.7240473031997681,
	"step": 509
	},
	{
	"epoch": 0.498046875,
	"grad_norm": 0.2846645712852478,
	"learning_rate": 0.00010107948969578018,
	"loss": 0.351628839969635,
	"step": 510
	},
	{
	"epoch": 0.4990234375,
	"grad_norm": 0.4778954088687897,
	"learning_rate": 0.00010088321884200196,
	"loss": 0.7705833911895752,
	"step": 511
	},
	{
	"epoch": 0.5,
	"grad_norm": 0.3384702503681183,
	"learning_rate": 0.00010068694798822376,
	"loss": 0.5467265248298645,
	"step": 512
	},
	{
	"epoch": 0.5009765625,
	"grad_norm": 0.43917056918144226,
	"learning_rate": 0.00010049067713444553,
	"loss": 0.9810686707496643,
	"step": 513
	},
	{
	"epoch": 0.501953125,
	"grad_norm": 0.4351615607738495,
	"learning_rate": 0.00010029440628066732,
	"loss": 0.9716764688491821,
	"step": 514
	},
	{
	"epoch": 0.5029296875,
	"grad_norm": 0.49873459339141846,
	"learning_rate": 0.00010009813542688911,
	"loss": 0.9183788299560547,
	"step": 515
	},
	{
	"epoch": 0.50390625,
	"grad_norm": 0.36710789799690247,
	"learning_rate": 9.990186457311089e-05,
	"loss": 0.49884548783302307,
	"step": 516
	},
	{
	"epoch": 0.5048828125,
	"grad_norm": 0.5402531623840332,
	"learning_rate": 9.970559371933269e-05,
	"loss": 0.6645570993423462,
	"step": 517
	},
	{
	"epoch": 0.505859375,
	"grad_norm": 0.4990559220314026,
	"learning_rate": 9.950932286555447e-05,
	"loss": 1.0321924686431885,
	"step": 518
	},
	{
	"epoch": 0.5068359375,
	"grad_norm": 0.4634752869606018,
	"learning_rate": 9.931305201177625e-05,
	"loss": 0.8484972715377808,
	"step": 519
	},
	{
	"epoch": 0.5078125,
	"grad_norm": 0.38584330677986145,
	"learning_rate": 9.911678115799805e-05,
	"loss": 0.3424939513206482,
	"step": 520
	},
	{
	"epoch": 0.5087890625,
	"grad_norm": 0.41148415207862854,
	"learning_rate": 9.892051030421983e-05,
	"loss": 0.7890703678131104,
	"step": 521
	},
	{
	"epoch": 0.509765625,
	"grad_norm": 0.35891374945640564,
	"learning_rate": 9.872423945044161e-05,
	"loss": 0.7387750744819641,
	"step": 522
	},
	{
	"epoch": 0.5107421875,
	"grad_norm": 0.4174203872680664,
	"learning_rate": 9.85279685966634e-05,
	"loss": 0.5610706806182861,
	"step": 523
	},
	{
	"epoch": 0.51171875,
	"grad_norm": 0.4062010645866394,
	"learning_rate": 9.833169774288519e-05,
	"loss": 0.6016039252281189,
	"step": 524
	},
	{
	"epoch": 0.5126953125,
	"grad_norm": 0.35915061831474304,
	"learning_rate": 9.813542688910697e-05,
	"loss": 0.37933990359306335,
	"step": 525
	},
	{
	"epoch": 0.513671875,
	"grad_norm": 0.49826234579086304,
	"learning_rate": 9.793915603532877e-05,
	"loss": 0.9650976657867432,
	"step": 526
	},
	{
	"epoch": 0.5146484375,
	"grad_norm": 0.4122180938720703,
	"learning_rate": 9.774288518155055e-05,
	"loss": 0.5477824211120605,
	"step": 527
	},
	{
	"epoch": 0.515625,
	"grad_norm": 0.3824058175086975,
	"learning_rate": 9.754661432777233e-05,
	"loss": 0.5163108706474304,
	"step": 528
	},
	{
	"epoch": 0.5166015625,
	"grad_norm": 0.4485555589199066,
	"learning_rate": 9.735034347399413e-05,
	"loss": 0.9402418732643127,
	"step": 529
	},
	{
	"epoch": 0.517578125,
	"grad_norm": 0.4053209722042084,
	"learning_rate": 9.715407262021591e-05,
	"loss": 0.9314478039741516,
	"step": 530
	},
	{
	"epoch": 0.5185546875,
	"grad_norm": 0.3183811604976654,
	"learning_rate": 9.695780176643768e-05,
	"loss": 0.6706205606460571,
	"step": 531
	},
	{
	"epoch": 0.51953125,
	"grad_norm": 0.40083932876586914,
	"learning_rate": 9.676153091265947e-05,
	"loss": 1.102424144744873,
	"step": 532
	},
	{
	"epoch": 0.5205078125,
	"grad_norm": 0.5949054956436157,
	"learning_rate": 9.656526005888126e-05,
	"loss": 0.8396608829498291,
	"step": 533
	},
	{
	"epoch": 0.521484375,
	"grad_norm": 0.41966959834098816,
	"learning_rate": 9.636898920510304e-05,
	"loss": 0.5641101002693176,
	"step": 534
	},
	{
	"epoch": 0.5224609375,
	"grad_norm": 0.448281466960907,
	"learning_rate": 9.617271835132484e-05,
	"loss": 0.44873932003974915,
	"step": 535
	},
	{
	"epoch": 0.5234375,
	"grad_norm": 0.47785645723342896,
	"learning_rate": 9.597644749754662e-05,
	"loss": 0.8799008131027222,
	"step": 536
	},
	{
	"epoch": 0.5244140625,
	"grad_norm": 0.45459261536598206,
	"learning_rate": 9.57801766437684e-05,
	"loss": 0.8261788487434387,
	"step": 537
	},
	{
	"epoch": 0.525390625,
	"grad_norm": 0.6168074607849121,
	"learning_rate": 9.55839057899902e-05,
	"loss": 0.9762136936187744,
	"step": 538
	},
	{
	"epoch": 0.5263671875,
	"grad_norm": 0.6500818133354187,
	"learning_rate": 9.538763493621198e-05,
	"loss": 0.9044640064239502,
	"step": 539
	},
	{
	"epoch": 0.52734375,
	"grad_norm": 0.31668490171432495,
	"learning_rate": 9.519136408243376e-05,
	"loss": 0.42503029108047485,
	"step": 540
	},
	{
	"epoch": 0.5283203125,
	"grad_norm": 0.4041314721107483,
	"learning_rate": 9.499509322865556e-05,
	"loss": 0.6643175482749939,
	"step": 541
	},
	{
	"epoch": 0.529296875,
	"grad_norm": 1.011020541191101,
	"learning_rate": 9.479882237487734e-05,
	"loss": 0.7636033892631531,
	"step": 542
	},
	{
	"epoch": 0.5302734375,
	"grad_norm": 0.3690396845340729,
	"learning_rate": 9.460255152109912e-05,
	"loss": 1.0516947507858276,
	"step": 543
	},
	{
	"epoch": 0.53125,
	"grad_norm": 0.288604199886322,
	"learning_rate": 9.440628066732092e-05,
	"loss": 0.3806208372116089,
	"step": 544
	},
	{
	"epoch": 0.5322265625,
	"grad_norm": 0.4247501790523529,
	"learning_rate": 9.42100098135427e-05,
	"loss": 0.8651745319366455,
	"step": 545
	},
	{
	"epoch": 0.533203125,
	"grad_norm": 1.1893255710601807,
	"learning_rate": 9.401373895976447e-05,
	"loss": 0.28601521253585815,
	"step": 546
	},
	{
	"epoch": 0.5341796875,
	"grad_norm": 0.3229619562625885,
	"learning_rate": 9.381746810598626e-05,
	"loss": 0.8316909670829773,
	"step": 547
	},
	{
	"epoch": 0.53515625,
	"grad_norm": 0.390278160572052,
	"learning_rate": 9.362119725220805e-05,
	"loss": 0.7263185977935791,
	"step": 548
	},
	{
	"epoch": 0.5361328125,
	"grad_norm": 0.2949998378753662,
	"learning_rate": 9.342492639842983e-05,
	"loss": 0.5417062044143677,
	"step": 549
	},
	{
	"epoch": 0.537109375,
	"grad_norm": 0.47482210397720337,
	"learning_rate": 9.322865554465163e-05,
	"loss": 0.6505849361419678,
	"step": 550
	},
	{
	"epoch": 0.5380859375,
	"grad_norm": 0.3653123676776886,
	"learning_rate": 9.303238469087341e-05,
	"loss": 0.7270935773849487,
	"step": 551
	},
	{
	"epoch": 0.5390625,
	"grad_norm": 0.5652351975440979,
	"learning_rate": 9.283611383709519e-05,
	"loss": 0.8330069780349731,
	"step": 552
	},
	{
	"epoch": 0.5400390625,
	"grad_norm": 0.448408842086792,
	"learning_rate": 9.263984298331699e-05,
	"loss": 0.8804951310157776,
	"step": 553
	},
	{
	"epoch": 0.541015625,
	"grad_norm": 0.7700690031051636,
	"learning_rate": 9.244357212953877e-05,
	"loss": 0.6466813087463379,
	"step": 554
	},
	{
	"epoch": 0.5419921875,
	"grad_norm": 0.45755863189697266,
	"learning_rate": 9.224730127576055e-05,
	"loss": 0.5548572540283203,
	"step": 555
	},
	{
	"epoch": 0.54296875,
	"grad_norm": 0.4113846719264984,
	"learning_rate": 9.205103042198235e-05,
	"loss": 0.9286736845970154,
	"step": 556
	},
	{
	"epoch": 0.5439453125,
	"grad_norm": 0.4555431604385376,
	"learning_rate": 9.185475956820413e-05,
	"loss": 0.8332977890968323,
	"step": 557
	},
	{
	"epoch": 0.544921875,
	"grad_norm": 0.5103408098220825,
	"learning_rate": 9.165848871442591e-05,
	"loss": 1.0110094547271729,
	"step": 558
	},
	{
	"epoch": 0.5458984375,
	"grad_norm": 0.299912691116333,
	"learning_rate": 9.146221786064771e-05,
	"loss": 0.3136459290981293,
	"step": 559
	},
	{
	"epoch": 0.546875,
	"grad_norm": 0.40499091148376465,
	"learning_rate": 9.126594700686948e-05,
	"loss": 0.6785961389541626,
	"step": 560
	},
	{
	"epoch": 0.5478515625,
	"grad_norm": 0.4190375804901123,
	"learning_rate": 9.106967615309127e-05,
	"loss": 0.9891744256019592,
	"step": 561
	},
	{
	"epoch": 0.548828125,
	"grad_norm": 0.6265519261360168,
	"learning_rate": 9.087340529931305e-05,
	"loss": 0.48712462186813354,
	"step": 562
	},
	{
	"epoch": 0.5498046875,
	"grad_norm": 0.466420978307724,
	"learning_rate": 9.067713444553484e-05,
	"loss": 0.5573943257331848,
	"step": 563
	},
	{
	"epoch": 0.55078125,
	"grad_norm": 0.3990301191806793,
	"learning_rate": 9.048086359175663e-05,
	"loss": 0.5893411040306091,
	"step": 564
	},
	{
	"epoch": 0.5517578125,
	"grad_norm": 0.31471043825149536,
	"learning_rate": 9.028459273797842e-05,
	"loss": 0.593424379825592,
	"step": 565
	},
	{
	"epoch": 0.552734375,
	"grad_norm": 0.46789905428886414,
	"learning_rate": 9.00883218842002e-05,
	"loss": 0.9398684501647949,
	"step": 566
	},
	{
	"epoch": 0.5537109375,
	"grad_norm": 0.48358282446861267,
	"learning_rate": 8.989205103042198e-05,
	"loss": 0.895098865032196,
	"step": 567
	},
	{
	"epoch": 0.5546875,
	"grad_norm": 0.25878453254699707,
	"learning_rate": 8.969578017664378e-05,
	"loss": 0.4817226231098175,
	"step": 568
	},
	{
	"epoch": 0.5556640625,
	"grad_norm": 0.5319378972053528,
	"learning_rate": 8.949950932286556e-05,
	"loss": 0.6119651794433594,
	"step": 569
	},
	{
	"epoch": 0.556640625,
	"grad_norm": 0.3002898097038269,
	"learning_rate": 8.930323846908734e-05,
	"loss": 0.28599199652671814,
	"step": 570
	},
	{
	"epoch": 0.5576171875,
	"grad_norm": 0.37161317467689514,
	"learning_rate": 8.910696761530914e-05,
	"loss": 0.3307079076766968,
	"step": 571
	},
	{
	"epoch": 0.55859375,
	"grad_norm": 0.4755436182022095,
	"learning_rate": 8.891069676153092e-05,
	"loss": 0.5868921279907227,
	"step": 572
	},
	{
	"epoch": 0.5595703125,
	"grad_norm": 0.3264123499393463,
	"learning_rate": 8.87144259077527e-05,
	"loss": 0.6682905554771423,
	"step": 573
	},
	{
	"epoch": 0.560546875,
	"grad_norm": 0.43468573689460754,
	"learning_rate": 8.85181550539745e-05,
	"loss": 0.6316066980361938,
	"step": 574
	},
	{
	"epoch": 0.5615234375,
	"grad_norm": 0.5759416222572327,
	"learning_rate": 8.832188420019627e-05,
	"loss": 0.5687480568885803,
	"step": 575
	},
	{
	"epoch": 0.5625,
	"grad_norm": 0.39352041482925415,
	"learning_rate": 8.812561334641806e-05,
	"loss": 0.3803275525569916,
	"step": 576
	},
	{
	"epoch": 0.5634765625,
	"grad_norm": 0.4155440926551819,
	"learning_rate": 8.792934249263984e-05,
	"loss": 0.3923049569129944,
	"step": 577
	},
	{
	"epoch": 0.564453125,
	"grad_norm": 0.34934133291244507,
	"learning_rate": 8.773307163886163e-05,
	"loss": 0.7100962996482849,
	"step": 578
	},
	{
	"epoch": 0.5654296875,
	"grad_norm": 0.3993069529533386,
	"learning_rate": 8.753680078508342e-05,
	"loss": 0.6711176037788391,
	"step": 579
	},
	{
	"epoch": 0.56640625,
	"grad_norm": 0.3445776700973511,
	"learning_rate": 8.73405299313052e-05,
	"loss": 0.6986067295074463,
	"step": 580
	},
	{
	"epoch": 0.5673828125,
	"grad_norm": 0.45837292075157166,
	"learning_rate": 8.714425907752699e-05,
	"loss": 0.9020513892173767,
	"step": 581
	},
	{
	"epoch": 0.568359375,
	"grad_norm": 0.3630208671092987,
	"learning_rate": 8.694798822374878e-05,
	"loss": 0.42499858140945435,
	"step": 582
	},
	{
	"epoch": 0.5693359375,
	"grad_norm": 0.41205838322639465,
	"learning_rate": 8.675171736997057e-05,
	"loss": 0.5535018444061279,
	"step": 583
	},
	{
	"epoch": 0.5703125,
	"grad_norm": 0.2596284747123718,
	"learning_rate": 8.655544651619235e-05,
	"loss": 0.3234618902206421,
	"step": 584
	},
	{
	"epoch": 0.5712890625,
	"grad_norm": 0.3716956079006195,
	"learning_rate": 8.635917566241414e-05,
	"loss": 0.7567611932754517,
	"step": 585
	},
	{
	"epoch": 0.572265625,
	"grad_norm": 0.42999619245529175,
	"learning_rate": 8.616290480863593e-05,
	"loss": 0.8695427179336548,
	"step": 586
	},
	{
	"epoch": 0.5732421875,
	"grad_norm": 0.3309305012226105,
	"learning_rate": 8.596663395485771e-05,
	"loss": 0.989714503288269,
	"step": 587
	},
	{
	"epoch": 0.57421875,
	"grad_norm": 0.40024474263191223,
	"learning_rate": 8.57703631010795e-05,
	"loss": 1.0608711242675781,
	"step": 588
	},
	{
	"epoch": 0.5751953125,
	"grad_norm": 0.453950434923172,
	"learning_rate": 8.557409224730129e-05,
	"loss": 0.7340632677078247,
	"step": 589
	},
	{
	"epoch": 0.576171875,
	"grad_norm": 0.4473342299461365,
	"learning_rate": 8.537782139352306e-05,
	"loss": 0.7264219522476196,
	"step": 590
	},
	{
	"epoch": 0.5771484375,
	"grad_norm": 0.420469731092453,
	"learning_rate": 8.518155053974485e-05,
	"loss": 0.8141539692878723,
	"step": 591
	},
	{
	"epoch": 0.578125,
	"grad_norm": 0.4068243205547333,
	"learning_rate": 8.498527968596663e-05,
	"loss": 0.5802872180938721,
	"step": 592
	},
	{
	"epoch": 0.5791015625,
	"grad_norm": 0.4243272840976715,
	"learning_rate": 8.478900883218842e-05,
	"loss": 0.350595086812973,
	"step": 593
	},
	{
	"epoch": 0.580078125,
	"grad_norm": 0.4519834518432617,
	"learning_rate": 8.459273797841021e-05,
	"loss": 0.7131458520889282,
	"step": 594
	},
	{
	"epoch": 0.5810546875,
	"grad_norm": 0.34145745635032654,
	"learning_rate": 8.4396467124632e-05,
	"loss": 0.7618221640586853,
	"step": 595
	},
	{
	"epoch": 0.58203125,
	"grad_norm": 0.46494174003601074,
	"learning_rate": 8.420019627085378e-05,
	"loss": 0.5102145075798035,
	"step": 596
	},
	{
	"epoch": 0.5830078125,
	"grad_norm": 0.3305060565471649,
	"learning_rate": 8.400392541707557e-05,
	"loss": 0.7812811732292175,
	"step": 597
	},
	{
	"epoch": 0.583984375,
	"grad_norm": 0.47092583775520325,
	"learning_rate": 8.380765456329736e-05,
	"loss": 0.7497634887695312,
	"step": 598
	},
	{
	"epoch": 0.5849609375,
	"grad_norm": 0.38902655243873596,
	"learning_rate": 8.361138370951914e-05,
	"loss": 0.4198119640350342,
	"step": 599
	},
	{
	"epoch": 0.5859375,
	"grad_norm": 0.43659287691116333,
	"learning_rate": 8.341511285574093e-05,
	"loss": 0.824333667755127,
	"step": 600
	},
	{
	"epoch": 0.5869140625,
	"grad_norm": 0.4277879595756531,
	"learning_rate": 8.321884200196272e-05,
	"loss": 0.445267915725708,
	"step": 601
	},
	{
	"epoch": 0.587890625,
	"grad_norm": 0.3186829090118408,
	"learning_rate": 8.30225711481845e-05,
	"loss": 0.9906235337257385,
	"step": 602
	},
	{
	"epoch": 0.5888671875,
	"grad_norm": 0.2983294427394867,
	"learning_rate": 8.28263002944063e-05,
	"loss": 0.5342146754264832,
	"step": 603
	},
	{
	"epoch": 0.58984375,
	"grad_norm": 0.4127228856086731,
	"learning_rate": 8.263002944062808e-05,
	"loss": 0.41288450360298157,
	"step": 604
	},
	{
	"epoch": 0.5908203125,
	"grad_norm": 0.3961617052555084,
	"learning_rate": 8.243375858684985e-05,
	"loss": 0.43576663732528687,
	"step": 605
	},
	{
	"epoch": 0.591796875,
	"grad_norm": 0.4124387502670288,
	"learning_rate": 8.223748773307164e-05,
	"loss": 0.5837401747703552,
	"step": 606
	},
	{
	"epoch": 0.5927734375,
	"grad_norm": 0.4274151921272278,
	"learning_rate": 8.204121687929342e-05,
	"loss": 0.8666547536849976,
	"step": 607
	},
	{
	"epoch": 0.59375,
	"grad_norm": 0.3881700932979584,
	"learning_rate": 8.18449460255152e-05,
	"loss": 0.9063656330108643,
	"step": 608
	},
	{
	"epoch": 0.5947265625,
	"grad_norm": 0.46216556429862976,
	"learning_rate": 8.1648675171737e-05,
	"loss": 0.4573599696159363,
	"step": 609
	},
	{
	"epoch": 0.595703125,
	"grad_norm": 0.3843960762023926,
	"learning_rate": 8.145240431795878e-05,
	"loss": 0.6214632391929626,
	"step": 610
	},
	{
	"epoch": 0.5966796875,
	"grad_norm": 0.538301408290863,
	"learning_rate": 8.125613346418057e-05,
	"loss": 0.8800979852676392,
	"step": 611
	},
	{
	"epoch": 0.59765625,
	"grad_norm": 0.49643319845199585,
	"learning_rate": 8.105986261040236e-05,
	"loss": 0.48715031147003174,
	"step": 612
	},
	{
	"epoch": 0.5986328125,
	"grad_norm": 0.4753062427043915,
	"learning_rate": 8.086359175662415e-05,
	"loss": 0.8127011060714722,
	"step": 613
	},
	{
	"epoch": 0.599609375,
	"grad_norm": 0.7572022676467896,
	"learning_rate": 8.066732090284593e-05,
	"loss": 0.7151535153388977,
	"step": 614
	},
	{
	"epoch": 0.6005859375,
	"grad_norm": 0.35117295384407043,
	"learning_rate": 8.047105004906772e-05,
	"loss": 0.9221618175506592,
	"step": 615
	},
	{
	"epoch": 0.6015625,
	"grad_norm": 0.2643633186817169,
	"learning_rate": 8.02747791952895e-05,
	"loss": 0.5025840401649475,
	"step": 616
	},
	{
	"epoch": 0.6025390625,
	"grad_norm": 0.45553916692733765,
	"learning_rate": 8.007850834151129e-05,
	"loss": 0.452494740486145,
	"step": 617
	},
	{
	"epoch": 0.603515625,
	"grad_norm": 0.386594295501709,
	"learning_rate": 7.988223748773308e-05,
	"loss": 0.7942792773246765,
	"step": 618
	},
	{
	"epoch": 0.6044921875,
	"grad_norm": 0.3616650700569153,
	"learning_rate": 7.968596663395485e-05,
	"loss": 0.5697340965270996,
	"step": 619
	},
	{
	"epoch": 0.60546875,
	"grad_norm": 0.3885051906108856,
	"learning_rate": 7.948969578017665e-05,
	"loss": 0.7082506418228149,
	"step": 620
	},
	{
	"epoch": 0.6064453125,
	"grad_norm": 0.4484117329120636,
	"learning_rate": 7.929342492639843e-05,
	"loss": 0.5993860960006714,
	"step": 621
	},
	{
	"epoch": 0.607421875,
	"grad_norm": 0.44654563069343567,
	"learning_rate": 7.909715407262021e-05,
	"loss": 0.5804839134216309,
	"step": 622
	},
	{
	"epoch": 0.6083984375,
	"grad_norm": 0.3943687081336975,
	"learning_rate": 7.890088321884201e-05,
	"loss": 0.6422688364982605,
	"step": 623
	},
	{
	"epoch": 0.609375,
	"grad_norm": 0.4153381288051605,
	"learning_rate": 7.870461236506379e-05,
	"loss": 0.6437400579452515,
	"step": 624
	},
	{
	"epoch": 0.6103515625,
	"grad_norm": 0.38221171498298645,
	"learning_rate": 7.850834151128557e-05,
	"loss": 0.8738820552825928,
	"step": 625
	},
	{
	"epoch": 0.611328125,
	"grad_norm": 0.339599609375,
	"learning_rate": 7.831207065750737e-05,
	"loss": 0.517478883266449,
	"step": 626
	},
	{
	"epoch": 0.6123046875,
	"grad_norm": 0.7177076935768127,
	"learning_rate": 7.811579980372915e-05,
	"loss": 0.7372115254402161,
	"step": 627
	},
	{
	"epoch": 0.61328125,
	"grad_norm": 0.47573140263557434,
	"learning_rate": 7.791952894995093e-05,
	"loss": 0.649010181427002,
	"step": 628
	},
	{
	"epoch": 0.6142578125,
	"grad_norm": 0.44851094484329224,
	"learning_rate": 7.772325809617273e-05,
	"loss": 0.6269842386245728,
	"step": 629
	},
	{
	"epoch": 0.615234375,
	"grad_norm": 0.3544669449329376,
	"learning_rate": 7.752698724239451e-05,
	"loss": 0.8870983123779297,
	"step": 630
	},
	{
	"epoch": 0.6162109375,
	"grad_norm": 0.4103491008281708,
	"learning_rate": 7.73307163886163e-05,
	"loss": 0.8711034059524536,
	"step": 631
	},
	{
	"epoch": 0.6171875,
	"grad_norm": 0.3651062548160553,
	"learning_rate": 7.713444553483808e-05,
	"loss": 0.8420337438583374,
	"step": 632
	},
	{
	"epoch": 0.6181640625,
	"grad_norm": 0.4135638475418091,
	"learning_rate": 7.693817468105987e-05,
	"loss": 0.601078450679779,
	"step": 633
	},
	{
	"epoch": 0.619140625,
	"grad_norm": 0.5965299010276794,
	"learning_rate": 7.674190382728164e-05,
	"loss": 0.604471743106842,
	"step": 634
	},
	{
	"epoch": 0.6201171875,
	"grad_norm": 0.4340416491031647,
	"learning_rate": 7.654563297350344e-05,
	"loss": 0.905183732509613,
	"step": 635
	},
	{
	"epoch": 0.62109375,
	"grad_norm": 0.361518919467926,
	"learning_rate": 7.634936211972522e-05,
	"loss": 0.6569675207138062,
	"step": 636
	},
	{
	"epoch": 0.6220703125,
	"grad_norm": 1.04604971408844,
	"learning_rate": 7.6153091265947e-05,
	"loss": 0.7399482727050781,
	"step": 637
	},
	{
	"epoch": 0.623046875,
	"grad_norm": 0.8039460778236389,
	"learning_rate": 7.59568204121688e-05,
	"loss": 0.6003617644309998,
	"step": 638
	},
	{
	"epoch": 0.6240234375,
	"grad_norm": 0.5462118983268738,
	"learning_rate": 7.576054955839058e-05,
	"loss": 0.7750217914581299,
	"step": 639
	},
	{
	"epoch": 0.625,
	"grad_norm": 0.29333505034446716,
	"learning_rate": 7.556427870461236e-05,
	"loss": 0.47371456027030945,
	"step": 640
	},
	{
	"epoch": 0.6259765625,
	"grad_norm": 0.2468312531709671,
	"learning_rate": 7.536800785083416e-05,
	"loss": 0.4615188241004944,
	"step": 641
	},
	{
	"epoch": 0.626953125,
	"grad_norm": 0.48467332124710083,
	"learning_rate": 7.517173699705594e-05,
	"loss": 0.6456693410873413,
	"step": 642
	},
	{
	"epoch": 0.6279296875,
	"grad_norm": 0.5471943020820618,
	"learning_rate": 7.497546614327772e-05,
	"loss": 0.5899155139923096,
	"step": 643
	},
	{
	"epoch": 0.62890625,
	"grad_norm": 0.3715604841709137,
	"learning_rate": 7.477919528949952e-05,
	"loss": 0.7910970449447632,
	"step": 644
	},
	{
	"epoch": 0.6298828125,
	"grad_norm": 0.3298327922821045,
	"learning_rate": 7.45829244357213e-05,
	"loss": 0.5769776701927185,
	"step": 645
	},
	{
	"epoch": 0.630859375,
	"grad_norm": 0.44131916761398315,
	"learning_rate": 7.438665358194309e-05,
	"loss": 0.8805806636810303,
	"step": 646
	},
	{
	"epoch": 0.6318359375,
	"grad_norm": 0.4686948359012604,
	"learning_rate": 7.419038272816488e-05,
	"loss": 0.7262091636657715,
	"step": 647
	},
	{
	"epoch": 0.6328125,
	"grad_norm": 0.48123931884765625,
	"learning_rate": 7.399411187438666e-05,
	"loss": 0.8481992483139038,
	"step": 648
	},
	{
	"epoch": 0.6337890625,
	"grad_norm": 0.5582646131515503,
	"learning_rate": 7.379784102060843e-05,
	"loss": 0.4963653087615967,
	"step": 649
	},
	{
	"epoch": 0.634765625,
	"grad_norm": 0.30464881658554077,
	"learning_rate": 7.360157016683023e-05,
	"loss": 0.6772556900978088,
	"step": 650
	},
	{
	"epoch": 0.6357421875,
	"grad_norm": 0.44710803031921387,
	"learning_rate": 7.340529931305201e-05,
	"loss": 0.5476983189582825,
	"step": 651
	},
	{
	"epoch": 0.63671875,
	"grad_norm": 0.35922887921333313,
	"learning_rate": 7.320902845927379e-05,
	"loss": 0.8256508111953735,
	"step": 652
	},
	{
	"epoch": 0.6376953125,
	"grad_norm": 0.40085500478744507,
	"learning_rate": 7.301275760549559e-05,
	"loss": 0.5783500671386719,
	"step": 653
	},
	{
	"epoch": 0.638671875,
	"grad_norm": 0.47579512000083923,
	"learning_rate": 7.281648675171737e-05,
	"loss": 0.5591031908988953,
	"step": 654
	},
	{
	"epoch": 0.6396484375,
	"grad_norm": 0.5594353675842285,
	"learning_rate": 7.262021589793915e-05,
	"loss": 0.8133666515350342,
	"step": 655
	},
	{
	"epoch": 0.640625,
	"grad_norm": 0.44030821323394775,
	"learning_rate": 7.242394504416095e-05,
	"loss": 1.0282940864562988,
	"step": 656
	},
	{
	"epoch": 0.6416015625,
	"grad_norm": 0.7038627862930298,
	"learning_rate": 7.222767419038273e-05,
	"loss": 0.2322971373796463,
	"step": 657
	},
	{
	"epoch": 0.642578125,
	"grad_norm": 0.223698228597641,
	"learning_rate": 7.203140333660451e-05,
	"loss": 0.7056642174720764,
	"step": 658
	},
	{
	"epoch": 0.6435546875,
	"grad_norm": 0.3815765976905823,
	"learning_rate": 7.183513248282631e-05,
	"loss": 1.074477195739746,
	"step": 659
	},
	{
	"epoch": 0.64453125,
	"grad_norm": 0.35606271028518677,
	"learning_rate": 7.163886162904809e-05,
	"loss": 0.4300801753997803,
	"step": 660
	},
	{
	"epoch": 0.6455078125,
	"grad_norm": 0.32899999618530273,
	"learning_rate": 7.144259077526988e-05,
	"loss": 0.5923078060150146,
	"step": 661
	},
	{
	"epoch": 0.646484375,
	"grad_norm": 0.49968358874320984,
	"learning_rate": 7.124631992149167e-05,
	"loss": 0.8295183181762695,
	"step": 662
	},
	{
	"epoch": 0.6474609375,
	"grad_norm": 0.3393777012825012,
	"learning_rate": 7.105004906771345e-05,
	"loss": 0.30383622646331787,
	"step": 663
	},
	{
	"epoch": 0.6484375,
	"grad_norm": 0.24977968633174896,
	"learning_rate": 7.085377821393524e-05,
	"loss": 0.429612934589386,
	"step": 664
	},
	{
	"epoch": 0.6494140625,
	"grad_norm": 0.35886242985725403,
	"learning_rate": 7.065750736015702e-05,
	"loss": 0.9189084768295288,
	"step": 665
	},
	{
	"epoch": 0.650390625,
	"grad_norm": 0.3856249153614044,
	"learning_rate": 7.04612365063788e-05,
	"loss": 0.4880048930644989,
	"step": 666
	},
	{
	"epoch": 0.6513671875,
	"grad_norm": 0.4439884424209595,
	"learning_rate": 7.026496565260058e-05,
	"loss": 0.7537186145782471,
	"step": 667
	},
	{
	"epoch": 0.65234375,
	"grad_norm": 0.29563215374946594,
	"learning_rate": 7.006869479882238e-05,
	"loss": 0.38701343536376953,
	"step": 668
	},
	{
	"epoch": 0.6533203125,
	"grad_norm": 0.1909576952457428,
	"learning_rate": 6.987242394504416e-05,
	"loss": 0.15140604972839355,
	"step": 669
	},
	{
	"epoch": 0.654296875,
	"grad_norm": 0.3344849944114685,
	"learning_rate": 6.967615309126594e-05,
	"loss": 0.527427077293396,
	"step": 670
	},
	{
	"epoch": 0.6552734375,
	"grad_norm": 0.3609422743320465,
	"learning_rate": 6.947988223748774e-05,
	"loss": 0.29116177558898926,
	"step": 671
	},
	{
	"epoch": 0.65625,
	"grad_norm": 0.4419811964035034,
	"learning_rate": 6.928361138370952e-05,
	"loss": 0.7166855931282043,
	"step": 672
	},
	{
	"epoch": 0.6572265625,
	"grad_norm": 0.31890806555747986,
	"learning_rate": 6.90873405299313e-05,
	"loss": 0.5259425640106201,
	"step": 673
	},
	{
	"epoch": 0.658203125,
	"grad_norm": 0.39572352170944214,
	"learning_rate": 6.88910696761531e-05,
	"loss": 0.5964791774749756,
	"step": 674
	},
	{
	"epoch": 0.6591796875,
	"grad_norm": 0.4501058757305145,
	"learning_rate": 6.869479882237488e-05,
	"loss": 0.2289922833442688,
	"step": 675
	},
	{
	"epoch": 0.66015625,
	"grad_norm": 0.2884235680103302,
	"learning_rate": 6.849852796859666e-05,
	"loss": 0.2730886936187744,
	"step": 676
	},
	{
	"epoch": 0.6611328125,
	"grad_norm": 0.32970431447029114,
	"learning_rate": 6.830225711481846e-05,
	"loss": 0.4283568859100342,
	"step": 677
	},
	{
	"epoch": 0.662109375,
	"grad_norm": 0.39025789499282837,
	"learning_rate": 6.810598626104023e-05,
	"loss": 0.9361288547515869,
	"step": 678
	},
	{
	"epoch": 0.6630859375,
	"grad_norm": 0.48386886715888977,
	"learning_rate": 6.790971540726203e-05,
	"loss": 0.4907494783401489,
	"step": 679
	},
	{
	"epoch": 0.6640625,
	"grad_norm": 0.41783151030540466,
	"learning_rate": 6.771344455348381e-05,
	"loss": 0.7485824823379517,
	"step": 680
	},
	{
	"epoch": 0.6650390625,
	"grad_norm": 0.4826144278049469,
	"learning_rate": 6.751717369970559e-05,
	"loss": 0.6413211226463318,
	"step": 681
	},
	{
	"epoch": 0.666015625,
	"grad_norm": 0.27521079778671265,
	"learning_rate": 6.732090284592739e-05,
	"loss": 0.5747159123420715,
	"step": 682
	},
	{
	"epoch": 0.6669921875,
	"grad_norm": 0.3745660185813904,
	"learning_rate": 6.712463199214917e-05,
	"loss": 0.414341002702713,
	"step": 683
	},
	{
	"epoch": 0.66796875,
	"grad_norm": 0.45048731565475464,
	"learning_rate": 6.692836113837095e-05,
	"loss": 0.3665570318698883,
	"step": 684
	},
	{
	"epoch": 0.6689453125,
	"grad_norm": 0.5048633217811584,
	"learning_rate": 6.673209028459275e-05,
	"loss": 0.5923498272895813,
	"step": 685
	},
	{
	"epoch": 0.669921875,
	"grad_norm": 0.46423155069351196,
	"learning_rate": 6.653581943081453e-05,
	"loss": 0.7506915330886841,
	"step": 686
	},
	{
	"epoch": 0.6708984375,
	"grad_norm": 0.42965108156204224,
	"learning_rate": 6.633954857703631e-05,
	"loss": 0.7576399445533752,
	"step": 687
	},
	{
	"epoch": 0.671875,
	"grad_norm": 0.48331597447395325,
	"learning_rate": 6.614327772325811e-05,
	"loss": 0.5249682068824768,
	"step": 688
	},
	{
	"epoch": 0.6728515625,
	"grad_norm": 0.4685790240764618,
	"learning_rate": 6.594700686947989e-05,
	"loss": 0.8056750297546387,
	"step": 689
	},
	{
	"epoch": 0.673828125,
	"grad_norm": 0.46440044045448303,
	"learning_rate": 6.575073601570167e-05,
	"loss": 0.9252493381500244,
	"step": 690
	},
	{
	"epoch": 0.6748046875,
	"grad_norm": 0.46564289927482605,
	"learning_rate": 6.555446516192347e-05,
	"loss": 0.8182022571563721,
	"step": 691
	},
	{
	"epoch": 0.67578125,
	"grad_norm": 0.4397750496864319,
	"learning_rate": 6.535819430814525e-05,
	"loss": 0.7928388118743896,
	"step": 692
	},
	{
	"epoch": 0.6767578125,
	"grad_norm": 0.3233174681663513,
	"learning_rate": 6.516192345436702e-05,
	"loss": 0.5252426862716675,
	"step": 693
	},
	{
	"epoch": 0.677734375,
	"grad_norm": 0.6012148857116699,
	"learning_rate": 6.496565260058882e-05,
	"loss": 0.44195663928985596,
	"step": 694
	},
	{
	"epoch": 0.6787109375,
	"grad_norm": 0.6329052448272705,
	"learning_rate": 6.47693817468106e-05,
	"loss": 0.5354570150375366,
	"step": 695
	},
	{
	"epoch": 0.6796875,
	"grad_norm": 0.47926270961761475,
	"learning_rate": 6.457311089303238e-05,
	"loss": 0.4950491786003113,
	"step": 696
	},
	{
	"epoch": 0.6806640625,
	"grad_norm": 0.5051383972167969,
	"learning_rate": 6.437684003925418e-05,
	"loss": 0.6795849204063416,
	"step": 697
	},
	{
	"epoch": 0.681640625,
	"grad_norm": 0.4022398591041565,
	"learning_rate": 6.418056918547596e-05,
	"loss": 1.0388166904449463,
	"step": 698
	},
	{
	"epoch": 0.6826171875,
	"grad_norm": 0.4309573471546173,
	"learning_rate": 6.398429833169774e-05,
	"loss": 0.6022897362709045,
	"step": 699
	},
	{
	"epoch": 0.68359375,
	"grad_norm": 0.3301983177661896,
	"learning_rate": 6.378802747791954e-05,
	"loss": 0.6451660394668579,
	"step": 700
	},
	{
	"epoch": 0.6845703125,
	"grad_norm": 0.6647156476974487,
	"learning_rate": 6.359175662414132e-05,
	"loss": 0.9699732661247253,
	"step": 701
	},
	{
	"epoch": 0.685546875,
	"grad_norm": 0.37545597553253174,
	"learning_rate": 6.33954857703631e-05,
	"loss": 0.43181508779525757,
	"step": 702
	},
	{
	"epoch": 0.6865234375,
	"grad_norm": 0.40882429480552673,
	"learning_rate": 6.31992149165849e-05,
	"loss": 0.665264368057251,
	"step": 703
	},
	{
	"epoch": 0.6875,
	"grad_norm": 0.46597936749458313,
	"learning_rate": 6.300294406280668e-05,
	"loss": 0.8813620209693909,
	"step": 704
	},
	{
	"epoch": 0.6884765625,
	"grad_norm": 0.4355461597442627,
	"learning_rate": 6.280667320902846e-05,
	"loss": 0.595770537853241,
	"step": 705
	},
	{
	"epoch": 0.689453125,
	"grad_norm": 0.45896056294441223,
	"learning_rate": 6.261040235525026e-05,
	"loss": 0.7571601271629333,
	"step": 706
	},
	{
	"epoch": 0.6904296875,
	"grad_norm": 0.37643495202064514,
	"learning_rate": 6.241413150147204e-05,
	"loss": 0.47930869460105896,
	"step": 707
	},
	{
	"epoch": 0.69140625,
	"grad_norm": 0.49690738320350647,
	"learning_rate": 6.221786064769381e-05,
	"loss": 0.3727263808250427,
	"step": 708
	},
	{
	"epoch": 0.6923828125,
	"grad_norm": 0.44111907482147217,
	"learning_rate": 6.20215897939156e-05,
	"loss": 0.7276532649993896,
	"step": 709
	},
	{
	"epoch": 0.693359375,
	"grad_norm": 0.44872644543647766,
	"learning_rate": 6.182531894013739e-05,
	"loss": 0.5082123279571533,
	"step": 710
	},
	{
	"epoch": 0.6943359375,
	"grad_norm": 0.3345314562320709,
	"learning_rate": 6.162904808635917e-05,
	"loss": 0.5472716093063354,
	"step": 711
	},
	{
	"epoch": 0.6953125,
	"grad_norm": 0.4269154667854309,
	"learning_rate": 6.143277723258097e-05,
	"loss": 0.7036910057067871,
	"step": 712
	},
	{
	"epoch": 0.6962890625,
	"grad_norm": 0.5314676761627197,
	"learning_rate": 6.123650637880275e-05,
	"loss": 0.8663474917411804,
	"step": 713
	},
	{
	"epoch": 0.697265625,
	"grad_norm": 0.2820166349411011,
	"learning_rate": 6.104023552502453e-05,
	"loss": 0.6397068500518799,
	"step": 714
	},
	{
	"epoch": 0.6982421875,
	"grad_norm": 0.40954726934432983,
	"learning_rate": 6.084396467124632e-05,
	"loss": 0.5477964282035828,
	"step": 715
	},
	{
	"epoch": 0.69921875,
	"grad_norm": 0.6858615279197693,
	"learning_rate": 6.064769381746811e-05,
	"loss": 0.694764256477356,
	"step": 716
	},
	{
	"epoch": 0.7001953125,
	"grad_norm": 2.901998281478882,
	"learning_rate": 6.04514229636899e-05,
	"loss": 0.5803335309028625,
	"step": 717
	},
	{
	"epoch": 0.701171875,
	"grad_norm": 0.6065869927406311,
	"learning_rate": 6.025515210991168e-05,
	"loss": 0.49790292978286743,
	"step": 718
	},
	{
	"epoch": 0.7021484375,
	"grad_norm": 0.3678690195083618,
	"learning_rate": 6.005888125613347e-05,
	"loss": 0.38595882058143616,
	"step": 719
	},
	{
	"epoch": 0.703125,
	"grad_norm": 0.32496991753578186,
	"learning_rate": 5.986261040235526e-05,
	"loss": 0.3554360866546631,
	"step": 720
	},
	{
	"epoch": 0.7041015625,
	"grad_norm": 0.5348960161209106,
	"learning_rate": 5.966633954857704e-05,
	"loss": 1.0386948585510254,
	"step": 721
	},
	{
	"epoch": 0.705078125,
	"grad_norm": 0.42248818278312683,
	"learning_rate": 5.947006869479883e-05,
	"loss": 0.4950508177280426,
	"step": 722
	},
	{
	"epoch": 0.7060546875,
	"grad_norm": 0.36575669050216675,
	"learning_rate": 5.9273797841020606e-05,
	"loss": 0.8793643712997437,
	"step": 723
	},
	{
	"epoch": 0.70703125,
	"grad_norm": 0.30802977085113525,
	"learning_rate": 5.9077526987242395e-05,
	"loss": 0.7557331919670105,
	"step": 724
	},
	{
	"epoch": 0.7080078125,
	"grad_norm": 0.36057788133621216,
	"learning_rate": 5.888125613346418e-05,
	"loss": 0.793386697769165,
	"step": 725
	},
	{
	"epoch": 0.708984375,
	"grad_norm": 0.5049283504486084,
	"learning_rate": 5.8684985279685966e-05,
	"loss": 0.3805343210697174,
	"step": 726
	},
	{
	"epoch": 0.7099609375,
	"grad_norm": 0.4448167681694031,
	"learning_rate": 5.8488714425907756e-05,
	"loss": 0.8297110199928284,
	"step": 727
	},
	{
	"epoch": 0.7109375,
	"grad_norm": 0.5144803524017334,
	"learning_rate": 5.829244357212954e-05,
	"loss": 0.8582932949066162,
	"step": 728
	},
	{
	"epoch": 0.7119140625,
	"grad_norm": 0.48559248447418213,
	"learning_rate": 5.809617271835133e-05,
	"loss": 0.851997971534729,
	"step": 729
	},
	{
	"epoch": 0.712890625,
	"grad_norm": 0.5277959704399109,
	"learning_rate": 5.7899901864573116e-05,
	"loss": 0.8560271859169006,
	"step": 730
	},
	{
	"epoch": 0.7138671875,
	"grad_norm": 0.39055025577545166,
	"learning_rate": 5.77036310107949e-05,
	"loss": 0.5023626685142517,
	"step": 731
	},
	{
	"epoch": 0.71484375,
	"grad_norm": 0.4014328718185425,
	"learning_rate": 5.750736015701669e-05,
	"loss": 0.7782986760139465,
	"step": 732
	},
	{
	"epoch": 0.7158203125,
	"grad_norm": 0.9840988516807556,
	"learning_rate": 5.731108930323848e-05,
	"loss": 0.5097107887268066,
	"step": 733
	},
	{
	"epoch": 0.716796875,
	"grad_norm": 0.512140691280365,
	"learning_rate": 5.711481844946026e-05,
	"loss": 0.5448895692825317,
	"step": 734
	},
	{
	"epoch": 0.7177734375,
	"grad_norm": 0.45195046067237854,
	"learning_rate": 5.691854759568205e-05,
	"loss": 0.7583330273628235,
	"step": 735
	},
	{
	"epoch": 0.71875,
	"grad_norm": 0.4155009090900421,
	"learning_rate": 5.672227674190384e-05,
	"loss": 0.5220797061920166,
	"step": 736
	},
	{
	"epoch": 0.7197265625,
	"grad_norm": 0.552148699760437,
	"learning_rate": 5.652600588812562e-05,
	"loss": 0.8043540716171265,
	"step": 737
	},
	{
	"epoch": 0.720703125,
	"grad_norm": 0.30510297417640686,
	"learning_rate": 5.6329735034347396e-05,
	"loss": 0.5110808610916138,
	"step": 738
	},
	{
	"epoch": 0.7216796875,
	"grad_norm": 0.522339940071106,
	"learning_rate": 5.6133464180569185e-05,
	"loss": 1.0245096683502197,
	"step": 739
	},
	{
	"epoch": 0.72265625,
	"grad_norm": 0.27751341462135315,
	"learning_rate": 5.5937193326790974e-05,
	"loss": 0.6376601457595825,
	"step": 740
	},
	{
	"epoch": 0.7236328125,
	"grad_norm": 0.4283340573310852,
	"learning_rate": 5.5740922473012756e-05,
	"loss": 1.1317777633666992,
	"step": 741
	},
	{
	"epoch": 0.724609375,
	"grad_norm": 0.541248619556427,
	"learning_rate": 5.5544651619234545e-05,
	"loss": 0.8086187839508057,
	"step": 742
	},
	{
	"epoch": 0.7255859375,
	"grad_norm": 0.24750906229019165,
	"learning_rate": 5.5348380765456335e-05,
	"loss": 0.4873177409172058,
	"step": 743
	},
	{
	"epoch": 0.7265625,
	"grad_norm": 0.42374616861343384,
	"learning_rate": 5.515210991167812e-05,
	"loss": 0.41606956720352173,
	"step": 744
	},
	{
	"epoch": 0.7275390625,
	"grad_norm": 0.35455161333084106,
	"learning_rate": 5.4955839057899906e-05,
	"loss": 0.49936947226524353,
	"step": 745
	},
	{
	"epoch": 0.728515625,
	"grad_norm": 0.4243617653846741,
	"learning_rate": 5.475956820412169e-05,
	"loss": 0.6650359630584717,
	"step": 746
	},
	{
	"epoch": 0.7294921875,
	"grad_norm": 0.4106060862541199,
	"learning_rate": 5.456329735034348e-05,
	"loss": 0.37870654463768005,
	"step": 747
	},
	{
	"epoch": 0.73046875,
	"grad_norm": 0.3536394536495209,
	"learning_rate": 5.436702649656527e-05,
	"loss": 1.0944924354553223,
	"step": 748
	},
	{
	"epoch": 0.7314453125,
	"grad_norm": 0.3067559003829956,
	"learning_rate": 5.417075564278705e-05,
	"loss": 0.6380996704101562,
	"step": 749
	},
	{
	"epoch": 0.732421875,
	"grad_norm": 0.40423691272735596,
	"learning_rate": 5.397448478900884e-05,
	"loss": 0.712358295917511,
	"step": 750
	},
	{
	"epoch": 0.7333984375,
	"grad_norm": 0.451038658618927,
	"learning_rate": 5.377821393523063e-05,
	"loss": 0.6221305727958679,
	"step": 751
	},
	{
	"epoch": 0.734375,
	"grad_norm": 0.32606229186058044,
	"learning_rate": 5.35819430814524e-05,
	"loss": 0.6600078344345093,
	"step": 752
	},
	{
	"epoch": 0.7353515625,
	"grad_norm": 0.746896505355835,
	"learning_rate": 5.3385672227674185e-05,
	"loss": 0.5533967614173889,
	"step": 753
	},
	{
	"epoch": 0.736328125,
	"grad_norm": 0.403277724981308,
	"learning_rate": 5.3189401373895974e-05,
	"loss": 0.7483388185501099,
	"step": 754
	},
	{
	"epoch": 0.7373046875,
	"grad_norm": 0.6016709208488464,
	"learning_rate": 5.2993130520117764e-05,
	"loss": 0.539909839630127,
	"step": 755
	},
	{
	"epoch": 0.73828125,
	"grad_norm": 0.39885231852531433,
	"learning_rate": 5.2796859666339546e-05,
	"loss": 0.7900533676147461,
	"step": 756
	},
	{
	"epoch": 0.7392578125,
	"grad_norm": 0.3245362639427185,
	"learning_rate": 5.2600588812561335e-05,
	"loss": 0.42862433195114136,
	"step": 757
	},
	{
	"epoch": 0.740234375,
	"grad_norm": 0.47334104776382446,
	"learning_rate": 5.2404317958783124e-05,
	"loss": 0.3249909281730652,
	"step": 758
	},
	{
	"epoch": 0.7412109375,
	"grad_norm": 0.3029737174510956,
	"learning_rate": 5.220804710500491e-05,
	"loss": 0.4264957308769226,
	"step": 759
	},
	{
	"epoch": 0.7421875,
	"grad_norm": 0.33878564834594727,
	"learning_rate": 5.2011776251226696e-05,
	"loss": 0.4446904957294464,
	"step": 760
	},
	{
	"epoch": 0.7431640625,
	"grad_norm": 0.3307798206806183,
	"learning_rate": 5.1815505397448485e-05,
	"loss": 0.461605966091156,
	"step": 761
	},
	{
	"epoch": 0.744140625,
	"grad_norm": 0.4146850109100342,
	"learning_rate": 5.161923454367027e-05,
	"loss": 0.758568525314331,
	"step": 762
	},
	{
	"epoch": 0.7451171875,
	"grad_norm": 0.3531327545642853,
	"learning_rate": 5.1422963689892056e-05,
	"loss": 0.4580535292625427,
	"step": 763
	},
	{
	"epoch": 0.74609375,
	"grad_norm": 0.3952695429325104,
	"learning_rate": 5.1226692836113846e-05,
	"loss": 0.333244651556015,
	"step": 764
	},
	{
	"epoch": 0.7470703125,
	"grad_norm": 0.5774162411689758,
	"learning_rate": 5.103042198233563e-05,
	"loss": 0.6433362364768982,
	"step": 765
	},
	{
	"epoch": 0.748046875,
	"grad_norm": 0.49668964743614197,
	"learning_rate": 5.083415112855742e-05,
	"loss": 0.8478100895881653,
	"step": 766
	},
	{
	"epoch": 0.7490234375,
	"grad_norm": 0.3303810954093933,
	"learning_rate": 5.063788027477919e-05,
	"loss": 0.7296837568283081,
	"step": 767
	},
	{
	"epoch": 0.75,
	"grad_norm": 0.27652832865715027,
	"learning_rate": 5.044160942100098e-05,
	"loss": 0.6442312598228455,
	"step": 768
	},
	{
	"epoch": 0.7509765625,
	"grad_norm": 1.0828924179077148,
	"learning_rate": 5.0245338567222764e-05,
	"loss": 0.9848635196685791,
	"step": 769
	},
	{
	"epoch": 0.751953125,
	"grad_norm": 0.38959333300590515,
	"learning_rate": 5.0049067713444553e-05,
	"loss": 0.722776472568512,
	"step": 770
	},
	{
	"epoch": 0.7529296875,
	"grad_norm": 0.3470323383808136,
	"learning_rate": 4.985279685966634e-05,
	"loss": 0.6584157943725586,
	"step": 771
	},
	{
	"epoch": 0.75390625,
	"grad_norm": 0.4060254693031311,
	"learning_rate": 4.9656526005888125e-05,
	"loss": 0.6276923418045044,
	"step": 772
	},
	{
	"epoch": 0.7548828125,
	"grad_norm": 0.34566962718963623,
	"learning_rate": 4.9460255152109914e-05,
	"loss": 0.972516655921936,
	"step": 773
	},
	{
	"epoch": 0.755859375,
	"grad_norm": 0.41829708218574524,
	"learning_rate": 4.92639842983317e-05,
	"loss": 0.6937177181243896,
	"step": 774
	},
	{
	"epoch": 0.7568359375,
	"grad_norm": 0.7653974294662476,
	"learning_rate": 4.9067713444553486e-05,
	"loss": 0.6027823090553284,
	"step": 775
	},
	{
	"epoch": 0.7578125,
	"grad_norm": 1.0477155447006226,
	"learning_rate": 4.8871442590775275e-05,
	"loss": 0.925806999206543,
	"step": 776
	},
	{
	"epoch": 0.7587890625,
	"grad_norm": 0.43484824895858765,
	"learning_rate": 4.8675171736997064e-05,
	"loss": 0.7783142328262329,
	"step": 777
	},
	{
	"epoch": 0.759765625,
	"grad_norm": 0.33719849586486816,
	"learning_rate": 4.847890088321884e-05,
	"loss": 0.6108527779579163,
	"step": 778
	},
	{
	"epoch": 0.7607421875,
	"grad_norm": 0.3983028531074524,
	"learning_rate": 4.828263002944063e-05,
	"loss": 0.9976012706756592,
	"step": 779
	},
	{
	"epoch": 0.76171875,
	"grad_norm": 0.3278787136077881,
	"learning_rate": 4.808635917566242e-05,
	"loss": 0.5754845142364502,
	"step": 780
	},
	{
	"epoch": 0.7626953125,
	"grad_norm": 0.42433467507362366,
	"learning_rate": 4.78900883218842e-05,
	"loss": 0.8455826640129089,
	"step": 781
	},
	{
	"epoch": 0.763671875,
	"grad_norm": 0.33245334029197693,
	"learning_rate": 4.769381746810599e-05,
	"loss": 0.5207083225250244,
	"step": 782
	},
	{
	"epoch": 0.7646484375,
	"grad_norm": 0.4390372931957245,
	"learning_rate": 4.749754661432778e-05,
	"loss": 0.7208432555198669,
	"step": 783
	},
	{
	"epoch": 0.765625,
	"grad_norm": 0.325720876455307,
	"learning_rate": 4.730127576054956e-05,
	"loss": 0.3017955422401428,
	"step": 784
	},
	{
	"epoch": 0.7666015625,
	"grad_norm": 0.3036203980445862,
	"learning_rate": 4.710500490677135e-05,
	"loss": 0.47869423031806946,
	"step": 785
	},
	{
	"epoch": 0.767578125,
	"grad_norm": 0.4316065013408661,
	"learning_rate": 4.690873405299313e-05,
	"loss": 0.7984920740127563,
	"step": 786
	},
	{
	"epoch": 0.7685546875,
	"grad_norm": 0.46907728910446167,
	"learning_rate": 4.6712463199214915e-05,
	"loss": 0.7288491725921631,
	"step": 787
	},
	{
	"epoch": 0.76953125,
	"grad_norm": 0.38269418478012085,
	"learning_rate": 4.6516192345436704e-05,
	"loss": 0.46745771169662476,
	"step": 788
	},
	{
	"epoch": 0.7705078125,
	"grad_norm": 0.6045718193054199,
	"learning_rate": 4.631992149165849e-05,
	"loss": 0.5405256152153015,
	"step": 789
	},
	{
	"epoch": 0.771484375,
	"grad_norm": 0.3303053677082062,
	"learning_rate": 4.6123650637880275e-05,
	"loss": 0.6721948981285095,
	"step": 790
	},
	{
	"epoch": 0.7724609375,
	"grad_norm": 0.42014074325561523,
	"learning_rate": 4.5927379784102065e-05,
	"loss": 0.9322581887245178,
	"step": 791
	},
	{
	"epoch": 0.7734375,
	"grad_norm": 0.3720149099826813,
	"learning_rate": 4.5731108930323854e-05,
	"loss": 0.7807843685150146,
	"step": 792
	},
	{
	"epoch": 0.7744140625,
	"grad_norm": 0.31559938192367554,
	"learning_rate": 4.5534838076545636e-05,
	"loss": 0.8503724336624146,
	"step": 793
	},
	{
	"epoch": 0.775390625,
	"grad_norm": 0.4096013903617859,
	"learning_rate": 4.533856722276742e-05,
	"loss": 0.6950633525848389,
	"step": 794
	},
	{
	"epoch": 0.7763671875,
	"grad_norm": 0.3791837990283966,
	"learning_rate": 4.514229636898921e-05,
	"loss": 0.7583197951316833,
	"step": 795
	},
	{
	"epoch": 0.77734375,
	"grad_norm": 0.5274584889411926,
	"learning_rate": 4.494602551521099e-05,
	"loss": 0.4712093770503998,
	"step": 796
	},
	{
	"epoch": 0.7783203125,
	"grad_norm": 0.29654791951179504,
	"learning_rate": 4.474975466143278e-05,
	"loss": 0.552979588508606,
	"step": 797
	},
	{
	"epoch": 0.779296875,
	"grad_norm": 0.25629475712776184,
	"learning_rate": 4.455348380765457e-05,
	"loss": 0.5225521922111511,
	"step": 798
	},
	{
	"epoch": 0.7802734375,
	"grad_norm": 0.2676495611667633,
	"learning_rate": 4.435721295387635e-05,
	"loss": 0.4382556080818176,
	"step": 799
	},
	{
	"epoch": 0.78125,
	"grad_norm": 0.4117366075515747,
	"learning_rate": 4.416094210009813e-05,
	"loss": 0.5639417767524719,
	"step": 800
	},
	{
	"epoch": 0.7822265625,
	"grad_norm": 0.26305386424064636,
	"learning_rate": 4.396467124631992e-05,
	"loss": 0.28840768337249756,
	"step": 801
	},
	{
	"epoch": 0.783203125,
	"grad_norm": 0.7253789305686951,
	"learning_rate": 4.376840039254171e-05,
	"loss": 0.4104336202144623,
	"step": 802
	},
	{
	"epoch": 0.7841796875,
	"grad_norm": 0.371288001537323,
	"learning_rate": 4.3572129538763494e-05,
	"loss": 0.609147310256958,
	"step": 803
	},
	{
	"epoch": 0.78515625,
	"grad_norm": 0.634273111820221,
	"learning_rate": 4.337585868498528e-05,
	"loss": 0.5141665935516357,
	"step": 804
	},
	{
	"epoch": 0.7861328125,
	"grad_norm": 0.4442044496536255,
	"learning_rate": 4.317958783120707e-05,
	"loss": 0.4882044494152069,
	"step": 805
	},
	{
	"epoch": 0.787109375,
	"grad_norm": 0.3099007308483124,
	"learning_rate": 4.2983316977428854e-05,
	"loss": 0.3148588538169861,
	"step": 806
	},
	{
	"epoch": 0.7880859375,
	"grad_norm": 0.41893890500068665,
	"learning_rate": 4.2787046123650643e-05,
	"loss": 0.6678078174591064,
	"step": 807
	},
	{
	"epoch": 0.7890625,
	"grad_norm": 0.47682809829711914,
	"learning_rate": 4.2590775269872426e-05,
	"loss": 0.46614763140678406,
	"step": 808
	},
	{
	"epoch": 0.7900390625,
	"grad_norm": 0.25193366408348083,
	"learning_rate": 4.239450441609421e-05,
	"loss": 0.3707652986049652,
	"step": 809
	},
	{
	"epoch": 0.791015625,
	"grad_norm": 0.3425232768058777,
	"learning_rate": 4.2198233562316e-05,
	"loss": 0.604179859161377,
	"step": 810
	},
	{
	"epoch": 0.7919921875,
	"grad_norm": 0.31459808349609375,
	"learning_rate": 4.2001962708537786e-05,
	"loss": 0.748989999294281,
	"step": 811
	},
	{
	"epoch": 0.79296875,
	"grad_norm": 0.3478514850139618,
	"learning_rate": 4.180569185475957e-05,
	"loss": 0.6651142835617065,
	"step": 812
	},
	{
	"epoch": 0.7939453125,
	"grad_norm": 0.3951675295829773,
	"learning_rate": 4.160942100098136e-05,
	"loss": 0.7293418049812317,
	"step": 813
	},
	{
	"epoch": 0.794921875,
	"grad_norm": 0.26888158917427063,
	"learning_rate": 4.141315014720315e-05,
	"loss": 0.2181730419397354,
	"step": 814
	},
	{
	"epoch": 0.7958984375,
	"grad_norm": 0.17496585845947266,
	"learning_rate": 4.121687929342492e-05,
	"loss": 0.18257993459701538,
	"step": 815
	},
	{
	"epoch": 0.796875,
	"grad_norm": 0.3386918306350708,
	"learning_rate": 4.102060843964671e-05,
	"loss": 0.43010956048965454,
	"step": 816
	},
	{
	"epoch": 0.7978515625,
	"grad_norm": 0.5185137987136841,
	"learning_rate": 4.08243375858685e-05,
	"loss": 0.9117882251739502,
	"step": 817
	},
	{
	"epoch": 0.798828125,
	"grad_norm": 0.499529093503952,
	"learning_rate": 4.0628066732090283e-05,
	"loss": 0.8601939678192139,
	"step": 818
	},
	{
	"epoch": 0.7998046875,
	"grad_norm": 0.44401317834854126,
	"learning_rate": 4.043179587831207e-05,
	"loss": 0.8643960356712341,
	"step": 819
	},
	{
	"epoch": 0.80078125,
	"grad_norm": 0.30553653836250305,
	"learning_rate": 4.023552502453386e-05,
	"loss": 0.7741817235946655,
	"step": 820
	},
	{
	"epoch": 0.8017578125,
	"grad_norm": 0.443541944026947,
	"learning_rate": 4.0039254170755644e-05,
	"loss": 0.9571224451065063,
	"step": 821
	},
	{
	"epoch": 0.802734375,
	"grad_norm": 0.2611587643623352,
	"learning_rate": 3.9842983316977426e-05,
	"loss": 0.4755222201347351,
	"step": 822
	},
	{
	"epoch": 0.8037109375,
	"grad_norm": 0.38695722818374634,
	"learning_rate": 3.9646712463199216e-05,
	"loss": 0.9597996473312378,
	"step": 823
	},
	{
	"epoch": 0.8046875,
	"grad_norm": 0.505346953868866,
	"learning_rate": 3.9450441609421005e-05,
	"loss": 0.328266441822052,
	"step": 824
	},
	{
	"epoch": 0.8056640625,
	"grad_norm": 0.38910478353500366,
	"learning_rate": 3.925417075564279e-05,
	"loss": 0.4758382737636566,
	"step": 825
	},
	{
	"epoch": 0.806640625,
	"grad_norm": 0.4268342852592468,
	"learning_rate": 3.9057899901864576e-05,
	"loss": 0.6131553649902344,
	"step": 826
	},
	{
	"epoch": 0.8076171875,
	"grad_norm": 0.32205328345298767,
	"learning_rate": 3.8861629048086365e-05,
	"loss": 0.6047544479370117,
	"step": 827
	},
	{
	"epoch": 0.80859375,
	"grad_norm": 0.6975948214530945,
	"learning_rate": 3.866535819430815e-05,
	"loss": 0.7599061727523804,
	"step": 828
	},
	{
	"epoch": 0.8095703125,
	"grad_norm": 0.20186780393123627,
	"learning_rate": 3.846908734052994e-05,
	"loss": 0.3639545738697052,
	"step": 829
	},
	{
	"epoch": 0.810546875,
	"grad_norm": 0.443435937166214,
	"learning_rate": 3.827281648675172e-05,
	"loss": 0.6933274269104004,
	"step": 830
	},
	{
	"epoch": 0.8115234375,
	"grad_norm": 0.44157811999320984,
	"learning_rate": 3.80765456329735e-05,
	"loss": 0.5135524272918701,
	"step": 831
	},
	{
	"epoch": 0.8125,
	"grad_norm": 0.3959600031375885,
	"learning_rate": 3.788027477919529e-05,
	"loss": 0.6713152527809143,
	"step": 832
	},
	{
	"epoch": 0.8134765625,
	"grad_norm": 0.5439519882202148,
	"learning_rate": 3.768400392541708e-05,
	"loss": 0.3603706359863281,
	"step": 833
	},
	{
	"epoch": 0.814453125,
	"grad_norm": 0.36693719029426575,
	"learning_rate": 3.748773307163886e-05,
	"loss": 0.8574247360229492,
	"step": 834
	},
	{
	"epoch": 0.8154296875,
	"grad_norm": 0.3476804792881012,
	"learning_rate": 3.729146221786065e-05,
	"loss": 0.6845530867576599,
	"step": 835
	},
	{
	"epoch": 0.81640625,
	"grad_norm": 0.48850229382514954,
	"learning_rate": 3.709519136408244e-05,
	"loss": 0.788569450378418,
	"step": 836
	},
	{
	"epoch": 0.8173828125,
	"grad_norm": 0.5997111797332764,
	"learning_rate": 3.6898920510304216e-05,
	"loss": 0.5885312557220459,
	"step": 837
	},
	{
	"epoch": 0.818359375,
	"grad_norm": 0.43312472105026245,
	"learning_rate": 3.6702649656526005e-05,
	"loss": 0.5300126075744629,
	"step": 838
	},
	{
	"epoch": 0.8193359375,
	"grad_norm": 0.6505857110023499,
	"learning_rate": 3.6506378802747795e-05,
	"loss": 0.7164736986160278,
	"step": 839
	},
	{
	"epoch": 0.8203125,
	"grad_norm": 0.34061765670776367,
	"learning_rate": 3.631010794896958e-05,
	"loss": 0.5405696034431458,
	"step": 840
	},
	{
	"epoch": 0.8212890625,
	"grad_norm": 0.4188057780265808,
	"learning_rate": 3.6113837095191366e-05,
	"loss": 1.0057684183120728,
	"step": 841
	},
	{
	"epoch": 0.822265625,
	"grad_norm": 0.392007052898407,
	"learning_rate": 3.5917566241413155e-05,
	"loss": 0.6687936782836914,
	"step": 842
	},
	{
	"epoch": 0.8232421875,
	"grad_norm": 0.44254210591316223,
	"learning_rate": 3.572129538763494e-05,
	"loss": 0.39150726795196533,
	"step": 843
	},
	{
	"epoch": 0.82421875,
	"grad_norm": 0.41756534576416016,
	"learning_rate": 3.552502453385673e-05,
	"loss": 0.764665961265564,
	"step": 844
	},
	{
	"epoch": 0.8251953125,
	"grad_norm": 0.9839560985565186,
	"learning_rate": 3.532875368007851e-05,
	"loss": 0.45259296894073486,
	"step": 845
	},
	{
	"epoch": 0.826171875,
	"grad_norm": 0.3465111553668976,
	"learning_rate": 3.513248282630029e-05,
	"loss": 0.5895928740501404,
	"step": 846
	},
	{
	"epoch": 0.8271484375,
	"grad_norm": 0.4883447289466858,
	"learning_rate": 3.493621197252208e-05,
	"loss": 0.8401346802711487,
	"step": 847
	},
	{
	"epoch": 0.828125,
	"grad_norm": 0.3590312898159027,
	"learning_rate": 3.473994111874387e-05,
	"loss": 0.6134470105171204,
	"step": 848
	},
	{
	"epoch": 0.8291015625,
	"grad_norm": 0.48273324966430664,
	"learning_rate": 3.454367026496565e-05,
	"loss": 0.6351644992828369,
	"step": 849
	},
	{
	"epoch": 0.830078125,
	"grad_norm": 0.32156500220298767,
	"learning_rate": 3.434739941118744e-05,
	"loss": 0.5098355412483215,
	"step": 850
	},
	{
	"epoch": 0.8310546875,
	"grad_norm": 0.38239747285842896,
	"learning_rate": 3.415112855740923e-05,
	"loss": 1.0178660154342651,
	"step": 851
	},
	{
	"epoch": 0.83203125,
	"grad_norm": 0.6875290274620056,
	"learning_rate": 3.395485770363101e-05,
	"loss": 0.4496825337409973,
	"step": 852
	},
	{
	"epoch": 0.8330078125,
	"grad_norm": 0.27034860849380493,
	"learning_rate": 3.3758586849852795e-05,
	"loss": 0.41253381967544556,
	"step": 853
	},
	{
	"epoch": 0.833984375,
	"grad_norm": 0.5166223049163818,
	"learning_rate": 3.3562315996074584e-05,
	"loss": 0.7344639897346497,
	"step": 854
	},
	{
	"epoch": 0.8349609375,
	"grad_norm": 0.39597758650779724,
	"learning_rate": 3.3366045142296373e-05,
	"loss": 0.6066821217536926,
	"step": 855
	},
	{
	"epoch": 0.8359375,
	"grad_norm": 0.44033098220825195,
	"learning_rate": 3.3169774288518156e-05,
	"loss": 0.7928174734115601,
	"step": 856
	},
	{
	"epoch": 0.8369140625,
	"grad_norm": 0.3340597450733185,
	"learning_rate": 3.2973503434739945e-05,
	"loss": 0.4783233404159546,
	"step": 857
	},
	{
	"epoch": 0.837890625,
	"grad_norm": 0.5634653568267822,
	"learning_rate": 3.2777232580961734e-05,
	"loss": 0.785845935344696,
	"step": 858
	},
	{
	"epoch": 0.8388671875,
	"grad_norm": 0.24581296741962433,
	"learning_rate": 3.258096172718351e-05,
	"loss": 0.36480462551116943,
	"step": 859
	},
	{
	"epoch": 0.83984375,
	"grad_norm": 0.316773384809494,
	"learning_rate": 3.23846908734053e-05,
	"loss": 0.886894941329956,
	"step": 860
	},
	{
	"epoch": 0.8408203125,
	"grad_norm": 0.4605409502983093,
	"learning_rate": 3.218842001962709e-05,
	"loss": 0.7125131487846375,
	"step": 861
	},
	{
	"epoch": 0.841796875,
	"grad_norm": 0.5473557114601135,
	"learning_rate": 3.199214916584887e-05,
	"loss": 0.45582157373428345,
	"step": 862
	},
	{
	"epoch": 0.8427734375,
	"grad_norm": 0.4604926109313965,
	"learning_rate": 3.179587831207066e-05,
	"loss": 0.5392733812332153,
	"step": 863
	},
	{
	"epoch": 0.84375,
	"grad_norm": 0.3192322552204132,
	"learning_rate": 3.159960745829245e-05,
	"loss": 0.3216538727283478,
	"step": 864
	},
	{
	"epoch": 0.8447265625,
	"grad_norm": 0.4225713610649109,
	"learning_rate": 3.140333660451423e-05,
	"loss": 0.36403900384902954,
	"step": 865
	},
	{
	"epoch": 0.845703125,
	"grad_norm": 0.7738484740257263,
	"learning_rate": 3.120706575073602e-05,
	"loss": 0.5428112149238586,
	"step": 866
	},
	{
	"epoch": 0.8466796875,
	"grad_norm": 0.7795976400375366,
	"learning_rate": 3.10107948969578e-05,
	"loss": 0.838668704032898,
	"step": 867
	},
	{
	"epoch": 0.84765625,
	"grad_norm": 0.4240044355392456,
	"learning_rate": 3.0814524043179585e-05,
	"loss": 0.5039677023887634,
	"step": 868
	},
	{
	"epoch": 0.8486328125,
	"grad_norm": 0.7870606780052185,
	"learning_rate": 3.0618253189401374e-05,
	"loss": 0.2639703154563904,
	"step": 869
	},
	{
	"epoch": 0.849609375,
	"grad_norm": 4.898192405700684,
	"learning_rate": 3.042198233562316e-05,
	"loss": 0.9641809463500977,
	"step": 870
	},
	{
	"epoch": 0.8505859375,
	"grad_norm": 0.4090663194656372,
	"learning_rate": 3.022571148184495e-05,
	"loss": 0.5249053835868835,
	"step": 871
	},
	{
	"epoch": 0.8515625,
	"grad_norm": 0.5761129856109619,
	"learning_rate": 3.0029440628066735e-05,
	"loss": 0.8987921476364136,
	"step": 872
	},
	{
	"epoch": 0.8525390625,
	"grad_norm": 0.2440023124217987,
	"learning_rate": 2.983316977428852e-05,
	"loss": 0.3279159367084503,
	"step": 873
	},
	{
	"epoch": 0.853515625,
	"grad_norm": 0.438519150018692,
	"learning_rate": 2.9636898920510303e-05,
	"loss": 0.8272308111190796,
	"step": 874
	},
	{
	"epoch": 0.8544921875,
	"grad_norm": 0.4011988639831543,
	"learning_rate": 2.944062806673209e-05,
	"loss": 0.3140803873538971,
	"step": 875
	},
	{
	"epoch": 0.85546875,
	"grad_norm": 0.5748201012611389,
	"learning_rate": 2.9244357212953878e-05,
	"loss": 0.6699116230010986,
	"step": 876
	},
	{
	"epoch": 0.8564453125,
	"grad_norm": 0.3001462519168854,
	"learning_rate": 2.9048086359175664e-05,
	"loss": 0.19382989406585693,
	"step": 877
	},
	{
	"epoch": 0.857421875,
	"grad_norm": 0.40844887495040894,
	"learning_rate": 2.885181550539745e-05,
	"loss": 0.6494845747947693,
	"step": 878
	},
	{
	"epoch": 0.8583984375,
	"grad_norm": 0.3480914235115051,
	"learning_rate": 2.865554465161924e-05,
	"loss": 0.5555131435394287,
	"step": 879
	},
	{
	"epoch": 0.859375,
	"grad_norm": 0.3903101682662964,
	"learning_rate": 2.8459273797841024e-05,
	"loss": 0.6830955147743225,
	"step": 880
	},
	{
	"epoch": 0.8603515625,
	"grad_norm": 0.3058629333972931,
	"learning_rate": 2.826300294406281e-05,
	"loss": 0.3747236728668213,
	"step": 881
	},
	{
	"epoch": 0.861328125,
	"grad_norm": 0.49275287985801697,
	"learning_rate": 2.8066732090284592e-05,
	"loss": 1.0192487239837646,
	"step": 882
	},
	{
	"epoch": 0.8623046875,
	"grad_norm": 0.4016769826412201,
	"learning_rate": 2.7870461236506378e-05,
	"loss": 0.4012300372123718,
	"step": 883
	},
	{
	"epoch": 0.86328125,
	"grad_norm": 0.4790811240673065,
	"learning_rate": 2.7674190382728167e-05,
	"loss": 0.6936056613922119,
	"step": 884
	},
	{
	"epoch": 0.8642578125,
	"grad_norm": 0.39931413531303406,
	"learning_rate": 2.7477919528949953e-05,
	"loss": 0.3612633943557739,
	"step": 885
	},
	{
	"epoch": 0.865234375,
	"grad_norm": 0.3250795006752014,
	"learning_rate": 2.728164867517174e-05,
	"loss": 0.5146504640579224,
	"step": 886
	},
	{
	"epoch": 0.8662109375,
	"grad_norm": 0.5216737985610962,
	"learning_rate": 2.7085377821393525e-05,
	"loss": 0.6185201406478882,
	"step": 887
	},
	{
	"epoch": 0.8671875,
	"grad_norm": 0.5681923031806946,
	"learning_rate": 2.6889106967615314e-05,
	"loss": 0.9492973685264587,
	"step": 888
	},
	{
	"epoch": 0.8681640625,
	"grad_norm": 0.5284391045570374,
	"learning_rate": 2.6692836113837093e-05,
	"loss": 0.7801765203475952,
	"step": 889
	},
	{
	"epoch": 0.869140625,
	"grad_norm": 0.42510825395584106,
	"learning_rate": 2.6496565260058882e-05,
	"loss": 0.4871942102909088,
	"step": 890
	},
	{
	"epoch": 0.8701171875,
	"grad_norm": 0.39092326164245605,
	"learning_rate": 2.6300294406280668e-05,
	"loss": 0.5123960375785828,
	"step": 891
	},
	{
	"epoch": 0.87109375,
	"grad_norm": 0.37694281339645386,
	"learning_rate": 2.6104023552502453e-05,
	"loss": 0.3543451428413391,
	"step": 892
	},
	{
	"epoch": 0.8720703125,
	"grad_norm": 0.26519376039505005,
	"learning_rate": 2.5907752698724242e-05,
	"loss": 0.2388455718755722,
	"step": 893
	},
	{
	"epoch": 0.873046875,
	"grad_norm": 0.6303861141204834,
	"learning_rate": 2.5711481844946028e-05,
	"loss": 0.7195224761962891,
	"step": 894
	},
	{
	"epoch": 0.8740234375,
	"grad_norm": 0.4436159133911133,
	"learning_rate": 2.5515210991167814e-05,
	"loss": 0.8888048529624939,
	"step": 895
	},
	{
	"epoch": 0.875,
	"grad_norm": 0.6473313570022583,
	"learning_rate": 2.5318940137389596e-05,
	"loss": 0.8557075262069702,
	"step": 896
	},
	{
	"epoch": 0.8759765625,
	"grad_norm": 0.6625436544418335,
	"learning_rate": 2.5122669283611382e-05,
	"loss": 0.7132158279418945,
	"step": 897
	},
	{
	"epoch": 0.876953125,
	"grad_norm": 0.7241202592849731,
	"learning_rate": 2.492639842983317e-05,
	"loss": 0.9367854595184326,
	"step": 898
	},
	{
	"epoch": 0.8779296875,
	"grad_norm": 0.5321157574653625,
	"learning_rate": 2.4730127576054957e-05,
	"loss": 1.0013937950134277,
	"step": 899
	},
	{
	"epoch": 0.87890625,
	"grad_norm": 0.3287423253059387,
	"learning_rate": 2.4533856722276743e-05,
	"loss": 0.4560258984565735,
	"step": 900
	},
	{
	"epoch": 0.8798828125,
	"grad_norm": 0.5040727257728577,
	"learning_rate": 2.4337585868498532e-05,
	"loss": 0.5655212998390198,
	"step": 901
	},
	{
	"epoch": 0.880859375,
	"grad_norm": 0.4150228202342987,
	"learning_rate": 2.4141315014720314e-05,
	"loss": 0.43106216192245483,
	"step": 902
	},
	{
	"epoch": 0.8818359375,
	"grad_norm": 0.4006192684173584,
	"learning_rate": 2.39450441609421e-05,
	"loss": 0.4401901364326477,
	"step": 903
	},
	{
	"epoch": 0.8828125,
	"grad_norm": 0.5145865678787231,
	"learning_rate": 2.374877330716389e-05,
	"loss": 0.9345691800117493,
	"step": 904
	},
	{
	"epoch": 0.8837890625,
	"grad_norm": 0.7273013591766357,
	"learning_rate": 2.3552502453385675e-05,
	"loss": 0.27768659591674805,
	"step": 905
	},
	{
	"epoch": 0.884765625,
	"grad_norm": 0.3039482831954956,
	"learning_rate": 2.3356231599607457e-05,
	"loss": 0.6196010112762451,
	"step": 906
	},
	{
	"epoch": 0.8857421875,
	"grad_norm": 0.35697150230407715,
	"learning_rate": 2.3159960745829247e-05,
	"loss": 0.34777021408081055,
	"step": 907
	},
	{
	"epoch": 0.88671875,
	"grad_norm": 0.356717050075531,
	"learning_rate": 2.2963689892051032e-05,
	"loss": 0.4651508331298828,
	"step": 908
	},
	{
	"epoch": 0.8876953125,
	"grad_norm": 0.485963374376297,
	"learning_rate": 2.2767419038272818e-05,
	"loss": 0.3906201720237732,
	"step": 909
	},
	{
	"epoch": 0.888671875,
	"grad_norm": 0.38827836513519287,
	"learning_rate": 2.2571148184494604e-05,
	"loss": 0.48782849311828613,
	"step": 910
	},
	{
	"epoch": 0.8896484375,
	"grad_norm": 0.39589494466781616,
	"learning_rate": 2.237487733071639e-05,
	"loss": 0.5089969635009766,
	"step": 911
	},
	{
	"epoch": 0.890625,
	"grad_norm": 0.6619493365287781,
	"learning_rate": 2.2178606476938175e-05,
	"loss": 0.9266189932823181,
	"step": 912
	},
	{
	"epoch": 0.8916015625,
	"grad_norm": 0.407817542552948,
	"learning_rate": 2.198233562315996e-05,
	"loss": 0.3518386483192444,
	"step": 913
	},
	{
	"epoch": 0.892578125,
	"grad_norm": 0.4645719826221466,
	"learning_rate": 2.1786064769381747e-05,
	"loss": 0.9297075271606445,
	"step": 914
	},
	{
	"epoch": 0.8935546875,
	"grad_norm": 0.434517502784729,
	"learning_rate": 2.1589793915603536e-05,
	"loss": 0.7716128826141357,
	"step": 915
	},
	{
	"epoch": 0.89453125,
	"grad_norm": 0.49387747049331665,
	"learning_rate": 2.1393523061825322e-05,
	"loss": 0.5475488901138306,
	"step": 916
	},
	{
	"epoch": 0.8955078125,
	"grad_norm": 0.5593905448913574,
	"learning_rate": 2.1197252208047104e-05,
	"loss": 0.7304456233978271,
	"step": 917
	},
	{
	"epoch": 0.896484375,
	"grad_norm": 0.3386078178882599,
	"learning_rate": 2.1000981354268893e-05,
	"loss": 0.7872465252876282,
	"step": 918
	},
	{
	"epoch": 0.8974609375,
	"grad_norm": 0.2872868478298187,
	"learning_rate": 2.080471050049068e-05,
	"loss": 0.3295198976993561,
	"step": 919
	},
	{
	"epoch": 0.8984375,
	"grad_norm": 0.4897945523262024,
	"learning_rate": 2.060843964671246e-05,
	"loss": 0.3939395546913147,
	"step": 920
	},
	{
	"epoch": 0.8994140625,
	"grad_norm": 0.5068129897117615,
	"learning_rate": 2.041216879293425e-05,
	"loss": 0.4646037817001343,
	"step": 921
	},
	{
	"epoch": 0.900390625,
	"grad_norm": 0.3769625425338745,
	"learning_rate": 2.0215897939156036e-05,
	"loss": 0.811498761177063,
	"step": 922
	},
	{
	"epoch": 0.9013671875,
	"grad_norm": 0.380655974149704,
	"learning_rate": 2.0019627085377822e-05,
	"loss": 0.6260181665420532,
	"step": 923
	},
	{
	"epoch": 0.90234375,
	"grad_norm": 0.5810602903366089,
	"learning_rate": 1.9823356231599608e-05,
	"loss": 0.7125158309936523,
	"step": 924
	},
	{
	"epoch": 0.9033203125,
	"grad_norm": 0.4367387592792511,
	"learning_rate": 1.9627085377821394e-05,
	"loss": 0.7728107571601868,
	"step": 925
	},
	{
	"epoch": 0.904296875,
	"grad_norm": 0.604702353477478,
	"learning_rate": 1.9430814524043183e-05,
	"loss": 0.5136534571647644,
	"step": 926
	},
	{
	"epoch": 0.9052734375,
	"grad_norm": 0.40865615010261536,
	"learning_rate": 1.923454367026497e-05,
	"loss": 0.5040115714073181,
	"step": 927
	},
	{
	"epoch": 0.90625,
	"grad_norm": 0.3602078855037689,
	"learning_rate": 1.903827281648675e-05,
	"loss": 0.4498569965362549,
	"step": 928
	},
	{
	"epoch": 0.9072265625,
	"grad_norm": 0.46351152658462524,
	"learning_rate": 1.884200196270854e-05,
	"loss": 0.8635745644569397,
	"step": 929
	},
	{
	"epoch": 0.908203125,
	"grad_norm": 0.5490495562553406,
	"learning_rate": 1.8645731108930326e-05,
	"loss": 0.9265761375427246,
	"step": 930
	},
	{
	"epoch": 0.9091796875,
	"grad_norm": 0.4198157489299774,
	"learning_rate": 1.8449460255152108e-05,
	"loss": 0.8148217797279358,
	"step": 931
	},
	{
	"epoch": 0.91015625,
	"grad_norm": 0.5183578729629517,
	"learning_rate": 1.8253189401373897e-05,
	"loss": 0.7837534546852112,
	"step": 932
	},
	{
	"epoch": 0.9111328125,
	"grad_norm": 0.41839340329170227,
	"learning_rate": 1.8056918547595683e-05,
	"loss": 0.7239848971366882,
	"step": 933
	},
	{
	"epoch": 0.912109375,
	"grad_norm": 0.49158063530921936,
	"learning_rate": 1.786064769381747e-05,
	"loss": 0.7751527428627014,
	"step": 934
	},
	{
	"epoch": 0.9130859375,
	"grad_norm": 0.20171599090099335,
	"learning_rate": 1.7664376840039255e-05,
	"loss": 0.181843563914299,
	"step": 935
	},
	{
	"epoch": 0.9140625,
	"grad_norm": 0.36237961053848267,
	"learning_rate": 1.746810598626104e-05,
	"loss": 0.5150234699249268,
	"step": 936
	},
	{
	"epoch": 0.9150390625,
	"grad_norm": 0.4587535858154297,
	"learning_rate": 1.7271835132482826e-05,
	"loss": 0.6178685426712036,
	"step": 937
	},
	{
	"epoch": 0.916015625,
	"grad_norm": 0.392635703086853,
	"learning_rate": 1.7075564278704615e-05,
	"loss": 0.7002321481704712,
	"step": 938
	},
	{
	"epoch": 0.9169921875,
	"grad_norm": 0.28255772590637207,
	"learning_rate": 1.6879293424926398e-05,
	"loss": 0.6161627769470215,
	"step": 939
	},
	{
	"epoch": 0.91796875,
	"grad_norm": 0.31382182240486145,
	"learning_rate": 1.6683022571148187e-05,
	"loss": 0.6143029928207397,
	"step": 940
	},
	{
	"epoch": 0.9189453125,
	"grad_norm": 0.5099475383758545,
	"learning_rate": 1.6486751717369972e-05,
	"loss": 0.9116108417510986,
	"step": 941
	},
	{
	"epoch": 0.919921875,
	"grad_norm": 0.4015892446041107,
	"learning_rate": 1.6290480863591755e-05,
	"loss": 0.7331390380859375,
	"step": 942
	},
	{
	"epoch": 0.9208984375,
	"grad_norm": 0.4519053101539612,
	"learning_rate": 1.6094210009813544e-05,
	"loss": 0.6662384867668152,
	"step": 943
	},
	{
	"epoch": 0.921875,
	"grad_norm": 0.5565328598022461,
	"learning_rate": 1.589793915603533e-05,
	"loss": 0.37386590242385864,
	"step": 944
	},
	{
	"epoch": 0.9228515625,
	"grad_norm": 0.398419588804245,
	"learning_rate": 1.5701668302257116e-05,
	"loss": 0.9127399325370789,
	"step": 945
	},
	{
	"epoch": 0.923828125,
	"grad_norm": 0.37491804361343384,
	"learning_rate": 1.55053974484789e-05,
	"loss": 0.47025924921035767,
	"step": 946
	},
	{
	"epoch": 0.9248046875,
	"grad_norm": 0.49557894468307495,
	"learning_rate": 1.5309126594700687e-05,
	"loss": 0.6349594593048096,
	"step": 947
	},
	{
	"epoch": 0.92578125,
	"grad_norm": 0.2361314743757248,
	"learning_rate": 1.5112855740922475e-05,
	"loss": 0.3594982922077179,
	"step": 948
	},
	{
	"epoch": 0.9267578125,
	"grad_norm": 0.40022003650665283,
	"learning_rate": 1.491658488714426e-05,
	"loss": 0.41701436042785645,
	"step": 949
	},
	{
	"epoch": 0.927734375,
	"grad_norm": 0.349528431892395,
	"learning_rate": 1.4720314033366044e-05,
	"loss": 0.2943156063556671,
	"step": 950
	},
	{
	"epoch": 0.9287109375,
	"grad_norm": 0.4660559892654419,
	"learning_rate": 1.4524043179587832e-05,
	"loss": 0.3633948564529419,
	"step": 951
	},
	{
	"epoch": 0.9296875,
	"grad_norm": 0.28590673208236694,
	"learning_rate": 1.432777232580962e-05,
	"loss": 0.4886907935142517,
	"step": 952
	},
	{
	"epoch": 0.9306640625,
	"grad_norm": 0.4388448894023895,
	"learning_rate": 1.4131501472031405e-05,
	"loss": 0.6123654246330261,
	"step": 953
	},
	{
	"epoch": 0.931640625,
	"grad_norm": 0.4807531237602234,
	"learning_rate": 1.3935230618253189e-05,
	"loss": 0.32400381565093994,
	"step": 954
	},
	{
	"epoch": 0.9326171875,
	"grad_norm": 0.3903636932373047,
	"learning_rate": 1.3738959764474977e-05,
	"loss": 0.6839208006858826,
	"step": 955
	},
	{
	"epoch": 0.93359375,
	"grad_norm": 0.2925507426261902,
	"learning_rate": 1.3542688910696762e-05,
	"loss": 0.5898708701133728,
	"step": 956
	},
	{
	"epoch": 0.9345703125,
	"grad_norm": 0.39300912618637085,
	"learning_rate": 1.3346418056918546e-05,
	"loss": 0.3898833692073822,
	"step": 957
	},
	{
	"epoch": 0.935546875,
	"grad_norm": 0.4321513772010803,
	"learning_rate": 1.3150147203140334e-05,
	"loss": 0.5717346668243408,
	"step": 958
	},
	{
	"epoch": 0.9365234375,
	"grad_norm": 0.47681212425231934,
	"learning_rate": 1.2953876349362121e-05,
	"loss": 0.9711145162582397,
	"step": 959
	},
	{
	"epoch": 0.9375,
	"grad_norm": 0.524958610534668,
	"learning_rate": 1.2757605495583907e-05,
	"loss": 0.6577808260917664,
	"step": 960
	},
	{
	"epoch": 0.9384765625,
	"grad_norm": 0.40814298391342163,
	"learning_rate": 1.2561334641805691e-05,
	"loss": 0.5148733258247375,
	"step": 961
	},
	{
	"epoch": 0.939453125,
	"grad_norm": 0.3122687041759491,
	"learning_rate": 1.2365063788027479e-05,
	"loss": 0.884072482585907,
	"step": 962
	},
	{
	"epoch": 0.9404296875,
	"grad_norm": 0.4473840594291687,
	"learning_rate": 1.2168792934249266e-05,
	"loss": 0.660685658454895,
	"step": 963
	},
	{
	"epoch": 0.94140625,
	"grad_norm": 0.3491450548171997,
	"learning_rate": 1.197252208047105e-05,
	"loss": 0.8680378794670105,
	"step": 964
	},
	{
	"epoch": 0.9423828125,
	"grad_norm": 0.6323879957199097,
	"learning_rate": 1.1776251226692837e-05,
	"loss": 0.8196921348571777,
	"step": 965
	},
	{
	"epoch": 0.943359375,
	"grad_norm": 0.354900062084198,
	"learning_rate": 1.1579980372914623e-05,
	"loss": 0.5380838513374329,
	"step": 966
	},
	{
	"epoch": 0.9443359375,
	"grad_norm": 0.3235265612602234,
	"learning_rate": 1.1383709519136409e-05,
	"loss": 0.39993464946746826,
	"step": 967
	},
	{
	"epoch": 0.9453125,
	"grad_norm": 0.3700491786003113,
	"learning_rate": 1.1187438665358195e-05,
	"loss": 0.6613435745239258,
	"step": 968
	},
	{
	"epoch": 0.9462890625,
	"grad_norm": 0.29880228638648987,
	"learning_rate": 1.099116781157998e-05,
	"loss": 0.5756196975708008,
	"step": 969
	},
	{
	"epoch": 0.947265625,
	"grad_norm": 0.4585433304309845,
	"learning_rate": 1.0794896957801768e-05,
	"loss": 0.5012968182563782,
	"step": 970
	},
	{
	"epoch": 0.9482421875,
	"grad_norm": 0.5275799632072449,
	"learning_rate": 1.0598626104023552e-05,
	"loss": 0.4986013174057007,
	"step": 971
	},
	{
	"epoch": 0.94921875,
	"grad_norm": 0.30642619729042053,
	"learning_rate": 1.040235525024534e-05,
	"loss": 0.29793277382850647,
	"step": 972
	},
	{
	"epoch": 0.9501953125,
	"grad_norm": 0.7356166243553162,
	"learning_rate": 1.0206084396467125e-05,
	"loss": 0.6518126726150513,
	"step": 973
	},
	{
	"epoch": 0.951171875,
	"grad_norm": 0.6069150567054749,
	"learning_rate": 1.0009813542688911e-05,
	"loss": 0.7005544900894165,
	"step": 974
	},
	{
	"epoch": 0.9521484375,
	"grad_norm": 0.500067949295044,
	"learning_rate": 9.813542688910697e-06,
	"loss": 0.5567950010299683,
	"step": 975
	},
	{
	"epoch": 0.953125,
	"grad_norm": 0.5926097631454468,
	"learning_rate": 9.617271835132484e-06,
	"loss": 0.6974345445632935,
	"step": 976
	},
	{
	"epoch": 0.9541015625,
	"grad_norm": 0.28873002529144287,
	"learning_rate": 9.42100098135427e-06,
	"loss": 0.28231939673423767,
	"step": 977
	},
	{
	"epoch": 0.955078125,
	"grad_norm": 0.6644822359085083,
	"learning_rate": 9.224730127576054e-06,
	"loss": 0.46575701236724854,
	"step": 978
	},
	{
	"epoch": 0.9560546875,
	"grad_norm": 0.34748774766921997,
	"learning_rate": 9.028459273797842e-06,
	"loss": 0.7192713022232056,
	"step": 979
	},
	{
	"epoch": 0.95703125,
	"grad_norm": 0.4444558024406433,
	"learning_rate": 8.832188420019627e-06,
	"loss": 0.34014150500297546,
	"step": 980
	},
	{
	"epoch": 0.9580078125,
	"grad_norm": 0.4814091920852661,
	"learning_rate": 8.635917566241413e-06,
	"loss": 0.8042552471160889,
	"step": 981
	},
	{
	"epoch": 0.958984375,
	"grad_norm": 0.5443412661552429,
	"learning_rate": 8.439646712463199e-06,
	"loss": 0.6534023880958557,
	"step": 982
	},
	{
	"epoch": 0.9599609375,
	"grad_norm": 0.40025195479393005,
	"learning_rate": 8.243375858684986e-06,
	"loss": 0.9056930541992188,
	"step": 983
	},
	{
	"epoch": 0.9609375,
	"grad_norm": 0.41958069801330566,
	"learning_rate": 8.047105004906772e-06,
	"loss": 0.5610394477844238,
	"step": 984
	},
	{
	"epoch": 0.9619140625,
	"grad_norm": 0.33056482672691345,
	"learning_rate": 7.850834151128558e-06,
	"loss": 0.5796000361442566,
	"step": 985
	},
	{
	"epoch": 0.962890625,
	"grad_norm": 0.5056169629096985,
	"learning_rate": 7.654563297350344e-06,
	"loss": 0.7795373201370239,
	"step": 986
	},
	{
	"epoch": 0.9638671875,
	"grad_norm": 0.4030667543411255,
	"learning_rate": 7.45829244357213e-06,
	"loss": 0.761528491973877,
	"step": 987
	},
	{
	"epoch": 0.96484375,
	"grad_norm": 0.22716952860355377,
	"learning_rate": 7.262021589793916e-06,
	"loss": 0.21712671220302582,
	"step": 988
	},
	{
	"epoch": 0.9658203125,
	"grad_norm": 0.4826786518096924,
	"learning_rate": 7.0657507360157025e-06,
	"loss": 0.6192560791969299,
	"step": 989
	},
	{
	"epoch": 0.966796875,
	"grad_norm": 0.3611379861831665,
	"learning_rate": 6.869479882237488e-06,
	"loss": 0.5660407543182373,
	"step": 990
	},
	{
	"epoch": 0.9677734375,
	"grad_norm": 0.44197750091552734,
	"learning_rate": 6.673209028459273e-06,
	"loss": 0.8223164081573486,
	"step": 991
	},
	{
	"epoch": 0.96875,
	"grad_norm": 0.45650866627693176,
	"learning_rate": 6.476938174681061e-06,
	"loss": 0.5810177326202393,
	"step": 992
	},
	{
	"epoch": 0.9697265625,
	"grad_norm": 0.6275922060012817,
	"learning_rate": 6.2806673209028455e-06,
	"loss": 0.46302127838134766,
	"step": 993
	},
	{
	"epoch": 0.970703125,
	"grad_norm": 0.29163289070129395,
	"learning_rate": 6.084396467124633e-06,
	"loss": 0.49744415283203125,
	"step": 994
	},
	{
	"epoch": 0.9716796875,
	"grad_norm": 0.4289768934249878,
	"learning_rate": 5.888125613346419e-06,
	"loss": 0.39710360765457153,
	"step": 995
	},
	{
	"epoch": 0.97265625,
	"grad_norm": 0.43311089277267456,
	"learning_rate": 5.6918547595682045e-06,
	"loss": 0.4934995174407959,
	"step": 996
	},
	{
	"epoch": 0.9736328125,
	"grad_norm": 0.4249640703201294,
	"learning_rate": 5.49558390578999e-06,
	"loss": 0.6822129487991333,
	"step": 997
	},
	{
	"epoch": 0.974609375,
	"grad_norm": 0.4080635607242584,
	"learning_rate": 5.299313052011776e-06,
	"loss": 0.2851019501686096,
	"step": 998
	},
	{
	"epoch": 0.9755859375,
	"grad_norm": 0.3082174062728882,
	"learning_rate": 5.103042198233563e-06,
	"loss": 0.8851650357246399,
	"step": 999
	},
	{
	"epoch": 0.9765625,
	"grad_norm": 0.5285578370094299,
	"learning_rate": 4.906771344455348e-06,
	"loss": 0.5684286952018738,
	"step": 1000
	},
	{
	"epoch": 0.9775390625,
	"grad_norm": 0.37052616477012634,
	"learning_rate": 4.710500490677135e-06,
	"loss": 0.8170924782752991,
	"step": 1001
	},
	{
	"epoch": 0.978515625,
	"grad_norm": 0.46926191449165344,
	"learning_rate": 4.514229636898921e-06,
	"loss": 0.665911853313446,
	"step": 1002
	},
	{
	"epoch": 0.9794921875,
	"grad_norm": 0.38110095262527466,
	"learning_rate": 4.3179587831207065e-06,
	"loss": 0.9365942478179932,
	"step": 1003
	},
	{
	"epoch": 0.98046875,
	"grad_norm": 0.3803754150867462,
	"learning_rate": 4.121687929342493e-06,
	"loss": 0.756361722946167,
	"step": 1004
	},
	{
	"epoch": 0.9814453125,
	"grad_norm": 0.6576887965202332,
	"learning_rate": 3.925417075564279e-06,
	"loss": 0.6846331357955933,
	"step": 1005
	},
	{
	"epoch": 0.982421875,
	"grad_norm": 0.6425113081932068,
	"learning_rate": 3.729146221786065e-06,
	"loss": 0.7665562629699707,
	"step": 1006
	},
	{
	"epoch": 0.9833984375,
	"grad_norm": 0.28858375549316406,
	"learning_rate": 3.5328753680078512e-06,
	"loss": 0.2748746871948242,
	"step": 1007
	},
	{
	"epoch": 0.984375,
	"grad_norm": 0.38693365454673767,
	"learning_rate": 3.3366045142296366e-06,
	"loss": 0.6602081060409546,
	"step": 1008
	},
	{
	"epoch": 0.9853515625,
	"grad_norm": 0.39297735691070557,
	"learning_rate": 3.1403336604514228e-06,
	"loss": 0.43784576654434204,
	"step": 1009
	},
	{
	"epoch": 0.986328125,
	"grad_norm": 0.4182215929031372,
	"learning_rate": 2.9440628066732094e-06,
	"loss": 0.7852948307991028,
	"step": 1010
	},
	{
	"epoch": 0.9873046875,
	"grad_norm": 0.4079328775405884,
	"learning_rate": 2.747791952894995e-06,
	"loss": 0.5413305759429932,
	"step": 1011
	},
	{
	"epoch": 0.98828125,
	"grad_norm": 0.41826963424682617,
	"learning_rate": 2.5515210991167813e-06,
	"loss": 0.449452668428421,
	"step": 1012
	},
	{
	"epoch": 0.9892578125,
	"grad_norm": 0.31969836354255676,
	"learning_rate": 2.3552502453385675e-06,
	"loss": 0.26595592498779297,
	"step": 1013
	},
	{
	"epoch": 0.990234375,
	"grad_norm": 0.466192364692688,
	"learning_rate": 2.1589793915603533e-06,
	"loss": 0.6175995469093323,
	"step": 1014
	},
	{
	"epoch": 0.9912109375,
	"grad_norm": 0.4734349846839905,
	"learning_rate": 1.9627085377821394e-06,
	"loss": 0.6440984010696411,
	"step": 1015
	},
	{
	"epoch": 0.9921875,
	"grad_norm": 0.4446095824241638,
	"learning_rate": 1.7664376840039256e-06,
	"loss": 0.5738557577133179,
	"step": 1016
	},
	{
	"epoch": 0.9931640625,
	"grad_norm": 0.24098840355873108,
	"learning_rate": 1.5701668302257114e-06,
	"loss": 0.6320365071296692,
	"step": 1017
	},
	{
	"epoch": 0.994140625,
	"grad_norm": 0.5342791676521301,
	"learning_rate": 1.3738959764474976e-06,
	"loss": 0.9431695938110352,
	"step": 1018
	},
	{
	"epoch": 0.9951171875,
	"grad_norm": 0.31406712532043457,
	"learning_rate": 1.1776251226692837e-06,
	"loss": 0.6406105160713196,
	"step": 1019
	},
	{
	"epoch": 0.99609375,
	"grad_norm": 0.5162865519523621,
	"learning_rate": 9.813542688910697e-07,
	"loss": 0.7935853004455566,
	"step": 1020
	},
	{
	"epoch": 0.9970703125,
	"grad_norm": 0.4624859690666199,
	"learning_rate": 7.850834151128557e-07,
	"loss": 0.9667851328849792,
	"step": 1021
	},
	{
	"epoch": 0.998046875,
	"grad_norm": 0.43549951910972595,
	"learning_rate": 5.888125613346419e-07,
	"loss": 0.73248291015625,
	"step": 1022
	},
	{
	"epoch": 0.9990234375,
	"grad_norm": 0.6080308556556702,
	"learning_rate": 3.9254170755642785e-07,
	"loss": 0.5045021772384644,
	"step": 1023
	},
	{
	"epoch": 1.0,
	"grad_norm": 0.3927266299724579,
	"learning_rate": 1.9627085377821392e-07,
	"loss": 0.37262263894081116,
	"step": 1024
	}
	],
	"logging_steps": 1,
	"max_steps": 1024,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 6.871410239702333e+17,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}