CKPT1024 / trainer_state.json
EclipseMist's picture
Upload 12 files
477cf9b verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1024,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0009765625,
"grad_norm": 0.6541444063186646,
"learning_rate": 0.0,
"loss": 1.0280990600585938,
"step": 1
},
{
"epoch": 0.001953125,
"grad_norm": 0.4356674551963806,
"learning_rate": 4e-05,
"loss": 0.8305179476737976,
"step": 2
},
{
"epoch": 0.0029296875,
"grad_norm": 0.3900858759880066,
"learning_rate": 8e-05,
"loss": 0.7835474014282227,
"step": 3
},
{
"epoch": 0.00390625,
"grad_norm": 0.3717947006225586,
"learning_rate": 0.00012,
"loss": 1.1571688652038574,
"step": 4
},
{
"epoch": 0.0048828125,
"grad_norm": 0.2760661542415619,
"learning_rate": 0.00016,
"loss": 0.8141135573387146,
"step": 5
},
{
"epoch": 0.005859375,
"grad_norm": 0.24524882435798645,
"learning_rate": 0.0002,
"loss": 0.29919666051864624,
"step": 6
},
{
"epoch": 0.0068359375,
"grad_norm": 0.3155483305454254,
"learning_rate": 0.00019980372914622178,
"loss": 0.916366696357727,
"step": 7
},
{
"epoch": 0.0078125,
"grad_norm": 1.0419310331344604,
"learning_rate": 0.00019960745829244357,
"loss": 0.986505389213562,
"step": 8
},
{
"epoch": 0.0087890625,
"grad_norm": 0.32395845651626587,
"learning_rate": 0.00019941118743866537,
"loss": 0.7845190167427063,
"step": 9
},
{
"epoch": 0.009765625,
"grad_norm": 0.564084529876709,
"learning_rate": 0.00019921491658488717,
"loss": 1.0922366380691528,
"step": 10
},
{
"epoch": 0.0107421875,
"grad_norm": 0.4066593647003174,
"learning_rate": 0.00019901864573110893,
"loss": 1.0279463529586792,
"step": 11
},
{
"epoch": 0.01171875,
"grad_norm": 0.43442535400390625,
"learning_rate": 0.00019882237487733073,
"loss": 0.9713175892829895,
"step": 12
},
{
"epoch": 0.0126953125,
"grad_norm": 0.26689526438713074,
"learning_rate": 0.0001986261040235525,
"loss": 0.38461241126060486,
"step": 13
},
{
"epoch": 0.013671875,
"grad_norm": 0.41254541277885437,
"learning_rate": 0.0001984298331697743,
"loss": 0.7746479511260986,
"step": 14
},
{
"epoch": 0.0146484375,
"grad_norm": 0.39432424306869507,
"learning_rate": 0.0001982335623159961,
"loss": 0.7843194603919983,
"step": 15
},
{
"epoch": 0.015625,
"grad_norm": 0.4303337037563324,
"learning_rate": 0.0001980372914622179,
"loss": 0.6613403558731079,
"step": 16
},
{
"epoch": 0.0166015625,
"grad_norm": 0.875269889831543,
"learning_rate": 0.00019784102060843966,
"loss": 1.0992671251296997,
"step": 17
},
{
"epoch": 0.017578125,
"grad_norm": 0.21415413916110992,
"learning_rate": 0.00019764474975466145,
"loss": 0.2784216105937958,
"step": 18
},
{
"epoch": 0.0185546875,
"grad_norm": 0.4318086504936218,
"learning_rate": 0.00019744847890088322,
"loss": 0.6146124005317688,
"step": 19
},
{
"epoch": 0.01953125,
"grad_norm": 0.20149515569210052,
"learning_rate": 0.00019725220804710502,
"loss": 0.3920556306838989,
"step": 20
},
{
"epoch": 0.0205078125,
"grad_norm": 0.358688622713089,
"learning_rate": 0.0001970559371933268,
"loss": 0.6672685742378235,
"step": 21
},
{
"epoch": 0.021484375,
"grad_norm": 0.5916730165481567,
"learning_rate": 0.00019685966633954858,
"loss": 1.0804443359375,
"step": 22
},
{
"epoch": 0.0224609375,
"grad_norm": 0.3139825761318207,
"learning_rate": 0.00019666339548577038,
"loss": 0.7358766794204712,
"step": 23
},
{
"epoch": 0.0234375,
"grad_norm": 0.4019712805747986,
"learning_rate": 0.00019646712463199215,
"loss": 0.7362902164459229,
"step": 24
},
{
"epoch": 0.0244140625,
"grad_norm": 0.2874290347099304,
"learning_rate": 0.00019627085377821394,
"loss": 0.6446189284324646,
"step": 25
},
{
"epoch": 0.025390625,
"grad_norm": 0.357494592666626,
"learning_rate": 0.0001960745829244357,
"loss": 0.2820976972579956,
"step": 26
},
{
"epoch": 0.0263671875,
"grad_norm": 0.22216391563415527,
"learning_rate": 0.00019587831207065753,
"loss": 0.6020435094833374,
"step": 27
},
{
"epoch": 0.02734375,
"grad_norm": 0.23284995555877686,
"learning_rate": 0.0001956820412168793,
"loss": 0.44151532649993896,
"step": 28
},
{
"epoch": 0.0283203125,
"grad_norm": 0.3594605028629303,
"learning_rate": 0.0001954857703631011,
"loss": 0.9414041042327881,
"step": 29
},
{
"epoch": 0.029296875,
"grad_norm": 0.4460504353046417,
"learning_rate": 0.00019528949950932287,
"loss": 0.7148531079292297,
"step": 30
},
{
"epoch": 0.0302734375,
"grad_norm": 0.3392362892627716,
"learning_rate": 0.00019509322865554466,
"loss": 0.7185512781143188,
"step": 31
},
{
"epoch": 0.03125,
"grad_norm": 0.3340625464916229,
"learning_rate": 0.00019489695780176643,
"loss": 0.6613262891769409,
"step": 32
},
{
"epoch": 0.0322265625,
"grad_norm": 0.26223355531692505,
"learning_rate": 0.00019470068694798826,
"loss": 0.590149462223053,
"step": 33
},
{
"epoch": 0.033203125,
"grad_norm": 0.3481689691543579,
"learning_rate": 0.00019450441609421002,
"loss": 0.5590913891792297,
"step": 34
},
{
"epoch": 0.0341796875,
"grad_norm": 0.4775488078594208,
"learning_rate": 0.00019430814524043182,
"loss": 0.927351176738739,
"step": 35
},
{
"epoch": 0.03515625,
"grad_norm": 0.4474835693836212,
"learning_rate": 0.0001941118743866536,
"loss": 0.7719380855560303,
"step": 36
},
{
"epoch": 0.0361328125,
"grad_norm": 0.3538999855518341,
"learning_rate": 0.00019391560353287536,
"loss": 1.0287561416625977,
"step": 37
},
{
"epoch": 0.037109375,
"grad_norm": 0.5018237233161926,
"learning_rate": 0.00019371933267909715,
"loss": 1.049814224243164,
"step": 38
},
{
"epoch": 0.0380859375,
"grad_norm": 0.5052743554115295,
"learning_rate": 0.00019352306182531895,
"loss": 0.39767658710479736,
"step": 39
},
{
"epoch": 0.0390625,
"grad_norm": 0.46170520782470703,
"learning_rate": 0.00019332679097154075,
"loss": 0.9849376678466797,
"step": 40
},
{
"epoch": 0.0400390625,
"grad_norm": 0.5961291193962097,
"learning_rate": 0.00019313052011776251,
"loss": 0.8527336716651917,
"step": 41
},
{
"epoch": 0.041015625,
"grad_norm": 0.4002876579761505,
"learning_rate": 0.0001929342492639843,
"loss": 0.7445047497749329,
"step": 42
},
{
"epoch": 0.0419921875,
"grad_norm": 0.6382992267608643,
"learning_rate": 0.00019273797841020608,
"loss": 0.7587878704071045,
"step": 43
},
{
"epoch": 0.04296875,
"grad_norm": 0.4204530715942383,
"learning_rate": 0.00019254170755642788,
"loss": 0.943995475769043,
"step": 44
},
{
"epoch": 0.0439453125,
"grad_norm": 0.29038068652153015,
"learning_rate": 0.00019234543670264967,
"loss": 0.4540131688117981,
"step": 45
},
{
"epoch": 0.044921875,
"grad_norm": 0.41968628764152527,
"learning_rate": 0.00019214916584887147,
"loss": 0.3900204300880432,
"step": 46
},
{
"epoch": 0.0458984375,
"grad_norm": 0.5870251059532166,
"learning_rate": 0.00019195289499509324,
"loss": 0.8700598478317261,
"step": 47
},
{
"epoch": 0.046875,
"grad_norm": 0.3120124042034149,
"learning_rate": 0.00019175662414131503,
"loss": 0.2866731882095337,
"step": 48
},
{
"epoch": 0.0478515625,
"grad_norm": 0.31891942024230957,
"learning_rate": 0.0001915603532875368,
"loss": 0.7711223363876343,
"step": 49
},
{
"epoch": 0.048828125,
"grad_norm": 0.4250207543373108,
"learning_rate": 0.0001913640824337586,
"loss": 0.7499758005142212,
"step": 50
},
{
"epoch": 0.0498046875,
"grad_norm": 0.4769924581050873,
"learning_rate": 0.0001911678115799804,
"loss": 0.8479812145233154,
"step": 51
},
{
"epoch": 0.05078125,
"grad_norm": 0.2966979146003723,
"learning_rate": 0.00019097154072620216,
"loss": 0.8125182390213013,
"step": 52
},
{
"epoch": 0.0517578125,
"grad_norm": 0.4924452006816864,
"learning_rate": 0.00019077526987242396,
"loss": 1.006331443786621,
"step": 53
},
{
"epoch": 0.052734375,
"grad_norm": 0.5558736324310303,
"learning_rate": 0.00019057899901864573,
"loss": 0.8218062520027161,
"step": 54
},
{
"epoch": 0.0537109375,
"grad_norm": 0.488903284072876,
"learning_rate": 0.00019038272816486752,
"loss": 0.7451006770133972,
"step": 55
},
{
"epoch": 0.0546875,
"grad_norm": 0.6092124581336975,
"learning_rate": 0.00019018645731108932,
"loss": 0.3371097445487976,
"step": 56
},
{
"epoch": 0.0556640625,
"grad_norm": 0.34885621070861816,
"learning_rate": 0.00018999018645731111,
"loss": 0.9263520836830139,
"step": 57
},
{
"epoch": 0.056640625,
"grad_norm": 0.41470521688461304,
"learning_rate": 0.00018979391560353288,
"loss": 0.8741390109062195,
"step": 58
},
{
"epoch": 0.0576171875,
"grad_norm": 0.32286664843559265,
"learning_rate": 0.00018959764474975468,
"loss": 0.6128658056259155,
"step": 59
},
{
"epoch": 0.05859375,
"grad_norm": 0.43667954206466675,
"learning_rate": 0.00018940137389597645,
"loss": 0.822106122970581,
"step": 60
},
{
"epoch": 0.0595703125,
"grad_norm": 0.5501149892807007,
"learning_rate": 0.00018920510304219824,
"loss": 0.2981743812561035,
"step": 61
},
{
"epoch": 0.060546875,
"grad_norm": 0.5234649777412415,
"learning_rate": 0.00018900883218842004,
"loss": 0.710310161113739,
"step": 62
},
{
"epoch": 0.0615234375,
"grad_norm": 0.5040559768676758,
"learning_rate": 0.00018881256133464184,
"loss": 1.0355676412582397,
"step": 63
},
{
"epoch": 0.0625,
"grad_norm": 0.4435643255710602,
"learning_rate": 0.0001886162904808636,
"loss": 1.031105399131775,
"step": 64
},
{
"epoch": 0.0634765625,
"grad_norm": 0.4987465441226959,
"learning_rate": 0.0001884200196270854,
"loss": 0.7753915190696716,
"step": 65
},
{
"epoch": 0.064453125,
"grad_norm": 0.3633696436882019,
"learning_rate": 0.00018822374877330717,
"loss": 1.2376799583435059,
"step": 66
},
{
"epoch": 0.0654296875,
"grad_norm": 1.0342258214950562,
"learning_rate": 0.00018802747791952894,
"loss": 0.6145737171173096,
"step": 67
},
{
"epoch": 0.06640625,
"grad_norm": 0.47045138478279114,
"learning_rate": 0.00018783120706575076,
"loss": 0.8622407913208008,
"step": 68
},
{
"epoch": 0.0673828125,
"grad_norm": 0.47864851355552673,
"learning_rate": 0.00018763493621197253,
"loss": 0.6727300882339478,
"step": 69
},
{
"epoch": 0.068359375,
"grad_norm": 0.38102060556411743,
"learning_rate": 0.00018743866535819433,
"loss": 0.7417519092559814,
"step": 70
},
{
"epoch": 0.0693359375,
"grad_norm": 0.4229515492916107,
"learning_rate": 0.0001872423945044161,
"loss": 0.46951866149902344,
"step": 71
},
{
"epoch": 0.0703125,
"grad_norm": 0.4868115186691284,
"learning_rate": 0.0001870461236506379,
"loss": 0.32457292079925537,
"step": 72
},
{
"epoch": 0.0712890625,
"grad_norm": 0.298020601272583,
"learning_rate": 0.00018684985279685966,
"loss": 0.2501494288444519,
"step": 73
},
{
"epoch": 0.072265625,
"grad_norm": 0.49870651960372925,
"learning_rate": 0.00018665358194308145,
"loss": 0.5599403381347656,
"step": 74
},
{
"epoch": 0.0732421875,
"grad_norm": 0.5717479586601257,
"learning_rate": 0.00018645731108930325,
"loss": 0.4725653827190399,
"step": 75
},
{
"epoch": 0.07421875,
"grad_norm": 0.5230128765106201,
"learning_rate": 0.00018626104023552505,
"loss": 1.0607699155807495,
"step": 76
},
{
"epoch": 0.0751953125,
"grad_norm": 0.4279435873031616,
"learning_rate": 0.00018606476938174682,
"loss": 0.5628142952919006,
"step": 77
},
{
"epoch": 0.076171875,
"grad_norm": 0.6166331171989441,
"learning_rate": 0.0001858684985279686,
"loss": 0.44837141036987305,
"step": 78
},
{
"epoch": 0.0771484375,
"grad_norm": 0.6329861879348755,
"learning_rate": 0.00018567222767419038,
"loss": 0.5013883709907532,
"step": 79
},
{
"epoch": 0.078125,
"grad_norm": 0.2921103239059448,
"learning_rate": 0.00018547595682041218,
"loss": 0.541824996471405,
"step": 80
},
{
"epoch": 0.0791015625,
"grad_norm": 0.36744800209999084,
"learning_rate": 0.00018527968596663397,
"loss": 0.3878925144672394,
"step": 81
},
{
"epoch": 0.080078125,
"grad_norm": 0.34045904874801636,
"learning_rate": 0.00018508341511285574,
"loss": 0.33476194739341736,
"step": 82
},
{
"epoch": 0.0810546875,
"grad_norm": 0.48908546566963196,
"learning_rate": 0.00018488714425907754,
"loss": 1.003555178642273,
"step": 83
},
{
"epoch": 0.08203125,
"grad_norm": 0.4683694839477539,
"learning_rate": 0.0001846908734052993,
"loss": 0.7300649285316467,
"step": 84
},
{
"epoch": 0.0830078125,
"grad_norm": 0.3560928404331207,
"learning_rate": 0.0001844946025515211,
"loss": 0.4525097608566284,
"step": 85
},
{
"epoch": 0.083984375,
"grad_norm": 1.481307864189148,
"learning_rate": 0.0001842983316977429,
"loss": 0.5444833040237427,
"step": 86
},
{
"epoch": 0.0849609375,
"grad_norm": 0.42610403895378113,
"learning_rate": 0.0001841020608439647,
"loss": 0.7340827584266663,
"step": 87
},
{
"epoch": 0.0859375,
"grad_norm": 0.6035026907920837,
"learning_rate": 0.00018390578999018646,
"loss": 0.5589049458503723,
"step": 88
},
{
"epoch": 0.0869140625,
"grad_norm": 0.6075074076652527,
"learning_rate": 0.00018370951913640826,
"loss": 0.4969009757041931,
"step": 89
},
{
"epoch": 0.087890625,
"grad_norm": 0.6751372814178467,
"learning_rate": 0.00018351324828263003,
"loss": 0.46451041102409363,
"step": 90
},
{
"epoch": 0.0888671875,
"grad_norm": 0.5816373229026794,
"learning_rate": 0.00018331697742885182,
"loss": 1.024427056312561,
"step": 91
},
{
"epoch": 0.08984375,
"grad_norm": 0.6644161939620972,
"learning_rate": 0.00018312070657507362,
"loss": 0.778592586517334,
"step": 92
},
{
"epoch": 0.0908203125,
"grad_norm": 0.652209997177124,
"learning_rate": 0.00018292443572129541,
"loss": 0.8565710783004761,
"step": 93
},
{
"epoch": 0.091796875,
"grad_norm": 0.9109074473381042,
"learning_rate": 0.00018272816486751718,
"loss": 0.6693978309631348,
"step": 94
},
{
"epoch": 0.0927734375,
"grad_norm": 0.5235186219215393,
"learning_rate": 0.00018253189401373895,
"loss": 0.8255172967910767,
"step": 95
},
{
"epoch": 0.09375,
"grad_norm": 0.8362122178077698,
"learning_rate": 0.00018233562315996075,
"loss": 0.5858157873153687,
"step": 96
},
{
"epoch": 0.0947265625,
"grad_norm": 0.6753116846084595,
"learning_rate": 0.00018213935230618254,
"loss": 0.6682421565055847,
"step": 97
},
{
"epoch": 0.095703125,
"grad_norm": 0.5394794940948486,
"learning_rate": 0.00018194308145240434,
"loss": 0.3218158781528473,
"step": 98
},
{
"epoch": 0.0966796875,
"grad_norm": 3.2796010971069336,
"learning_rate": 0.0001817468105986261,
"loss": 0.681085467338562,
"step": 99
},
{
"epoch": 0.09765625,
"grad_norm": 0.38390907645225525,
"learning_rate": 0.0001815505397448479,
"loss": 0.39554187655448914,
"step": 100
},
{
"epoch": 0.0986328125,
"grad_norm": 0.5289499759674072,
"learning_rate": 0.00018135426889106967,
"loss": 1.0264520645141602,
"step": 101
},
{
"epoch": 0.099609375,
"grad_norm": 0.8211148977279663,
"learning_rate": 0.00018115799803729147,
"loss": 0.8588113784790039,
"step": 102
},
{
"epoch": 0.1005859375,
"grad_norm": 0.4771063029766083,
"learning_rate": 0.00018096172718351327,
"loss": 0.7471244931221008,
"step": 103
},
{
"epoch": 0.1015625,
"grad_norm": 0.6326794624328613,
"learning_rate": 0.00018076545632973506,
"loss": 0.6081597805023193,
"step": 104
},
{
"epoch": 0.1025390625,
"grad_norm": 0.7229248285293579,
"learning_rate": 0.00018056918547595683,
"loss": 0.8315082788467407,
"step": 105
},
{
"epoch": 0.103515625,
"grad_norm": 0.6803163290023804,
"learning_rate": 0.00018037291462217863,
"loss": 0.8308911323547363,
"step": 106
},
{
"epoch": 0.1044921875,
"grad_norm": 0.5268850326538086,
"learning_rate": 0.0001801766437684004,
"loss": 0.8480656743049622,
"step": 107
},
{
"epoch": 0.10546875,
"grad_norm": 0.7849289178848267,
"learning_rate": 0.0001799803729146222,
"loss": 0.8200575113296509,
"step": 108
},
{
"epoch": 0.1064453125,
"grad_norm": 0.4259982407093048,
"learning_rate": 0.00017978410206084396,
"loss": 0.44367721676826477,
"step": 109
},
{
"epoch": 0.107421875,
"grad_norm": 0.4788619577884674,
"learning_rate": 0.00017958783120706576,
"loss": 0.6017763018608093,
"step": 110
},
{
"epoch": 0.1083984375,
"grad_norm": 0.34434452652931213,
"learning_rate": 0.00017939156035328755,
"loss": 0.29681769013404846,
"step": 111
},
{
"epoch": 0.109375,
"grad_norm": 1.1506884098052979,
"learning_rate": 0.00017919528949950932,
"loss": 0.6520863771438599,
"step": 112
},
{
"epoch": 0.1103515625,
"grad_norm": 0.8348999619483948,
"learning_rate": 0.00017899901864573112,
"loss": 0.6035414934158325,
"step": 113
},
{
"epoch": 0.111328125,
"grad_norm": 0.5550518035888672,
"learning_rate": 0.00017880274779195289,
"loss": 0.7711564302444458,
"step": 114
},
{
"epoch": 0.1123046875,
"grad_norm": 0.28814634680747986,
"learning_rate": 0.00017860647693817468,
"loss": 0.8325987458229065,
"step": 115
},
{
"epoch": 0.11328125,
"grad_norm": 0.3833630084991455,
"learning_rate": 0.00017841020608439648,
"loss": 0.3345921039581299,
"step": 116
},
{
"epoch": 0.1142578125,
"grad_norm": 0.8784507513046265,
"learning_rate": 0.00017821393523061827,
"loss": 0.4186948239803314,
"step": 117
},
{
"epoch": 0.115234375,
"grad_norm": 0.7263842225074768,
"learning_rate": 0.00017801766437684004,
"loss": 0.5570493936538696,
"step": 118
},
{
"epoch": 0.1162109375,
"grad_norm": 0.6391569972038269,
"learning_rate": 0.00017782139352306184,
"loss": 1.0257431268692017,
"step": 119
},
{
"epoch": 0.1171875,
"grad_norm": 0.6025450229644775,
"learning_rate": 0.0001776251226692836,
"loss": 0.8676729202270508,
"step": 120
},
{
"epoch": 0.1181640625,
"grad_norm": 0.3776579201221466,
"learning_rate": 0.0001774288518155054,
"loss": 0.5870720148086548,
"step": 121
},
{
"epoch": 0.119140625,
"grad_norm": 0.40912336111068726,
"learning_rate": 0.0001772325809617272,
"loss": 0.9210044145584106,
"step": 122
},
{
"epoch": 0.1201171875,
"grad_norm": 0.5036085247993469,
"learning_rate": 0.000177036310107949,
"loss": 0.47378072142601013,
"step": 123
},
{
"epoch": 0.12109375,
"grad_norm": 0.5508134961128235,
"learning_rate": 0.00017684003925417076,
"loss": 0.8295834064483643,
"step": 124
},
{
"epoch": 0.1220703125,
"grad_norm": 0.5522392392158508,
"learning_rate": 0.00017664376840039253,
"loss": 0.793156087398529,
"step": 125
},
{
"epoch": 0.123046875,
"grad_norm": 1.0098820924758911,
"learning_rate": 0.00017644749754661433,
"loss": 0.5780155658721924,
"step": 126
},
{
"epoch": 0.1240234375,
"grad_norm": 0.6178780198097229,
"learning_rate": 0.00017625122669283612,
"loss": 0.5129156708717346,
"step": 127
},
{
"epoch": 0.125,
"grad_norm": 0.6224352121353149,
"learning_rate": 0.00017605495583905792,
"loss": 0.8498928546905518,
"step": 128
},
{
"epoch": 0.1259765625,
"grad_norm": 0.7869983315467834,
"learning_rate": 0.0001758586849852797,
"loss": 0.9180670976638794,
"step": 129
},
{
"epoch": 0.126953125,
"grad_norm": 0.4122680127620697,
"learning_rate": 0.00017566241413150148,
"loss": 0.510919988155365,
"step": 130
},
{
"epoch": 0.1279296875,
"grad_norm": 0.7221843004226685,
"learning_rate": 0.00017546614327772325,
"loss": 0.3977488875389099,
"step": 131
},
{
"epoch": 0.12890625,
"grad_norm": 1.155800461769104,
"learning_rate": 0.00017526987242394505,
"loss": 0.6549078226089478,
"step": 132
},
{
"epoch": 0.1298828125,
"grad_norm": 0.7164724469184875,
"learning_rate": 0.00017507360157016685,
"loss": 0.8306566476821899,
"step": 133
},
{
"epoch": 0.130859375,
"grad_norm": 0.7600284814834595,
"learning_rate": 0.00017487733071638864,
"loss": 0.34278520941734314,
"step": 134
},
{
"epoch": 0.1318359375,
"grad_norm": 0.8636081218719482,
"learning_rate": 0.0001746810598626104,
"loss": 0.8881778717041016,
"step": 135
},
{
"epoch": 0.1328125,
"grad_norm": 1.0904357433319092,
"learning_rate": 0.0001744847890088322,
"loss": 0.4423227310180664,
"step": 136
},
{
"epoch": 0.1337890625,
"grad_norm": 0.5639862418174744,
"learning_rate": 0.00017428851815505397,
"loss": 0.8610935211181641,
"step": 137
},
{
"epoch": 0.134765625,
"grad_norm": 1.05929696559906,
"learning_rate": 0.00017409224730127577,
"loss": 1.1729753017425537,
"step": 138
},
{
"epoch": 0.1357421875,
"grad_norm": 1.0731761455535889,
"learning_rate": 0.00017389597644749757,
"loss": 0.6459341049194336,
"step": 139
},
{
"epoch": 0.13671875,
"grad_norm": 0.7464702725410461,
"learning_rate": 0.00017369970559371934,
"loss": 0.5368601083755493,
"step": 140
},
{
"epoch": 0.1376953125,
"grad_norm": 0.5722304582595825,
"learning_rate": 0.00017350343473994113,
"loss": 0.9642695784568787,
"step": 141
},
{
"epoch": 0.138671875,
"grad_norm": 0.5044945478439331,
"learning_rate": 0.0001733071638861629,
"loss": 0.49555253982543945,
"step": 142
},
{
"epoch": 0.1396484375,
"grad_norm": 0.8069168329238892,
"learning_rate": 0.0001731108930323847,
"loss": 0.8796389698982239,
"step": 143
},
{
"epoch": 0.140625,
"grad_norm": 0.5269959568977356,
"learning_rate": 0.00017291462217860646,
"loss": 0.9928920269012451,
"step": 144
},
{
"epoch": 0.1416015625,
"grad_norm": 0.6606360077857971,
"learning_rate": 0.0001727183513248283,
"loss": 1.0528640747070312,
"step": 145
},
{
"epoch": 0.142578125,
"grad_norm": 0.7145242691040039,
"learning_rate": 0.00017252208047105006,
"loss": 1.1252766847610474,
"step": 146
},
{
"epoch": 0.1435546875,
"grad_norm": 0.5808660984039307,
"learning_rate": 0.00017232580961727185,
"loss": 0.24914072453975677,
"step": 147
},
{
"epoch": 0.14453125,
"grad_norm": 0.8544529676437378,
"learning_rate": 0.00017212953876349362,
"loss": 0.4420434832572937,
"step": 148
},
{
"epoch": 0.1455078125,
"grad_norm": 0.899334728717804,
"learning_rate": 0.00017193326790971542,
"loss": 0.7128512263298035,
"step": 149
},
{
"epoch": 0.146484375,
"grad_norm": 0.36327579617500305,
"learning_rate": 0.00017173699705593719,
"loss": 0.5503419637680054,
"step": 150
},
{
"epoch": 0.1474609375,
"grad_norm": 0.553255021572113,
"learning_rate": 0.000171540726202159,
"loss": 0.5796535015106201,
"step": 151
},
{
"epoch": 0.1484375,
"grad_norm": 0.41036659479141235,
"learning_rate": 0.00017134445534838078,
"loss": 0.8935849666595459,
"step": 152
},
{
"epoch": 0.1494140625,
"grad_norm": 0.3723013997077942,
"learning_rate": 0.00017114818449460257,
"loss": 0.39106485247612,
"step": 153
},
{
"epoch": 0.150390625,
"grad_norm": 0.654262900352478,
"learning_rate": 0.00017095191364082434,
"loss": 1.0176405906677246,
"step": 154
},
{
"epoch": 0.1513671875,
"grad_norm": 0.5707812309265137,
"learning_rate": 0.0001707556427870461,
"loss": 0.6580768823623657,
"step": 155
},
{
"epoch": 0.15234375,
"grad_norm": 0.35879406332969666,
"learning_rate": 0.0001705593719332679,
"loss": 0.4050876200199127,
"step": 156
},
{
"epoch": 0.1533203125,
"grad_norm": 0.5701449513435364,
"learning_rate": 0.0001703631010794897,
"loss": 0.9737375974655151,
"step": 157
},
{
"epoch": 0.154296875,
"grad_norm": 0.4461202919483185,
"learning_rate": 0.0001701668302257115,
"loss": 0.9864733815193176,
"step": 158
},
{
"epoch": 0.1552734375,
"grad_norm": 0.6229621767997742,
"learning_rate": 0.00016997055937193327,
"loss": 0.35883933305740356,
"step": 159
},
{
"epoch": 0.15625,
"grad_norm": 0.5390028357505798,
"learning_rate": 0.00016977428851815506,
"loss": 0.5791765451431274,
"step": 160
},
{
"epoch": 0.1572265625,
"grad_norm": 0.7851611375808716,
"learning_rate": 0.00016957801766437683,
"loss": 0.9032300114631653,
"step": 161
},
{
"epoch": 0.158203125,
"grad_norm": 0.6211395263671875,
"learning_rate": 0.00016938174681059863,
"loss": 0.5069928765296936,
"step": 162
},
{
"epoch": 0.1591796875,
"grad_norm": 0.8290377855300903,
"learning_rate": 0.00016918547595682042,
"loss": 0.8917738795280457,
"step": 163
},
{
"epoch": 0.16015625,
"grad_norm": 0.42707324028015137,
"learning_rate": 0.00016898920510304222,
"loss": 0.606585681438446,
"step": 164
},
{
"epoch": 0.1611328125,
"grad_norm": 0.49472010135650635,
"learning_rate": 0.000168792934249264,
"loss": 1.0100075006484985,
"step": 165
},
{
"epoch": 0.162109375,
"grad_norm": 0.48441267013549805,
"learning_rate": 0.00016859666339548579,
"loss": 0.7145558595657349,
"step": 166
},
{
"epoch": 0.1630859375,
"grad_norm": 0.5181763172149658,
"learning_rate": 0.00016840039254170755,
"loss": 0.8088749647140503,
"step": 167
},
{
"epoch": 0.1640625,
"grad_norm": 0.4702328145503998,
"learning_rate": 0.00016820412168792935,
"loss": 0.5631542801856995,
"step": 168
},
{
"epoch": 0.1650390625,
"grad_norm": 0.35454344749450684,
"learning_rate": 0.00016800785083415115,
"loss": 0.31744396686553955,
"step": 169
},
{
"epoch": 0.166015625,
"grad_norm": 0.5193122029304504,
"learning_rate": 0.00016781157998037291,
"loss": 0.7338438034057617,
"step": 170
},
{
"epoch": 0.1669921875,
"grad_norm": 0.49799400568008423,
"learning_rate": 0.0001676153091265947,
"loss": 0.7910654544830322,
"step": 171
},
{
"epoch": 0.16796875,
"grad_norm": 0.4855571389198303,
"learning_rate": 0.00016741903827281648,
"loss": 0.38415610790252686,
"step": 172
},
{
"epoch": 0.1689453125,
"grad_norm": 0.8796041011810303,
"learning_rate": 0.00016722276741903828,
"loss": 0.6042807102203369,
"step": 173
},
{
"epoch": 0.169921875,
"grad_norm": 0.6005135774612427,
"learning_rate": 0.00016702649656526007,
"loss": 0.6617047786712646,
"step": 174
},
{
"epoch": 0.1708984375,
"grad_norm": 0.6359293460845947,
"learning_rate": 0.00016683022571148187,
"loss": 0.5227914452552795,
"step": 175
},
{
"epoch": 0.171875,
"grad_norm": 0.46007266640663147,
"learning_rate": 0.00016663395485770364,
"loss": 0.6881235837936401,
"step": 176
},
{
"epoch": 0.1728515625,
"grad_norm": 0.37411797046661377,
"learning_rate": 0.00016643768400392543,
"loss": 0.7384200096130371,
"step": 177
},
{
"epoch": 0.173828125,
"grad_norm": 0.4021860659122467,
"learning_rate": 0.0001662414131501472,
"loss": 1.1738500595092773,
"step": 178
},
{
"epoch": 0.1748046875,
"grad_norm": 0.3674755096435547,
"learning_rate": 0.000166045142296369,
"loss": 0.37539663910865784,
"step": 179
},
{
"epoch": 0.17578125,
"grad_norm": 0.5051441788673401,
"learning_rate": 0.0001658488714425908,
"loss": 0.6273016333580017,
"step": 180
},
{
"epoch": 0.1767578125,
"grad_norm": 0.6807597279548645,
"learning_rate": 0.0001656526005888126,
"loss": 0.4195510447025299,
"step": 181
},
{
"epoch": 0.177734375,
"grad_norm": 0.3345419466495514,
"learning_rate": 0.00016545632973503436,
"loss": 0.8546851873397827,
"step": 182
},
{
"epoch": 0.1787109375,
"grad_norm": 0.33821800351142883,
"learning_rate": 0.00016526005888125615,
"loss": 0.522655725479126,
"step": 183
},
{
"epoch": 0.1796875,
"grad_norm": 0.3145562708377838,
"learning_rate": 0.00016506378802747792,
"loss": 0.3799128532409668,
"step": 184
},
{
"epoch": 0.1806640625,
"grad_norm": 0.44908636808395386,
"learning_rate": 0.0001648675171736997,
"loss": 0.6263326406478882,
"step": 185
},
{
"epoch": 0.181640625,
"grad_norm": 0.7736865282058716,
"learning_rate": 0.00016467124631992151,
"loss": 0.3385460078716278,
"step": 186
},
{
"epoch": 0.1826171875,
"grad_norm": 0.5184527635574341,
"learning_rate": 0.00016447497546614328,
"loss": 0.7980771064758301,
"step": 187
},
{
"epoch": 0.18359375,
"grad_norm": 0.41774502396583557,
"learning_rate": 0.00016427870461236508,
"loss": 0.7745299339294434,
"step": 188
},
{
"epoch": 0.1845703125,
"grad_norm": 0.43824154138565063,
"learning_rate": 0.00016408243375858685,
"loss": 0.9190135598182678,
"step": 189
},
{
"epoch": 0.185546875,
"grad_norm": 0.4037880301475525,
"learning_rate": 0.00016388616290480864,
"loss": 0.5671911239624023,
"step": 190
},
{
"epoch": 0.1865234375,
"grad_norm": 0.3757816255092621,
"learning_rate": 0.0001636898920510304,
"loss": 0.39916592836380005,
"step": 191
},
{
"epoch": 0.1875,
"grad_norm": 0.4747844636440277,
"learning_rate": 0.00016349362119725224,
"loss": 0.9217299818992615,
"step": 192
},
{
"epoch": 0.1884765625,
"grad_norm": 0.42307209968566895,
"learning_rate": 0.000163297350343474,
"loss": 0.8852982521057129,
"step": 193
},
{
"epoch": 0.189453125,
"grad_norm": 0.47294488549232483,
"learning_rate": 0.0001631010794896958,
"loss": 1.0635476112365723,
"step": 194
},
{
"epoch": 0.1904296875,
"grad_norm": 0.3519342243671417,
"learning_rate": 0.00016290480863591757,
"loss": 0.33460623025894165,
"step": 195
},
{
"epoch": 0.19140625,
"grad_norm": 0.418151319026947,
"learning_rate": 0.00016270853778213936,
"loss": 0.8776851296424866,
"step": 196
},
{
"epoch": 0.1923828125,
"grad_norm": 0.3954712152481079,
"learning_rate": 0.00016251226692836113,
"loss": 0.9358173608779907,
"step": 197
},
{
"epoch": 0.193359375,
"grad_norm": 0.35646897554397583,
"learning_rate": 0.00016231599607458293,
"loss": 0.43795716762542725,
"step": 198
},
{
"epoch": 0.1943359375,
"grad_norm": 0.41675063967704773,
"learning_rate": 0.00016211972522080473,
"loss": 0.8348654508590698,
"step": 199
},
{
"epoch": 0.1953125,
"grad_norm": 0.5800544023513794,
"learning_rate": 0.0001619234543670265,
"loss": 0.5580507516860962,
"step": 200
},
{
"epoch": 0.1962890625,
"grad_norm": 0.44925832748413086,
"learning_rate": 0.0001617271835132483,
"loss": 0.47444453835487366,
"step": 201
},
{
"epoch": 0.197265625,
"grad_norm": 0.48447439074516296,
"learning_rate": 0.00016153091265947006,
"loss": 0.5927308797836304,
"step": 202
},
{
"epoch": 0.1982421875,
"grad_norm": 0.37814846634864807,
"learning_rate": 0.00016133464180569186,
"loss": 0.8504298329353333,
"step": 203
},
{
"epoch": 0.19921875,
"grad_norm": 0.4171026051044464,
"learning_rate": 0.00016113837095191365,
"loss": 1.0796414613723755,
"step": 204
},
{
"epoch": 0.2001953125,
"grad_norm": 0.4570372402667999,
"learning_rate": 0.00016094210009813545,
"loss": 0.6229358315467834,
"step": 205
},
{
"epoch": 0.201171875,
"grad_norm": 0.6294324994087219,
"learning_rate": 0.00016074582924435722,
"loss": 0.8749011158943176,
"step": 206
},
{
"epoch": 0.2021484375,
"grad_norm": 0.42371129989624023,
"learning_rate": 0.000160549558390579,
"loss": 0.9866290092468262,
"step": 207
},
{
"epoch": 0.203125,
"grad_norm": 0.5329370498657227,
"learning_rate": 0.00016035328753680078,
"loss": 0.7568405270576477,
"step": 208
},
{
"epoch": 0.2041015625,
"grad_norm": 0.37205901741981506,
"learning_rate": 0.00016015701668302258,
"loss": 0.7115534543991089,
"step": 209
},
{
"epoch": 0.205078125,
"grad_norm": 0.4536517262458801,
"learning_rate": 0.00015996074582924437,
"loss": 0.5152509808540344,
"step": 210
},
{
"epoch": 0.2060546875,
"grad_norm": 2.319321393966675,
"learning_rate": 0.00015976447497546617,
"loss": 0.2915653586387634,
"step": 211
},
{
"epoch": 0.20703125,
"grad_norm": 0.7047526836395264,
"learning_rate": 0.00015956820412168794,
"loss": 0.3070187568664551,
"step": 212
},
{
"epoch": 0.2080078125,
"grad_norm": 0.6068500280380249,
"learning_rate": 0.0001593719332679097,
"loss": 0.8103427290916443,
"step": 213
},
{
"epoch": 0.208984375,
"grad_norm": 0.3588794469833374,
"learning_rate": 0.0001591756624141315,
"loss": 0.4655485153198242,
"step": 214
},
{
"epoch": 0.2099609375,
"grad_norm": 0.6561040878295898,
"learning_rate": 0.0001589793915603533,
"loss": 0.5353362560272217,
"step": 215
},
{
"epoch": 0.2109375,
"grad_norm": 0.6485084891319275,
"learning_rate": 0.0001587831207065751,
"loss": 0.8601769804954529,
"step": 216
},
{
"epoch": 0.2119140625,
"grad_norm": 0.4718208909034729,
"learning_rate": 0.00015858684985279686,
"loss": 0.6897189617156982,
"step": 217
},
{
"epoch": 0.212890625,
"grad_norm": 0.7453560829162598,
"learning_rate": 0.00015839057899901866,
"loss": 1.0387171506881714,
"step": 218
},
{
"epoch": 0.2138671875,
"grad_norm": 0.41157087683677673,
"learning_rate": 0.00015819430814524043,
"loss": 0.4910873770713806,
"step": 219
},
{
"epoch": 0.21484375,
"grad_norm": 0.4198990762233734,
"learning_rate": 0.00015799803729146222,
"loss": 0.588080108165741,
"step": 220
},
{
"epoch": 0.2158203125,
"grad_norm": 0.7791650295257568,
"learning_rate": 0.00015780176643768402,
"loss": 0.754984974861145,
"step": 221
},
{
"epoch": 0.216796875,
"grad_norm": 1.4430909156799316,
"learning_rate": 0.00015760549558390581,
"loss": 0.5313946008682251,
"step": 222
},
{
"epoch": 0.2177734375,
"grad_norm": 0.4399142861366272,
"learning_rate": 0.00015740922473012758,
"loss": 0.523280918598175,
"step": 223
},
{
"epoch": 0.21875,
"grad_norm": 0.4177611470222473,
"learning_rate": 0.00015721295387634938,
"loss": 0.7598159313201904,
"step": 224
},
{
"epoch": 0.2197265625,
"grad_norm": 0.4408816397190094,
"learning_rate": 0.00015701668302257115,
"loss": 0.8131666779518127,
"step": 225
},
{
"epoch": 0.220703125,
"grad_norm": 0.4228694438934326,
"learning_rate": 0.00015682041216879294,
"loss": 1.0456180572509766,
"step": 226
},
{
"epoch": 0.2216796875,
"grad_norm": 0.6313449144363403,
"learning_rate": 0.00015662414131501474,
"loss": 0.496864914894104,
"step": 227
},
{
"epoch": 0.22265625,
"grad_norm": 0.48103493452072144,
"learning_rate": 0.0001564278704612365,
"loss": 0.5967347621917725,
"step": 228
},
{
"epoch": 0.2236328125,
"grad_norm": 0.3548172116279602,
"learning_rate": 0.0001562315996074583,
"loss": 0.3325611650943756,
"step": 229
},
{
"epoch": 0.224609375,
"grad_norm": 0.41543763875961304,
"learning_rate": 0.00015603532875368007,
"loss": 0.9223452806472778,
"step": 230
},
{
"epoch": 0.2255859375,
"grad_norm": 0.6072061061859131,
"learning_rate": 0.00015583905789990187,
"loss": 0.2860236167907715,
"step": 231
},
{
"epoch": 0.2265625,
"grad_norm": 0.3232869505882263,
"learning_rate": 0.00015564278704612364,
"loss": 0.7308738231658936,
"step": 232
},
{
"epoch": 0.2275390625,
"grad_norm": 0.5271327495574951,
"learning_rate": 0.00015544651619234546,
"loss": 1.0354498624801636,
"step": 233
},
{
"epoch": 0.228515625,
"grad_norm": 0.626105546951294,
"learning_rate": 0.00015525024533856723,
"loss": 1.0841856002807617,
"step": 234
},
{
"epoch": 0.2294921875,
"grad_norm": 0.5628311634063721,
"learning_rate": 0.00015505397448478903,
"loss": 0.8868529200553894,
"step": 235
},
{
"epoch": 0.23046875,
"grad_norm": 0.4290577471256256,
"learning_rate": 0.0001548577036310108,
"loss": 0.5887943506240845,
"step": 236
},
{
"epoch": 0.2314453125,
"grad_norm": 0.743786096572876,
"learning_rate": 0.0001546614327772326,
"loss": 0.8314348459243774,
"step": 237
},
{
"epoch": 0.232421875,
"grad_norm": 0.34498658776283264,
"learning_rate": 0.00015446516192345436,
"loss": 0.6171099543571472,
"step": 238
},
{
"epoch": 0.2333984375,
"grad_norm": 0.7894997596740723,
"learning_rate": 0.00015426889106967616,
"loss": 0.614283561706543,
"step": 239
},
{
"epoch": 0.234375,
"grad_norm": 0.4631381034851074,
"learning_rate": 0.00015407262021589795,
"loss": 0.6744101047515869,
"step": 240
},
{
"epoch": 0.2353515625,
"grad_norm": 0.44523295760154724,
"learning_rate": 0.00015387634936211975,
"loss": 0.7094103097915649,
"step": 241
},
{
"epoch": 0.236328125,
"grad_norm": 0.7059242725372314,
"learning_rate": 0.00015368007850834152,
"loss": 0.6856737732887268,
"step": 242
},
{
"epoch": 0.2373046875,
"grad_norm": 1.0360506772994995,
"learning_rate": 0.00015348380765456329,
"loss": 1.101341962814331,
"step": 243
},
{
"epoch": 0.23828125,
"grad_norm": 0.6630859375,
"learning_rate": 0.00015328753680078508,
"loss": 0.8815068006515503,
"step": 244
},
{
"epoch": 0.2392578125,
"grad_norm": 0.4162105321884155,
"learning_rate": 0.00015309126594700688,
"loss": 0.39801689982414246,
"step": 245
},
{
"epoch": 0.240234375,
"grad_norm": 0.5786510109901428,
"learning_rate": 0.00015289499509322867,
"loss": 0.5399383902549744,
"step": 246
},
{
"epoch": 0.2412109375,
"grad_norm": 0.5430185794830322,
"learning_rate": 0.00015269872423945044,
"loss": 0.5432325601577759,
"step": 247
},
{
"epoch": 0.2421875,
"grad_norm": 0.3750382959842682,
"learning_rate": 0.00015250245338567224,
"loss": 0.49265092611312866,
"step": 248
},
{
"epoch": 0.2431640625,
"grad_norm": 0.5081580877304077,
"learning_rate": 0.000152306182531894,
"loss": 0.8720104098320007,
"step": 249
},
{
"epoch": 0.244140625,
"grad_norm": 0.5619673728942871,
"learning_rate": 0.0001521099116781158,
"loss": 0.4022529125213623,
"step": 250
},
{
"epoch": 0.2451171875,
"grad_norm": 0.3996225893497467,
"learning_rate": 0.0001519136408243376,
"loss": 0.443879097700119,
"step": 251
},
{
"epoch": 0.24609375,
"grad_norm": 0.4688915014266968,
"learning_rate": 0.0001517173699705594,
"loss": 0.47562721371650696,
"step": 252
},
{
"epoch": 0.2470703125,
"grad_norm": 1.7595641613006592,
"learning_rate": 0.00015152109911678116,
"loss": 0.5174474716186523,
"step": 253
},
{
"epoch": 0.248046875,
"grad_norm": 0.47813650965690613,
"learning_rate": 0.00015132482826300296,
"loss": 0.8565359711647034,
"step": 254
},
{
"epoch": 0.2490234375,
"grad_norm": 0.49612802267074585,
"learning_rate": 0.00015112855740922473,
"loss": 0.4736977815628052,
"step": 255
},
{
"epoch": 0.25,
"grad_norm": 0.4370449483394623,
"learning_rate": 0.00015093228655544652,
"loss": 0.7566809058189392,
"step": 256
},
{
"epoch": 0.2509765625,
"grad_norm": 0.43916988372802734,
"learning_rate": 0.00015073601570166832,
"loss": 0.8396226763725281,
"step": 257
},
{
"epoch": 0.251953125,
"grad_norm": 0.7745673060417175,
"learning_rate": 0.0001505397448478901,
"loss": 0.3085971772670746,
"step": 258
},
{
"epoch": 0.2529296875,
"grad_norm": 0.4097643792629242,
"learning_rate": 0.00015034347399411188,
"loss": 0.2730502188205719,
"step": 259
},
{
"epoch": 0.25390625,
"grad_norm": 0.4131183624267578,
"learning_rate": 0.00015014720314033365,
"loss": 0.5422588586807251,
"step": 260
},
{
"epoch": 0.2548828125,
"grad_norm": 0.469498872756958,
"learning_rate": 0.00014995093228655545,
"loss": 0.6572885513305664,
"step": 261
},
{
"epoch": 0.255859375,
"grad_norm": 0.3662133514881134,
"learning_rate": 0.00014975466143277725,
"loss": 0.9272421598434448,
"step": 262
},
{
"epoch": 0.2568359375,
"grad_norm": 0.38194844126701355,
"learning_rate": 0.00014955839057899904,
"loss": 0.6010634303092957,
"step": 263
},
{
"epoch": 0.2578125,
"grad_norm": 0.3645467758178711,
"learning_rate": 0.0001493621197252208,
"loss": 0.9131143093109131,
"step": 264
},
{
"epoch": 0.2587890625,
"grad_norm": 0.3304290771484375,
"learning_rate": 0.0001491658488714426,
"loss": 0.4593530297279358,
"step": 265
},
{
"epoch": 0.259765625,
"grad_norm": 0.7529020309448242,
"learning_rate": 0.00014896957801766437,
"loss": 0.5219628810882568,
"step": 266
},
{
"epoch": 0.2607421875,
"grad_norm": 0.4974548816680908,
"learning_rate": 0.00014877330716388617,
"loss": 0.7617945075035095,
"step": 267
},
{
"epoch": 0.26171875,
"grad_norm": 0.28884655237197876,
"learning_rate": 0.00014857703631010797,
"loss": 0.4288986921310425,
"step": 268
},
{
"epoch": 0.2626953125,
"grad_norm": 0.5195730328559875,
"learning_rate": 0.00014838076545632976,
"loss": 0.830593466758728,
"step": 269
},
{
"epoch": 0.263671875,
"grad_norm": 0.40689924359321594,
"learning_rate": 0.00014818449460255153,
"loss": 0.7528857588768005,
"step": 270
},
{
"epoch": 0.2646484375,
"grad_norm": 0.33955928683280945,
"learning_rate": 0.00014798822374877333,
"loss": 0.5274187326431274,
"step": 271
},
{
"epoch": 0.265625,
"grad_norm": 1.0572726726531982,
"learning_rate": 0.0001477919528949951,
"loss": 0.7389089465141296,
"step": 272
},
{
"epoch": 0.2666015625,
"grad_norm": 0.5191348791122437,
"learning_rate": 0.00014759568204121686,
"loss": 0.4842514991760254,
"step": 273
},
{
"epoch": 0.267578125,
"grad_norm": 0.3779315650463104,
"learning_rate": 0.00014739941118743866,
"loss": 0.7406666278839111,
"step": 274
},
{
"epoch": 0.2685546875,
"grad_norm": 0.6065999865531921,
"learning_rate": 0.00014720314033366046,
"loss": 0.6771246790885925,
"step": 275
},
{
"epoch": 0.26953125,
"grad_norm": 0.537529468536377,
"learning_rate": 0.00014700686947988225,
"loss": 0.861257791519165,
"step": 276
},
{
"epoch": 0.2705078125,
"grad_norm": 0.3961732089519501,
"learning_rate": 0.00014681059862610402,
"loss": 0.9672999382019043,
"step": 277
},
{
"epoch": 0.271484375,
"grad_norm": 0.45974740386009216,
"learning_rate": 0.00014661432777232582,
"loss": 0.5789016485214233,
"step": 278
},
{
"epoch": 0.2724609375,
"grad_norm": 0.7211292386054993,
"learning_rate": 0.00014641805691854759,
"loss": 0.867314338684082,
"step": 279
},
{
"epoch": 0.2734375,
"grad_norm": 0.6938930749893188,
"learning_rate": 0.00014622178606476938,
"loss": 0.4570122957229614,
"step": 280
},
{
"epoch": 0.2744140625,
"grad_norm": 0.5093329548835754,
"learning_rate": 0.00014602551521099118,
"loss": 0.9487482309341431,
"step": 281
},
{
"epoch": 0.275390625,
"grad_norm": 0.4403358995914459,
"learning_rate": 0.00014582924435721297,
"loss": 0.5330759286880493,
"step": 282
},
{
"epoch": 0.2763671875,
"grad_norm": 0.5305198431015015,
"learning_rate": 0.00014563297350343474,
"loss": 0.8727459907531738,
"step": 283
},
{
"epoch": 0.27734375,
"grad_norm": 0.49577099084854126,
"learning_rate": 0.00014543670264965654,
"loss": 0.6166709065437317,
"step": 284
},
{
"epoch": 0.2783203125,
"grad_norm": 0.4856763780117035,
"learning_rate": 0.0001452404317958783,
"loss": 0.920722484588623,
"step": 285
},
{
"epoch": 0.279296875,
"grad_norm": 0.3397112786769867,
"learning_rate": 0.0001450441609421001,
"loss": 1.001542329788208,
"step": 286
},
{
"epoch": 0.2802734375,
"grad_norm": 0.591691792011261,
"learning_rate": 0.0001448478900883219,
"loss": 0.4898494780063629,
"step": 287
},
{
"epoch": 0.28125,
"grad_norm": 0.45293164253234863,
"learning_rate": 0.00014465161923454367,
"loss": 0.4958389401435852,
"step": 288
},
{
"epoch": 0.2822265625,
"grad_norm": 0.38414305448532104,
"learning_rate": 0.00014445534838076546,
"loss": 0.3971215784549713,
"step": 289
},
{
"epoch": 0.283203125,
"grad_norm": 0.5568608045578003,
"learning_rate": 0.00014425907752698723,
"loss": 0.7953230142593384,
"step": 290
},
{
"epoch": 0.2841796875,
"grad_norm": 0.3680984377861023,
"learning_rate": 0.00014406280667320903,
"loss": 0.703729510307312,
"step": 291
},
{
"epoch": 0.28515625,
"grad_norm": 0.4263870120048523,
"learning_rate": 0.00014386653581943082,
"loss": 0.7433100938796997,
"step": 292
},
{
"epoch": 0.2861328125,
"grad_norm": 1.3262213468551636,
"learning_rate": 0.00014367026496565262,
"loss": 0.8011248111724854,
"step": 293
},
{
"epoch": 0.287109375,
"grad_norm": 0.44766104221343994,
"learning_rate": 0.0001434739941118744,
"loss": 0.6682827472686768,
"step": 294
},
{
"epoch": 0.2880859375,
"grad_norm": 0.7399169206619263,
"learning_rate": 0.00014327772325809619,
"loss": 0.8356127142906189,
"step": 295
},
{
"epoch": 0.2890625,
"grad_norm": 0.3582242727279663,
"learning_rate": 0.00014308145240431795,
"loss": 0.7127545475959778,
"step": 296
},
{
"epoch": 0.2900390625,
"grad_norm": 0.5251145958900452,
"learning_rate": 0.00014288518155053975,
"loss": 0.7467620968818665,
"step": 297
},
{
"epoch": 0.291015625,
"grad_norm": 0.639377772808075,
"learning_rate": 0.00014268891069676155,
"loss": 0.434887170791626,
"step": 298
},
{
"epoch": 0.2919921875,
"grad_norm": 0.5007404685020447,
"learning_rate": 0.00014249263984298334,
"loss": 1.028229832649231,
"step": 299
},
{
"epoch": 0.29296875,
"grad_norm": 0.41101035475730896,
"learning_rate": 0.0001422963689892051,
"loss": 0.8766242265701294,
"step": 300
},
{
"epoch": 0.2939453125,
"grad_norm": 0.3938690721988678,
"learning_rate": 0.0001421000981354269,
"loss": 0.7176960706710815,
"step": 301
},
{
"epoch": 0.294921875,
"grad_norm": 0.5939344763755798,
"learning_rate": 0.00014190382728164868,
"loss": 0.6655953526496887,
"step": 302
},
{
"epoch": 0.2958984375,
"grad_norm": 0.47224998474121094,
"learning_rate": 0.00014170755642787047,
"loss": 0.9155608415603638,
"step": 303
},
{
"epoch": 0.296875,
"grad_norm": 0.41344454884529114,
"learning_rate": 0.00014151128557409227,
"loss": 0.6017557382583618,
"step": 304
},
{
"epoch": 0.2978515625,
"grad_norm": 0.514320969581604,
"learning_rate": 0.00014131501472031404,
"loss": 0.6184566617012024,
"step": 305
},
{
"epoch": 0.298828125,
"grad_norm": 0.5005887150764465,
"learning_rate": 0.00014111874386653583,
"loss": 0.6652892231941223,
"step": 306
},
{
"epoch": 0.2998046875,
"grad_norm": 0.5872619152069092,
"learning_rate": 0.0001409224730127576,
"loss": 0.8618959784507751,
"step": 307
},
{
"epoch": 0.30078125,
"grad_norm": 0.5114542245864868,
"learning_rate": 0.0001407262021589794,
"loss": 0.6637990474700928,
"step": 308
},
{
"epoch": 0.3017578125,
"grad_norm": 1.141750693321228,
"learning_rate": 0.00014052993130520117,
"loss": 0.6234999299049377,
"step": 309
},
{
"epoch": 0.302734375,
"grad_norm": 0.4786873459815979,
"learning_rate": 0.000140333660451423,
"loss": 0.9601540565490723,
"step": 310
},
{
"epoch": 0.3037109375,
"grad_norm": 0.6048462390899658,
"learning_rate": 0.00014013738959764476,
"loss": 0.5895652770996094,
"step": 311
},
{
"epoch": 0.3046875,
"grad_norm": 0.7435188889503479,
"learning_rate": 0.00013994111874386655,
"loss": 1.196149468421936,
"step": 312
},
{
"epoch": 0.3056640625,
"grad_norm": 0.7936303019523621,
"learning_rate": 0.00013974484789008832,
"loss": 0.6073983907699585,
"step": 313
},
{
"epoch": 0.306640625,
"grad_norm": 0.5199156403541565,
"learning_rate": 0.00013954857703631012,
"loss": 0.2734944224357605,
"step": 314
},
{
"epoch": 0.3076171875,
"grad_norm": 0.38845276832580566,
"learning_rate": 0.0001393523061825319,
"loss": 0.604506254196167,
"step": 315
},
{
"epoch": 0.30859375,
"grad_norm": 0.6925122737884521,
"learning_rate": 0.0001391560353287537,
"loss": 1.0446012020111084,
"step": 316
},
{
"epoch": 0.3095703125,
"grad_norm": 0.4950433075428009,
"learning_rate": 0.00013895976447497548,
"loss": 1.027349591255188,
"step": 317
},
{
"epoch": 0.310546875,
"grad_norm": 0.36179935932159424,
"learning_rate": 0.00013876349362119725,
"loss": 0.6760075688362122,
"step": 318
},
{
"epoch": 0.3115234375,
"grad_norm": 0.3730153739452362,
"learning_rate": 0.00013856722276741904,
"loss": 0.47779884934425354,
"step": 319
},
{
"epoch": 0.3125,
"grad_norm": 0.6181739568710327,
"learning_rate": 0.0001383709519136408,
"loss": 0.4747524857521057,
"step": 320
},
{
"epoch": 0.3134765625,
"grad_norm": 0.8233240246772766,
"learning_rate": 0.0001381746810598626,
"loss": 0.490276575088501,
"step": 321
},
{
"epoch": 0.314453125,
"grad_norm": 0.6492604613304138,
"learning_rate": 0.0001379784102060844,
"loss": 0.44847172498703003,
"step": 322
},
{
"epoch": 0.3154296875,
"grad_norm": 0.5506369471549988,
"learning_rate": 0.0001377821393523062,
"loss": 0.47955968976020813,
"step": 323
},
{
"epoch": 0.31640625,
"grad_norm": 0.4187554717063904,
"learning_rate": 0.00013758586849852797,
"loss": 0.6466250419616699,
"step": 324
},
{
"epoch": 0.3173828125,
"grad_norm": 0.3976380527019501,
"learning_rate": 0.00013738959764474977,
"loss": 0.756473183631897,
"step": 325
},
{
"epoch": 0.318359375,
"grad_norm": 0.6089552044868469,
"learning_rate": 0.00013719332679097153,
"loss": 0.9309840202331543,
"step": 326
},
{
"epoch": 0.3193359375,
"grad_norm": 0.31628501415252686,
"learning_rate": 0.00013699705593719333,
"loss": 0.7739764451980591,
"step": 327
},
{
"epoch": 0.3203125,
"grad_norm": 0.6984357237815857,
"learning_rate": 0.00013680078508341513,
"loss": 1.0047030448913574,
"step": 328
},
{
"epoch": 0.3212890625,
"grad_norm": 0.42705219984054565,
"learning_rate": 0.00013660451422963692,
"loss": 0.5215034484863281,
"step": 329
},
{
"epoch": 0.322265625,
"grad_norm": 0.3548984229564667,
"learning_rate": 0.0001364082433758587,
"loss": 0.777184009552002,
"step": 330
},
{
"epoch": 0.3232421875,
"grad_norm": 0.6042805314064026,
"learning_rate": 0.00013621197252208046,
"loss": 0.469806432723999,
"step": 331
},
{
"epoch": 0.32421875,
"grad_norm": 0.43482446670532227,
"learning_rate": 0.00013601570166830226,
"loss": 0.8123322129249573,
"step": 332
},
{
"epoch": 0.3251953125,
"grad_norm": 0.4851783812046051,
"learning_rate": 0.00013581943081452405,
"loss": 1.1560527086257935,
"step": 333
},
{
"epoch": 0.326171875,
"grad_norm": 0.681423008441925,
"learning_rate": 0.00013562315996074585,
"loss": 0.5681013464927673,
"step": 334
},
{
"epoch": 0.3271484375,
"grad_norm": 0.43838411569595337,
"learning_rate": 0.00013542688910696762,
"loss": 0.8758999109268188,
"step": 335
},
{
"epoch": 0.328125,
"grad_norm": 0.5508302450180054,
"learning_rate": 0.0001352306182531894,
"loss": 0.7725740671157837,
"step": 336
},
{
"epoch": 0.3291015625,
"grad_norm": 0.2603519856929779,
"learning_rate": 0.00013503434739941118,
"loss": 0.357033908367157,
"step": 337
},
{
"epoch": 0.330078125,
"grad_norm": 0.38098394870758057,
"learning_rate": 0.00013483807654563298,
"loss": 0.41752922534942627,
"step": 338
},
{
"epoch": 0.3310546875,
"grad_norm": 0.5308575630187988,
"learning_rate": 0.00013464180569185477,
"loss": 0.6187021732330322,
"step": 339
},
{
"epoch": 0.33203125,
"grad_norm": 0.4033392369747162,
"learning_rate": 0.00013444553483807657,
"loss": 0.9481551647186279,
"step": 340
},
{
"epoch": 0.3330078125,
"grad_norm": 0.3999135494232178,
"learning_rate": 0.00013424926398429834,
"loss": 0.6853100657463074,
"step": 341
},
{
"epoch": 0.333984375,
"grad_norm": 0.4521353840827942,
"learning_rate": 0.00013405299313052013,
"loss": 1.0335659980773926,
"step": 342
},
{
"epoch": 0.3349609375,
"grad_norm": 0.3538281321525574,
"learning_rate": 0.0001338567222767419,
"loss": 0.821506142616272,
"step": 343
},
{
"epoch": 0.3359375,
"grad_norm": 0.49575889110565186,
"learning_rate": 0.0001336604514229637,
"loss": 0.6124354004859924,
"step": 344
},
{
"epoch": 0.3369140625,
"grad_norm": 0.37985700368881226,
"learning_rate": 0.0001334641805691855,
"loss": 0.6803320646286011,
"step": 345
},
{
"epoch": 0.337890625,
"grad_norm": 0.3533600866794586,
"learning_rate": 0.00013326790971540726,
"loss": 0.7260403037071228,
"step": 346
},
{
"epoch": 0.3388671875,
"grad_norm": 0.49213504791259766,
"learning_rate": 0.00013307163886162906,
"loss": 0.9051091074943542,
"step": 347
},
{
"epoch": 0.33984375,
"grad_norm": 0.37704166769981384,
"learning_rate": 0.00013287536800785083,
"loss": 0.4471222460269928,
"step": 348
},
{
"epoch": 0.3408203125,
"grad_norm": 0.4309573471546173,
"learning_rate": 0.00013267909715407262,
"loss": 0.749025285243988,
"step": 349
},
{
"epoch": 0.341796875,
"grad_norm": 0.7491689920425415,
"learning_rate": 0.0001324828263002944,
"loss": 1.1318167448043823,
"step": 350
},
{
"epoch": 0.3427734375,
"grad_norm": 0.3965498208999634,
"learning_rate": 0.00013228655544651622,
"loss": 0.8451839685440063,
"step": 351
},
{
"epoch": 0.34375,
"grad_norm": 0.4446418285369873,
"learning_rate": 0.00013209028459273798,
"loss": 0.7875360250473022,
"step": 352
},
{
"epoch": 0.3447265625,
"grad_norm": 0.3396705985069275,
"learning_rate": 0.00013189401373895978,
"loss": 0.8446518182754517,
"step": 353
},
{
"epoch": 0.345703125,
"grad_norm": 0.3436250388622284,
"learning_rate": 0.00013169774288518155,
"loss": 0.8995112180709839,
"step": 354
},
{
"epoch": 0.3466796875,
"grad_norm": 0.33643823862075806,
"learning_rate": 0.00013150147203140334,
"loss": 0.6253601312637329,
"step": 355
},
{
"epoch": 0.34765625,
"grad_norm": 0.39978718757629395,
"learning_rate": 0.0001313052011776251,
"loss": 0.31882500648498535,
"step": 356
},
{
"epoch": 0.3486328125,
"grad_norm": 0.3054925799369812,
"learning_rate": 0.00013110893032384694,
"loss": 0.3698769807815552,
"step": 357
},
{
"epoch": 0.349609375,
"grad_norm": 0.3789948523044586,
"learning_rate": 0.0001309126594700687,
"loss": 0.9039162397384644,
"step": 358
},
{
"epoch": 0.3505859375,
"grad_norm": 0.4192582964897156,
"learning_rate": 0.0001307163886162905,
"loss": 0.7852678298950195,
"step": 359
},
{
"epoch": 0.3515625,
"grad_norm": 0.5130710601806641,
"learning_rate": 0.00013052011776251227,
"loss": 0.7745686769485474,
"step": 360
},
{
"epoch": 0.3525390625,
"grad_norm": 0.39334234595298767,
"learning_rate": 0.00013032384690873404,
"loss": 0.7644802331924438,
"step": 361
},
{
"epoch": 0.353515625,
"grad_norm": 0.6141180992126465,
"learning_rate": 0.00013012757605495583,
"loss": 0.6028044819831848,
"step": 362
},
{
"epoch": 0.3544921875,
"grad_norm": 0.33263200521469116,
"learning_rate": 0.00012993130520117763,
"loss": 0.6908546090126038,
"step": 363
},
{
"epoch": 0.35546875,
"grad_norm": 0.3901807367801666,
"learning_rate": 0.00012973503434739943,
"loss": 0.8896909952163696,
"step": 364
},
{
"epoch": 0.3564453125,
"grad_norm": 0.3889808654785156,
"learning_rate": 0.0001295387634936212,
"loss": 0.622492790222168,
"step": 365
},
{
"epoch": 0.357421875,
"grad_norm": 0.41004979610443115,
"learning_rate": 0.000129342492639843,
"loss": 0.6293104887008667,
"step": 366
},
{
"epoch": 0.3583984375,
"grad_norm": 0.32929369807243347,
"learning_rate": 0.00012914622178606476,
"loss": 0.7049382925033569,
"step": 367
},
{
"epoch": 0.359375,
"grad_norm": 0.5189999341964722,
"learning_rate": 0.00012894995093228656,
"loss": 0.9230547547340393,
"step": 368
},
{
"epoch": 0.3603515625,
"grad_norm": 0.290991872549057,
"learning_rate": 0.00012875368007850835,
"loss": 0.5716772079467773,
"step": 369
},
{
"epoch": 0.361328125,
"grad_norm": 0.3976893126964569,
"learning_rate": 0.00012855740922473015,
"loss": 0.4593455493450165,
"step": 370
},
{
"epoch": 0.3623046875,
"grad_norm": 0.38385459780693054,
"learning_rate": 0.00012836113837095192,
"loss": 0.4766542315483093,
"step": 371
},
{
"epoch": 0.36328125,
"grad_norm": 0.45652449131011963,
"learning_rate": 0.0001281648675171737,
"loss": 0.9292062520980835,
"step": 372
},
{
"epoch": 0.3642578125,
"grad_norm": 0.384463906288147,
"learning_rate": 0.00012796859666339548,
"loss": 0.7896109223365784,
"step": 373
},
{
"epoch": 0.365234375,
"grad_norm": 0.43412724137306213,
"learning_rate": 0.00012777232580961728,
"loss": 0.6185650825500488,
"step": 374
},
{
"epoch": 0.3662109375,
"grad_norm": 0.4574507772922516,
"learning_rate": 0.00012757605495583907,
"loss": 0.5614027380943298,
"step": 375
},
{
"epoch": 0.3671875,
"grad_norm": 0.2921536862850189,
"learning_rate": 0.00012737978410206084,
"loss": 0.26786333322525024,
"step": 376
},
{
"epoch": 0.3681640625,
"grad_norm": 0.5887529850006104,
"learning_rate": 0.00012718351324828264,
"loss": 0.4167410433292389,
"step": 377
},
{
"epoch": 0.369140625,
"grad_norm": 0.3651127815246582,
"learning_rate": 0.0001269872423945044,
"loss": 1.0140016078948975,
"step": 378
},
{
"epoch": 0.3701171875,
"grad_norm": 0.47206228971481323,
"learning_rate": 0.0001267909715407262,
"loss": 0.8293377757072449,
"step": 379
},
{
"epoch": 0.37109375,
"grad_norm": 0.6319689154624939,
"learning_rate": 0.000126594700686948,
"loss": 0.7301446795463562,
"step": 380
},
{
"epoch": 0.3720703125,
"grad_norm": 0.5163951516151428,
"learning_rate": 0.0001263984298331698,
"loss": 0.9944421648979187,
"step": 381
},
{
"epoch": 0.373046875,
"grad_norm": 0.519072949886322,
"learning_rate": 0.00012620215897939156,
"loss": 0.6176541447639465,
"step": 382
},
{
"epoch": 0.3740234375,
"grad_norm": 3.0750813484191895,
"learning_rate": 0.00012600588812561336,
"loss": 0.7531320452690125,
"step": 383
},
{
"epoch": 0.375,
"grad_norm": 0.3246331512928009,
"learning_rate": 0.00012580961727183513,
"loss": 0.3269459903240204,
"step": 384
},
{
"epoch": 0.3759765625,
"grad_norm": 1.1105197668075562,
"learning_rate": 0.00012561334641805692,
"loss": 0.4228656589984894,
"step": 385
},
{
"epoch": 0.376953125,
"grad_norm": 0.6776182055473328,
"learning_rate": 0.00012541707556427872,
"loss": 0.791953980922699,
"step": 386
},
{
"epoch": 0.3779296875,
"grad_norm": 0.4413786828517914,
"learning_rate": 0.00012522080471050052,
"loss": 0.7953442335128784,
"step": 387
},
{
"epoch": 0.37890625,
"grad_norm": 0.4036264419555664,
"learning_rate": 0.00012502453385672228,
"loss": 0.6062744855880737,
"step": 388
},
{
"epoch": 0.3798828125,
"grad_norm": 1.0638166666030884,
"learning_rate": 0.00012482826300294408,
"loss": 1.0578093528747559,
"step": 389
},
{
"epoch": 0.380859375,
"grad_norm": 0.2518276572227478,
"learning_rate": 0.00012463199214916585,
"loss": 0.5070685148239136,
"step": 390
},
{
"epoch": 0.3818359375,
"grad_norm": 0.3338214159011841,
"learning_rate": 0.00012443572129538762,
"loss": 0.7665579915046692,
"step": 391
},
{
"epoch": 0.3828125,
"grad_norm": 0.4730507433414459,
"learning_rate": 0.00012423945044160944,
"loss": 0.48353517055511475,
"step": 392
},
{
"epoch": 0.3837890625,
"grad_norm": 0.3488924503326416,
"learning_rate": 0.0001240431795878312,
"loss": 0.4422420561313629,
"step": 393
},
{
"epoch": 0.384765625,
"grad_norm": 0.2397361695766449,
"learning_rate": 0.000123846908734053,
"loss": 0.7025644183158875,
"step": 394
},
{
"epoch": 0.3857421875,
"grad_norm": 0.3638167679309845,
"learning_rate": 0.00012365063788027478,
"loss": 0.5372107625007629,
"step": 395
},
{
"epoch": 0.38671875,
"grad_norm": 0.4088346064090729,
"learning_rate": 0.00012345436702649657,
"loss": 0.7636011838912964,
"step": 396
},
{
"epoch": 0.3876953125,
"grad_norm": 0.36985111236572266,
"learning_rate": 0.00012325809617271834,
"loss": 0.6720612645149231,
"step": 397
},
{
"epoch": 0.388671875,
"grad_norm": 0.37556055188179016,
"learning_rate": 0.00012306182531894016,
"loss": 0.8087592124938965,
"step": 398
},
{
"epoch": 0.3896484375,
"grad_norm": 0.6851724982261658,
"learning_rate": 0.00012286555446516193,
"loss": 0.780835747718811,
"step": 399
},
{
"epoch": 0.390625,
"grad_norm": 0.3453989326953888,
"learning_rate": 0.00012266928361138373,
"loss": 0.8235517740249634,
"step": 400
},
{
"epoch": 0.3916015625,
"grad_norm": 0.43622198700904846,
"learning_rate": 0.0001224730127576055,
"loss": 0.3758167028427124,
"step": 401
},
{
"epoch": 0.392578125,
"grad_norm": 0.4364018142223358,
"learning_rate": 0.0001222767419038273,
"loss": 0.7123017907142639,
"step": 402
},
{
"epoch": 0.3935546875,
"grad_norm": 0.24169716238975525,
"learning_rate": 0.00012208047105004906,
"loss": 0.48390328884124756,
"step": 403
},
{
"epoch": 0.39453125,
"grad_norm": 3.4902851581573486,
"learning_rate": 0.00012188420019627087,
"loss": 0.8519951105117798,
"step": 404
},
{
"epoch": 0.3955078125,
"grad_norm": 0.8332751989364624,
"learning_rate": 0.00012168792934249264,
"loss": 0.7562370896339417,
"step": 405
},
{
"epoch": 0.396484375,
"grad_norm": 0.3582589030265808,
"learning_rate": 0.00012149165848871442,
"loss": 0.3723471164703369,
"step": 406
},
{
"epoch": 0.3974609375,
"grad_norm": 0.48302146792411804,
"learning_rate": 0.00012129538763493622,
"loss": 1.0008171796798706,
"step": 407
},
{
"epoch": 0.3984375,
"grad_norm": 0.3510138988494873,
"learning_rate": 0.000121099116781158,
"loss": 0.30772703886032104,
"step": 408
},
{
"epoch": 0.3994140625,
"grad_norm": 0.2771015763282776,
"learning_rate": 0.0001209028459273798,
"loss": 0.4403090178966522,
"step": 409
},
{
"epoch": 0.400390625,
"grad_norm": 0.42239415645599365,
"learning_rate": 0.00012070657507360156,
"loss": 0.5451241731643677,
"step": 410
},
{
"epoch": 0.4013671875,
"grad_norm": 0.27876874804496765,
"learning_rate": 0.00012051030421982336,
"loss": 0.3590753972530365,
"step": 411
},
{
"epoch": 0.40234375,
"grad_norm": 0.42854824662208557,
"learning_rate": 0.00012031403336604514,
"loss": 1.0192680358886719,
"step": 412
},
{
"epoch": 0.4033203125,
"grad_norm": 0.32980695366859436,
"learning_rate": 0.00012011776251226694,
"loss": 0.6476566195487976,
"step": 413
},
{
"epoch": 0.404296875,
"grad_norm": 0.45046037435531616,
"learning_rate": 0.00011992149165848872,
"loss": 0.9548048973083496,
"step": 414
},
{
"epoch": 0.4052734375,
"grad_norm": 0.4176082909107208,
"learning_rate": 0.00011972522080471052,
"loss": 0.3793225586414337,
"step": 415
},
{
"epoch": 0.40625,
"grad_norm": 0.335823118686676,
"learning_rate": 0.00011952894995093229,
"loss": 0.5807560086250305,
"step": 416
},
{
"epoch": 0.4072265625,
"grad_norm": 0.4758591651916504,
"learning_rate": 0.00011933267909715408,
"loss": 0.3924551010131836,
"step": 417
},
{
"epoch": 0.408203125,
"grad_norm": 0.21527709066867828,
"learning_rate": 0.00011913640824337586,
"loss": 0.1651245653629303,
"step": 418
},
{
"epoch": 0.4091796875,
"grad_norm": 0.31255391240119934,
"learning_rate": 0.00011894013738959766,
"loss": 0.6133516430854797,
"step": 419
},
{
"epoch": 0.41015625,
"grad_norm": 0.40668365359306335,
"learning_rate": 0.00011874386653581944,
"loss": 0.894720196723938,
"step": 420
},
{
"epoch": 0.4111328125,
"grad_norm": 0.35574087500572205,
"learning_rate": 0.00011854759568204121,
"loss": 0.9017484188079834,
"step": 421
},
{
"epoch": 0.412109375,
"grad_norm": 0.3389612138271332,
"learning_rate": 0.00011835132482826301,
"loss": 0.7961660623550415,
"step": 422
},
{
"epoch": 0.4130859375,
"grad_norm": 0.8334202766418457,
"learning_rate": 0.00011815505397448479,
"loss": 0.8654063940048218,
"step": 423
},
{
"epoch": 0.4140625,
"grad_norm": 0.5917571187019348,
"learning_rate": 0.00011795878312070659,
"loss": 0.631730318069458,
"step": 424
},
{
"epoch": 0.4150390625,
"grad_norm": 0.4908443093299866,
"learning_rate": 0.00011776251226692835,
"loss": 0.3205869495868683,
"step": 425
},
{
"epoch": 0.416015625,
"grad_norm": 0.8349789381027222,
"learning_rate": 0.00011756624141315016,
"loss": 0.8526176810264587,
"step": 426
},
{
"epoch": 0.4169921875,
"grad_norm": 0.38712671399116516,
"learning_rate": 0.00011736997055937193,
"loss": 0.6580482125282288,
"step": 427
},
{
"epoch": 0.41796875,
"grad_norm": 0.766034722328186,
"learning_rate": 0.00011717369970559373,
"loss": 0.5494309663772583,
"step": 428
},
{
"epoch": 0.4189453125,
"grad_norm": 0.33322349190711975,
"learning_rate": 0.00011697742885181551,
"loss": 0.38351887464523315,
"step": 429
},
{
"epoch": 0.419921875,
"grad_norm": 0.411155641078949,
"learning_rate": 0.00011678115799803731,
"loss": 0.8139836192131042,
"step": 430
},
{
"epoch": 0.4208984375,
"grad_norm": 0.5857217907905579,
"learning_rate": 0.00011658488714425908,
"loss": 0.5668150186538696,
"step": 431
},
{
"epoch": 0.421875,
"grad_norm": 0.8849710822105408,
"learning_rate": 0.00011638861629048087,
"loss": 0.5478008985519409,
"step": 432
},
{
"epoch": 0.4228515625,
"grad_norm": 0.6771020293235779,
"learning_rate": 0.00011619234543670265,
"loss": 0.608709454536438,
"step": 433
},
{
"epoch": 0.423828125,
"grad_norm": 0.30138713121414185,
"learning_rate": 0.00011599607458292445,
"loss": 0.8240669369697571,
"step": 434
},
{
"epoch": 0.4248046875,
"grad_norm": 0.3273598253726959,
"learning_rate": 0.00011579980372914623,
"loss": 0.6287229657173157,
"step": 435
},
{
"epoch": 0.42578125,
"grad_norm": 0.5044806003570557,
"learning_rate": 0.000115603532875368,
"loss": 0.735835075378418,
"step": 436
},
{
"epoch": 0.4267578125,
"grad_norm": 0.34495776891708374,
"learning_rate": 0.0001154072620215898,
"loss": 0.7688421010971069,
"step": 437
},
{
"epoch": 0.427734375,
"grad_norm": 0.41923069953918457,
"learning_rate": 0.00011521099116781158,
"loss": 0.679617166519165,
"step": 438
},
{
"epoch": 0.4287109375,
"grad_norm": 0.3509843945503235,
"learning_rate": 0.00011501472031403338,
"loss": 0.7478575110435486,
"step": 439
},
{
"epoch": 0.4296875,
"grad_norm": 0.4758707582950592,
"learning_rate": 0.00011481844946025514,
"loss": 0.48871147632598877,
"step": 440
},
{
"epoch": 0.4306640625,
"grad_norm": 0.30272597074508667,
"learning_rate": 0.00011462217860647695,
"loss": 0.4311315715312958,
"step": 441
},
{
"epoch": 0.431640625,
"grad_norm": 0.5226417779922485,
"learning_rate": 0.00011442590775269872,
"loss": 0.8198300004005432,
"step": 442
},
{
"epoch": 0.4326171875,
"grad_norm": 0.41183850169181824,
"learning_rate": 0.00011422963689892052,
"loss": 0.9958367347717285,
"step": 443
},
{
"epoch": 0.43359375,
"grad_norm": 0.384048193693161,
"learning_rate": 0.0001140333660451423,
"loss": 0.3194778859615326,
"step": 444
},
{
"epoch": 0.4345703125,
"grad_norm": 0.5035115480422974,
"learning_rate": 0.0001138370951913641,
"loss": 0.6455928683280945,
"step": 445
},
{
"epoch": 0.435546875,
"grad_norm": 0.4875551462173462,
"learning_rate": 0.00011364082433758587,
"loss": 0.799978494644165,
"step": 446
},
{
"epoch": 0.4365234375,
"grad_norm": 0.3395763337612152,
"learning_rate": 0.00011344455348380768,
"loss": 0.47672414779663086,
"step": 447
},
{
"epoch": 0.4375,
"grad_norm": 0.5594314932823181,
"learning_rate": 0.00011324828263002944,
"loss": 0.4325803518295288,
"step": 448
},
{
"epoch": 0.4384765625,
"grad_norm": 0.44647228717803955,
"learning_rate": 0.00011305201177625124,
"loss": 0.8119433522224426,
"step": 449
},
{
"epoch": 0.439453125,
"grad_norm": 0.3190518915653229,
"learning_rate": 0.00011285574092247302,
"loss": 0.4949466288089752,
"step": 450
},
{
"epoch": 0.4404296875,
"grad_norm": 0.5943452715873718,
"learning_rate": 0.00011265947006869479,
"loss": 0.8245764374732971,
"step": 451
},
{
"epoch": 0.44140625,
"grad_norm": 0.8067309260368347,
"learning_rate": 0.00011246319921491659,
"loss": 0.39331740140914917,
"step": 452
},
{
"epoch": 0.4423828125,
"grad_norm": 0.4130857288837433,
"learning_rate": 0.00011226692836113837,
"loss": 1.0005946159362793,
"step": 453
},
{
"epoch": 0.443359375,
"grad_norm": 0.6839224100112915,
"learning_rate": 0.00011207065750736017,
"loss": 0.453269362449646,
"step": 454
},
{
"epoch": 0.4443359375,
"grad_norm": 0.6282085180282593,
"learning_rate": 0.00011187438665358195,
"loss": 0.7137607932090759,
"step": 455
},
{
"epoch": 0.4453125,
"grad_norm": 0.49894508719444275,
"learning_rate": 0.00011167811579980374,
"loss": 0.6289803981781006,
"step": 456
},
{
"epoch": 0.4462890625,
"grad_norm": 0.3570895493030548,
"learning_rate": 0.00011148184494602551,
"loss": 0.3711976110935211,
"step": 457
},
{
"epoch": 0.447265625,
"grad_norm": 0.28931114077568054,
"learning_rate": 0.00011128557409224731,
"loss": 0.5629679560661316,
"step": 458
},
{
"epoch": 0.4482421875,
"grad_norm": 1.2492791414260864,
"learning_rate": 0.00011108930323846909,
"loss": 0.5821082592010498,
"step": 459
},
{
"epoch": 0.44921875,
"grad_norm": 0.29861876368522644,
"learning_rate": 0.00011089303238469089,
"loss": 0.4129573106765747,
"step": 460
},
{
"epoch": 0.4501953125,
"grad_norm": 0.5244950652122498,
"learning_rate": 0.00011069676153091267,
"loss": 0.8300201296806335,
"step": 461
},
{
"epoch": 0.451171875,
"grad_norm": 0.446435809135437,
"learning_rate": 0.00011050049067713446,
"loss": 0.7500958442687988,
"step": 462
},
{
"epoch": 0.4521484375,
"grad_norm": 0.4531306028366089,
"learning_rate": 0.00011030421982335623,
"loss": 0.8492609262466431,
"step": 463
},
{
"epoch": 0.453125,
"grad_norm": 0.46944308280944824,
"learning_rate": 0.00011010794896957802,
"loss": 0.6209090948104858,
"step": 464
},
{
"epoch": 0.4541015625,
"grad_norm": 0.5465651154518127,
"learning_rate": 0.00010991167811579981,
"loss": 0.5176469087600708,
"step": 465
},
{
"epoch": 0.455078125,
"grad_norm": 0.36550402641296387,
"learning_rate": 0.00010971540726202158,
"loss": 0.6358295679092407,
"step": 466
},
{
"epoch": 0.4560546875,
"grad_norm": 0.48919910192489624,
"learning_rate": 0.00010951913640824338,
"loss": 0.5903019905090332,
"step": 467
},
{
"epoch": 0.45703125,
"grad_norm": 0.4378332793712616,
"learning_rate": 0.00010932286555446516,
"loss": 0.6710047721862793,
"step": 468
},
{
"epoch": 0.4580078125,
"grad_norm": 0.3095405101776123,
"learning_rate": 0.00010912659470068696,
"loss": 0.6787213683128357,
"step": 469
},
{
"epoch": 0.458984375,
"grad_norm": 0.40901967883110046,
"learning_rate": 0.00010893032384690874,
"loss": 0.6371384859085083,
"step": 470
},
{
"epoch": 0.4599609375,
"grad_norm": 0.3962486982345581,
"learning_rate": 0.00010873405299313053,
"loss": 0.5823498964309692,
"step": 471
},
{
"epoch": 0.4609375,
"grad_norm": 0.4094708263874054,
"learning_rate": 0.0001085377821393523,
"loss": 1.0396480560302734,
"step": 472
},
{
"epoch": 0.4619140625,
"grad_norm": 0.5117614269256592,
"learning_rate": 0.0001083415112855741,
"loss": 0.6320610642433167,
"step": 473
},
{
"epoch": 0.462890625,
"grad_norm": 0.28345227241516113,
"learning_rate": 0.00010814524043179588,
"loss": 0.33279290795326233,
"step": 474
},
{
"epoch": 0.4638671875,
"grad_norm": 0.5475791096687317,
"learning_rate": 0.00010794896957801768,
"loss": 0.359570175409317,
"step": 475
},
{
"epoch": 0.46484375,
"grad_norm": 0.44176843762397766,
"learning_rate": 0.00010775269872423946,
"loss": 0.7576714158058167,
"step": 476
},
{
"epoch": 0.4658203125,
"grad_norm": 0.473562628030777,
"learning_rate": 0.00010755642787046125,
"loss": 0.8758799433708191,
"step": 477
},
{
"epoch": 0.466796875,
"grad_norm": 0.41919219493865967,
"learning_rate": 0.00010736015701668302,
"loss": 0.863654375076294,
"step": 478
},
{
"epoch": 0.4677734375,
"grad_norm": 0.4215691089630127,
"learning_rate": 0.0001071638861629048,
"loss": 0.5004569292068481,
"step": 479
},
{
"epoch": 0.46875,
"grad_norm": 0.36801034212112427,
"learning_rate": 0.0001069676153091266,
"loss": 0.9330754280090332,
"step": 480
},
{
"epoch": 0.4697265625,
"grad_norm": 0.42489972710609436,
"learning_rate": 0.00010677134445534837,
"loss": 1.0529820919036865,
"step": 481
},
{
"epoch": 0.470703125,
"grad_norm": 0.4067368507385254,
"learning_rate": 0.00010657507360157018,
"loss": 0.5453970432281494,
"step": 482
},
{
"epoch": 0.4716796875,
"grad_norm": 0.28611162304878235,
"learning_rate": 0.00010637880274779195,
"loss": 0.2348572313785553,
"step": 483
},
{
"epoch": 0.47265625,
"grad_norm": 0.40047627687454224,
"learning_rate": 0.00010618253189401374,
"loss": 0.4776308834552765,
"step": 484
},
{
"epoch": 0.4736328125,
"grad_norm": 0.5168628692626953,
"learning_rate": 0.00010598626104023553,
"loss": 0.9922167062759399,
"step": 485
},
{
"epoch": 0.474609375,
"grad_norm": 0.3620246946811676,
"learning_rate": 0.00010578999018645732,
"loss": 0.7285036444664001,
"step": 486
},
{
"epoch": 0.4755859375,
"grad_norm": 0.42711782455444336,
"learning_rate": 0.00010559371933267909,
"loss": 0.6387231349945068,
"step": 487
},
{
"epoch": 0.4765625,
"grad_norm": 0.2139827311038971,
"learning_rate": 0.0001053974484789009,
"loss": 0.4295338988304138,
"step": 488
},
{
"epoch": 0.4775390625,
"grad_norm": 0.31191739439964294,
"learning_rate": 0.00010520117762512267,
"loss": 0.42860671877861023,
"step": 489
},
{
"epoch": 0.478515625,
"grad_norm": 0.2909379303455353,
"learning_rate": 0.00010500490677134447,
"loss": 0.47065097093582153,
"step": 490
},
{
"epoch": 0.4794921875,
"grad_norm": 0.48990437388420105,
"learning_rate": 0.00010480863591756625,
"loss": 0.8870656490325928,
"step": 491
},
{
"epoch": 0.48046875,
"grad_norm": 0.5662127733230591,
"learning_rate": 0.00010461236506378804,
"loss": 0.8007984161376953,
"step": 492
},
{
"epoch": 0.4814453125,
"grad_norm": 0.3656634986400604,
"learning_rate": 0.00010441609421000981,
"loss": 0.41389334201812744,
"step": 493
},
{
"epoch": 0.482421875,
"grad_norm": 0.39840465784072876,
"learning_rate": 0.0001042198233562316,
"loss": 0.6927056908607483,
"step": 494
},
{
"epoch": 0.4833984375,
"grad_norm": 0.641647219657898,
"learning_rate": 0.00010402355250245339,
"loss": 0.7912976145744324,
"step": 495
},
{
"epoch": 0.484375,
"grad_norm": 0.4522266685962677,
"learning_rate": 0.00010382728164867517,
"loss": 0.615374743938446,
"step": 496
},
{
"epoch": 0.4853515625,
"grad_norm": 0.415444016456604,
"learning_rate": 0.00010363101079489697,
"loss": 0.8559135794639587,
"step": 497
},
{
"epoch": 0.486328125,
"grad_norm": 0.4477578401565552,
"learning_rate": 0.00010343473994111874,
"loss": 0.6109384298324585,
"step": 498
},
{
"epoch": 0.4873046875,
"grad_norm": 0.33097633719444275,
"learning_rate": 0.00010323846908734053,
"loss": 0.6325762271881104,
"step": 499
},
{
"epoch": 0.48828125,
"grad_norm": 0.38771572709083557,
"learning_rate": 0.00010304219823356232,
"loss": 0.5979640483856201,
"step": 500
},
{
"epoch": 0.4892578125,
"grad_norm": 0.3339928984642029,
"learning_rate": 0.00010284592737978411,
"loss": 0.6619001626968384,
"step": 501
},
{
"epoch": 0.490234375,
"grad_norm": 0.6400135159492493,
"learning_rate": 0.00010264965652600588,
"loss": 0.28338727355003357,
"step": 502
},
{
"epoch": 0.4912109375,
"grad_norm": 0.35763970017433167,
"learning_rate": 0.00010245338567222769,
"loss": 0.6373124122619629,
"step": 503
},
{
"epoch": 0.4921875,
"grad_norm": 0.2136622965335846,
"learning_rate": 0.00010225711481844946,
"loss": 0.2315329760313034,
"step": 504
},
{
"epoch": 0.4931640625,
"grad_norm": 0.6324110627174377,
"learning_rate": 0.00010206084396467126,
"loss": 1.0045514106750488,
"step": 505
},
{
"epoch": 0.494140625,
"grad_norm": 0.4471307694911957,
"learning_rate": 0.00010186457311089304,
"loss": 0.5188390016555786,
"step": 506
},
{
"epoch": 0.4951171875,
"grad_norm": 0.38222211599349976,
"learning_rate": 0.00010166830225711483,
"loss": 0.7351740598678589,
"step": 507
},
{
"epoch": 0.49609375,
"grad_norm": 0.41885000467300415,
"learning_rate": 0.0001014720314033366,
"loss": 0.9071688055992126,
"step": 508
},
{
"epoch": 0.4970703125,
"grad_norm": 0.8193621635437012,
"learning_rate": 0.00010127576054955839,
"loss": 0.7240473031997681,
"step": 509
},
{
"epoch": 0.498046875,
"grad_norm": 0.2846645712852478,
"learning_rate": 0.00010107948969578018,
"loss": 0.351628839969635,
"step": 510
},
{
"epoch": 0.4990234375,
"grad_norm": 0.4778954088687897,
"learning_rate": 0.00010088321884200196,
"loss": 0.7705833911895752,
"step": 511
},
{
"epoch": 0.5,
"grad_norm": 0.3384702503681183,
"learning_rate": 0.00010068694798822376,
"loss": 0.5467265248298645,
"step": 512
},
{
"epoch": 0.5009765625,
"grad_norm": 0.43917056918144226,
"learning_rate": 0.00010049067713444553,
"loss": 0.9810686707496643,
"step": 513
},
{
"epoch": 0.501953125,
"grad_norm": 0.4351615607738495,
"learning_rate": 0.00010029440628066732,
"loss": 0.9716764688491821,
"step": 514
},
{
"epoch": 0.5029296875,
"grad_norm": 0.49873459339141846,
"learning_rate": 0.00010009813542688911,
"loss": 0.9183788299560547,
"step": 515
},
{
"epoch": 0.50390625,
"grad_norm": 0.36710789799690247,
"learning_rate": 9.990186457311089e-05,
"loss": 0.49884548783302307,
"step": 516
},
{
"epoch": 0.5048828125,
"grad_norm": 0.5402531623840332,
"learning_rate": 9.970559371933269e-05,
"loss": 0.6645570993423462,
"step": 517
},
{
"epoch": 0.505859375,
"grad_norm": 0.4990559220314026,
"learning_rate": 9.950932286555447e-05,
"loss": 1.0321924686431885,
"step": 518
},
{
"epoch": 0.5068359375,
"grad_norm": 0.4634752869606018,
"learning_rate": 9.931305201177625e-05,
"loss": 0.8484972715377808,
"step": 519
},
{
"epoch": 0.5078125,
"grad_norm": 0.38584330677986145,
"learning_rate": 9.911678115799805e-05,
"loss": 0.3424939513206482,
"step": 520
},
{
"epoch": 0.5087890625,
"grad_norm": 0.41148415207862854,
"learning_rate": 9.892051030421983e-05,
"loss": 0.7890703678131104,
"step": 521
},
{
"epoch": 0.509765625,
"grad_norm": 0.35891374945640564,
"learning_rate": 9.872423945044161e-05,
"loss": 0.7387750744819641,
"step": 522
},
{
"epoch": 0.5107421875,
"grad_norm": 0.4174203872680664,
"learning_rate": 9.85279685966634e-05,
"loss": 0.5610706806182861,
"step": 523
},
{
"epoch": 0.51171875,
"grad_norm": 0.4062010645866394,
"learning_rate": 9.833169774288519e-05,
"loss": 0.6016039252281189,
"step": 524
},
{
"epoch": 0.5126953125,
"grad_norm": 0.35915061831474304,
"learning_rate": 9.813542688910697e-05,
"loss": 0.37933990359306335,
"step": 525
},
{
"epoch": 0.513671875,
"grad_norm": 0.49826234579086304,
"learning_rate": 9.793915603532877e-05,
"loss": 0.9650976657867432,
"step": 526
},
{
"epoch": 0.5146484375,
"grad_norm": 0.4122180938720703,
"learning_rate": 9.774288518155055e-05,
"loss": 0.5477824211120605,
"step": 527
},
{
"epoch": 0.515625,
"grad_norm": 0.3824058175086975,
"learning_rate": 9.754661432777233e-05,
"loss": 0.5163108706474304,
"step": 528
},
{
"epoch": 0.5166015625,
"grad_norm": 0.4485555589199066,
"learning_rate": 9.735034347399413e-05,
"loss": 0.9402418732643127,
"step": 529
},
{
"epoch": 0.517578125,
"grad_norm": 0.4053209722042084,
"learning_rate": 9.715407262021591e-05,
"loss": 0.9314478039741516,
"step": 530
},
{
"epoch": 0.5185546875,
"grad_norm": 0.3183811604976654,
"learning_rate": 9.695780176643768e-05,
"loss": 0.6706205606460571,
"step": 531
},
{
"epoch": 0.51953125,
"grad_norm": 0.40083932876586914,
"learning_rate": 9.676153091265947e-05,
"loss": 1.102424144744873,
"step": 532
},
{
"epoch": 0.5205078125,
"grad_norm": 0.5949054956436157,
"learning_rate": 9.656526005888126e-05,
"loss": 0.8396608829498291,
"step": 533
},
{
"epoch": 0.521484375,
"grad_norm": 0.41966959834098816,
"learning_rate": 9.636898920510304e-05,
"loss": 0.5641101002693176,
"step": 534
},
{
"epoch": 0.5224609375,
"grad_norm": 0.448281466960907,
"learning_rate": 9.617271835132484e-05,
"loss": 0.44873932003974915,
"step": 535
},
{
"epoch": 0.5234375,
"grad_norm": 0.47785645723342896,
"learning_rate": 9.597644749754662e-05,
"loss": 0.8799008131027222,
"step": 536
},
{
"epoch": 0.5244140625,
"grad_norm": 0.45459261536598206,
"learning_rate": 9.57801766437684e-05,
"loss": 0.8261788487434387,
"step": 537
},
{
"epoch": 0.525390625,
"grad_norm": 0.6168074607849121,
"learning_rate": 9.55839057899902e-05,
"loss": 0.9762136936187744,
"step": 538
},
{
"epoch": 0.5263671875,
"grad_norm": 0.6500818133354187,
"learning_rate": 9.538763493621198e-05,
"loss": 0.9044640064239502,
"step": 539
},
{
"epoch": 0.52734375,
"grad_norm": 0.31668490171432495,
"learning_rate": 9.519136408243376e-05,
"loss": 0.42503029108047485,
"step": 540
},
{
"epoch": 0.5283203125,
"grad_norm": 0.4041314721107483,
"learning_rate": 9.499509322865556e-05,
"loss": 0.6643175482749939,
"step": 541
},
{
"epoch": 0.529296875,
"grad_norm": 1.011020541191101,
"learning_rate": 9.479882237487734e-05,
"loss": 0.7636033892631531,
"step": 542
},
{
"epoch": 0.5302734375,
"grad_norm": 0.3690396845340729,
"learning_rate": 9.460255152109912e-05,
"loss": 1.0516947507858276,
"step": 543
},
{
"epoch": 0.53125,
"grad_norm": 0.288604199886322,
"learning_rate": 9.440628066732092e-05,
"loss": 0.3806208372116089,
"step": 544
},
{
"epoch": 0.5322265625,
"grad_norm": 0.4247501790523529,
"learning_rate": 9.42100098135427e-05,
"loss": 0.8651745319366455,
"step": 545
},
{
"epoch": 0.533203125,
"grad_norm": 1.1893255710601807,
"learning_rate": 9.401373895976447e-05,
"loss": 0.28601521253585815,
"step": 546
},
{
"epoch": 0.5341796875,
"grad_norm": 0.3229619562625885,
"learning_rate": 9.381746810598626e-05,
"loss": 0.8316909670829773,
"step": 547
},
{
"epoch": 0.53515625,
"grad_norm": 0.390278160572052,
"learning_rate": 9.362119725220805e-05,
"loss": 0.7263185977935791,
"step": 548
},
{
"epoch": 0.5361328125,
"grad_norm": 0.2949998378753662,
"learning_rate": 9.342492639842983e-05,
"loss": 0.5417062044143677,
"step": 549
},
{
"epoch": 0.537109375,
"grad_norm": 0.47482210397720337,
"learning_rate": 9.322865554465163e-05,
"loss": 0.6505849361419678,
"step": 550
},
{
"epoch": 0.5380859375,
"grad_norm": 0.3653123676776886,
"learning_rate": 9.303238469087341e-05,
"loss": 0.7270935773849487,
"step": 551
},
{
"epoch": 0.5390625,
"grad_norm": 0.5652351975440979,
"learning_rate": 9.283611383709519e-05,
"loss": 0.8330069780349731,
"step": 552
},
{
"epoch": 0.5400390625,
"grad_norm": 0.448408842086792,
"learning_rate": 9.263984298331699e-05,
"loss": 0.8804951310157776,
"step": 553
},
{
"epoch": 0.541015625,
"grad_norm": 0.7700690031051636,
"learning_rate": 9.244357212953877e-05,
"loss": 0.6466813087463379,
"step": 554
},
{
"epoch": 0.5419921875,
"grad_norm": 0.45755863189697266,
"learning_rate": 9.224730127576055e-05,
"loss": 0.5548572540283203,
"step": 555
},
{
"epoch": 0.54296875,
"grad_norm": 0.4113846719264984,
"learning_rate": 9.205103042198235e-05,
"loss": 0.9286736845970154,
"step": 556
},
{
"epoch": 0.5439453125,
"grad_norm": 0.4555431604385376,
"learning_rate": 9.185475956820413e-05,
"loss": 0.8332977890968323,
"step": 557
},
{
"epoch": 0.544921875,
"grad_norm": 0.5103408098220825,
"learning_rate": 9.165848871442591e-05,
"loss": 1.0110094547271729,
"step": 558
},
{
"epoch": 0.5458984375,
"grad_norm": 0.299912691116333,
"learning_rate": 9.146221786064771e-05,
"loss": 0.3136459290981293,
"step": 559
},
{
"epoch": 0.546875,
"grad_norm": 0.40499091148376465,
"learning_rate": 9.126594700686948e-05,
"loss": 0.6785961389541626,
"step": 560
},
{
"epoch": 0.5478515625,
"grad_norm": 0.4190375804901123,
"learning_rate": 9.106967615309127e-05,
"loss": 0.9891744256019592,
"step": 561
},
{
"epoch": 0.548828125,
"grad_norm": 0.6265519261360168,
"learning_rate": 9.087340529931305e-05,
"loss": 0.48712462186813354,
"step": 562
},
{
"epoch": 0.5498046875,
"grad_norm": 0.466420978307724,
"learning_rate": 9.067713444553484e-05,
"loss": 0.5573943257331848,
"step": 563
},
{
"epoch": 0.55078125,
"grad_norm": 0.3990301191806793,
"learning_rate": 9.048086359175663e-05,
"loss": 0.5893411040306091,
"step": 564
},
{
"epoch": 0.5517578125,
"grad_norm": 0.31471043825149536,
"learning_rate": 9.028459273797842e-05,
"loss": 0.593424379825592,
"step": 565
},
{
"epoch": 0.552734375,
"grad_norm": 0.46789905428886414,
"learning_rate": 9.00883218842002e-05,
"loss": 0.9398684501647949,
"step": 566
},
{
"epoch": 0.5537109375,
"grad_norm": 0.48358282446861267,
"learning_rate": 8.989205103042198e-05,
"loss": 0.895098865032196,
"step": 567
},
{
"epoch": 0.5546875,
"grad_norm": 0.25878453254699707,
"learning_rate": 8.969578017664378e-05,
"loss": 0.4817226231098175,
"step": 568
},
{
"epoch": 0.5556640625,
"grad_norm": 0.5319378972053528,
"learning_rate": 8.949950932286556e-05,
"loss": 0.6119651794433594,
"step": 569
},
{
"epoch": 0.556640625,
"grad_norm": 0.3002898097038269,
"learning_rate": 8.930323846908734e-05,
"loss": 0.28599199652671814,
"step": 570
},
{
"epoch": 0.5576171875,
"grad_norm": 0.37161317467689514,
"learning_rate": 8.910696761530914e-05,
"loss": 0.3307079076766968,
"step": 571
},
{
"epoch": 0.55859375,
"grad_norm": 0.4755436182022095,
"learning_rate": 8.891069676153092e-05,
"loss": 0.5868921279907227,
"step": 572
},
{
"epoch": 0.5595703125,
"grad_norm": 0.3264123499393463,
"learning_rate": 8.87144259077527e-05,
"loss": 0.6682905554771423,
"step": 573
},
{
"epoch": 0.560546875,
"grad_norm": 0.43468573689460754,
"learning_rate": 8.85181550539745e-05,
"loss": 0.6316066980361938,
"step": 574
},
{
"epoch": 0.5615234375,
"grad_norm": 0.5759416222572327,
"learning_rate": 8.832188420019627e-05,
"loss": 0.5687480568885803,
"step": 575
},
{
"epoch": 0.5625,
"grad_norm": 0.39352041482925415,
"learning_rate": 8.812561334641806e-05,
"loss": 0.3803275525569916,
"step": 576
},
{
"epoch": 0.5634765625,
"grad_norm": 0.4155440926551819,
"learning_rate": 8.792934249263984e-05,
"loss": 0.3923049569129944,
"step": 577
},
{
"epoch": 0.564453125,
"grad_norm": 0.34934133291244507,
"learning_rate": 8.773307163886163e-05,
"loss": 0.7100962996482849,
"step": 578
},
{
"epoch": 0.5654296875,
"grad_norm": 0.3993069529533386,
"learning_rate": 8.753680078508342e-05,
"loss": 0.6711176037788391,
"step": 579
},
{
"epoch": 0.56640625,
"grad_norm": 0.3445776700973511,
"learning_rate": 8.73405299313052e-05,
"loss": 0.6986067295074463,
"step": 580
},
{
"epoch": 0.5673828125,
"grad_norm": 0.45837292075157166,
"learning_rate": 8.714425907752699e-05,
"loss": 0.9020513892173767,
"step": 581
},
{
"epoch": 0.568359375,
"grad_norm": 0.3630208671092987,
"learning_rate": 8.694798822374878e-05,
"loss": 0.42499858140945435,
"step": 582
},
{
"epoch": 0.5693359375,
"grad_norm": 0.41205838322639465,
"learning_rate": 8.675171736997057e-05,
"loss": 0.5535018444061279,
"step": 583
},
{
"epoch": 0.5703125,
"grad_norm": 0.2596284747123718,
"learning_rate": 8.655544651619235e-05,
"loss": 0.3234618902206421,
"step": 584
},
{
"epoch": 0.5712890625,
"grad_norm": 0.3716956079006195,
"learning_rate": 8.635917566241414e-05,
"loss": 0.7567611932754517,
"step": 585
},
{
"epoch": 0.572265625,
"grad_norm": 0.42999619245529175,
"learning_rate": 8.616290480863593e-05,
"loss": 0.8695427179336548,
"step": 586
},
{
"epoch": 0.5732421875,
"grad_norm": 0.3309305012226105,
"learning_rate": 8.596663395485771e-05,
"loss": 0.989714503288269,
"step": 587
},
{
"epoch": 0.57421875,
"grad_norm": 0.40024474263191223,
"learning_rate": 8.57703631010795e-05,
"loss": 1.0608711242675781,
"step": 588
},
{
"epoch": 0.5751953125,
"grad_norm": 0.453950434923172,
"learning_rate": 8.557409224730129e-05,
"loss": 0.7340632677078247,
"step": 589
},
{
"epoch": 0.576171875,
"grad_norm": 0.4473342299461365,
"learning_rate": 8.537782139352306e-05,
"loss": 0.7264219522476196,
"step": 590
},
{
"epoch": 0.5771484375,
"grad_norm": 0.420469731092453,
"learning_rate": 8.518155053974485e-05,
"loss": 0.8141539692878723,
"step": 591
},
{
"epoch": 0.578125,
"grad_norm": 0.4068243205547333,
"learning_rate": 8.498527968596663e-05,
"loss": 0.5802872180938721,
"step": 592
},
{
"epoch": 0.5791015625,
"grad_norm": 0.4243272840976715,
"learning_rate": 8.478900883218842e-05,
"loss": 0.350595086812973,
"step": 593
},
{
"epoch": 0.580078125,
"grad_norm": 0.4519834518432617,
"learning_rate": 8.459273797841021e-05,
"loss": 0.7131458520889282,
"step": 594
},
{
"epoch": 0.5810546875,
"grad_norm": 0.34145745635032654,
"learning_rate": 8.4396467124632e-05,
"loss": 0.7618221640586853,
"step": 595
},
{
"epoch": 0.58203125,
"grad_norm": 0.46494174003601074,
"learning_rate": 8.420019627085378e-05,
"loss": 0.5102145075798035,
"step": 596
},
{
"epoch": 0.5830078125,
"grad_norm": 0.3305060565471649,
"learning_rate": 8.400392541707557e-05,
"loss": 0.7812811732292175,
"step": 597
},
{
"epoch": 0.583984375,
"grad_norm": 0.47092583775520325,
"learning_rate": 8.380765456329736e-05,
"loss": 0.7497634887695312,
"step": 598
},
{
"epoch": 0.5849609375,
"grad_norm": 0.38902655243873596,
"learning_rate": 8.361138370951914e-05,
"loss": 0.4198119640350342,
"step": 599
},
{
"epoch": 0.5859375,
"grad_norm": 0.43659287691116333,
"learning_rate": 8.341511285574093e-05,
"loss": 0.824333667755127,
"step": 600
},
{
"epoch": 0.5869140625,
"grad_norm": 0.4277879595756531,
"learning_rate": 8.321884200196272e-05,
"loss": 0.445267915725708,
"step": 601
},
{
"epoch": 0.587890625,
"grad_norm": 0.3186829090118408,
"learning_rate": 8.30225711481845e-05,
"loss": 0.9906235337257385,
"step": 602
},
{
"epoch": 0.5888671875,
"grad_norm": 0.2983294427394867,
"learning_rate": 8.28263002944063e-05,
"loss": 0.5342146754264832,
"step": 603
},
{
"epoch": 0.58984375,
"grad_norm": 0.4127228856086731,
"learning_rate": 8.263002944062808e-05,
"loss": 0.41288450360298157,
"step": 604
},
{
"epoch": 0.5908203125,
"grad_norm": 0.3961617052555084,
"learning_rate": 8.243375858684985e-05,
"loss": 0.43576663732528687,
"step": 605
},
{
"epoch": 0.591796875,
"grad_norm": 0.4124387502670288,
"learning_rate": 8.223748773307164e-05,
"loss": 0.5837401747703552,
"step": 606
},
{
"epoch": 0.5927734375,
"grad_norm": 0.4274151921272278,
"learning_rate": 8.204121687929342e-05,
"loss": 0.8666547536849976,
"step": 607
},
{
"epoch": 0.59375,
"grad_norm": 0.3881700932979584,
"learning_rate": 8.18449460255152e-05,
"loss": 0.9063656330108643,
"step": 608
},
{
"epoch": 0.5947265625,
"grad_norm": 0.46216556429862976,
"learning_rate": 8.1648675171737e-05,
"loss": 0.4573599696159363,
"step": 609
},
{
"epoch": 0.595703125,
"grad_norm": 0.3843960762023926,
"learning_rate": 8.145240431795878e-05,
"loss": 0.6214632391929626,
"step": 610
},
{
"epoch": 0.5966796875,
"grad_norm": 0.538301408290863,
"learning_rate": 8.125613346418057e-05,
"loss": 0.8800979852676392,
"step": 611
},
{
"epoch": 0.59765625,
"grad_norm": 0.49643319845199585,
"learning_rate": 8.105986261040236e-05,
"loss": 0.48715031147003174,
"step": 612
},
{
"epoch": 0.5986328125,
"grad_norm": 0.4753062427043915,
"learning_rate": 8.086359175662415e-05,
"loss": 0.8127011060714722,
"step": 613
},
{
"epoch": 0.599609375,
"grad_norm": 0.7572022676467896,
"learning_rate": 8.066732090284593e-05,
"loss": 0.7151535153388977,
"step": 614
},
{
"epoch": 0.6005859375,
"grad_norm": 0.35117295384407043,
"learning_rate": 8.047105004906772e-05,
"loss": 0.9221618175506592,
"step": 615
},
{
"epoch": 0.6015625,
"grad_norm": 0.2643633186817169,
"learning_rate": 8.02747791952895e-05,
"loss": 0.5025840401649475,
"step": 616
},
{
"epoch": 0.6025390625,
"grad_norm": 0.45553916692733765,
"learning_rate": 8.007850834151129e-05,
"loss": 0.452494740486145,
"step": 617
},
{
"epoch": 0.603515625,
"grad_norm": 0.386594295501709,
"learning_rate": 7.988223748773308e-05,
"loss": 0.7942792773246765,
"step": 618
},
{
"epoch": 0.6044921875,
"grad_norm": 0.3616650700569153,
"learning_rate": 7.968596663395485e-05,
"loss": 0.5697340965270996,
"step": 619
},
{
"epoch": 0.60546875,
"grad_norm": 0.3885051906108856,
"learning_rate": 7.948969578017665e-05,
"loss": 0.7082506418228149,
"step": 620
},
{
"epoch": 0.6064453125,
"grad_norm": 0.4484117329120636,
"learning_rate": 7.929342492639843e-05,
"loss": 0.5993860960006714,
"step": 621
},
{
"epoch": 0.607421875,
"grad_norm": 0.44654563069343567,
"learning_rate": 7.909715407262021e-05,
"loss": 0.5804839134216309,
"step": 622
},
{
"epoch": 0.6083984375,
"grad_norm": 0.3943687081336975,
"learning_rate": 7.890088321884201e-05,
"loss": 0.6422688364982605,
"step": 623
},
{
"epoch": 0.609375,
"grad_norm": 0.4153381288051605,
"learning_rate": 7.870461236506379e-05,
"loss": 0.6437400579452515,
"step": 624
},
{
"epoch": 0.6103515625,
"grad_norm": 0.38221171498298645,
"learning_rate": 7.850834151128557e-05,
"loss": 0.8738820552825928,
"step": 625
},
{
"epoch": 0.611328125,
"grad_norm": 0.339599609375,
"learning_rate": 7.831207065750737e-05,
"loss": 0.517478883266449,
"step": 626
},
{
"epoch": 0.6123046875,
"grad_norm": 0.7177076935768127,
"learning_rate": 7.811579980372915e-05,
"loss": 0.7372115254402161,
"step": 627
},
{
"epoch": 0.61328125,
"grad_norm": 0.47573140263557434,
"learning_rate": 7.791952894995093e-05,
"loss": 0.649010181427002,
"step": 628
},
{
"epoch": 0.6142578125,
"grad_norm": 0.44851094484329224,
"learning_rate": 7.772325809617273e-05,
"loss": 0.6269842386245728,
"step": 629
},
{
"epoch": 0.615234375,
"grad_norm": 0.3544669449329376,
"learning_rate": 7.752698724239451e-05,
"loss": 0.8870983123779297,
"step": 630
},
{
"epoch": 0.6162109375,
"grad_norm": 0.4103491008281708,
"learning_rate": 7.73307163886163e-05,
"loss": 0.8711034059524536,
"step": 631
},
{
"epoch": 0.6171875,
"grad_norm": 0.3651062548160553,
"learning_rate": 7.713444553483808e-05,
"loss": 0.8420337438583374,
"step": 632
},
{
"epoch": 0.6181640625,
"grad_norm": 0.4135638475418091,
"learning_rate": 7.693817468105987e-05,
"loss": 0.601078450679779,
"step": 633
},
{
"epoch": 0.619140625,
"grad_norm": 0.5965299010276794,
"learning_rate": 7.674190382728164e-05,
"loss": 0.604471743106842,
"step": 634
},
{
"epoch": 0.6201171875,
"grad_norm": 0.4340416491031647,
"learning_rate": 7.654563297350344e-05,
"loss": 0.905183732509613,
"step": 635
},
{
"epoch": 0.62109375,
"grad_norm": 0.361518919467926,
"learning_rate": 7.634936211972522e-05,
"loss": 0.6569675207138062,
"step": 636
},
{
"epoch": 0.6220703125,
"grad_norm": 1.04604971408844,
"learning_rate": 7.6153091265947e-05,
"loss": 0.7399482727050781,
"step": 637
},
{
"epoch": 0.623046875,
"grad_norm": 0.8039460778236389,
"learning_rate": 7.59568204121688e-05,
"loss": 0.6003617644309998,
"step": 638
},
{
"epoch": 0.6240234375,
"grad_norm": 0.5462118983268738,
"learning_rate": 7.576054955839058e-05,
"loss": 0.7750217914581299,
"step": 639
},
{
"epoch": 0.625,
"grad_norm": 0.29333505034446716,
"learning_rate": 7.556427870461236e-05,
"loss": 0.47371456027030945,
"step": 640
},
{
"epoch": 0.6259765625,
"grad_norm": 0.2468312531709671,
"learning_rate": 7.536800785083416e-05,
"loss": 0.4615188241004944,
"step": 641
},
{
"epoch": 0.626953125,
"grad_norm": 0.48467332124710083,
"learning_rate": 7.517173699705594e-05,
"loss": 0.6456693410873413,
"step": 642
},
{
"epoch": 0.6279296875,
"grad_norm": 0.5471943020820618,
"learning_rate": 7.497546614327772e-05,
"loss": 0.5899155139923096,
"step": 643
},
{
"epoch": 0.62890625,
"grad_norm": 0.3715604841709137,
"learning_rate": 7.477919528949952e-05,
"loss": 0.7910970449447632,
"step": 644
},
{
"epoch": 0.6298828125,
"grad_norm": 0.3298327922821045,
"learning_rate": 7.45829244357213e-05,
"loss": 0.5769776701927185,
"step": 645
},
{
"epoch": 0.630859375,
"grad_norm": 0.44131916761398315,
"learning_rate": 7.438665358194309e-05,
"loss": 0.8805806636810303,
"step": 646
},
{
"epoch": 0.6318359375,
"grad_norm": 0.4686948359012604,
"learning_rate": 7.419038272816488e-05,
"loss": 0.7262091636657715,
"step": 647
},
{
"epoch": 0.6328125,
"grad_norm": 0.48123931884765625,
"learning_rate": 7.399411187438666e-05,
"loss": 0.8481992483139038,
"step": 648
},
{
"epoch": 0.6337890625,
"grad_norm": 0.5582646131515503,
"learning_rate": 7.379784102060843e-05,
"loss": 0.4963653087615967,
"step": 649
},
{
"epoch": 0.634765625,
"grad_norm": 0.30464881658554077,
"learning_rate": 7.360157016683023e-05,
"loss": 0.6772556900978088,
"step": 650
},
{
"epoch": 0.6357421875,
"grad_norm": 0.44710803031921387,
"learning_rate": 7.340529931305201e-05,
"loss": 0.5476983189582825,
"step": 651
},
{
"epoch": 0.63671875,
"grad_norm": 0.35922887921333313,
"learning_rate": 7.320902845927379e-05,
"loss": 0.8256508111953735,
"step": 652
},
{
"epoch": 0.6376953125,
"grad_norm": 0.40085500478744507,
"learning_rate": 7.301275760549559e-05,
"loss": 0.5783500671386719,
"step": 653
},
{
"epoch": 0.638671875,
"grad_norm": 0.47579512000083923,
"learning_rate": 7.281648675171737e-05,
"loss": 0.5591031908988953,
"step": 654
},
{
"epoch": 0.6396484375,
"grad_norm": 0.5594353675842285,
"learning_rate": 7.262021589793915e-05,
"loss": 0.8133666515350342,
"step": 655
},
{
"epoch": 0.640625,
"grad_norm": 0.44030821323394775,
"learning_rate": 7.242394504416095e-05,
"loss": 1.0282940864562988,
"step": 656
},
{
"epoch": 0.6416015625,
"grad_norm": 0.7038627862930298,
"learning_rate": 7.222767419038273e-05,
"loss": 0.2322971373796463,
"step": 657
},
{
"epoch": 0.642578125,
"grad_norm": 0.223698228597641,
"learning_rate": 7.203140333660451e-05,
"loss": 0.7056642174720764,
"step": 658
},
{
"epoch": 0.6435546875,
"grad_norm": 0.3815765976905823,
"learning_rate": 7.183513248282631e-05,
"loss": 1.074477195739746,
"step": 659
},
{
"epoch": 0.64453125,
"grad_norm": 0.35606271028518677,
"learning_rate": 7.163886162904809e-05,
"loss": 0.4300801753997803,
"step": 660
},
{
"epoch": 0.6455078125,
"grad_norm": 0.32899999618530273,
"learning_rate": 7.144259077526988e-05,
"loss": 0.5923078060150146,
"step": 661
},
{
"epoch": 0.646484375,
"grad_norm": 0.49968358874320984,
"learning_rate": 7.124631992149167e-05,
"loss": 0.8295183181762695,
"step": 662
},
{
"epoch": 0.6474609375,
"grad_norm": 0.3393777012825012,
"learning_rate": 7.105004906771345e-05,
"loss": 0.30383622646331787,
"step": 663
},
{
"epoch": 0.6484375,
"grad_norm": 0.24977968633174896,
"learning_rate": 7.085377821393524e-05,
"loss": 0.429612934589386,
"step": 664
},
{
"epoch": 0.6494140625,
"grad_norm": 0.35886242985725403,
"learning_rate": 7.065750736015702e-05,
"loss": 0.9189084768295288,
"step": 665
},
{
"epoch": 0.650390625,
"grad_norm": 0.3856249153614044,
"learning_rate": 7.04612365063788e-05,
"loss": 0.4880048930644989,
"step": 666
},
{
"epoch": 0.6513671875,
"grad_norm": 0.4439884424209595,
"learning_rate": 7.026496565260058e-05,
"loss": 0.7537186145782471,
"step": 667
},
{
"epoch": 0.65234375,
"grad_norm": 0.29563215374946594,
"learning_rate": 7.006869479882238e-05,
"loss": 0.38701343536376953,
"step": 668
},
{
"epoch": 0.6533203125,
"grad_norm": 0.1909576952457428,
"learning_rate": 6.987242394504416e-05,
"loss": 0.15140604972839355,
"step": 669
},
{
"epoch": 0.654296875,
"grad_norm": 0.3344849944114685,
"learning_rate": 6.967615309126594e-05,
"loss": 0.527427077293396,
"step": 670
},
{
"epoch": 0.6552734375,
"grad_norm": 0.3609422743320465,
"learning_rate": 6.947988223748774e-05,
"loss": 0.29116177558898926,
"step": 671
},
{
"epoch": 0.65625,
"grad_norm": 0.4419811964035034,
"learning_rate": 6.928361138370952e-05,
"loss": 0.7166855931282043,
"step": 672
},
{
"epoch": 0.6572265625,
"grad_norm": 0.31890806555747986,
"learning_rate": 6.90873405299313e-05,
"loss": 0.5259425640106201,
"step": 673
},
{
"epoch": 0.658203125,
"grad_norm": 0.39572352170944214,
"learning_rate": 6.88910696761531e-05,
"loss": 0.5964791774749756,
"step": 674
},
{
"epoch": 0.6591796875,
"grad_norm": 0.4501058757305145,
"learning_rate": 6.869479882237488e-05,
"loss": 0.2289922833442688,
"step": 675
},
{
"epoch": 0.66015625,
"grad_norm": 0.2884235680103302,
"learning_rate": 6.849852796859666e-05,
"loss": 0.2730886936187744,
"step": 676
},
{
"epoch": 0.6611328125,
"grad_norm": 0.32970431447029114,
"learning_rate": 6.830225711481846e-05,
"loss": 0.4283568859100342,
"step": 677
},
{
"epoch": 0.662109375,
"grad_norm": 0.39025789499282837,
"learning_rate": 6.810598626104023e-05,
"loss": 0.9361288547515869,
"step": 678
},
{
"epoch": 0.6630859375,
"grad_norm": 0.48386886715888977,
"learning_rate": 6.790971540726203e-05,
"loss": 0.4907494783401489,
"step": 679
},
{
"epoch": 0.6640625,
"grad_norm": 0.41783151030540466,
"learning_rate": 6.771344455348381e-05,
"loss": 0.7485824823379517,
"step": 680
},
{
"epoch": 0.6650390625,
"grad_norm": 0.4826144278049469,
"learning_rate": 6.751717369970559e-05,
"loss": 0.6413211226463318,
"step": 681
},
{
"epoch": 0.666015625,
"grad_norm": 0.27521079778671265,
"learning_rate": 6.732090284592739e-05,
"loss": 0.5747159123420715,
"step": 682
},
{
"epoch": 0.6669921875,
"grad_norm": 0.3745660185813904,
"learning_rate": 6.712463199214917e-05,
"loss": 0.414341002702713,
"step": 683
},
{
"epoch": 0.66796875,
"grad_norm": 0.45048731565475464,
"learning_rate": 6.692836113837095e-05,
"loss": 0.3665570318698883,
"step": 684
},
{
"epoch": 0.6689453125,
"grad_norm": 0.5048633217811584,
"learning_rate": 6.673209028459275e-05,
"loss": 0.5923498272895813,
"step": 685
},
{
"epoch": 0.669921875,
"grad_norm": 0.46423155069351196,
"learning_rate": 6.653581943081453e-05,
"loss": 0.7506915330886841,
"step": 686
},
{
"epoch": 0.6708984375,
"grad_norm": 0.42965108156204224,
"learning_rate": 6.633954857703631e-05,
"loss": 0.7576399445533752,
"step": 687
},
{
"epoch": 0.671875,
"grad_norm": 0.48331597447395325,
"learning_rate": 6.614327772325811e-05,
"loss": 0.5249682068824768,
"step": 688
},
{
"epoch": 0.6728515625,
"grad_norm": 0.4685790240764618,
"learning_rate": 6.594700686947989e-05,
"loss": 0.8056750297546387,
"step": 689
},
{
"epoch": 0.673828125,
"grad_norm": 0.46440044045448303,
"learning_rate": 6.575073601570167e-05,
"loss": 0.9252493381500244,
"step": 690
},
{
"epoch": 0.6748046875,
"grad_norm": 0.46564289927482605,
"learning_rate": 6.555446516192347e-05,
"loss": 0.8182022571563721,
"step": 691
},
{
"epoch": 0.67578125,
"grad_norm": 0.4397750496864319,
"learning_rate": 6.535819430814525e-05,
"loss": 0.7928388118743896,
"step": 692
},
{
"epoch": 0.6767578125,
"grad_norm": 0.3233174681663513,
"learning_rate": 6.516192345436702e-05,
"loss": 0.5252426862716675,
"step": 693
},
{
"epoch": 0.677734375,
"grad_norm": 0.6012148857116699,
"learning_rate": 6.496565260058882e-05,
"loss": 0.44195663928985596,
"step": 694
},
{
"epoch": 0.6787109375,
"grad_norm": 0.6329052448272705,
"learning_rate": 6.47693817468106e-05,
"loss": 0.5354570150375366,
"step": 695
},
{
"epoch": 0.6796875,
"grad_norm": 0.47926270961761475,
"learning_rate": 6.457311089303238e-05,
"loss": 0.4950491786003113,
"step": 696
},
{
"epoch": 0.6806640625,
"grad_norm": 0.5051383972167969,
"learning_rate": 6.437684003925418e-05,
"loss": 0.6795849204063416,
"step": 697
},
{
"epoch": 0.681640625,
"grad_norm": 0.4022398591041565,
"learning_rate": 6.418056918547596e-05,
"loss": 1.0388166904449463,
"step": 698
},
{
"epoch": 0.6826171875,
"grad_norm": 0.4309573471546173,
"learning_rate": 6.398429833169774e-05,
"loss": 0.6022897362709045,
"step": 699
},
{
"epoch": 0.68359375,
"grad_norm": 0.3301983177661896,
"learning_rate": 6.378802747791954e-05,
"loss": 0.6451660394668579,
"step": 700
},
{
"epoch": 0.6845703125,
"grad_norm": 0.6647156476974487,
"learning_rate": 6.359175662414132e-05,
"loss": 0.9699732661247253,
"step": 701
},
{
"epoch": 0.685546875,
"grad_norm": 0.37545597553253174,
"learning_rate": 6.33954857703631e-05,
"loss": 0.43181508779525757,
"step": 702
},
{
"epoch": 0.6865234375,
"grad_norm": 0.40882429480552673,
"learning_rate": 6.31992149165849e-05,
"loss": 0.665264368057251,
"step": 703
},
{
"epoch": 0.6875,
"grad_norm": 0.46597936749458313,
"learning_rate": 6.300294406280668e-05,
"loss": 0.8813620209693909,
"step": 704
},
{
"epoch": 0.6884765625,
"grad_norm": 0.4355461597442627,
"learning_rate": 6.280667320902846e-05,
"loss": 0.595770537853241,
"step": 705
},
{
"epoch": 0.689453125,
"grad_norm": 0.45896056294441223,
"learning_rate": 6.261040235525026e-05,
"loss": 0.7571601271629333,
"step": 706
},
{
"epoch": 0.6904296875,
"grad_norm": 0.37643495202064514,
"learning_rate": 6.241413150147204e-05,
"loss": 0.47930869460105896,
"step": 707
},
{
"epoch": 0.69140625,
"grad_norm": 0.49690738320350647,
"learning_rate": 6.221786064769381e-05,
"loss": 0.3727263808250427,
"step": 708
},
{
"epoch": 0.6923828125,
"grad_norm": 0.44111907482147217,
"learning_rate": 6.20215897939156e-05,
"loss": 0.7276532649993896,
"step": 709
},
{
"epoch": 0.693359375,
"grad_norm": 0.44872644543647766,
"learning_rate": 6.182531894013739e-05,
"loss": 0.5082123279571533,
"step": 710
},
{
"epoch": 0.6943359375,
"grad_norm": 0.3345314562320709,
"learning_rate": 6.162904808635917e-05,
"loss": 0.5472716093063354,
"step": 711
},
{
"epoch": 0.6953125,
"grad_norm": 0.4269154667854309,
"learning_rate": 6.143277723258097e-05,
"loss": 0.7036910057067871,
"step": 712
},
{
"epoch": 0.6962890625,
"grad_norm": 0.5314676761627197,
"learning_rate": 6.123650637880275e-05,
"loss": 0.8663474917411804,
"step": 713
},
{
"epoch": 0.697265625,
"grad_norm": 0.2820166349411011,
"learning_rate": 6.104023552502453e-05,
"loss": 0.6397068500518799,
"step": 714
},
{
"epoch": 0.6982421875,
"grad_norm": 0.40954726934432983,
"learning_rate": 6.084396467124632e-05,
"loss": 0.5477964282035828,
"step": 715
},
{
"epoch": 0.69921875,
"grad_norm": 0.6858615279197693,
"learning_rate": 6.064769381746811e-05,
"loss": 0.694764256477356,
"step": 716
},
{
"epoch": 0.7001953125,
"grad_norm": 2.901998281478882,
"learning_rate": 6.04514229636899e-05,
"loss": 0.5803335309028625,
"step": 717
},
{
"epoch": 0.701171875,
"grad_norm": 0.6065869927406311,
"learning_rate": 6.025515210991168e-05,
"loss": 0.49790292978286743,
"step": 718
},
{
"epoch": 0.7021484375,
"grad_norm": 0.3678690195083618,
"learning_rate": 6.005888125613347e-05,
"loss": 0.38595882058143616,
"step": 719
},
{
"epoch": 0.703125,
"grad_norm": 0.32496991753578186,
"learning_rate": 5.986261040235526e-05,
"loss": 0.3554360866546631,
"step": 720
},
{
"epoch": 0.7041015625,
"grad_norm": 0.5348960161209106,
"learning_rate": 5.966633954857704e-05,
"loss": 1.0386948585510254,
"step": 721
},
{
"epoch": 0.705078125,
"grad_norm": 0.42248818278312683,
"learning_rate": 5.947006869479883e-05,
"loss": 0.4950508177280426,
"step": 722
},
{
"epoch": 0.7060546875,
"grad_norm": 0.36575669050216675,
"learning_rate": 5.9273797841020606e-05,
"loss": 0.8793643712997437,
"step": 723
},
{
"epoch": 0.70703125,
"grad_norm": 0.30802977085113525,
"learning_rate": 5.9077526987242395e-05,
"loss": 0.7557331919670105,
"step": 724
},
{
"epoch": 0.7080078125,
"grad_norm": 0.36057788133621216,
"learning_rate": 5.888125613346418e-05,
"loss": 0.793386697769165,
"step": 725
},
{
"epoch": 0.708984375,
"grad_norm": 0.5049283504486084,
"learning_rate": 5.8684985279685966e-05,
"loss": 0.3805343210697174,
"step": 726
},
{
"epoch": 0.7099609375,
"grad_norm": 0.4448167681694031,
"learning_rate": 5.8488714425907756e-05,
"loss": 0.8297110199928284,
"step": 727
},
{
"epoch": 0.7109375,
"grad_norm": 0.5144803524017334,
"learning_rate": 5.829244357212954e-05,
"loss": 0.8582932949066162,
"step": 728
},
{
"epoch": 0.7119140625,
"grad_norm": 0.48559248447418213,
"learning_rate": 5.809617271835133e-05,
"loss": 0.851997971534729,
"step": 729
},
{
"epoch": 0.712890625,
"grad_norm": 0.5277959704399109,
"learning_rate": 5.7899901864573116e-05,
"loss": 0.8560271859169006,
"step": 730
},
{
"epoch": 0.7138671875,
"grad_norm": 0.39055025577545166,
"learning_rate": 5.77036310107949e-05,
"loss": 0.5023626685142517,
"step": 731
},
{
"epoch": 0.71484375,
"grad_norm": 0.4014328718185425,
"learning_rate": 5.750736015701669e-05,
"loss": 0.7782986760139465,
"step": 732
},
{
"epoch": 0.7158203125,
"grad_norm": 0.9840988516807556,
"learning_rate": 5.731108930323848e-05,
"loss": 0.5097107887268066,
"step": 733
},
{
"epoch": 0.716796875,
"grad_norm": 0.512140691280365,
"learning_rate": 5.711481844946026e-05,
"loss": 0.5448895692825317,
"step": 734
},
{
"epoch": 0.7177734375,
"grad_norm": 0.45195046067237854,
"learning_rate": 5.691854759568205e-05,
"loss": 0.7583330273628235,
"step": 735
},
{
"epoch": 0.71875,
"grad_norm": 0.4155009090900421,
"learning_rate": 5.672227674190384e-05,
"loss": 0.5220797061920166,
"step": 736
},
{
"epoch": 0.7197265625,
"grad_norm": 0.552148699760437,
"learning_rate": 5.652600588812562e-05,
"loss": 0.8043540716171265,
"step": 737
},
{
"epoch": 0.720703125,
"grad_norm": 0.30510297417640686,
"learning_rate": 5.6329735034347396e-05,
"loss": 0.5110808610916138,
"step": 738
},
{
"epoch": 0.7216796875,
"grad_norm": 0.522339940071106,
"learning_rate": 5.6133464180569185e-05,
"loss": 1.0245096683502197,
"step": 739
},
{
"epoch": 0.72265625,
"grad_norm": 0.27751341462135315,
"learning_rate": 5.5937193326790974e-05,
"loss": 0.6376601457595825,
"step": 740
},
{
"epoch": 0.7236328125,
"grad_norm": 0.4283340573310852,
"learning_rate": 5.5740922473012756e-05,
"loss": 1.1317777633666992,
"step": 741
},
{
"epoch": 0.724609375,
"grad_norm": 0.541248619556427,
"learning_rate": 5.5544651619234545e-05,
"loss": 0.8086187839508057,
"step": 742
},
{
"epoch": 0.7255859375,
"grad_norm": 0.24750906229019165,
"learning_rate": 5.5348380765456335e-05,
"loss": 0.4873177409172058,
"step": 743
},
{
"epoch": 0.7265625,
"grad_norm": 0.42374616861343384,
"learning_rate": 5.515210991167812e-05,
"loss": 0.41606956720352173,
"step": 744
},
{
"epoch": 0.7275390625,
"grad_norm": 0.35455161333084106,
"learning_rate": 5.4955839057899906e-05,
"loss": 0.49936947226524353,
"step": 745
},
{
"epoch": 0.728515625,
"grad_norm": 0.4243617653846741,
"learning_rate": 5.475956820412169e-05,
"loss": 0.6650359630584717,
"step": 746
},
{
"epoch": 0.7294921875,
"grad_norm": 0.4106060862541199,
"learning_rate": 5.456329735034348e-05,
"loss": 0.37870654463768005,
"step": 747
},
{
"epoch": 0.73046875,
"grad_norm": 0.3536394536495209,
"learning_rate": 5.436702649656527e-05,
"loss": 1.0944924354553223,
"step": 748
},
{
"epoch": 0.7314453125,
"grad_norm": 0.3067559003829956,
"learning_rate": 5.417075564278705e-05,
"loss": 0.6380996704101562,
"step": 749
},
{
"epoch": 0.732421875,
"grad_norm": 0.40423691272735596,
"learning_rate": 5.397448478900884e-05,
"loss": 0.712358295917511,
"step": 750
},
{
"epoch": 0.7333984375,
"grad_norm": 0.451038658618927,
"learning_rate": 5.377821393523063e-05,
"loss": 0.6221305727958679,
"step": 751
},
{
"epoch": 0.734375,
"grad_norm": 0.32606229186058044,
"learning_rate": 5.35819430814524e-05,
"loss": 0.6600078344345093,
"step": 752
},
{
"epoch": 0.7353515625,
"grad_norm": 0.746896505355835,
"learning_rate": 5.3385672227674185e-05,
"loss": 0.5533967614173889,
"step": 753
},
{
"epoch": 0.736328125,
"grad_norm": 0.403277724981308,
"learning_rate": 5.3189401373895974e-05,
"loss": 0.7483388185501099,
"step": 754
},
{
"epoch": 0.7373046875,
"grad_norm": 0.6016709208488464,
"learning_rate": 5.2993130520117764e-05,
"loss": 0.539909839630127,
"step": 755
},
{
"epoch": 0.73828125,
"grad_norm": 0.39885231852531433,
"learning_rate": 5.2796859666339546e-05,
"loss": 0.7900533676147461,
"step": 756
},
{
"epoch": 0.7392578125,
"grad_norm": 0.3245362639427185,
"learning_rate": 5.2600588812561335e-05,
"loss": 0.42862433195114136,
"step": 757
},
{
"epoch": 0.740234375,
"grad_norm": 0.47334104776382446,
"learning_rate": 5.2404317958783124e-05,
"loss": 0.3249909281730652,
"step": 758
},
{
"epoch": 0.7412109375,
"grad_norm": 0.3029737174510956,
"learning_rate": 5.220804710500491e-05,
"loss": 0.4264957308769226,
"step": 759
},
{
"epoch": 0.7421875,
"grad_norm": 0.33878564834594727,
"learning_rate": 5.2011776251226696e-05,
"loss": 0.4446904957294464,
"step": 760
},
{
"epoch": 0.7431640625,
"grad_norm": 0.3307798206806183,
"learning_rate": 5.1815505397448485e-05,
"loss": 0.461605966091156,
"step": 761
},
{
"epoch": 0.744140625,
"grad_norm": 0.4146850109100342,
"learning_rate": 5.161923454367027e-05,
"loss": 0.758568525314331,
"step": 762
},
{
"epoch": 0.7451171875,
"grad_norm": 0.3531327545642853,
"learning_rate": 5.1422963689892056e-05,
"loss": 0.4580535292625427,
"step": 763
},
{
"epoch": 0.74609375,
"grad_norm": 0.3952695429325104,
"learning_rate": 5.1226692836113846e-05,
"loss": 0.333244651556015,
"step": 764
},
{
"epoch": 0.7470703125,
"grad_norm": 0.5774162411689758,
"learning_rate": 5.103042198233563e-05,
"loss": 0.6433362364768982,
"step": 765
},
{
"epoch": 0.748046875,
"grad_norm": 0.49668964743614197,
"learning_rate": 5.083415112855742e-05,
"loss": 0.8478100895881653,
"step": 766
},
{
"epoch": 0.7490234375,
"grad_norm": 0.3303810954093933,
"learning_rate": 5.063788027477919e-05,
"loss": 0.7296837568283081,
"step": 767
},
{
"epoch": 0.75,
"grad_norm": 0.27652832865715027,
"learning_rate": 5.044160942100098e-05,
"loss": 0.6442312598228455,
"step": 768
},
{
"epoch": 0.7509765625,
"grad_norm": 1.0828924179077148,
"learning_rate": 5.0245338567222764e-05,
"loss": 0.9848635196685791,
"step": 769
},
{
"epoch": 0.751953125,
"grad_norm": 0.38959333300590515,
"learning_rate": 5.0049067713444553e-05,
"loss": 0.722776472568512,
"step": 770
},
{
"epoch": 0.7529296875,
"grad_norm": 0.3470323383808136,
"learning_rate": 4.985279685966634e-05,
"loss": 0.6584157943725586,
"step": 771
},
{
"epoch": 0.75390625,
"grad_norm": 0.4060254693031311,
"learning_rate": 4.9656526005888125e-05,
"loss": 0.6276923418045044,
"step": 772
},
{
"epoch": 0.7548828125,
"grad_norm": 0.34566962718963623,
"learning_rate": 4.9460255152109914e-05,
"loss": 0.972516655921936,
"step": 773
},
{
"epoch": 0.755859375,
"grad_norm": 0.41829708218574524,
"learning_rate": 4.92639842983317e-05,
"loss": 0.6937177181243896,
"step": 774
},
{
"epoch": 0.7568359375,
"grad_norm": 0.7653974294662476,
"learning_rate": 4.9067713444553486e-05,
"loss": 0.6027823090553284,
"step": 775
},
{
"epoch": 0.7578125,
"grad_norm": 1.0477155447006226,
"learning_rate": 4.8871442590775275e-05,
"loss": 0.925806999206543,
"step": 776
},
{
"epoch": 0.7587890625,
"grad_norm": 0.43484824895858765,
"learning_rate": 4.8675171736997064e-05,
"loss": 0.7783142328262329,
"step": 777
},
{
"epoch": 0.759765625,
"grad_norm": 0.33719849586486816,
"learning_rate": 4.847890088321884e-05,
"loss": 0.6108527779579163,
"step": 778
},
{
"epoch": 0.7607421875,
"grad_norm": 0.3983028531074524,
"learning_rate": 4.828263002944063e-05,
"loss": 0.9976012706756592,
"step": 779
},
{
"epoch": 0.76171875,
"grad_norm": 0.3278787136077881,
"learning_rate": 4.808635917566242e-05,
"loss": 0.5754845142364502,
"step": 780
},
{
"epoch": 0.7626953125,
"grad_norm": 0.42433467507362366,
"learning_rate": 4.78900883218842e-05,
"loss": 0.8455826640129089,
"step": 781
},
{
"epoch": 0.763671875,
"grad_norm": 0.33245334029197693,
"learning_rate": 4.769381746810599e-05,
"loss": 0.5207083225250244,
"step": 782
},
{
"epoch": 0.7646484375,
"grad_norm": 0.4390372931957245,
"learning_rate": 4.749754661432778e-05,
"loss": 0.7208432555198669,
"step": 783
},
{
"epoch": 0.765625,
"grad_norm": 0.325720876455307,
"learning_rate": 4.730127576054956e-05,
"loss": 0.3017955422401428,
"step": 784
},
{
"epoch": 0.7666015625,
"grad_norm": 0.3036203980445862,
"learning_rate": 4.710500490677135e-05,
"loss": 0.47869423031806946,
"step": 785
},
{
"epoch": 0.767578125,
"grad_norm": 0.4316065013408661,
"learning_rate": 4.690873405299313e-05,
"loss": 0.7984920740127563,
"step": 786
},
{
"epoch": 0.7685546875,
"grad_norm": 0.46907728910446167,
"learning_rate": 4.6712463199214915e-05,
"loss": 0.7288491725921631,
"step": 787
},
{
"epoch": 0.76953125,
"grad_norm": 0.38269418478012085,
"learning_rate": 4.6516192345436704e-05,
"loss": 0.46745771169662476,
"step": 788
},
{
"epoch": 0.7705078125,
"grad_norm": 0.6045718193054199,
"learning_rate": 4.631992149165849e-05,
"loss": 0.5405256152153015,
"step": 789
},
{
"epoch": 0.771484375,
"grad_norm": 0.3303053677082062,
"learning_rate": 4.6123650637880275e-05,
"loss": 0.6721948981285095,
"step": 790
},
{
"epoch": 0.7724609375,
"grad_norm": 0.42014074325561523,
"learning_rate": 4.5927379784102065e-05,
"loss": 0.9322581887245178,
"step": 791
},
{
"epoch": 0.7734375,
"grad_norm": 0.3720149099826813,
"learning_rate": 4.5731108930323854e-05,
"loss": 0.7807843685150146,
"step": 792
},
{
"epoch": 0.7744140625,
"grad_norm": 0.31559938192367554,
"learning_rate": 4.5534838076545636e-05,
"loss": 0.8503724336624146,
"step": 793
},
{
"epoch": 0.775390625,
"grad_norm": 0.4096013903617859,
"learning_rate": 4.533856722276742e-05,
"loss": 0.6950633525848389,
"step": 794
},
{
"epoch": 0.7763671875,
"grad_norm": 0.3791837990283966,
"learning_rate": 4.514229636898921e-05,
"loss": 0.7583197951316833,
"step": 795
},
{
"epoch": 0.77734375,
"grad_norm": 0.5274584889411926,
"learning_rate": 4.494602551521099e-05,
"loss": 0.4712093770503998,
"step": 796
},
{
"epoch": 0.7783203125,
"grad_norm": 0.29654791951179504,
"learning_rate": 4.474975466143278e-05,
"loss": 0.552979588508606,
"step": 797
},
{
"epoch": 0.779296875,
"grad_norm": 0.25629475712776184,
"learning_rate": 4.455348380765457e-05,
"loss": 0.5225521922111511,
"step": 798
},
{
"epoch": 0.7802734375,
"grad_norm": 0.2676495611667633,
"learning_rate": 4.435721295387635e-05,
"loss": 0.4382556080818176,
"step": 799
},
{
"epoch": 0.78125,
"grad_norm": 0.4117366075515747,
"learning_rate": 4.416094210009813e-05,
"loss": 0.5639417767524719,
"step": 800
},
{
"epoch": 0.7822265625,
"grad_norm": 0.26305386424064636,
"learning_rate": 4.396467124631992e-05,
"loss": 0.28840768337249756,
"step": 801
},
{
"epoch": 0.783203125,
"grad_norm": 0.7253789305686951,
"learning_rate": 4.376840039254171e-05,
"loss": 0.4104336202144623,
"step": 802
},
{
"epoch": 0.7841796875,
"grad_norm": 0.371288001537323,
"learning_rate": 4.3572129538763494e-05,
"loss": 0.609147310256958,
"step": 803
},
{
"epoch": 0.78515625,
"grad_norm": 0.634273111820221,
"learning_rate": 4.337585868498528e-05,
"loss": 0.5141665935516357,
"step": 804
},
{
"epoch": 0.7861328125,
"grad_norm": 0.4442044496536255,
"learning_rate": 4.317958783120707e-05,
"loss": 0.4882044494152069,
"step": 805
},
{
"epoch": 0.787109375,
"grad_norm": 0.3099007308483124,
"learning_rate": 4.2983316977428854e-05,
"loss": 0.3148588538169861,
"step": 806
},
{
"epoch": 0.7880859375,
"grad_norm": 0.41893890500068665,
"learning_rate": 4.2787046123650643e-05,
"loss": 0.6678078174591064,
"step": 807
},
{
"epoch": 0.7890625,
"grad_norm": 0.47682809829711914,
"learning_rate": 4.2590775269872426e-05,
"loss": 0.46614763140678406,
"step": 808
},
{
"epoch": 0.7900390625,
"grad_norm": 0.25193366408348083,
"learning_rate": 4.239450441609421e-05,
"loss": 0.3707652986049652,
"step": 809
},
{
"epoch": 0.791015625,
"grad_norm": 0.3425232768058777,
"learning_rate": 4.2198233562316e-05,
"loss": 0.604179859161377,
"step": 810
},
{
"epoch": 0.7919921875,
"grad_norm": 0.31459808349609375,
"learning_rate": 4.2001962708537786e-05,
"loss": 0.748989999294281,
"step": 811
},
{
"epoch": 0.79296875,
"grad_norm": 0.3478514850139618,
"learning_rate": 4.180569185475957e-05,
"loss": 0.6651142835617065,
"step": 812
},
{
"epoch": 0.7939453125,
"grad_norm": 0.3951675295829773,
"learning_rate": 4.160942100098136e-05,
"loss": 0.7293418049812317,
"step": 813
},
{
"epoch": 0.794921875,
"grad_norm": 0.26888158917427063,
"learning_rate": 4.141315014720315e-05,
"loss": 0.2181730419397354,
"step": 814
},
{
"epoch": 0.7958984375,
"grad_norm": 0.17496585845947266,
"learning_rate": 4.121687929342492e-05,
"loss": 0.18257993459701538,
"step": 815
},
{
"epoch": 0.796875,
"grad_norm": 0.3386918306350708,
"learning_rate": 4.102060843964671e-05,
"loss": 0.43010956048965454,
"step": 816
},
{
"epoch": 0.7978515625,
"grad_norm": 0.5185137987136841,
"learning_rate": 4.08243375858685e-05,
"loss": 0.9117882251739502,
"step": 817
},
{
"epoch": 0.798828125,
"grad_norm": 0.499529093503952,
"learning_rate": 4.0628066732090283e-05,
"loss": 0.8601939678192139,
"step": 818
},
{
"epoch": 0.7998046875,
"grad_norm": 0.44401317834854126,
"learning_rate": 4.043179587831207e-05,
"loss": 0.8643960356712341,
"step": 819
},
{
"epoch": 0.80078125,
"grad_norm": 0.30553653836250305,
"learning_rate": 4.023552502453386e-05,
"loss": 0.7741817235946655,
"step": 820
},
{
"epoch": 0.8017578125,
"grad_norm": 0.443541944026947,
"learning_rate": 4.0039254170755644e-05,
"loss": 0.9571224451065063,
"step": 821
},
{
"epoch": 0.802734375,
"grad_norm": 0.2611587643623352,
"learning_rate": 3.9842983316977426e-05,
"loss": 0.4755222201347351,
"step": 822
},
{
"epoch": 0.8037109375,
"grad_norm": 0.38695722818374634,
"learning_rate": 3.9646712463199216e-05,
"loss": 0.9597996473312378,
"step": 823
},
{
"epoch": 0.8046875,
"grad_norm": 0.505346953868866,
"learning_rate": 3.9450441609421005e-05,
"loss": 0.328266441822052,
"step": 824
},
{
"epoch": 0.8056640625,
"grad_norm": 0.38910478353500366,
"learning_rate": 3.925417075564279e-05,
"loss": 0.4758382737636566,
"step": 825
},
{
"epoch": 0.806640625,
"grad_norm": 0.4268342852592468,
"learning_rate": 3.9057899901864576e-05,
"loss": 0.6131553649902344,
"step": 826
},
{
"epoch": 0.8076171875,
"grad_norm": 0.32205328345298767,
"learning_rate": 3.8861629048086365e-05,
"loss": 0.6047544479370117,
"step": 827
},
{
"epoch": 0.80859375,
"grad_norm": 0.6975948214530945,
"learning_rate": 3.866535819430815e-05,
"loss": 0.7599061727523804,
"step": 828
},
{
"epoch": 0.8095703125,
"grad_norm": 0.20186780393123627,
"learning_rate": 3.846908734052994e-05,
"loss": 0.3639545738697052,
"step": 829
},
{
"epoch": 0.810546875,
"grad_norm": 0.443435937166214,
"learning_rate": 3.827281648675172e-05,
"loss": 0.6933274269104004,
"step": 830
},
{
"epoch": 0.8115234375,
"grad_norm": 0.44157811999320984,
"learning_rate": 3.80765456329735e-05,
"loss": 0.5135524272918701,
"step": 831
},
{
"epoch": 0.8125,
"grad_norm": 0.3959600031375885,
"learning_rate": 3.788027477919529e-05,
"loss": 0.6713152527809143,
"step": 832
},
{
"epoch": 0.8134765625,
"grad_norm": 0.5439519882202148,
"learning_rate": 3.768400392541708e-05,
"loss": 0.3603706359863281,
"step": 833
},
{
"epoch": 0.814453125,
"grad_norm": 0.36693719029426575,
"learning_rate": 3.748773307163886e-05,
"loss": 0.8574247360229492,
"step": 834
},
{
"epoch": 0.8154296875,
"grad_norm": 0.3476804792881012,
"learning_rate": 3.729146221786065e-05,
"loss": 0.6845530867576599,
"step": 835
},
{
"epoch": 0.81640625,
"grad_norm": 0.48850229382514954,
"learning_rate": 3.709519136408244e-05,
"loss": 0.788569450378418,
"step": 836
},
{
"epoch": 0.8173828125,
"grad_norm": 0.5997111797332764,
"learning_rate": 3.6898920510304216e-05,
"loss": 0.5885312557220459,
"step": 837
},
{
"epoch": 0.818359375,
"grad_norm": 0.43312472105026245,
"learning_rate": 3.6702649656526005e-05,
"loss": 0.5300126075744629,
"step": 838
},
{
"epoch": 0.8193359375,
"grad_norm": 0.6505857110023499,
"learning_rate": 3.6506378802747795e-05,
"loss": 0.7164736986160278,
"step": 839
},
{
"epoch": 0.8203125,
"grad_norm": 0.34061765670776367,
"learning_rate": 3.631010794896958e-05,
"loss": 0.5405696034431458,
"step": 840
},
{
"epoch": 0.8212890625,
"grad_norm": 0.4188057780265808,
"learning_rate": 3.6113837095191366e-05,
"loss": 1.0057684183120728,
"step": 841
},
{
"epoch": 0.822265625,
"grad_norm": 0.392007052898407,
"learning_rate": 3.5917566241413155e-05,
"loss": 0.6687936782836914,
"step": 842
},
{
"epoch": 0.8232421875,
"grad_norm": 0.44254210591316223,
"learning_rate": 3.572129538763494e-05,
"loss": 0.39150726795196533,
"step": 843
},
{
"epoch": 0.82421875,
"grad_norm": 0.41756534576416016,
"learning_rate": 3.552502453385673e-05,
"loss": 0.764665961265564,
"step": 844
},
{
"epoch": 0.8251953125,
"grad_norm": 0.9839560985565186,
"learning_rate": 3.532875368007851e-05,
"loss": 0.45259296894073486,
"step": 845
},
{
"epoch": 0.826171875,
"grad_norm": 0.3465111553668976,
"learning_rate": 3.513248282630029e-05,
"loss": 0.5895928740501404,
"step": 846
},
{
"epoch": 0.8271484375,
"grad_norm": 0.4883447289466858,
"learning_rate": 3.493621197252208e-05,
"loss": 0.8401346802711487,
"step": 847
},
{
"epoch": 0.828125,
"grad_norm": 0.3590312898159027,
"learning_rate": 3.473994111874387e-05,
"loss": 0.6134470105171204,
"step": 848
},
{
"epoch": 0.8291015625,
"grad_norm": 0.48273324966430664,
"learning_rate": 3.454367026496565e-05,
"loss": 0.6351644992828369,
"step": 849
},
{
"epoch": 0.830078125,
"grad_norm": 0.32156500220298767,
"learning_rate": 3.434739941118744e-05,
"loss": 0.5098355412483215,
"step": 850
},
{
"epoch": 0.8310546875,
"grad_norm": 0.38239747285842896,
"learning_rate": 3.415112855740923e-05,
"loss": 1.0178660154342651,
"step": 851
},
{
"epoch": 0.83203125,
"grad_norm": 0.6875290274620056,
"learning_rate": 3.395485770363101e-05,
"loss": 0.4496825337409973,
"step": 852
},
{
"epoch": 0.8330078125,
"grad_norm": 0.27034860849380493,
"learning_rate": 3.3758586849852795e-05,
"loss": 0.41253381967544556,
"step": 853
},
{
"epoch": 0.833984375,
"grad_norm": 0.5166223049163818,
"learning_rate": 3.3562315996074584e-05,
"loss": 0.7344639897346497,
"step": 854
},
{
"epoch": 0.8349609375,
"grad_norm": 0.39597758650779724,
"learning_rate": 3.3366045142296373e-05,
"loss": 0.6066821217536926,
"step": 855
},
{
"epoch": 0.8359375,
"grad_norm": 0.44033098220825195,
"learning_rate": 3.3169774288518156e-05,
"loss": 0.7928174734115601,
"step": 856
},
{
"epoch": 0.8369140625,
"grad_norm": 0.3340597450733185,
"learning_rate": 3.2973503434739945e-05,
"loss": 0.4783233404159546,
"step": 857
},
{
"epoch": 0.837890625,
"grad_norm": 0.5634653568267822,
"learning_rate": 3.2777232580961734e-05,
"loss": 0.785845935344696,
"step": 858
},
{
"epoch": 0.8388671875,
"grad_norm": 0.24581296741962433,
"learning_rate": 3.258096172718351e-05,
"loss": 0.36480462551116943,
"step": 859
},
{
"epoch": 0.83984375,
"grad_norm": 0.316773384809494,
"learning_rate": 3.23846908734053e-05,
"loss": 0.886894941329956,
"step": 860
},
{
"epoch": 0.8408203125,
"grad_norm": 0.4605409502983093,
"learning_rate": 3.218842001962709e-05,
"loss": 0.7125131487846375,
"step": 861
},
{
"epoch": 0.841796875,
"grad_norm": 0.5473557114601135,
"learning_rate": 3.199214916584887e-05,
"loss": 0.45582157373428345,
"step": 862
},
{
"epoch": 0.8427734375,
"grad_norm": 0.4604926109313965,
"learning_rate": 3.179587831207066e-05,
"loss": 0.5392733812332153,
"step": 863
},
{
"epoch": 0.84375,
"grad_norm": 0.3192322552204132,
"learning_rate": 3.159960745829245e-05,
"loss": 0.3216538727283478,
"step": 864
},
{
"epoch": 0.8447265625,
"grad_norm": 0.4225713610649109,
"learning_rate": 3.140333660451423e-05,
"loss": 0.36403900384902954,
"step": 865
},
{
"epoch": 0.845703125,
"grad_norm": 0.7738484740257263,
"learning_rate": 3.120706575073602e-05,
"loss": 0.5428112149238586,
"step": 866
},
{
"epoch": 0.8466796875,
"grad_norm": 0.7795976400375366,
"learning_rate": 3.10107948969578e-05,
"loss": 0.838668704032898,
"step": 867
},
{
"epoch": 0.84765625,
"grad_norm": 0.4240044355392456,
"learning_rate": 3.0814524043179585e-05,
"loss": 0.5039677023887634,
"step": 868
},
{
"epoch": 0.8486328125,
"grad_norm": 0.7870606780052185,
"learning_rate": 3.0618253189401374e-05,
"loss": 0.2639703154563904,
"step": 869
},
{
"epoch": 0.849609375,
"grad_norm": 4.898192405700684,
"learning_rate": 3.042198233562316e-05,
"loss": 0.9641809463500977,
"step": 870
},
{
"epoch": 0.8505859375,
"grad_norm": 0.4090663194656372,
"learning_rate": 3.022571148184495e-05,
"loss": 0.5249053835868835,
"step": 871
},
{
"epoch": 0.8515625,
"grad_norm": 0.5761129856109619,
"learning_rate": 3.0029440628066735e-05,
"loss": 0.8987921476364136,
"step": 872
},
{
"epoch": 0.8525390625,
"grad_norm": 0.2440023124217987,
"learning_rate": 2.983316977428852e-05,
"loss": 0.3279159367084503,
"step": 873
},
{
"epoch": 0.853515625,
"grad_norm": 0.438519150018692,
"learning_rate": 2.9636898920510303e-05,
"loss": 0.8272308111190796,
"step": 874
},
{
"epoch": 0.8544921875,
"grad_norm": 0.4011988639831543,
"learning_rate": 2.944062806673209e-05,
"loss": 0.3140803873538971,
"step": 875
},
{
"epoch": 0.85546875,
"grad_norm": 0.5748201012611389,
"learning_rate": 2.9244357212953878e-05,
"loss": 0.6699116230010986,
"step": 876
},
{
"epoch": 0.8564453125,
"grad_norm": 0.3001462519168854,
"learning_rate": 2.9048086359175664e-05,
"loss": 0.19382989406585693,
"step": 877
},
{
"epoch": 0.857421875,
"grad_norm": 0.40844887495040894,
"learning_rate": 2.885181550539745e-05,
"loss": 0.6494845747947693,
"step": 878
},
{
"epoch": 0.8583984375,
"grad_norm": 0.3480914235115051,
"learning_rate": 2.865554465161924e-05,
"loss": 0.5555131435394287,
"step": 879
},
{
"epoch": 0.859375,
"grad_norm": 0.3903101682662964,
"learning_rate": 2.8459273797841024e-05,
"loss": 0.6830955147743225,
"step": 880
},
{
"epoch": 0.8603515625,
"grad_norm": 0.3058629333972931,
"learning_rate": 2.826300294406281e-05,
"loss": 0.3747236728668213,
"step": 881
},
{
"epoch": 0.861328125,
"grad_norm": 0.49275287985801697,
"learning_rate": 2.8066732090284592e-05,
"loss": 1.0192487239837646,
"step": 882
},
{
"epoch": 0.8623046875,
"grad_norm": 0.4016769826412201,
"learning_rate": 2.7870461236506378e-05,
"loss": 0.4012300372123718,
"step": 883
},
{
"epoch": 0.86328125,
"grad_norm": 0.4790811240673065,
"learning_rate": 2.7674190382728167e-05,
"loss": 0.6936056613922119,
"step": 884
},
{
"epoch": 0.8642578125,
"grad_norm": 0.39931413531303406,
"learning_rate": 2.7477919528949953e-05,
"loss": 0.3612633943557739,
"step": 885
},
{
"epoch": 0.865234375,
"grad_norm": 0.3250795006752014,
"learning_rate": 2.728164867517174e-05,
"loss": 0.5146504640579224,
"step": 886
},
{
"epoch": 0.8662109375,
"grad_norm": 0.5216737985610962,
"learning_rate": 2.7085377821393525e-05,
"loss": 0.6185201406478882,
"step": 887
},
{
"epoch": 0.8671875,
"grad_norm": 0.5681923031806946,
"learning_rate": 2.6889106967615314e-05,
"loss": 0.9492973685264587,
"step": 888
},
{
"epoch": 0.8681640625,
"grad_norm": 0.5284391045570374,
"learning_rate": 2.6692836113837093e-05,
"loss": 0.7801765203475952,
"step": 889
},
{
"epoch": 0.869140625,
"grad_norm": 0.42510825395584106,
"learning_rate": 2.6496565260058882e-05,
"loss": 0.4871942102909088,
"step": 890
},
{
"epoch": 0.8701171875,
"grad_norm": 0.39092326164245605,
"learning_rate": 2.6300294406280668e-05,
"loss": 0.5123960375785828,
"step": 891
},
{
"epoch": 0.87109375,
"grad_norm": 0.37694281339645386,
"learning_rate": 2.6104023552502453e-05,
"loss": 0.3543451428413391,
"step": 892
},
{
"epoch": 0.8720703125,
"grad_norm": 0.26519376039505005,
"learning_rate": 2.5907752698724242e-05,
"loss": 0.2388455718755722,
"step": 893
},
{
"epoch": 0.873046875,
"grad_norm": 0.6303861141204834,
"learning_rate": 2.5711481844946028e-05,
"loss": 0.7195224761962891,
"step": 894
},
{
"epoch": 0.8740234375,
"grad_norm": 0.4436159133911133,
"learning_rate": 2.5515210991167814e-05,
"loss": 0.8888048529624939,
"step": 895
},
{
"epoch": 0.875,
"grad_norm": 0.6473313570022583,
"learning_rate": 2.5318940137389596e-05,
"loss": 0.8557075262069702,
"step": 896
},
{
"epoch": 0.8759765625,
"grad_norm": 0.6625436544418335,
"learning_rate": 2.5122669283611382e-05,
"loss": 0.7132158279418945,
"step": 897
},
{
"epoch": 0.876953125,
"grad_norm": 0.7241202592849731,
"learning_rate": 2.492639842983317e-05,
"loss": 0.9367854595184326,
"step": 898
},
{
"epoch": 0.8779296875,
"grad_norm": 0.5321157574653625,
"learning_rate": 2.4730127576054957e-05,
"loss": 1.0013937950134277,
"step": 899
},
{
"epoch": 0.87890625,
"grad_norm": 0.3287423253059387,
"learning_rate": 2.4533856722276743e-05,
"loss": 0.4560258984565735,
"step": 900
},
{
"epoch": 0.8798828125,
"grad_norm": 0.5040727257728577,
"learning_rate": 2.4337585868498532e-05,
"loss": 0.5655212998390198,
"step": 901
},
{
"epoch": 0.880859375,
"grad_norm": 0.4150228202342987,
"learning_rate": 2.4141315014720314e-05,
"loss": 0.43106216192245483,
"step": 902
},
{
"epoch": 0.8818359375,
"grad_norm": 0.4006192684173584,
"learning_rate": 2.39450441609421e-05,
"loss": 0.4401901364326477,
"step": 903
},
{
"epoch": 0.8828125,
"grad_norm": 0.5145865678787231,
"learning_rate": 2.374877330716389e-05,
"loss": 0.9345691800117493,
"step": 904
},
{
"epoch": 0.8837890625,
"grad_norm": 0.7273013591766357,
"learning_rate": 2.3552502453385675e-05,
"loss": 0.27768659591674805,
"step": 905
},
{
"epoch": 0.884765625,
"grad_norm": 0.3039482831954956,
"learning_rate": 2.3356231599607457e-05,
"loss": 0.6196010112762451,
"step": 906
},
{
"epoch": 0.8857421875,
"grad_norm": 0.35697150230407715,
"learning_rate": 2.3159960745829247e-05,
"loss": 0.34777021408081055,
"step": 907
},
{
"epoch": 0.88671875,
"grad_norm": 0.356717050075531,
"learning_rate": 2.2963689892051032e-05,
"loss": 0.4651508331298828,
"step": 908
},
{
"epoch": 0.8876953125,
"grad_norm": 0.485963374376297,
"learning_rate": 2.2767419038272818e-05,
"loss": 0.3906201720237732,
"step": 909
},
{
"epoch": 0.888671875,
"grad_norm": 0.38827836513519287,
"learning_rate": 2.2571148184494604e-05,
"loss": 0.48782849311828613,
"step": 910
},
{
"epoch": 0.8896484375,
"grad_norm": 0.39589494466781616,
"learning_rate": 2.237487733071639e-05,
"loss": 0.5089969635009766,
"step": 911
},
{
"epoch": 0.890625,
"grad_norm": 0.6619493365287781,
"learning_rate": 2.2178606476938175e-05,
"loss": 0.9266189932823181,
"step": 912
},
{
"epoch": 0.8916015625,
"grad_norm": 0.407817542552948,
"learning_rate": 2.198233562315996e-05,
"loss": 0.3518386483192444,
"step": 913
},
{
"epoch": 0.892578125,
"grad_norm": 0.4645719826221466,
"learning_rate": 2.1786064769381747e-05,
"loss": 0.9297075271606445,
"step": 914
},
{
"epoch": 0.8935546875,
"grad_norm": 0.434517502784729,
"learning_rate": 2.1589793915603536e-05,
"loss": 0.7716128826141357,
"step": 915
},
{
"epoch": 0.89453125,
"grad_norm": 0.49387747049331665,
"learning_rate": 2.1393523061825322e-05,
"loss": 0.5475488901138306,
"step": 916
},
{
"epoch": 0.8955078125,
"grad_norm": 0.5593905448913574,
"learning_rate": 2.1197252208047104e-05,
"loss": 0.7304456233978271,
"step": 917
},
{
"epoch": 0.896484375,
"grad_norm": 0.3386078178882599,
"learning_rate": 2.1000981354268893e-05,
"loss": 0.7872465252876282,
"step": 918
},
{
"epoch": 0.8974609375,
"grad_norm": 0.2872868478298187,
"learning_rate": 2.080471050049068e-05,
"loss": 0.3295198976993561,
"step": 919
},
{
"epoch": 0.8984375,
"grad_norm": 0.4897945523262024,
"learning_rate": 2.060843964671246e-05,
"loss": 0.3939395546913147,
"step": 920
},
{
"epoch": 0.8994140625,
"grad_norm": 0.5068129897117615,
"learning_rate": 2.041216879293425e-05,
"loss": 0.4646037817001343,
"step": 921
},
{
"epoch": 0.900390625,
"grad_norm": 0.3769625425338745,
"learning_rate": 2.0215897939156036e-05,
"loss": 0.811498761177063,
"step": 922
},
{
"epoch": 0.9013671875,
"grad_norm": 0.380655974149704,
"learning_rate": 2.0019627085377822e-05,
"loss": 0.6260181665420532,
"step": 923
},
{
"epoch": 0.90234375,
"grad_norm": 0.5810602903366089,
"learning_rate": 1.9823356231599608e-05,
"loss": 0.7125158309936523,
"step": 924
},
{
"epoch": 0.9033203125,
"grad_norm": 0.4367387592792511,
"learning_rate": 1.9627085377821394e-05,
"loss": 0.7728107571601868,
"step": 925
},
{
"epoch": 0.904296875,
"grad_norm": 0.604702353477478,
"learning_rate": 1.9430814524043183e-05,
"loss": 0.5136534571647644,
"step": 926
},
{
"epoch": 0.9052734375,
"grad_norm": 0.40865615010261536,
"learning_rate": 1.923454367026497e-05,
"loss": 0.5040115714073181,
"step": 927
},
{
"epoch": 0.90625,
"grad_norm": 0.3602078855037689,
"learning_rate": 1.903827281648675e-05,
"loss": 0.4498569965362549,
"step": 928
},
{
"epoch": 0.9072265625,
"grad_norm": 0.46351152658462524,
"learning_rate": 1.884200196270854e-05,
"loss": 0.8635745644569397,
"step": 929
},
{
"epoch": 0.908203125,
"grad_norm": 0.5490495562553406,
"learning_rate": 1.8645731108930326e-05,
"loss": 0.9265761375427246,
"step": 930
},
{
"epoch": 0.9091796875,
"grad_norm": 0.4198157489299774,
"learning_rate": 1.8449460255152108e-05,
"loss": 0.8148217797279358,
"step": 931
},
{
"epoch": 0.91015625,
"grad_norm": 0.5183578729629517,
"learning_rate": 1.8253189401373897e-05,
"loss": 0.7837534546852112,
"step": 932
},
{
"epoch": 0.9111328125,
"grad_norm": 0.41839340329170227,
"learning_rate": 1.8056918547595683e-05,
"loss": 0.7239848971366882,
"step": 933
},
{
"epoch": 0.912109375,
"grad_norm": 0.49158063530921936,
"learning_rate": 1.786064769381747e-05,
"loss": 0.7751527428627014,
"step": 934
},
{
"epoch": 0.9130859375,
"grad_norm": 0.20171599090099335,
"learning_rate": 1.7664376840039255e-05,
"loss": 0.181843563914299,
"step": 935
},
{
"epoch": 0.9140625,
"grad_norm": 0.36237961053848267,
"learning_rate": 1.746810598626104e-05,
"loss": 0.5150234699249268,
"step": 936
},
{
"epoch": 0.9150390625,
"grad_norm": 0.4587535858154297,
"learning_rate": 1.7271835132482826e-05,
"loss": 0.6178685426712036,
"step": 937
},
{
"epoch": 0.916015625,
"grad_norm": 0.392635703086853,
"learning_rate": 1.7075564278704615e-05,
"loss": 0.7002321481704712,
"step": 938
},
{
"epoch": 0.9169921875,
"grad_norm": 0.28255772590637207,
"learning_rate": 1.6879293424926398e-05,
"loss": 0.6161627769470215,
"step": 939
},
{
"epoch": 0.91796875,
"grad_norm": 0.31382182240486145,
"learning_rate": 1.6683022571148187e-05,
"loss": 0.6143029928207397,
"step": 940
},
{
"epoch": 0.9189453125,
"grad_norm": 0.5099475383758545,
"learning_rate": 1.6486751717369972e-05,
"loss": 0.9116108417510986,
"step": 941
},
{
"epoch": 0.919921875,
"grad_norm": 0.4015892446041107,
"learning_rate": 1.6290480863591755e-05,
"loss": 0.7331390380859375,
"step": 942
},
{
"epoch": 0.9208984375,
"grad_norm": 0.4519053101539612,
"learning_rate": 1.6094210009813544e-05,
"loss": 0.6662384867668152,
"step": 943
},
{
"epoch": 0.921875,
"grad_norm": 0.5565328598022461,
"learning_rate": 1.589793915603533e-05,
"loss": 0.37386590242385864,
"step": 944
},
{
"epoch": 0.9228515625,
"grad_norm": 0.398419588804245,
"learning_rate": 1.5701668302257116e-05,
"loss": 0.9127399325370789,
"step": 945
},
{
"epoch": 0.923828125,
"grad_norm": 0.37491804361343384,
"learning_rate": 1.55053974484789e-05,
"loss": 0.47025924921035767,
"step": 946
},
{
"epoch": 0.9248046875,
"grad_norm": 0.49557894468307495,
"learning_rate": 1.5309126594700687e-05,
"loss": 0.6349594593048096,
"step": 947
},
{
"epoch": 0.92578125,
"grad_norm": 0.2361314743757248,
"learning_rate": 1.5112855740922475e-05,
"loss": 0.3594982922077179,
"step": 948
},
{
"epoch": 0.9267578125,
"grad_norm": 0.40022003650665283,
"learning_rate": 1.491658488714426e-05,
"loss": 0.41701436042785645,
"step": 949
},
{
"epoch": 0.927734375,
"grad_norm": 0.349528431892395,
"learning_rate": 1.4720314033366044e-05,
"loss": 0.2943156063556671,
"step": 950
},
{
"epoch": 0.9287109375,
"grad_norm": 0.4660559892654419,
"learning_rate": 1.4524043179587832e-05,
"loss": 0.3633948564529419,
"step": 951
},
{
"epoch": 0.9296875,
"grad_norm": 0.28590673208236694,
"learning_rate": 1.432777232580962e-05,
"loss": 0.4886907935142517,
"step": 952
},
{
"epoch": 0.9306640625,
"grad_norm": 0.4388448894023895,
"learning_rate": 1.4131501472031405e-05,
"loss": 0.6123654246330261,
"step": 953
},
{
"epoch": 0.931640625,
"grad_norm": 0.4807531237602234,
"learning_rate": 1.3935230618253189e-05,
"loss": 0.32400381565093994,
"step": 954
},
{
"epoch": 0.9326171875,
"grad_norm": 0.3903636932373047,
"learning_rate": 1.3738959764474977e-05,
"loss": 0.6839208006858826,
"step": 955
},
{
"epoch": 0.93359375,
"grad_norm": 0.2925507426261902,
"learning_rate": 1.3542688910696762e-05,
"loss": 0.5898708701133728,
"step": 956
},
{
"epoch": 0.9345703125,
"grad_norm": 0.39300912618637085,
"learning_rate": 1.3346418056918546e-05,
"loss": 0.3898833692073822,
"step": 957
},
{
"epoch": 0.935546875,
"grad_norm": 0.4321513772010803,
"learning_rate": 1.3150147203140334e-05,
"loss": 0.5717346668243408,
"step": 958
},
{
"epoch": 0.9365234375,
"grad_norm": 0.47681212425231934,
"learning_rate": 1.2953876349362121e-05,
"loss": 0.9711145162582397,
"step": 959
},
{
"epoch": 0.9375,
"grad_norm": 0.524958610534668,
"learning_rate": 1.2757605495583907e-05,
"loss": 0.6577808260917664,
"step": 960
},
{
"epoch": 0.9384765625,
"grad_norm": 0.40814298391342163,
"learning_rate": 1.2561334641805691e-05,
"loss": 0.5148733258247375,
"step": 961
},
{
"epoch": 0.939453125,
"grad_norm": 0.3122687041759491,
"learning_rate": 1.2365063788027479e-05,
"loss": 0.884072482585907,
"step": 962
},
{
"epoch": 0.9404296875,
"grad_norm": 0.4473840594291687,
"learning_rate": 1.2168792934249266e-05,
"loss": 0.660685658454895,
"step": 963
},
{
"epoch": 0.94140625,
"grad_norm": 0.3491450548171997,
"learning_rate": 1.197252208047105e-05,
"loss": 0.8680378794670105,
"step": 964
},
{
"epoch": 0.9423828125,
"grad_norm": 0.6323879957199097,
"learning_rate": 1.1776251226692837e-05,
"loss": 0.8196921348571777,
"step": 965
},
{
"epoch": 0.943359375,
"grad_norm": 0.354900062084198,
"learning_rate": 1.1579980372914623e-05,
"loss": 0.5380838513374329,
"step": 966
},
{
"epoch": 0.9443359375,
"grad_norm": 0.3235265612602234,
"learning_rate": 1.1383709519136409e-05,
"loss": 0.39993464946746826,
"step": 967
},
{
"epoch": 0.9453125,
"grad_norm": 0.3700491786003113,
"learning_rate": 1.1187438665358195e-05,
"loss": 0.6613435745239258,
"step": 968
},
{
"epoch": 0.9462890625,
"grad_norm": 0.29880228638648987,
"learning_rate": 1.099116781157998e-05,
"loss": 0.5756196975708008,
"step": 969
},
{
"epoch": 0.947265625,
"grad_norm": 0.4585433304309845,
"learning_rate": 1.0794896957801768e-05,
"loss": 0.5012968182563782,
"step": 970
},
{
"epoch": 0.9482421875,
"grad_norm": 0.5275799632072449,
"learning_rate": 1.0598626104023552e-05,
"loss": 0.4986013174057007,
"step": 971
},
{
"epoch": 0.94921875,
"grad_norm": 0.30642619729042053,
"learning_rate": 1.040235525024534e-05,
"loss": 0.29793277382850647,
"step": 972
},
{
"epoch": 0.9501953125,
"grad_norm": 0.7356166243553162,
"learning_rate": 1.0206084396467125e-05,
"loss": 0.6518126726150513,
"step": 973
},
{
"epoch": 0.951171875,
"grad_norm": 0.6069150567054749,
"learning_rate": 1.0009813542688911e-05,
"loss": 0.7005544900894165,
"step": 974
},
{
"epoch": 0.9521484375,
"grad_norm": 0.500067949295044,
"learning_rate": 9.813542688910697e-06,
"loss": 0.5567950010299683,
"step": 975
},
{
"epoch": 0.953125,
"grad_norm": 0.5926097631454468,
"learning_rate": 9.617271835132484e-06,
"loss": 0.6974345445632935,
"step": 976
},
{
"epoch": 0.9541015625,
"grad_norm": 0.28873002529144287,
"learning_rate": 9.42100098135427e-06,
"loss": 0.28231939673423767,
"step": 977
},
{
"epoch": 0.955078125,
"grad_norm": 0.6644822359085083,
"learning_rate": 9.224730127576054e-06,
"loss": 0.46575701236724854,
"step": 978
},
{
"epoch": 0.9560546875,
"grad_norm": 0.34748774766921997,
"learning_rate": 9.028459273797842e-06,
"loss": 0.7192713022232056,
"step": 979
},
{
"epoch": 0.95703125,
"grad_norm": 0.4444558024406433,
"learning_rate": 8.832188420019627e-06,
"loss": 0.34014150500297546,
"step": 980
},
{
"epoch": 0.9580078125,
"grad_norm": 0.4814091920852661,
"learning_rate": 8.635917566241413e-06,
"loss": 0.8042552471160889,
"step": 981
},
{
"epoch": 0.958984375,
"grad_norm": 0.5443412661552429,
"learning_rate": 8.439646712463199e-06,
"loss": 0.6534023880958557,
"step": 982
},
{
"epoch": 0.9599609375,
"grad_norm": 0.40025195479393005,
"learning_rate": 8.243375858684986e-06,
"loss": 0.9056930541992188,
"step": 983
},
{
"epoch": 0.9609375,
"grad_norm": 0.41958069801330566,
"learning_rate": 8.047105004906772e-06,
"loss": 0.5610394477844238,
"step": 984
},
{
"epoch": 0.9619140625,
"grad_norm": 0.33056482672691345,
"learning_rate": 7.850834151128558e-06,
"loss": 0.5796000361442566,
"step": 985
},
{
"epoch": 0.962890625,
"grad_norm": 0.5056169629096985,
"learning_rate": 7.654563297350344e-06,
"loss": 0.7795373201370239,
"step": 986
},
{
"epoch": 0.9638671875,
"grad_norm": 0.4030667543411255,
"learning_rate": 7.45829244357213e-06,
"loss": 0.761528491973877,
"step": 987
},
{
"epoch": 0.96484375,
"grad_norm": 0.22716952860355377,
"learning_rate": 7.262021589793916e-06,
"loss": 0.21712671220302582,
"step": 988
},
{
"epoch": 0.9658203125,
"grad_norm": 0.4826786518096924,
"learning_rate": 7.0657507360157025e-06,
"loss": 0.6192560791969299,
"step": 989
},
{
"epoch": 0.966796875,
"grad_norm": 0.3611379861831665,
"learning_rate": 6.869479882237488e-06,
"loss": 0.5660407543182373,
"step": 990
},
{
"epoch": 0.9677734375,
"grad_norm": 0.44197750091552734,
"learning_rate": 6.673209028459273e-06,
"loss": 0.8223164081573486,
"step": 991
},
{
"epoch": 0.96875,
"grad_norm": 0.45650866627693176,
"learning_rate": 6.476938174681061e-06,
"loss": 0.5810177326202393,
"step": 992
},
{
"epoch": 0.9697265625,
"grad_norm": 0.6275922060012817,
"learning_rate": 6.2806673209028455e-06,
"loss": 0.46302127838134766,
"step": 993
},
{
"epoch": 0.970703125,
"grad_norm": 0.29163289070129395,
"learning_rate": 6.084396467124633e-06,
"loss": 0.49744415283203125,
"step": 994
},
{
"epoch": 0.9716796875,
"grad_norm": 0.4289768934249878,
"learning_rate": 5.888125613346419e-06,
"loss": 0.39710360765457153,
"step": 995
},
{
"epoch": 0.97265625,
"grad_norm": 0.43311089277267456,
"learning_rate": 5.6918547595682045e-06,
"loss": 0.4934995174407959,
"step": 996
},
{
"epoch": 0.9736328125,
"grad_norm": 0.4249640703201294,
"learning_rate": 5.49558390578999e-06,
"loss": 0.6822129487991333,
"step": 997
},
{
"epoch": 0.974609375,
"grad_norm": 0.4080635607242584,
"learning_rate": 5.299313052011776e-06,
"loss": 0.2851019501686096,
"step": 998
},
{
"epoch": 0.9755859375,
"grad_norm": 0.3082174062728882,
"learning_rate": 5.103042198233563e-06,
"loss": 0.8851650357246399,
"step": 999
},
{
"epoch": 0.9765625,
"grad_norm": 0.5285578370094299,
"learning_rate": 4.906771344455348e-06,
"loss": 0.5684286952018738,
"step": 1000
},
{
"epoch": 0.9775390625,
"grad_norm": 0.37052616477012634,
"learning_rate": 4.710500490677135e-06,
"loss": 0.8170924782752991,
"step": 1001
},
{
"epoch": 0.978515625,
"grad_norm": 0.46926191449165344,
"learning_rate": 4.514229636898921e-06,
"loss": 0.665911853313446,
"step": 1002
},
{
"epoch": 0.9794921875,
"grad_norm": 0.38110095262527466,
"learning_rate": 4.3179587831207065e-06,
"loss": 0.9365942478179932,
"step": 1003
},
{
"epoch": 0.98046875,
"grad_norm": 0.3803754150867462,
"learning_rate": 4.121687929342493e-06,
"loss": 0.756361722946167,
"step": 1004
},
{
"epoch": 0.9814453125,
"grad_norm": 0.6576887965202332,
"learning_rate": 3.925417075564279e-06,
"loss": 0.6846331357955933,
"step": 1005
},
{
"epoch": 0.982421875,
"grad_norm": 0.6425113081932068,
"learning_rate": 3.729146221786065e-06,
"loss": 0.7665562629699707,
"step": 1006
},
{
"epoch": 0.9833984375,
"grad_norm": 0.28858375549316406,
"learning_rate": 3.5328753680078512e-06,
"loss": 0.2748746871948242,
"step": 1007
},
{
"epoch": 0.984375,
"grad_norm": 0.38693365454673767,
"learning_rate": 3.3366045142296366e-06,
"loss": 0.6602081060409546,
"step": 1008
},
{
"epoch": 0.9853515625,
"grad_norm": 0.39297735691070557,
"learning_rate": 3.1403336604514228e-06,
"loss": 0.43784576654434204,
"step": 1009
},
{
"epoch": 0.986328125,
"grad_norm": 0.4182215929031372,
"learning_rate": 2.9440628066732094e-06,
"loss": 0.7852948307991028,
"step": 1010
},
{
"epoch": 0.9873046875,
"grad_norm": 0.4079328775405884,
"learning_rate": 2.747791952894995e-06,
"loss": 0.5413305759429932,
"step": 1011
},
{
"epoch": 0.98828125,
"grad_norm": 0.41826963424682617,
"learning_rate": 2.5515210991167813e-06,
"loss": 0.449452668428421,
"step": 1012
},
{
"epoch": 0.9892578125,
"grad_norm": 0.31969836354255676,
"learning_rate": 2.3552502453385675e-06,
"loss": 0.26595592498779297,
"step": 1013
},
{
"epoch": 0.990234375,
"grad_norm": 0.466192364692688,
"learning_rate": 2.1589793915603533e-06,
"loss": 0.6175995469093323,
"step": 1014
},
{
"epoch": 0.9912109375,
"grad_norm": 0.4734349846839905,
"learning_rate": 1.9627085377821394e-06,
"loss": 0.6440984010696411,
"step": 1015
},
{
"epoch": 0.9921875,
"grad_norm": 0.4446095824241638,
"learning_rate": 1.7664376840039256e-06,
"loss": 0.5738557577133179,
"step": 1016
},
{
"epoch": 0.9931640625,
"grad_norm": 0.24098840355873108,
"learning_rate": 1.5701668302257114e-06,
"loss": 0.6320365071296692,
"step": 1017
},
{
"epoch": 0.994140625,
"grad_norm": 0.5342791676521301,
"learning_rate": 1.3738959764474976e-06,
"loss": 0.9431695938110352,
"step": 1018
},
{
"epoch": 0.9951171875,
"grad_norm": 0.31406712532043457,
"learning_rate": 1.1776251226692837e-06,
"loss": 0.6406105160713196,
"step": 1019
},
{
"epoch": 0.99609375,
"grad_norm": 0.5162865519523621,
"learning_rate": 9.813542688910697e-07,
"loss": 0.7935853004455566,
"step": 1020
},
{
"epoch": 0.9970703125,
"grad_norm": 0.4624859690666199,
"learning_rate": 7.850834151128557e-07,
"loss": 0.9667851328849792,
"step": 1021
},
{
"epoch": 0.998046875,
"grad_norm": 0.43549951910972595,
"learning_rate": 5.888125613346419e-07,
"loss": 0.73248291015625,
"step": 1022
},
{
"epoch": 0.9990234375,
"grad_norm": 0.6080308556556702,
"learning_rate": 3.9254170755642785e-07,
"loss": 0.5045021772384644,
"step": 1023
},
{
"epoch": 1.0,
"grad_norm": 0.3927266299724579,
"learning_rate": 1.9627085377821392e-07,
"loss": 0.37262263894081116,
"step": 1024
}
],
"logging_steps": 1,
"max_steps": 1024,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.871410239702333e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}