hafith / trainer_state.json
mdnaseif's picture
Initial model upload
12788cd verified
Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON
{
"best_global_step": 110000,
"best_metric": 0.07368261883895177,
"best_model_checkpoint": "./TrOCR_SigLIP2_Aranizer_41K_AND_COMBINED/stage2/checkpoint-110000",
"epoch": 50.0,
"eval_steps": 10000,
"global_step": 115050,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.021733224667209998,
"grad_norm": 190.25732421875,
"learning_rate": 4.900000000000001e-07,
"loss": 17.665,
"step": 50
},
{
"epoch": 0.043466449334419996,
"grad_norm": 101.98078918457031,
"learning_rate": 9.9e-07,
"loss": 17.0,
"step": 100
},
{
"epoch": 0.06519967400162999,
"grad_norm": 61.98678970336914,
"learning_rate": 1.4900000000000001e-06,
"loss": 16.6227,
"step": 150
},
{
"epoch": 0.08693289866883999,
"grad_norm": 77.35042572021484,
"learning_rate": 1.9900000000000004e-06,
"loss": 16.5637,
"step": 200
},
{
"epoch": 0.10866612333604998,
"grad_norm": 91.11700439453125,
"learning_rate": 2.4900000000000003e-06,
"loss": 16.7798,
"step": 250
},
{
"epoch": 0.13039934800325997,
"grad_norm": 95.30789184570312,
"learning_rate": 2.99e-06,
"loss": 16.4945,
"step": 300
},
{
"epoch": 0.15213257267046998,
"grad_norm": 81.99232482910156,
"learning_rate": 3.49e-06,
"loss": 16.2094,
"step": 350
},
{
"epoch": 0.17386579733767998,
"grad_norm": 65.64993286132812,
"learning_rate": 3.990000000000001e-06,
"loss": 16.4501,
"step": 400
},
{
"epoch": 0.19559902200489,
"grad_norm": 98.58848571777344,
"learning_rate": 4.49e-06,
"loss": 16.386,
"step": 450
},
{
"epoch": 0.21733224667209997,
"grad_norm": 85.1087646484375,
"learning_rate": 4.9900000000000005e-06,
"loss": 16.453,
"step": 500
},
{
"epoch": 0.23906547133930997,
"grad_norm": 68.43016052246094,
"learning_rate": 5.490000000000001e-06,
"loss": 16.245,
"step": 550
},
{
"epoch": 0.26079869600651995,
"grad_norm": 71.09822082519531,
"learning_rate": 5.99e-06,
"loss": 16.0378,
"step": 600
},
{
"epoch": 0.28253192067372995,
"grad_norm": 71.08029174804688,
"learning_rate": 6.4900000000000005e-06,
"loss": 16.7085,
"step": 650
},
{
"epoch": 0.30426514534093996,
"grad_norm": 66.14205169677734,
"learning_rate": 6.99e-06,
"loss": 16.3454,
"step": 700
},
{
"epoch": 0.32599837000814996,
"grad_norm": 93.4423599243164,
"learning_rate": 7.49e-06,
"loss": 16.4424,
"step": 750
},
{
"epoch": 0.34773159467535997,
"grad_norm": 80.68280029296875,
"learning_rate": 7.990000000000001e-06,
"loss": 16.5934,
"step": 800
},
{
"epoch": 0.36946481934257,
"grad_norm": 50.99578094482422,
"learning_rate": 8.49e-06,
"loss": 16.4154,
"step": 850
},
{
"epoch": 0.39119804400978,
"grad_norm": 73.9505386352539,
"learning_rate": 8.99e-06,
"loss": 16.4743,
"step": 900
},
{
"epoch": 0.4129312686769899,
"grad_norm": 53.72673034667969,
"learning_rate": 9.49e-06,
"loss": 16.5231,
"step": 950
},
{
"epoch": 0.43466449334419993,
"grad_norm": 80.76425170898438,
"learning_rate": 9.990000000000001e-06,
"loss": 16.3367,
"step": 1000
},
{
"epoch": 0.45639771801140994,
"grad_norm": 88.06133270263672,
"learning_rate": 9.995703638754933e-06,
"loss": 16.7444,
"step": 1050
},
{
"epoch": 0.47813094267861994,
"grad_norm": 49.88163757324219,
"learning_rate": 9.991319596668128e-06,
"loss": 16.4815,
"step": 1100
},
{
"epoch": 0.49986416734582995,
"grad_norm": 66.88523864746094,
"learning_rate": 9.986935554581326e-06,
"loss": 16.467,
"step": 1150
},
{
"epoch": 0.5215973920130399,
"grad_norm": 65.38529968261719,
"learning_rate": 9.982551512494521e-06,
"loss": 16.4785,
"step": 1200
},
{
"epoch": 0.54333061668025,
"grad_norm": 54.72123336791992,
"learning_rate": 9.978167470407716e-06,
"loss": 16.5669,
"step": 1250
},
{
"epoch": 0.5650638413474599,
"grad_norm": 52.83816909790039,
"learning_rate": 9.973783428320912e-06,
"loss": 16.3767,
"step": 1300
},
{
"epoch": 0.58679706601467,
"grad_norm": 72.39130401611328,
"learning_rate": 9.969399386234109e-06,
"loss": 16.2847,
"step": 1350
},
{
"epoch": 0.6085302906818799,
"grad_norm": 52.192020416259766,
"learning_rate": 9.965015344147304e-06,
"loss": 16.3576,
"step": 1400
},
{
"epoch": 0.63026351534909,
"grad_norm": 50.59873962402344,
"learning_rate": 9.960631302060501e-06,
"loss": 15.9625,
"step": 1450
},
{
"epoch": 0.6519967400162999,
"grad_norm": 253.75856018066406,
"learning_rate": 9.956247259973697e-06,
"loss": 16.3934,
"step": 1500
},
{
"epoch": 0.6737299646835099,
"grad_norm": 43.8497314453125,
"learning_rate": 9.951863217886892e-06,
"loss": 16.686,
"step": 1550
},
{
"epoch": 0.6954631893507199,
"grad_norm": 48.21563720703125,
"learning_rate": 9.947479175800089e-06,
"loss": 16.4491,
"step": 1600
},
{
"epoch": 0.7171964140179299,
"grad_norm": 59.72194290161133,
"learning_rate": 9.943095133713284e-06,
"loss": 16.1068,
"step": 1650
},
{
"epoch": 0.73892963868514,
"grad_norm": 117.0466079711914,
"learning_rate": 9.93871109162648e-06,
"loss": 16.3469,
"step": 1700
},
{
"epoch": 0.7606628633523499,
"grad_norm": 48.334346771240234,
"learning_rate": 9.934327049539675e-06,
"loss": 16.3334,
"step": 1750
},
{
"epoch": 0.78239608801956,
"grad_norm": 54.45792007446289,
"learning_rate": 9.929943007452872e-06,
"loss": 16.2452,
"step": 1800
},
{
"epoch": 0.8041293126867699,
"grad_norm": 39.005428314208984,
"learning_rate": 9.925558965366068e-06,
"loss": 16.0999,
"step": 1850
},
{
"epoch": 0.8258625373539799,
"grad_norm": 67.72175598144531,
"learning_rate": 9.921174923279265e-06,
"loss": 16.2755,
"step": 1900
},
{
"epoch": 0.8475957620211899,
"grad_norm": 72.75259399414062,
"learning_rate": 9.91679088119246e-06,
"loss": 16.5987,
"step": 1950
},
{
"epoch": 0.8693289866883999,
"grad_norm": 58.86764144897461,
"learning_rate": 9.912406839105656e-06,
"loss": 16.4092,
"step": 2000
},
{
"epoch": 0.8910622113556099,
"grad_norm": 73.46177673339844,
"learning_rate": 9.908022797018853e-06,
"loss": 16.637,
"step": 2050
},
{
"epoch": 0.9127954360228199,
"grad_norm": 62.41428756713867,
"learning_rate": 9.903638754932048e-06,
"loss": 16.528,
"step": 2100
},
{
"epoch": 0.9345286606900299,
"grad_norm": 65.01278686523438,
"learning_rate": 9.899254712845245e-06,
"loss": 16.5003,
"step": 2150
},
{
"epoch": 0.9562618853572399,
"grad_norm": 57.43757247924805,
"learning_rate": 9.894870670758439e-06,
"loss": 16.3343,
"step": 2200
},
{
"epoch": 0.9779951100244498,
"grad_norm": 55.26877975463867,
"learning_rate": 9.890486628671636e-06,
"loss": 16.4804,
"step": 2250
},
{
"epoch": 0.9997283346916599,
"grad_norm": 59.66270446777344,
"learning_rate": 9.886102586584833e-06,
"loss": 16.1125,
"step": 2300
},
{
"epoch": 1.0212985601738658,
"grad_norm": 65.677490234375,
"learning_rate": 9.881718544498028e-06,
"loss": 15.5081,
"step": 2350
},
{
"epoch": 1.0430317848410757,
"grad_norm": 46.62354278564453,
"learning_rate": 9.877334502411224e-06,
"loss": 15.5581,
"step": 2400
},
{
"epoch": 1.0647650095082857,
"grad_norm": 47.7025032043457,
"learning_rate": 9.87295046032442e-06,
"loss": 15.2401,
"step": 2450
},
{
"epoch": 1.0864982341754958,
"grad_norm": 57.22602081298828,
"learning_rate": 9.868566418237616e-06,
"loss": 15.5116,
"step": 2500
},
{
"epoch": 1.1082314588427058,
"grad_norm": 51.149818420410156,
"learning_rate": 9.864182376150812e-06,
"loss": 15.5938,
"step": 2550
},
{
"epoch": 1.1299646835099157,
"grad_norm": 80.70169067382812,
"learning_rate": 9.859798334064009e-06,
"loss": 15.5891,
"step": 2600
},
{
"epoch": 1.1516979081771257,
"grad_norm": 59.62293243408203,
"learning_rate": 9.855414291977204e-06,
"loss": 15.6038,
"step": 2650
},
{
"epoch": 1.1734311328443359,
"grad_norm": 109.22635650634766,
"learning_rate": 9.8510302498904e-06,
"loss": 15.4956,
"step": 2700
},
{
"epoch": 1.1951643575115458,
"grad_norm": 54.90534591674805,
"learning_rate": 9.846646207803597e-06,
"loss": 15.5951,
"step": 2750
},
{
"epoch": 1.2168975821787558,
"grad_norm": 130.99798583984375,
"learning_rate": 9.842262165716792e-06,
"loss": 15.5201,
"step": 2800
},
{
"epoch": 1.2386308068459657,
"grad_norm": 45.625389099121094,
"learning_rate": 9.837878123629987e-06,
"loss": 15.4576,
"step": 2850
},
{
"epoch": 1.2603640315131757,
"grad_norm": 36.033836364746094,
"learning_rate": 9.833494081543183e-06,
"loss": 15.4927,
"step": 2900
},
{
"epoch": 1.2820972561803858,
"grad_norm": 52.81075668334961,
"learning_rate": 9.82911003945638e-06,
"loss": 15.4669,
"step": 2950
},
{
"epoch": 1.3038304808475958,
"grad_norm": 43.44648361206055,
"learning_rate": 9.824725997369575e-06,
"loss": 15.5326,
"step": 3000
},
{
"epoch": 1.3255637055148057,
"grad_norm": 40.79172134399414,
"learning_rate": 9.820341955282772e-06,
"loss": 15.6252,
"step": 3050
},
{
"epoch": 1.3472969301820157,
"grad_norm": 54.189910888671875,
"learning_rate": 9.815957913195968e-06,
"loss": 15.5897,
"step": 3100
},
{
"epoch": 1.3690301548492259,
"grad_norm": 55.73503112792969,
"learning_rate": 9.811573871109163e-06,
"loss": 15.6081,
"step": 3150
},
{
"epoch": 1.3907633795164358,
"grad_norm": 67.98750305175781,
"learning_rate": 9.80718982902236e-06,
"loss": 15.5154,
"step": 3200
},
{
"epoch": 1.4124966041836458,
"grad_norm": 61.99040222167969,
"learning_rate": 9.802805786935556e-06,
"loss": 15.464,
"step": 3250
},
{
"epoch": 1.4342298288508557,
"grad_norm": 42.3632926940918,
"learning_rate": 9.798421744848751e-06,
"loss": 15.3425,
"step": 3300
},
{
"epoch": 1.4559630535180657,
"grad_norm": 46.5098991394043,
"learning_rate": 9.794037702761946e-06,
"loss": 15.5891,
"step": 3350
},
{
"epoch": 1.4776962781852756,
"grad_norm": 59.43826675415039,
"learning_rate": 9.789653660675143e-06,
"loss": 15.6337,
"step": 3400
},
{
"epoch": 1.4994295028524858,
"grad_norm": 76.57585906982422,
"learning_rate": 9.785269618588339e-06,
"loss": 15.4892,
"step": 3450
},
{
"epoch": 1.5211627275196957,
"grad_norm": 65.46538543701172,
"learning_rate": 9.780885576501536e-06,
"loss": 15.6873,
"step": 3500
},
{
"epoch": 1.542895952186906,
"grad_norm": 65.29698181152344,
"learning_rate": 9.776501534414731e-06,
"loss": 15.5051,
"step": 3550
},
{
"epoch": 1.5646291768541158,
"grad_norm": 46.745784759521484,
"learning_rate": 9.772117492327927e-06,
"loss": 15.6602,
"step": 3600
},
{
"epoch": 1.5863624015213258,
"grad_norm": 44.605228424072266,
"learning_rate": 9.767733450241124e-06,
"loss": 15.5229,
"step": 3650
},
{
"epoch": 1.6080956261885357,
"grad_norm": 47.4207649230957,
"learning_rate": 9.763349408154319e-06,
"loss": 15.5634,
"step": 3700
},
{
"epoch": 1.6298288508557457,
"grad_norm": 43.18611145019531,
"learning_rate": 9.758965366067516e-06,
"loss": 15.5222,
"step": 3750
},
{
"epoch": 1.6515620755229556,
"grad_norm": 39.6898078918457,
"learning_rate": 9.754581323980712e-06,
"loss": 15.5259,
"step": 3800
},
{
"epoch": 1.6732953001901656,
"grad_norm": 71.0409164428711,
"learning_rate": 9.750197281893907e-06,
"loss": 15.5971,
"step": 3850
},
{
"epoch": 1.6950285248573758,
"grad_norm": 53.462467193603516,
"learning_rate": 9.745813239807102e-06,
"loss": 15.4515,
"step": 3900
},
{
"epoch": 1.7167617495245857,
"grad_norm": 40.28457260131836,
"learning_rate": 9.7414291977203e-06,
"loss": 15.4006,
"step": 3950
},
{
"epoch": 1.7384949741917957,
"grad_norm": 50.27900695800781,
"learning_rate": 9.737045155633495e-06,
"loss": 15.3051,
"step": 4000
},
{
"epoch": 1.7602281988590058,
"grad_norm": 44.33418655395508,
"learning_rate": 9.73266111354669e-06,
"loss": 15.7606,
"step": 4050
},
{
"epoch": 1.7819614235262158,
"grad_norm": 65.12041473388672,
"learning_rate": 9.728277071459887e-06,
"loss": 15.4747,
"step": 4100
},
{
"epoch": 1.8036946481934257,
"grad_norm": 50.64781951904297,
"learning_rate": 9.723893029373083e-06,
"loss": 15.5251,
"step": 4150
},
{
"epoch": 1.8254278728606357,
"grad_norm": 37.71573257446289,
"learning_rate": 9.71950898728628e-06,
"loss": 15.4422,
"step": 4200
},
{
"epoch": 1.8471610975278456,
"grad_norm": 53.08781433105469,
"learning_rate": 9.715124945199475e-06,
"loss": 15.6055,
"step": 4250
},
{
"epoch": 1.8688943221950556,
"grad_norm": 40.412384033203125,
"learning_rate": 9.71074090311267e-06,
"loss": 15.3275,
"step": 4300
},
{
"epoch": 1.8906275468622655,
"grad_norm": 81.10236358642578,
"learning_rate": 9.706356861025866e-06,
"loss": 15.391,
"step": 4350
},
{
"epoch": 1.9123607715294757,
"grad_norm": 73.39491271972656,
"learning_rate": 9.701972818939063e-06,
"loss": 15.7053,
"step": 4400
},
{
"epoch": 1.9340939961966856,
"grad_norm": 42.71440124511719,
"learning_rate": 9.697588776852258e-06,
"loss": 15.484,
"step": 4450
},
{
"epoch": 1.9558272208638958,
"grad_norm": 73.08609008789062,
"learning_rate": 9.693204734765454e-06,
"loss": 15.5822,
"step": 4500
},
{
"epoch": 1.9775604455311058,
"grad_norm": 66.59615325927734,
"learning_rate": 9.68882069267865e-06,
"loss": 15.6825,
"step": 4550
},
{
"epoch": 1.9992936701983157,
"grad_norm": 63.10333251953125,
"learning_rate": 9.684436650591846e-06,
"loss": 15.5203,
"step": 4600
},
{
"epoch": 2.0208638956805216,
"grad_norm": 57.844970703125,
"learning_rate": 9.680052608505043e-06,
"loss": 14.5109,
"step": 4650
},
{
"epoch": 2.0425971203477316,
"grad_norm": 36.37318420410156,
"learning_rate": 9.675668566418239e-06,
"loss": 14.7488,
"step": 4700
},
{
"epoch": 2.0643303450149415,
"grad_norm": 72.80779266357422,
"learning_rate": 9.671284524331434e-06,
"loss": 14.9111,
"step": 4750
},
{
"epoch": 2.0860635696821515,
"grad_norm": 71.37971496582031,
"learning_rate": 9.66690048224463e-06,
"loss": 14.6878,
"step": 4800
},
{
"epoch": 2.1077967943493614,
"grad_norm": 42.20883560180664,
"learning_rate": 9.662516440157827e-06,
"loss": 14.7469,
"step": 4850
},
{
"epoch": 2.1295300190165714,
"grad_norm": 53.63486862182617,
"learning_rate": 9.658132398071022e-06,
"loss": 14.6422,
"step": 4900
},
{
"epoch": 2.1512632436837817,
"grad_norm": 54.38608932495117,
"learning_rate": 9.653748355984219e-06,
"loss": 14.6238,
"step": 4950
},
{
"epoch": 2.1729964683509917,
"grad_norm": 44.58712387084961,
"learning_rate": 9.649364313897414e-06,
"loss": 14.6619,
"step": 5000
},
{
"epoch": 2.1947296930182016,
"grad_norm": 46.281524658203125,
"learning_rate": 9.64498027181061e-06,
"loss": 14.8443,
"step": 5050
},
{
"epoch": 2.2164629176854116,
"grad_norm": 38.51953887939453,
"learning_rate": 9.640596229723807e-06,
"loss": 14.9273,
"step": 5100
},
{
"epoch": 2.2381961423526215,
"grad_norm": 53.27817153930664,
"learning_rate": 9.636212187637002e-06,
"loss": 14.6411,
"step": 5150
},
{
"epoch": 2.2599293670198315,
"grad_norm": 43.47584533691406,
"learning_rate": 9.631828145550198e-06,
"loss": 14.7459,
"step": 5200
},
{
"epoch": 2.2816625916870414,
"grad_norm": 37.26194381713867,
"learning_rate": 9.627444103463393e-06,
"loss": 14.9103,
"step": 5250
},
{
"epoch": 2.3033958163542514,
"grad_norm": 38.729373931884766,
"learning_rate": 9.62306006137659e-06,
"loss": 14.5397,
"step": 5300
},
{
"epoch": 2.3251290410214613,
"grad_norm": 33.352901458740234,
"learning_rate": 9.618676019289785e-06,
"loss": 14.9044,
"step": 5350
},
{
"epoch": 2.3468622656886717,
"grad_norm": 47.63081359863281,
"learning_rate": 9.614291977202983e-06,
"loss": 14.5471,
"step": 5400
},
{
"epoch": 2.3685954903558817,
"grad_norm": 125.63179016113281,
"learning_rate": 9.609907935116178e-06,
"loss": 14.8267,
"step": 5450
},
{
"epoch": 2.3903287150230916,
"grad_norm": 49.1522216796875,
"learning_rate": 9.605523893029373e-06,
"loss": 14.7433,
"step": 5500
},
{
"epoch": 2.4120619396903016,
"grad_norm": 43.327091217041016,
"learning_rate": 9.60113985094257e-06,
"loss": 14.908,
"step": 5550
},
{
"epoch": 2.4337951643575115,
"grad_norm": 30.76859474182129,
"learning_rate": 9.596755808855766e-06,
"loss": 14.7341,
"step": 5600
},
{
"epoch": 2.4555283890247215,
"grad_norm": 42.72526550292969,
"learning_rate": 9.592371766768961e-06,
"loss": 15.0516,
"step": 5650
},
{
"epoch": 2.4772616136919314,
"grad_norm": 58.9193000793457,
"learning_rate": 9.587987724682157e-06,
"loss": 14.8745,
"step": 5700
},
{
"epoch": 2.4989948383591414,
"grad_norm": 53.90520095825195,
"learning_rate": 9.583603682595354e-06,
"loss": 14.8219,
"step": 5750
},
{
"epoch": 2.5207280630263513,
"grad_norm": 61.370452880859375,
"learning_rate": 9.579219640508549e-06,
"loss": 14.6925,
"step": 5800
},
{
"epoch": 2.5424612876935617,
"grad_norm": 47.58317184448242,
"learning_rate": 9.574835598421746e-06,
"loss": 14.8654,
"step": 5850
},
{
"epoch": 2.5641945123607717,
"grad_norm": 51.90703582763672,
"learning_rate": 9.570451556334942e-06,
"loss": 14.8152,
"step": 5900
},
{
"epoch": 2.5859277370279816,
"grad_norm": 42.62101364135742,
"learning_rate": 9.566067514248137e-06,
"loss": 15.0139,
"step": 5950
},
{
"epoch": 2.6076609616951916,
"grad_norm": 58.69119644165039,
"learning_rate": 9.561683472161334e-06,
"loss": 14.8962,
"step": 6000
},
{
"epoch": 2.6293941863624015,
"grad_norm": 58.02621841430664,
"learning_rate": 9.55729943007453e-06,
"loss": 15.0422,
"step": 6050
},
{
"epoch": 2.6511274110296115,
"grad_norm": 45.985225677490234,
"learning_rate": 9.552915387987726e-06,
"loss": 14.8361,
"step": 6100
},
{
"epoch": 2.6728606356968214,
"grad_norm": 58.74437713623047,
"learning_rate": 9.548531345900922e-06,
"loss": 14.9231,
"step": 6150
},
{
"epoch": 2.6945938603640314,
"grad_norm": 54.490962982177734,
"learning_rate": 9.544147303814117e-06,
"loss": 14.7987,
"step": 6200
},
{
"epoch": 2.7163270850312413,
"grad_norm": 44.067710876464844,
"learning_rate": 9.539763261727313e-06,
"loss": 14.8596,
"step": 6250
},
{
"epoch": 2.7380603096984517,
"grad_norm": 56.0435676574707,
"learning_rate": 9.53537921964051e-06,
"loss": 14.7602,
"step": 6300
},
{
"epoch": 2.7597935343656617,
"grad_norm": 68.08670806884766,
"learning_rate": 9.530995177553705e-06,
"loss": 14.78,
"step": 6350
},
{
"epoch": 2.7815267590328716,
"grad_norm": 55.21569061279297,
"learning_rate": 9.5266111354669e-06,
"loss": 14.908,
"step": 6400
},
{
"epoch": 2.8032599837000816,
"grad_norm": 48.79258346557617,
"learning_rate": 9.522227093380098e-06,
"loss": 14.8478,
"step": 6450
},
{
"epoch": 2.8249932083672915,
"grad_norm": 61.38957214355469,
"learning_rate": 9.517843051293293e-06,
"loss": 14.9716,
"step": 6500
},
{
"epoch": 2.8467264330345015,
"grad_norm": 53.00950622558594,
"learning_rate": 9.51345900920649e-06,
"loss": 14.7508,
"step": 6550
},
{
"epoch": 2.8684596577017114,
"grad_norm": 43.13687515258789,
"learning_rate": 9.509074967119685e-06,
"loss": 14.8466,
"step": 6600
},
{
"epoch": 2.8901928823689214,
"grad_norm": 54.39565658569336,
"learning_rate": 9.50469092503288e-06,
"loss": 14.8025,
"step": 6650
},
{
"epoch": 2.9119261070361313,
"grad_norm": 47.21046447753906,
"learning_rate": 9.500306882946078e-06,
"loss": 14.8096,
"step": 6700
},
{
"epoch": 2.9336593317033417,
"grad_norm": 51.13401412963867,
"learning_rate": 9.495922840859273e-06,
"loss": 14.9482,
"step": 6750
},
{
"epoch": 2.955392556370551,
"grad_norm": 47.619503021240234,
"learning_rate": 9.491538798772469e-06,
"loss": 14.9805,
"step": 6800
},
{
"epoch": 2.9771257810377616,
"grad_norm": 40.876808166503906,
"learning_rate": 9.487154756685664e-06,
"loss": 14.9048,
"step": 6850
},
{
"epoch": 2.9988590057049715,
"grad_norm": 52.037567138671875,
"learning_rate": 9.482770714598861e-06,
"loss": 14.9026,
"step": 6900
},
{
"epoch": 3.0204292311871774,
"grad_norm": 41.4274787902832,
"learning_rate": 9.478386672512057e-06,
"loss": 14.1481,
"step": 6950
},
{
"epoch": 3.0421624558543874,
"grad_norm": 51.49604797363281,
"learning_rate": 9.474002630425254e-06,
"loss": 14.1383,
"step": 7000
},
{
"epoch": 3.0638956805215973,
"grad_norm": 53.052005767822266,
"learning_rate": 9.469618588338449e-06,
"loss": 14.2031,
"step": 7050
},
{
"epoch": 3.0856289051888073,
"grad_norm": 29.748735427856445,
"learning_rate": 9.465234546251644e-06,
"loss": 14.2273,
"step": 7100
},
{
"epoch": 3.1073621298560172,
"grad_norm": 41.33003616333008,
"learning_rate": 9.460850504164841e-06,
"loss": 14.2338,
"step": 7150
},
{
"epoch": 3.129095354523227,
"grad_norm": 39.78664779663086,
"learning_rate": 9.456466462078037e-06,
"loss": 14.1309,
"step": 7200
},
{
"epoch": 3.1508285791904376,
"grad_norm": 35.99256896972656,
"learning_rate": 9.452082419991234e-06,
"loss": 14.2261,
"step": 7250
},
{
"epoch": 3.1725618038576475,
"grad_norm": 40.001197814941406,
"learning_rate": 9.44769837790443e-06,
"loss": 14.1562,
"step": 7300
},
{
"epoch": 3.1942950285248575,
"grad_norm": 48.2380256652832,
"learning_rate": 9.443314335817625e-06,
"loss": 14.0423,
"step": 7350
},
{
"epoch": 3.2160282531920674,
"grad_norm": 44.41048812866211,
"learning_rate": 9.43893029373082e-06,
"loss": 14.0881,
"step": 7400
},
{
"epoch": 3.2377614778592774,
"grad_norm": 29.655723571777344,
"learning_rate": 9.434546251644017e-06,
"loss": 14.2163,
"step": 7450
},
{
"epoch": 3.2594947025264873,
"grad_norm": 40.9448356628418,
"learning_rate": 9.430162209557213e-06,
"loss": 14.2057,
"step": 7500
},
{
"epoch": 3.2812279271936973,
"grad_norm": 50.84467315673828,
"learning_rate": 9.425778167470408e-06,
"loss": 14.2386,
"step": 7550
},
{
"epoch": 3.302961151860907,
"grad_norm": 46.98764419555664,
"learning_rate": 9.421394125383605e-06,
"loss": 14.2241,
"step": 7600
},
{
"epoch": 3.324694376528117,
"grad_norm": 46.322166442871094,
"learning_rate": 9.4170100832968e-06,
"loss": 14.1992,
"step": 7650
},
{
"epoch": 3.3464276011953276,
"grad_norm": 45.123985290527344,
"learning_rate": 9.412626041209997e-06,
"loss": 14.2106,
"step": 7700
},
{
"epoch": 3.3681608258625375,
"grad_norm": 50.508583068847656,
"learning_rate": 9.408241999123193e-06,
"loss": 14.0418,
"step": 7750
},
{
"epoch": 3.3898940505297475,
"grad_norm": 42.03702926635742,
"learning_rate": 9.403857957036388e-06,
"loss": 14.298,
"step": 7800
},
{
"epoch": 3.4116272751969574,
"grad_norm": 49.16743469238281,
"learning_rate": 9.399473914949584e-06,
"loss": 14.2654,
"step": 7850
},
{
"epoch": 3.4333604998641674,
"grad_norm": 47.92793273925781,
"learning_rate": 9.39508987286278e-06,
"loss": 14.3558,
"step": 7900
},
{
"epoch": 3.4550937245313773,
"grad_norm": 37.79042434692383,
"learning_rate": 9.390705830775976e-06,
"loss": 14.2301,
"step": 7950
},
{
"epoch": 3.4768269491985873,
"grad_norm": 41.851051330566406,
"learning_rate": 9.386321788689171e-06,
"loss": 14.2025,
"step": 8000
},
{
"epoch": 3.498560173865797,
"grad_norm": 58.03968811035156,
"learning_rate": 9.381937746602369e-06,
"loss": 14.3552,
"step": 8050
},
{
"epoch": 3.520293398533007,
"grad_norm": 34.9418830871582,
"learning_rate": 9.377553704515564e-06,
"loss": 14.3336,
"step": 8100
},
{
"epoch": 3.5420266232002175,
"grad_norm": 41.05316162109375,
"learning_rate": 9.373169662428761e-06,
"loss": 14.2819,
"step": 8150
},
{
"epoch": 3.563759847867427,
"grad_norm": 45.65940856933594,
"learning_rate": 9.368785620341956e-06,
"loss": 14.2237,
"step": 8200
},
{
"epoch": 3.5854930725346374,
"grad_norm": 37.271751403808594,
"learning_rate": 9.364401578255152e-06,
"loss": 14.0594,
"step": 8250
},
{
"epoch": 3.6072262972018474,
"grad_norm": 41.95325469970703,
"learning_rate": 9.360017536168347e-06,
"loss": 14.1691,
"step": 8300
},
{
"epoch": 3.6289595218690573,
"grad_norm": 91.28557586669922,
"learning_rate": 9.355633494081544e-06,
"loss": 14.1016,
"step": 8350
},
{
"epoch": 3.6506927465362673,
"grad_norm": 56.508670806884766,
"learning_rate": 9.35124945199474e-06,
"loss": 14.2686,
"step": 8400
},
{
"epoch": 3.6724259712034772,
"grad_norm": 69.8916015625,
"learning_rate": 9.346865409907935e-06,
"loss": 14.3426,
"step": 8450
},
{
"epoch": 3.694159195870687,
"grad_norm": 51.414215087890625,
"learning_rate": 9.342481367821132e-06,
"loss": 14.3489,
"step": 8500
},
{
"epoch": 3.715892420537897,
"grad_norm": 51.891639709472656,
"learning_rate": 9.338097325734328e-06,
"loss": 14.2614,
"step": 8550
},
{
"epoch": 3.7376256452051075,
"grad_norm": 55.276275634765625,
"learning_rate": 9.333713283647525e-06,
"loss": 14.1835,
"step": 8600
},
{
"epoch": 3.759358869872317,
"grad_norm": 38.33846664428711,
"learning_rate": 9.32932924156072e-06,
"loss": 14.5973,
"step": 8650
},
{
"epoch": 3.7810920945395274,
"grad_norm": 46.052513122558594,
"learning_rate": 9.324945199473915e-06,
"loss": 14.3903,
"step": 8700
},
{
"epoch": 3.8028253192067374,
"grad_norm": 35.333560943603516,
"learning_rate": 9.32056115738711e-06,
"loss": 14.3516,
"step": 8750
},
{
"epoch": 3.8245585438739473,
"grad_norm": 52.49406051635742,
"learning_rate": 9.316177115300308e-06,
"loss": 14.371,
"step": 8800
},
{
"epoch": 3.8462917685411573,
"grad_norm": 48.86211013793945,
"learning_rate": 9.311793073213503e-06,
"loss": 14.2632,
"step": 8850
},
{
"epoch": 3.8680249932083672,
"grad_norm": 48.95231628417969,
"learning_rate": 9.3074090311267e-06,
"loss": 14.1847,
"step": 8900
},
{
"epoch": 3.889758217875577,
"grad_norm": 37.594696044921875,
"learning_rate": 9.303024989039896e-06,
"loss": 14.3286,
"step": 8950
},
{
"epoch": 3.911491442542787,
"grad_norm": 47.66452407836914,
"learning_rate": 9.298640946953091e-06,
"loss": 14.3358,
"step": 9000
},
{
"epoch": 3.9332246672099975,
"grad_norm": 40.61109161376953,
"learning_rate": 9.294256904866288e-06,
"loss": 14.4558,
"step": 9050
},
{
"epoch": 3.954957891877207,
"grad_norm": 34.296836853027344,
"learning_rate": 9.289872862779484e-06,
"loss": 14.3049,
"step": 9100
},
{
"epoch": 3.9766911165444174,
"grad_norm": 43.91560363769531,
"learning_rate": 9.285488820692679e-06,
"loss": 14.3231,
"step": 9150
},
{
"epoch": 3.9984243412116274,
"grad_norm": 37.4168586730957,
"learning_rate": 9.281104778605874e-06,
"loss": 14.2418,
"step": 9200
},
{
"epoch": 4.019994566693833,
"grad_norm": 34.46104049682617,
"learning_rate": 9.276720736519071e-06,
"loss": 13.6035,
"step": 9250
},
{
"epoch": 4.041727791361043,
"grad_norm": 38.560298919677734,
"learning_rate": 9.272336694432267e-06,
"loss": 13.6357,
"step": 9300
},
{
"epoch": 4.063461016028254,
"grad_norm": 35.547752380371094,
"learning_rate": 9.267952652345464e-06,
"loss": 13.798,
"step": 9350
},
{
"epoch": 4.085194240695463,
"grad_norm": 36.332298278808594,
"learning_rate": 9.26356861025866e-06,
"loss": 13.6992,
"step": 9400
},
{
"epoch": 4.1069274653626735,
"grad_norm": 40.322715759277344,
"learning_rate": 9.259184568171855e-06,
"loss": 13.7247,
"step": 9450
},
{
"epoch": 4.128660690029883,
"grad_norm": 27.05885887145996,
"learning_rate": 9.254800526085052e-06,
"loss": 13.801,
"step": 9500
},
{
"epoch": 4.150393914697093,
"grad_norm": 38.66703414916992,
"learning_rate": 9.250416483998247e-06,
"loss": 13.7814,
"step": 9550
},
{
"epoch": 4.172127139364303,
"grad_norm": 37.8776969909668,
"learning_rate": 9.246032441911443e-06,
"loss": 13.7403,
"step": 9600
},
{
"epoch": 4.193860364031513,
"grad_norm": 36.977317810058594,
"learning_rate": 9.241648399824638e-06,
"loss": 13.8831,
"step": 9650
},
{
"epoch": 4.215593588698723,
"grad_norm": 43.09788131713867,
"learning_rate": 9.237264357737835e-06,
"loss": 13.7397,
"step": 9700
},
{
"epoch": 4.237326813365933,
"grad_norm": 33.9801139831543,
"learning_rate": 9.23288031565103e-06,
"loss": 13.8114,
"step": 9750
},
{
"epoch": 4.259060038033143,
"grad_norm": 32.65711212158203,
"learning_rate": 9.228496273564227e-06,
"loss": 13.7081,
"step": 9800
},
{
"epoch": 4.280793262700353,
"grad_norm": 71.91608428955078,
"learning_rate": 9.224112231477423e-06,
"loss": 13.7953,
"step": 9850
},
{
"epoch": 4.3025264873675635,
"grad_norm": 28.490583419799805,
"learning_rate": 9.219728189390618e-06,
"loss": 13.7322,
"step": 9900
},
{
"epoch": 4.324259712034773,
"grad_norm": 49.53886795043945,
"learning_rate": 9.215344147303815e-06,
"loss": 13.8046,
"step": 9950
},
{
"epoch": 4.345992936701983,
"grad_norm": 40.42410659790039,
"learning_rate": 9.21096010521701e-06,
"loss": 13.735,
"step": 10000
},
{
"epoch": 4.345992936701983,
"eval_cer": 0.07540147877501142,
"eval_loss": 2.2973904609680176,
"eval_runtime": 396.0823,
"eval_samples_per_second": 13.649,
"eval_steps_per_second": 3.413,
"eval_wer": 0.22808918197519235,
"step": 10000
},
{
"epoch": 4.367726161369193,
"grad_norm": 32.75251388549805,
"learning_rate": 9.206576063130208e-06,
"loss": 13.8058,
"step": 10050
},
{
"epoch": 4.389459386036403,
"grad_norm": 35.6936149597168,
"learning_rate": 9.202192021043403e-06,
"loss": 13.7489,
"step": 10100
},
{
"epoch": 4.411192610703613,
"grad_norm": 39.304100036621094,
"learning_rate": 9.197807978956599e-06,
"loss": 13.8124,
"step": 10150
},
{
"epoch": 4.432925835370823,
"grad_norm": 39.43230438232422,
"learning_rate": 9.193423936869794e-06,
"loss": 13.9531,
"step": 10200
},
{
"epoch": 4.454659060038033,
"grad_norm": 37.89631652832031,
"learning_rate": 9.189039894782991e-06,
"loss": 13.7975,
"step": 10250
},
{
"epoch": 4.476392284705243,
"grad_norm": 36.32379150390625,
"learning_rate": 9.184655852696186e-06,
"loss": 13.9208,
"step": 10300
},
{
"epoch": 4.4981255093724535,
"grad_norm": 39.24440002441406,
"learning_rate": 9.180271810609382e-06,
"loss": 13.88,
"step": 10350
},
{
"epoch": 4.519858734039663,
"grad_norm": 32.791900634765625,
"learning_rate": 9.175887768522579e-06,
"loss": 13.8944,
"step": 10400
},
{
"epoch": 4.541591958706873,
"grad_norm": 33.695865631103516,
"learning_rate": 9.171503726435774e-06,
"loss": 13.8637,
"step": 10450
},
{
"epoch": 4.563325183374083,
"grad_norm": 33.961647033691406,
"learning_rate": 9.167119684348971e-06,
"loss": 13.7873,
"step": 10500
},
{
"epoch": 4.585058408041293,
"grad_norm": 101.09957122802734,
"learning_rate": 9.162735642262167e-06,
"loss": 13.848,
"step": 10550
},
{
"epoch": 4.606791632708503,
"grad_norm": 42.666595458984375,
"learning_rate": 9.158351600175362e-06,
"loss": 13.9049,
"step": 10600
},
{
"epoch": 4.628524857375713,
"grad_norm": 44.05756378173828,
"learning_rate": 9.153967558088558e-06,
"loss": 13.9251,
"step": 10650
},
{
"epoch": 4.650258082042923,
"grad_norm": 44.468162536621094,
"learning_rate": 9.149583516001755e-06,
"loss": 13.7975,
"step": 10700
},
{
"epoch": 4.671991306710133,
"grad_norm": 35.0707893371582,
"learning_rate": 9.14519947391495e-06,
"loss": 13.9261,
"step": 10750
},
{
"epoch": 4.6937245313773435,
"grad_norm": 40.042274475097656,
"learning_rate": 9.140815431828145e-06,
"loss": 13.8594,
"step": 10800
},
{
"epoch": 4.715457756044553,
"grad_norm": 29.434371948242188,
"learning_rate": 9.136431389741342e-06,
"loss": 13.8838,
"step": 10850
},
{
"epoch": 4.737190980711763,
"grad_norm": 37.34782409667969,
"learning_rate": 9.132047347654538e-06,
"loss": 13.7635,
"step": 10900
},
{
"epoch": 4.758924205378973,
"grad_norm": 221.30532836914062,
"learning_rate": 9.127663305567735e-06,
"loss": 13.76,
"step": 10950
},
{
"epoch": 4.780657430046183,
"grad_norm": 35.67972946166992,
"learning_rate": 9.12327926348093e-06,
"loss": 13.8596,
"step": 11000
},
{
"epoch": 4.802390654713393,
"grad_norm": 52.167972564697266,
"learning_rate": 9.118895221394126e-06,
"loss": 13.8706,
"step": 11050
},
{
"epoch": 4.824123879380603,
"grad_norm": 47.18834686279297,
"learning_rate": 9.114511179307321e-06,
"loss": 13.8763,
"step": 11100
},
{
"epoch": 4.845857104047813,
"grad_norm": 42.99448776245117,
"learning_rate": 9.110127137220518e-06,
"loss": 13.9622,
"step": 11150
},
{
"epoch": 4.867590328715023,
"grad_norm": 55.08070755004883,
"learning_rate": 9.105743095133715e-06,
"loss": 13.8151,
"step": 11200
},
{
"epoch": 4.8893235533822335,
"grad_norm": 32.91100311279297,
"learning_rate": 9.10135905304691e-06,
"loss": 13.8262,
"step": 11250
},
{
"epoch": 4.911056778049443,
"grad_norm": 34.78753662109375,
"learning_rate": 9.096975010960106e-06,
"loss": 13.928,
"step": 11300
},
{
"epoch": 4.932790002716653,
"grad_norm": 40.1533203125,
"learning_rate": 9.092590968873301e-06,
"loss": 13.8169,
"step": 11350
},
{
"epoch": 4.954523227383863,
"grad_norm": 41.97115707397461,
"learning_rate": 9.088206926786498e-06,
"loss": 13.8194,
"step": 11400
},
{
"epoch": 4.976256452051073,
"grad_norm": 30.406110763549805,
"learning_rate": 9.083822884699694e-06,
"loss": 13.9941,
"step": 11450
},
{
"epoch": 4.997989676718283,
"grad_norm": 44.02429962158203,
"learning_rate": 9.07943884261289e-06,
"loss": 13.9272,
"step": 11500
},
{
"epoch": 5.019559902200489,
"grad_norm": 42.15421676635742,
"learning_rate": 9.075054800526086e-06,
"loss": 13.386,
"step": 11550
},
{
"epoch": 5.041293126867699,
"grad_norm": 28.951597213745117,
"learning_rate": 9.070670758439282e-06,
"loss": 13.4325,
"step": 11600
},
{
"epoch": 5.0630263515349085,
"grad_norm": 59.380374908447266,
"learning_rate": 9.066286716352479e-06,
"loss": 13.409,
"step": 11650
},
{
"epoch": 5.084759576202119,
"grad_norm": 56.05976867675781,
"learning_rate": 9.061902674265674e-06,
"loss": 13.5315,
"step": 11700
},
{
"epoch": 5.106492800869329,
"grad_norm": 36.069583892822266,
"learning_rate": 9.05751863217887e-06,
"loss": 13.3532,
"step": 11750
},
{
"epoch": 5.128226025536539,
"grad_norm": 39.289833068847656,
"learning_rate": 9.053134590092065e-06,
"loss": 13.3686,
"step": 11800
},
{
"epoch": 5.149959250203749,
"grad_norm": 37.062931060791016,
"learning_rate": 9.048750548005262e-06,
"loss": 13.4362,
"step": 11850
},
{
"epoch": 5.171692474870959,
"grad_norm": 56.210750579833984,
"learning_rate": 9.044366505918457e-06,
"loss": 13.4053,
"step": 11900
},
{
"epoch": 5.193425699538169,
"grad_norm": 26.70563507080078,
"learning_rate": 9.039982463831653e-06,
"loss": 13.3191,
"step": 11950
},
{
"epoch": 5.215158924205379,
"grad_norm": 39.95426940917969,
"learning_rate": 9.03559842174485e-06,
"loss": 13.3161,
"step": 12000
},
{
"epoch": 5.236892148872589,
"grad_norm": 29.761014938354492,
"learning_rate": 9.031214379658045e-06,
"loss": 13.5076,
"step": 12050
},
{
"epoch": 5.2586253735397985,
"grad_norm": 32.707786560058594,
"learning_rate": 9.026830337571242e-06,
"loss": 13.4865,
"step": 12100
},
{
"epoch": 5.280358598207009,
"grad_norm": 30.934314727783203,
"learning_rate": 9.022446295484438e-06,
"loss": 13.3348,
"step": 12150
},
{
"epoch": 5.302091822874219,
"grad_norm": 38.97114562988281,
"learning_rate": 9.018062253397633e-06,
"loss": 13.4458,
"step": 12200
},
{
"epoch": 5.323825047541429,
"grad_norm": 52.749507904052734,
"learning_rate": 9.013678211310829e-06,
"loss": 13.5492,
"step": 12250
},
{
"epoch": 5.345558272208639,
"grad_norm": 37.54782485961914,
"learning_rate": 9.009294169224026e-06,
"loss": 13.3087,
"step": 12300
},
{
"epoch": 5.367291496875849,
"grad_norm": 40.16310501098633,
"learning_rate": 9.004910127137221e-06,
"loss": 13.4691,
"step": 12350
},
{
"epoch": 5.389024721543059,
"grad_norm": 58.52961349487305,
"learning_rate": 9.000526085050418e-06,
"loss": 13.5101,
"step": 12400
},
{
"epoch": 5.410757946210269,
"grad_norm": 31.150737762451172,
"learning_rate": 8.996142042963613e-06,
"loss": 13.3933,
"step": 12450
},
{
"epoch": 5.432491170877479,
"grad_norm": 31.380889892578125,
"learning_rate": 8.991758000876809e-06,
"loss": 13.5171,
"step": 12500
},
{
"epoch": 5.4542243955446885,
"grad_norm": 45.46767044067383,
"learning_rate": 8.987373958790006e-06,
"loss": 13.4807,
"step": 12550
},
{
"epoch": 5.475957620211899,
"grad_norm": 42.970542907714844,
"learning_rate": 8.982989916703201e-06,
"loss": 13.4787,
"step": 12600
},
{
"epoch": 5.497690844879109,
"grad_norm": 51.134578704833984,
"learning_rate": 8.978605874616397e-06,
"loss": 13.4804,
"step": 12650
},
{
"epoch": 5.519424069546319,
"grad_norm": 37.2877082824707,
"learning_rate": 8.974221832529592e-06,
"loss": 13.5335,
"step": 12700
},
{
"epoch": 5.541157294213529,
"grad_norm": 41.327144622802734,
"learning_rate": 8.96983779044279e-06,
"loss": 13.5202,
"step": 12750
},
{
"epoch": 5.562890518880739,
"grad_norm": 41.8232421875,
"learning_rate": 8.965453748355985e-06,
"loss": 13.4273,
"step": 12800
},
{
"epoch": 5.584623743547949,
"grad_norm": 34.09703063964844,
"learning_rate": 8.961069706269182e-06,
"loss": 13.5441,
"step": 12850
},
{
"epoch": 5.606356968215159,
"grad_norm": 34.51966094970703,
"learning_rate": 8.956685664182377e-06,
"loss": 13.5746,
"step": 12900
},
{
"epoch": 5.628090192882369,
"grad_norm": 44.580360412597656,
"learning_rate": 8.952301622095572e-06,
"loss": 13.5962,
"step": 12950
},
{
"epoch": 5.6498234175495785,
"grad_norm": 49.46404266357422,
"learning_rate": 8.94791758000877e-06,
"loss": 13.5788,
"step": 13000
},
{
"epoch": 5.671556642216789,
"grad_norm": 33.00864028930664,
"learning_rate": 8.943533537921965e-06,
"loss": 13.4571,
"step": 13050
},
{
"epoch": 5.693289866883999,
"grad_norm": 31.570575714111328,
"learning_rate": 8.93914949583516e-06,
"loss": 13.512,
"step": 13100
},
{
"epoch": 5.715023091551209,
"grad_norm": 31.16398048400879,
"learning_rate": 8.934765453748356e-06,
"loss": 13.5388,
"step": 13150
},
{
"epoch": 5.736756316218419,
"grad_norm": 31.840078353881836,
"learning_rate": 8.930381411661553e-06,
"loss": 13.5697,
"step": 13200
},
{
"epoch": 5.758489540885629,
"grad_norm": 41.02314376831055,
"learning_rate": 8.925997369574748e-06,
"loss": 13.5952,
"step": 13250
},
{
"epoch": 5.780222765552839,
"grad_norm": 38.16290283203125,
"learning_rate": 8.921613327487945e-06,
"loss": 13.6544,
"step": 13300
},
{
"epoch": 5.801955990220049,
"grad_norm": 34.18564224243164,
"learning_rate": 8.91722928540114e-06,
"loss": 13.6995,
"step": 13350
},
{
"epoch": 5.823689214887259,
"grad_norm": 27.264175415039062,
"learning_rate": 8.912845243314336e-06,
"loss": 13.5539,
"step": 13400
},
{
"epoch": 5.8454224395544685,
"grad_norm": 39.271888732910156,
"learning_rate": 8.908461201227533e-06,
"loss": 13.6108,
"step": 13450
},
{
"epoch": 5.867155664221679,
"grad_norm": 25.51955223083496,
"learning_rate": 8.904077159140728e-06,
"loss": 13.5095,
"step": 13500
},
{
"epoch": 5.888888888888889,
"grad_norm": 37.255367279052734,
"learning_rate": 8.899693117053926e-06,
"loss": 13.5916,
"step": 13550
},
{
"epoch": 5.910622113556099,
"grad_norm": 36.901702880859375,
"learning_rate": 8.89530907496712e-06,
"loss": 13.5283,
"step": 13600
},
{
"epoch": 5.932355338223309,
"grad_norm": 36.892799377441406,
"learning_rate": 8.890925032880316e-06,
"loss": 13.6032,
"step": 13650
},
{
"epoch": 5.954088562890519,
"grad_norm": 36.8080940246582,
"learning_rate": 8.886540990793512e-06,
"loss": 13.7407,
"step": 13700
},
{
"epoch": 5.975821787557729,
"grad_norm": 41.102657318115234,
"learning_rate": 8.882156948706709e-06,
"loss": 13.5335,
"step": 13750
},
{
"epoch": 5.997555012224939,
"grad_norm": 31.643165588378906,
"learning_rate": 8.877772906619904e-06,
"loss": 13.6137,
"step": 13800
},
{
"epoch": 6.0191252377071445,
"grad_norm": 35.148006439208984,
"learning_rate": 8.8733888645331e-06,
"loss": 13.1715,
"step": 13850
},
{
"epoch": 6.040858462374355,
"grad_norm": 34.13616943359375,
"learning_rate": 8.869004822446297e-06,
"loss": 13.1644,
"step": 13900
},
{
"epoch": 6.062591687041564,
"grad_norm": 43.90581512451172,
"learning_rate": 8.864620780359492e-06,
"loss": 13.0996,
"step": 13950
},
{
"epoch": 6.084324911708775,
"grad_norm": 36.725379943847656,
"learning_rate": 8.860236738272689e-06,
"loss": 13.1415,
"step": 14000
},
{
"epoch": 6.106058136375985,
"grad_norm": 32.847129821777344,
"learning_rate": 8.855852696185884e-06,
"loss": 13.1429,
"step": 14050
},
{
"epoch": 6.127791361043195,
"grad_norm": 27.32487678527832,
"learning_rate": 8.85146865409908e-06,
"loss": 13.2287,
"step": 14100
},
{
"epoch": 6.149524585710405,
"grad_norm": 38.18893051147461,
"learning_rate": 8.847084612012275e-06,
"loss": 13.1909,
"step": 14150
},
{
"epoch": 6.171257810377615,
"grad_norm": 29.566404342651367,
"learning_rate": 8.842700569925472e-06,
"loss": 13.1921,
"step": 14200
},
{
"epoch": 6.192991035044825,
"grad_norm": 27.988677978515625,
"learning_rate": 8.838316527838668e-06,
"loss": 13.1361,
"step": 14250
},
{
"epoch": 6.2147242597120345,
"grad_norm": 36.260833740234375,
"learning_rate": 8.833932485751863e-06,
"loss": 13.1959,
"step": 14300
},
{
"epoch": 6.236457484379245,
"grad_norm": 37.56959533691406,
"learning_rate": 8.82954844366506e-06,
"loss": 13.0934,
"step": 14350
},
{
"epoch": 6.258190709046454,
"grad_norm": 37.16026306152344,
"learning_rate": 8.825164401578256e-06,
"loss": 13.3774,
"step": 14400
},
{
"epoch": 6.279923933713665,
"grad_norm": 51.96893310546875,
"learning_rate": 8.820780359491453e-06,
"loss": 13.2566,
"step": 14450
},
{
"epoch": 6.301657158380875,
"grad_norm": 31.46018409729004,
"learning_rate": 8.816396317404648e-06,
"loss": 13.242,
"step": 14500
},
{
"epoch": 6.323390383048085,
"grad_norm": 40.38423538208008,
"learning_rate": 8.812012275317843e-06,
"loss": 13.2649,
"step": 14550
},
{
"epoch": 6.345123607715295,
"grad_norm": 33.40611267089844,
"learning_rate": 8.807628233231039e-06,
"loss": 13.2143,
"step": 14600
},
{
"epoch": 6.3668568323825045,
"grad_norm": 32.6546745300293,
"learning_rate": 8.803244191144236e-06,
"loss": 13.2293,
"step": 14650
},
{
"epoch": 6.388590057049715,
"grad_norm": 30.99147605895996,
"learning_rate": 8.798860149057433e-06,
"loss": 13.1595,
"step": 14700
},
{
"epoch": 6.4103232817169244,
"grad_norm": 49.923667907714844,
"learning_rate": 8.794476106970627e-06,
"loss": 13.1624,
"step": 14750
},
{
"epoch": 6.432056506384135,
"grad_norm": 27.526941299438477,
"learning_rate": 8.790092064883824e-06,
"loss": 13.245,
"step": 14800
},
{
"epoch": 6.453789731051344,
"grad_norm": 41.09890365600586,
"learning_rate": 8.785708022797019e-06,
"loss": 13.1945,
"step": 14850
},
{
"epoch": 6.475522955718555,
"grad_norm": 36.0584831237793,
"learning_rate": 8.781323980710216e-06,
"loss": 13.207,
"step": 14900
},
{
"epoch": 6.497256180385765,
"grad_norm": 30.85024642944336,
"learning_rate": 8.776939938623412e-06,
"loss": 13.2022,
"step": 14950
},
{
"epoch": 6.518989405052975,
"grad_norm": 34.92485427856445,
"learning_rate": 8.772555896536607e-06,
"loss": 13.3696,
"step": 15000
},
{
"epoch": 6.540722629720185,
"grad_norm": 33.38056564331055,
"learning_rate": 8.768171854449802e-06,
"loss": 13.2597,
"step": 15050
},
{
"epoch": 6.5624558543873945,
"grad_norm": 29.834815979003906,
"learning_rate": 8.763787812363e-06,
"loss": 13.2281,
"step": 15100
},
{
"epoch": 6.584189079054605,
"grad_norm": 30.077539443969727,
"learning_rate": 8.759403770276197e-06,
"loss": 13.2554,
"step": 15150
},
{
"epoch": 6.605922303721814,
"grad_norm": 43.224586486816406,
"learning_rate": 8.755019728189392e-06,
"loss": 13.1997,
"step": 15200
},
{
"epoch": 6.627655528389025,
"grad_norm": 48.51641082763672,
"learning_rate": 8.750635686102587e-06,
"loss": 13.2243,
"step": 15250
},
{
"epoch": 6.649388753056234,
"grad_norm": 29.839174270629883,
"learning_rate": 8.746251644015783e-06,
"loss": 13.3737,
"step": 15300
},
{
"epoch": 6.671121977723445,
"grad_norm": 44.47172546386719,
"learning_rate": 8.74186760192898e-06,
"loss": 13.1659,
"step": 15350
},
{
"epoch": 6.692855202390655,
"grad_norm": 27.568334579467773,
"learning_rate": 8.737483559842175e-06,
"loss": 13.2285,
"step": 15400
},
{
"epoch": 6.714588427057865,
"grad_norm": 31.159231185913086,
"learning_rate": 8.73309951775537e-06,
"loss": 13.3102,
"step": 15450
},
{
"epoch": 6.736321651725075,
"grad_norm": 30.869430541992188,
"learning_rate": 8.728715475668566e-06,
"loss": 13.2286,
"step": 15500
},
{
"epoch": 6.7580548763922845,
"grad_norm": 51.48735427856445,
"learning_rate": 8.724331433581763e-06,
"loss": 13.3347,
"step": 15550
},
{
"epoch": 6.779788101059495,
"grad_norm": 35.06986999511719,
"learning_rate": 8.71994739149496e-06,
"loss": 13.1973,
"step": 15600
},
{
"epoch": 6.801521325726704,
"grad_norm": 27.670289993286133,
"learning_rate": 8.715563349408155e-06,
"loss": 13.162,
"step": 15650
},
{
"epoch": 6.823254550393915,
"grad_norm": 34.26895523071289,
"learning_rate": 8.711179307321351e-06,
"loss": 13.3011,
"step": 15700
},
{
"epoch": 6.844987775061124,
"grad_norm": 41.056182861328125,
"learning_rate": 8.706795265234546e-06,
"loss": 13.2564,
"step": 15750
},
{
"epoch": 6.866720999728335,
"grad_norm": 47.23772048950195,
"learning_rate": 8.702411223147743e-06,
"loss": 13.3127,
"step": 15800
},
{
"epoch": 6.888454224395545,
"grad_norm": 65.80028533935547,
"learning_rate": 8.698027181060939e-06,
"loss": 13.2797,
"step": 15850
},
{
"epoch": 6.910187449062755,
"grad_norm": 40.93989562988281,
"learning_rate": 8.693643138974134e-06,
"loss": 13.3707,
"step": 15900
},
{
"epoch": 6.931920673729965,
"grad_norm": 83.51680755615234,
"learning_rate": 8.689259096887331e-06,
"loss": 13.217,
"step": 15950
},
{
"epoch": 6.9536538983971745,
"grad_norm": 32.16157150268555,
"learning_rate": 8.684875054800527e-06,
"loss": 13.274,
"step": 16000
},
{
"epoch": 6.975387123064385,
"grad_norm": 31.57478904724121,
"learning_rate": 8.680491012713724e-06,
"loss": 13.2057,
"step": 16050
},
{
"epoch": 6.997120347731594,
"grad_norm": 37.837303161621094,
"learning_rate": 8.676106970626919e-06,
"loss": 13.262,
"step": 16100
},
{
"epoch": 7.0186905732138,
"grad_norm": 24.430326461791992,
"learning_rate": 8.671722928540114e-06,
"loss": 12.9144,
"step": 16150
},
{
"epoch": 7.040423797881011,
"grad_norm": 45.298194885253906,
"learning_rate": 8.66733888645331e-06,
"loss": 12.8617,
"step": 16200
},
{
"epoch": 7.06215702254822,
"grad_norm": 52.39512252807617,
"learning_rate": 8.662954844366507e-06,
"loss": 12.9514,
"step": 16250
},
{
"epoch": 7.083890247215431,
"grad_norm": 35.9492073059082,
"learning_rate": 8.658570802279702e-06,
"loss": 12.9577,
"step": 16300
},
{
"epoch": 7.105623471882641,
"grad_norm": 31.363454818725586,
"learning_rate": 8.6541867601929e-06,
"loss": 12.9849,
"step": 16350
},
{
"epoch": 7.1273566965498505,
"grad_norm": 24.993553161621094,
"learning_rate": 8.649802718106095e-06,
"loss": 12.9269,
"step": 16400
},
{
"epoch": 7.149089921217061,
"grad_norm": 28.327381134033203,
"learning_rate": 8.64541867601929e-06,
"loss": 12.941,
"step": 16450
},
{
"epoch": 7.17082314588427,
"grad_norm": 30.908496856689453,
"learning_rate": 8.641034633932487e-06,
"loss": 13.0525,
"step": 16500
},
{
"epoch": 7.192556370551481,
"grad_norm": 41.53740310668945,
"learning_rate": 8.636650591845683e-06,
"loss": 13.0038,
"step": 16550
},
{
"epoch": 7.21428959521869,
"grad_norm": 34.16611862182617,
"learning_rate": 8.632266549758878e-06,
"loss": 12.9893,
"step": 16600
},
{
"epoch": 7.236022819885901,
"grad_norm": 28.183107376098633,
"learning_rate": 8.627882507672073e-06,
"loss": 13.0103,
"step": 16650
},
{
"epoch": 7.25775604455311,
"grad_norm": 28.345674514770508,
"learning_rate": 8.62349846558527e-06,
"loss": 12.9886,
"step": 16700
},
{
"epoch": 7.279489269220321,
"grad_norm": 36.2637825012207,
"learning_rate": 8.619114423498466e-06,
"loss": 12.9905,
"step": 16750
},
{
"epoch": 7.301222493887531,
"grad_norm": 32.89162826538086,
"learning_rate": 8.614730381411663e-06,
"loss": 12.9033,
"step": 16800
},
{
"epoch": 7.3229557185547405,
"grad_norm": 31.151569366455078,
"learning_rate": 8.610346339324858e-06,
"loss": 13.026,
"step": 16850
},
{
"epoch": 7.344688943221951,
"grad_norm": 32.4716682434082,
"learning_rate": 8.605962297238054e-06,
"loss": 12.9932,
"step": 16900
},
{
"epoch": 7.36642216788916,
"grad_norm": 28.446046829223633,
"learning_rate": 8.60157825515125e-06,
"loss": 13.0201,
"step": 16950
},
{
"epoch": 7.388155392556371,
"grad_norm": 27.000221252441406,
"learning_rate": 8.597194213064446e-06,
"loss": 13.0463,
"step": 17000
},
{
"epoch": 7.40988861722358,
"grad_norm": 35.49698257446289,
"learning_rate": 8.592810170977642e-06,
"loss": 12.9461,
"step": 17050
},
{
"epoch": 7.431621841890791,
"grad_norm": 48.70148849487305,
"learning_rate": 8.588426128890837e-06,
"loss": 13.0921,
"step": 17100
},
{
"epoch": 7.453355066558,
"grad_norm": 28.99524688720703,
"learning_rate": 8.584042086804034e-06,
"loss": 12.9729,
"step": 17150
},
{
"epoch": 7.475088291225211,
"grad_norm": 28.51788902282715,
"learning_rate": 8.57965804471723e-06,
"loss": 13.0311,
"step": 17200
},
{
"epoch": 7.496821515892421,
"grad_norm": 48.5558967590332,
"learning_rate": 8.575274002630427e-06,
"loss": 13.0931,
"step": 17250
},
{
"epoch": 7.5185547405596305,
"grad_norm": 35.883365631103516,
"learning_rate": 8.570889960543622e-06,
"loss": 13.0601,
"step": 17300
},
{
"epoch": 7.540287965226841,
"grad_norm": 30.609474182128906,
"learning_rate": 8.566505918456817e-06,
"loss": 13.0277,
"step": 17350
},
{
"epoch": 7.56202118989405,
"grad_norm": 31.2172794342041,
"learning_rate": 8.562121876370014e-06,
"loss": 12.9501,
"step": 17400
},
{
"epoch": 7.583754414561261,
"grad_norm": 42.7708740234375,
"learning_rate": 8.55773783428321e-06,
"loss": 13.0667,
"step": 17450
},
{
"epoch": 7.60548763922847,
"grad_norm": 30.39897346496582,
"learning_rate": 8.553353792196407e-06,
"loss": 13.0591,
"step": 17500
},
{
"epoch": 7.627220863895681,
"grad_norm": 26.951528549194336,
"learning_rate": 8.548969750109602e-06,
"loss": 12.9949,
"step": 17550
},
{
"epoch": 7.64895408856289,
"grad_norm": 33.658206939697266,
"learning_rate": 8.544585708022798e-06,
"loss": 13.0532,
"step": 17600
},
{
"epoch": 7.670687313230101,
"grad_norm": 34.114768981933594,
"learning_rate": 8.540201665935993e-06,
"loss": 13.1035,
"step": 17650
},
{
"epoch": 7.692420537897311,
"grad_norm": 29.691999435424805,
"learning_rate": 8.53581762384919e-06,
"loss": 13.0645,
"step": 17700
},
{
"epoch": 7.7141537625645205,
"grad_norm": 39.269493103027344,
"learning_rate": 8.531433581762385e-06,
"loss": 13.112,
"step": 17750
},
{
"epoch": 7.735886987231731,
"grad_norm": 37.816837310791016,
"learning_rate": 8.527049539675581e-06,
"loss": 13.0634,
"step": 17800
},
{
"epoch": 7.75762021189894,
"grad_norm": 36.515132904052734,
"learning_rate": 8.522665497588778e-06,
"loss": 13.0395,
"step": 17850
},
{
"epoch": 7.779353436566151,
"grad_norm": 22.76226043701172,
"learning_rate": 8.518281455501973e-06,
"loss": 13.0559,
"step": 17900
},
{
"epoch": 7.80108666123336,
"grad_norm": 28.64872169494629,
"learning_rate": 8.51389741341517e-06,
"loss": 13.0638,
"step": 17950
},
{
"epoch": 7.822819885900571,
"grad_norm": 41.4809684753418,
"learning_rate": 8.509513371328366e-06,
"loss": 13.0299,
"step": 18000
},
{
"epoch": 7.84455311056778,
"grad_norm": 25.84028434753418,
"learning_rate": 8.505129329241561e-06,
"loss": 13.0003,
"step": 18050
},
{
"epoch": 7.8662863352349905,
"grad_norm": 36.24126434326172,
"learning_rate": 8.500745287154757e-06,
"loss": 13.0231,
"step": 18100
},
{
"epoch": 7.888019559902201,
"grad_norm": 19.62076187133789,
"learning_rate": 8.496361245067954e-06,
"loss": 13.006,
"step": 18150
},
{
"epoch": 7.9097527845694104,
"grad_norm": 28.422643661499023,
"learning_rate": 8.491977202981149e-06,
"loss": 12.9981,
"step": 18200
},
{
"epoch": 7.931486009236621,
"grad_norm": 36.77701187133789,
"learning_rate": 8.487593160894344e-06,
"loss": 13.1429,
"step": 18250
},
{
"epoch": 7.95321923390383,
"grad_norm": 36.51480484008789,
"learning_rate": 8.483209118807542e-06,
"loss": 12.9689,
"step": 18300
},
{
"epoch": 7.974952458571041,
"grad_norm": 30.303489685058594,
"learning_rate": 8.478825076720737e-06,
"loss": 13.0392,
"step": 18350
},
{
"epoch": 7.99668568323825,
"grad_norm": 41.148353576660156,
"learning_rate": 8.474441034633934e-06,
"loss": 13.0697,
"step": 18400
},
{
"epoch": 8.018255908720457,
"grad_norm": 30.144062042236328,
"learning_rate": 8.47005699254713e-06,
"loss": 12.7092,
"step": 18450
},
{
"epoch": 8.039989133387666,
"grad_norm": 33.70432662963867,
"learning_rate": 8.465672950460325e-06,
"loss": 12.8033,
"step": 18500
},
{
"epoch": 8.061722358054876,
"grad_norm": 25.66695785522461,
"learning_rate": 8.46128890837352e-06,
"loss": 12.7704,
"step": 18550
},
{
"epoch": 8.083455582722086,
"grad_norm": 38.33973693847656,
"learning_rate": 8.456904866286717e-06,
"loss": 12.8512,
"step": 18600
},
{
"epoch": 8.105188807389297,
"grad_norm": 25.794679641723633,
"learning_rate": 8.452520824199914e-06,
"loss": 12.7138,
"step": 18650
},
{
"epoch": 8.126922032056507,
"grad_norm": 39.2582893371582,
"learning_rate": 8.44813678211311e-06,
"loss": 12.7657,
"step": 18700
},
{
"epoch": 8.148655256723716,
"grad_norm": 30.886682510375977,
"learning_rate": 8.443752740026305e-06,
"loss": 12.7667,
"step": 18750
},
{
"epoch": 8.170388481390926,
"grad_norm": 39.30559158325195,
"learning_rate": 8.4393686979395e-06,
"loss": 12.7548,
"step": 18800
},
{
"epoch": 8.192121706058137,
"grad_norm": 22.945003509521484,
"learning_rate": 8.434984655852698e-06,
"loss": 12.8788,
"step": 18850
},
{
"epoch": 8.213854930725347,
"grad_norm": 30.998369216918945,
"learning_rate": 8.430600613765893e-06,
"loss": 12.8048,
"step": 18900
},
{
"epoch": 8.235588155392556,
"grad_norm": 29.44565773010254,
"learning_rate": 8.426216571679088e-06,
"loss": 12.7907,
"step": 18950
},
{
"epoch": 8.257321380059766,
"grad_norm": 29.368488311767578,
"learning_rate": 8.421832529592284e-06,
"loss": 12.8157,
"step": 19000
},
{
"epoch": 8.279054604726976,
"grad_norm": 28.4185791015625,
"learning_rate": 8.41744848750548e-06,
"loss": 12.8382,
"step": 19050
},
{
"epoch": 8.300787829394187,
"grad_norm": 45.91888427734375,
"learning_rate": 8.413064445418678e-06,
"loss": 12.9013,
"step": 19100
},
{
"epoch": 8.322521054061397,
"grad_norm": 36.90361022949219,
"learning_rate": 8.408680403331873e-06,
"loss": 12.8076,
"step": 19150
},
{
"epoch": 8.344254278728606,
"grad_norm": 54.692935943603516,
"learning_rate": 8.404296361245069e-06,
"loss": 12.8288,
"step": 19200
},
{
"epoch": 8.365987503395816,
"grad_norm": 27.947093963623047,
"learning_rate": 8.399912319158264e-06,
"loss": 12.8577,
"step": 19250
},
{
"epoch": 8.387720728063027,
"grad_norm": 28.992555618286133,
"learning_rate": 8.395528277071461e-06,
"loss": 12.882,
"step": 19300
},
{
"epoch": 8.409453952730237,
"grad_norm": 22.34044647216797,
"learning_rate": 8.391144234984656e-06,
"loss": 12.8171,
"step": 19350
},
{
"epoch": 8.431187177397446,
"grad_norm": 50.96314239501953,
"learning_rate": 8.386760192897852e-06,
"loss": 12.8761,
"step": 19400
},
{
"epoch": 8.452920402064656,
"grad_norm": Infinity,
"learning_rate": 8.382376150811047e-06,
"loss": 12.8613,
"step": 19450
},
{
"epoch": 8.474653626731866,
"grad_norm": 25.97089195251465,
"learning_rate": 8.377992108724244e-06,
"loss": 12.8875,
"step": 19500
},
{
"epoch": 8.496386851399077,
"grad_norm": 30.094532012939453,
"learning_rate": 8.373608066637441e-06,
"loss": 12.8784,
"step": 19550
},
{
"epoch": 8.518120076066285,
"grad_norm": 37.806156158447266,
"learning_rate": 8.369224024550637e-06,
"loss": 12.8339,
"step": 19600
},
{
"epoch": 8.539853300733496,
"grad_norm": 38.92607498168945,
"learning_rate": 8.364839982463832e-06,
"loss": 12.8541,
"step": 19650
},
{
"epoch": 8.561586525400706,
"grad_norm": 31.54934310913086,
"learning_rate": 8.360455940377028e-06,
"loss": 12.8991,
"step": 19700
},
{
"epoch": 8.583319750067917,
"grad_norm": 37.04362869262695,
"learning_rate": 8.356071898290225e-06,
"loss": 12.9116,
"step": 19750
},
{
"epoch": 8.605052974735127,
"grad_norm": 38.93299865722656,
"learning_rate": 8.35168785620342e-06,
"loss": 12.8499,
"step": 19800
},
{
"epoch": 8.626786199402336,
"grad_norm": 28.214290618896484,
"learning_rate": 8.347303814116615e-06,
"loss": 12.8512,
"step": 19850
},
{
"epoch": 8.648519424069546,
"grad_norm": 27.576839447021484,
"learning_rate": 8.34291977202981e-06,
"loss": 12.8824,
"step": 19900
},
{
"epoch": 8.670252648736756,
"grad_norm": 25.321149826049805,
"learning_rate": 8.338535729943008e-06,
"loss": 12.842,
"step": 19950
},
{
"epoch": 8.691985873403967,
"grad_norm": 36.43674087524414,
"learning_rate": 8.334151687856205e-06,
"loss": 12.9156,
"step": 20000
},
{
"epoch": 8.691985873403967,
"eval_cer": 0.07732709565131522,
"eval_loss": 2.3227267265319824,
"eval_runtime": 401.1503,
"eval_samples_per_second": 13.476,
"eval_steps_per_second": 3.37,
"eval_wer": 0.23097817553776104,
"step": 20000
},
{
"epoch": 8.713719098071177,
"grad_norm": 30.650314331054688,
"learning_rate": 8.3297676457694e-06,
"loss": 12.9089,
"step": 20050
},
{
"epoch": 8.735452322738386,
"grad_norm": 27.448633193969727,
"learning_rate": 8.325383603682596e-06,
"loss": 12.8572,
"step": 20100
},
{
"epoch": 8.757185547405596,
"grad_norm": 25.665332794189453,
"learning_rate": 8.320999561595791e-06,
"loss": 12.8087,
"step": 20150
},
{
"epoch": 8.778918772072807,
"grad_norm": 43.74554443359375,
"learning_rate": 8.316615519508988e-06,
"loss": 12.915,
"step": 20200
},
{
"epoch": 8.800651996740017,
"grad_norm": 31.74461555480957,
"learning_rate": 8.312231477422184e-06,
"loss": 12.8676,
"step": 20250
},
{
"epoch": 8.822385221407226,
"grad_norm": 28.51342010498047,
"learning_rate": 8.30784743533538e-06,
"loss": 12.8645,
"step": 20300
},
{
"epoch": 8.844118446074436,
"grad_norm": 27.660497665405273,
"learning_rate": 8.303463393248576e-06,
"loss": 12.9217,
"step": 20350
},
{
"epoch": 8.865851670741646,
"grad_norm": 41.046485900878906,
"learning_rate": 8.299079351161771e-06,
"loss": 12.8472,
"step": 20400
},
{
"epoch": 8.887584895408857,
"grad_norm": 50.21107482910156,
"learning_rate": 8.294695309074969e-06,
"loss": 12.8141,
"step": 20450
},
{
"epoch": 8.909318120076065,
"grad_norm": 42.08512878417969,
"learning_rate": 8.290311266988164e-06,
"loss": 12.9162,
"step": 20500
},
{
"epoch": 8.931051344743276,
"grad_norm": 22.199024200439453,
"learning_rate": 8.28592722490136e-06,
"loss": 12.8649,
"step": 20550
},
{
"epoch": 8.952784569410486,
"grad_norm": 38.15290451049805,
"learning_rate": 8.281543182814555e-06,
"loss": 12.8547,
"step": 20600
},
{
"epoch": 8.974517794077697,
"grad_norm": 35.076698303222656,
"learning_rate": 8.277159140727752e-06,
"loss": 12.8954,
"step": 20650
},
{
"epoch": 8.996251018744907,
"grad_norm": 26.742168426513672,
"learning_rate": 8.272775098640947e-06,
"loss": 12.8845,
"step": 20700
},
{
"epoch": 9.017821244227113,
"grad_norm": 18.43798828125,
"learning_rate": 8.268391056554144e-06,
"loss": 12.6111,
"step": 20750
},
{
"epoch": 9.039554468894321,
"grad_norm": 22.483016967773438,
"learning_rate": 8.26400701446734e-06,
"loss": 12.6938,
"step": 20800
},
{
"epoch": 9.061287693561532,
"grad_norm": 22.414525985717773,
"learning_rate": 8.259622972380535e-06,
"loss": 12.6499,
"step": 20850
},
{
"epoch": 9.083020918228742,
"grad_norm": 33.88186264038086,
"learning_rate": 8.255238930293732e-06,
"loss": 12.5987,
"step": 20900
},
{
"epoch": 9.104754142895953,
"grad_norm": 34.6947021484375,
"learning_rate": 8.250854888206928e-06,
"loss": 12.6804,
"step": 20950
},
{
"epoch": 9.126487367563163,
"grad_norm": 22.22621726989746,
"learning_rate": 8.246470846120123e-06,
"loss": 12.7388,
"step": 21000
},
{
"epoch": 9.148220592230372,
"grad_norm": 30.4085693359375,
"learning_rate": 8.242086804033318e-06,
"loss": 12.7085,
"step": 21050
},
{
"epoch": 9.169953816897582,
"grad_norm": 131.27008056640625,
"learning_rate": 8.237702761946515e-06,
"loss": 12.7142,
"step": 21100
},
{
"epoch": 9.191687041564792,
"grad_norm": 28.05132293701172,
"learning_rate": 8.23331871985971e-06,
"loss": 12.698,
"step": 21150
},
{
"epoch": 9.213420266232003,
"grad_norm": 157.52548217773438,
"learning_rate": 8.228934677772908e-06,
"loss": 12.7275,
"step": 21200
},
{
"epoch": 9.235153490899211,
"grad_norm": 29.362707138061523,
"learning_rate": 8.224550635686103e-06,
"loss": 12.648,
"step": 21250
},
{
"epoch": 9.256886715566422,
"grad_norm": 27.221683502197266,
"learning_rate": 8.220166593599299e-06,
"loss": 12.7306,
"step": 21300
},
{
"epoch": 9.278619940233632,
"grad_norm": 18.6680850982666,
"learning_rate": 8.215782551512496e-06,
"loss": 12.6896,
"step": 21350
},
{
"epoch": 9.300353164900843,
"grad_norm": 35.81766128540039,
"learning_rate": 8.211398509425691e-06,
"loss": 12.6838,
"step": 21400
},
{
"epoch": 9.322086389568053,
"grad_norm": 24.64043426513672,
"learning_rate": 8.207014467338888e-06,
"loss": 12.7201,
"step": 21450
},
{
"epoch": 9.343819614235262,
"grad_norm": 41.39848327636719,
"learning_rate": 8.202630425252084e-06,
"loss": 12.7289,
"step": 21500
},
{
"epoch": 9.365552838902472,
"grad_norm": 23.982431411743164,
"learning_rate": 8.198246383165279e-06,
"loss": 12.7145,
"step": 21550
},
{
"epoch": 9.387286063569682,
"grad_norm": 25.513904571533203,
"learning_rate": 8.193862341078474e-06,
"loss": 12.6646,
"step": 21600
},
{
"epoch": 9.409019288236893,
"grad_norm": 28.16943359375,
"learning_rate": 8.189478298991671e-06,
"loss": 12.7157,
"step": 21650
},
{
"epoch": 9.430752512904101,
"grad_norm": 31.33350944519043,
"learning_rate": 8.185094256904867e-06,
"loss": 12.7245,
"step": 21700
},
{
"epoch": 9.452485737571312,
"grad_norm": 22.30205726623535,
"learning_rate": 8.180710214818062e-06,
"loss": 12.7082,
"step": 21750
},
{
"epoch": 9.474218962238522,
"grad_norm": 31.175230026245117,
"learning_rate": 8.17632617273126e-06,
"loss": 12.7716,
"step": 21800
},
{
"epoch": 9.495952186905733,
"grad_norm": 24.61014747619629,
"learning_rate": 8.171942130644455e-06,
"loss": 12.7153,
"step": 21850
},
{
"epoch": 9.517685411572941,
"grad_norm": 37.26193618774414,
"learning_rate": 8.167558088557652e-06,
"loss": 12.7623,
"step": 21900
},
{
"epoch": 9.539418636240152,
"grad_norm": 29.6248779296875,
"learning_rate": 8.163174046470847e-06,
"loss": 12.7862,
"step": 21950
},
{
"epoch": 9.561151860907362,
"grad_norm": 37.52980422973633,
"learning_rate": 8.158790004384042e-06,
"loss": 12.6912,
"step": 22000
},
{
"epoch": 9.582885085574572,
"grad_norm": 35.345035552978516,
"learning_rate": 8.154405962297238e-06,
"loss": 12.677,
"step": 22050
},
{
"epoch": 9.604618310241783,
"grad_norm": 32.45883560180664,
"learning_rate": 8.150021920210435e-06,
"loss": 12.6662,
"step": 22100
},
{
"epoch": 9.626351534908991,
"grad_norm": 46.35236358642578,
"learning_rate": 8.14563787812363e-06,
"loss": 12.7472,
"step": 22150
},
{
"epoch": 9.648084759576202,
"grad_norm": 26.202049255371094,
"learning_rate": 8.141253836036826e-06,
"loss": 12.7174,
"step": 22200
},
{
"epoch": 9.669817984243412,
"grad_norm": 27.350576400756836,
"learning_rate": 8.136869793950023e-06,
"loss": 12.6917,
"step": 22250
},
{
"epoch": 9.691551208910623,
"grad_norm": 32.96540451049805,
"learning_rate": 8.132485751863218e-06,
"loss": 12.7865,
"step": 22300
},
{
"epoch": 9.713284433577833,
"grad_norm": 33.34325408935547,
"learning_rate": 8.128101709776415e-06,
"loss": 12.8177,
"step": 22350
},
{
"epoch": 9.735017658245042,
"grad_norm": 24.0529727935791,
"learning_rate": 8.12371766768961e-06,
"loss": 12.7816,
"step": 22400
},
{
"epoch": 9.756750882912252,
"grad_norm": 31.504335403442383,
"learning_rate": 8.119333625602806e-06,
"loss": 12.7014,
"step": 22450
},
{
"epoch": 9.778484107579462,
"grad_norm": 37.35165023803711,
"learning_rate": 8.114949583516001e-06,
"loss": 12.6674,
"step": 22500
},
{
"epoch": 9.800217332246673,
"grad_norm": 22.923002243041992,
"learning_rate": 8.110565541429199e-06,
"loss": 12.7619,
"step": 22550
},
{
"epoch": 9.821950556913881,
"grad_norm": 29.871366500854492,
"learning_rate": 8.106181499342396e-06,
"loss": 12.7368,
"step": 22600
},
{
"epoch": 9.843683781581092,
"grad_norm": 40.105369567871094,
"learning_rate": 8.101797457255591e-06,
"loss": 12.6962,
"step": 22650
},
{
"epoch": 9.865417006248302,
"grad_norm": 25.92096710205078,
"learning_rate": 8.097413415168786e-06,
"loss": 12.686,
"step": 22700
},
{
"epoch": 9.887150230915513,
"grad_norm": 42.663368225097656,
"learning_rate": 8.093029373081982e-06,
"loss": 12.7663,
"step": 22750
},
{
"epoch": 9.908883455582721,
"grad_norm": 30.958925247192383,
"learning_rate": 8.088645330995179e-06,
"loss": 12.7574,
"step": 22800
},
{
"epoch": 9.930616680249932,
"grad_norm": 32.973209381103516,
"learning_rate": 8.084261288908374e-06,
"loss": 12.7376,
"step": 22850
},
{
"epoch": 9.952349904917142,
"grad_norm": 24.848648071289062,
"learning_rate": 8.07987724682157e-06,
"loss": 12.7988,
"step": 22900
},
{
"epoch": 9.974083129584352,
"grad_norm": 38.90625762939453,
"learning_rate": 8.075493204734765e-06,
"loss": 12.7848,
"step": 22950
},
{
"epoch": 9.995816354251563,
"grad_norm": 169.55076599121094,
"learning_rate": 8.071109162647962e-06,
"loss": 12.7591,
"step": 23000
},
{
"epoch": 10.017386579733769,
"grad_norm": 25.580976486206055,
"learning_rate": 8.06672512056116e-06,
"loss": 12.4225,
"step": 23050
},
{
"epoch": 10.039119804400977,
"grad_norm": 35.71001434326172,
"learning_rate": 8.062341078474355e-06,
"loss": 12.6339,
"step": 23100
},
{
"epoch": 10.060853029068188,
"grad_norm": 27.853500366210938,
"learning_rate": 8.05795703638755e-06,
"loss": 12.5467,
"step": 23150
},
{
"epoch": 10.082586253735398,
"grad_norm": 25.689022064208984,
"learning_rate": 8.053572994300745e-06,
"loss": 12.6073,
"step": 23200
},
{
"epoch": 10.104319478402608,
"grad_norm": 19.449281692504883,
"learning_rate": 8.049188952213942e-06,
"loss": 12.65,
"step": 23250
},
{
"epoch": 10.126052703069817,
"grad_norm": 50.91756820678711,
"learning_rate": 8.044804910127138e-06,
"loss": 12.6245,
"step": 23300
},
{
"epoch": 10.147785927737027,
"grad_norm": 30.20039939880371,
"learning_rate": 8.040420868040333e-06,
"loss": 12.5309,
"step": 23350
},
{
"epoch": 10.169519152404238,
"grad_norm": 19.78704071044922,
"learning_rate": 8.036036825953529e-06,
"loss": 12.5593,
"step": 23400
},
{
"epoch": 10.191252377071448,
"grad_norm": 19.870885848999023,
"learning_rate": 8.031652783866726e-06,
"loss": 12.5285,
"step": 23450
},
{
"epoch": 10.212985601738659,
"grad_norm": 28.326723098754883,
"learning_rate": 8.027268741779923e-06,
"loss": 12.5193,
"step": 23500
},
{
"epoch": 10.234718826405867,
"grad_norm": 27.501436233520508,
"learning_rate": 8.022884699693118e-06,
"loss": 12.5663,
"step": 23550
},
{
"epoch": 10.256452051073078,
"grad_norm": 28.51038932800293,
"learning_rate": 8.018500657606314e-06,
"loss": 12.6105,
"step": 23600
},
{
"epoch": 10.278185275740288,
"grad_norm": 38.11888885498047,
"learning_rate": 8.014116615519509e-06,
"loss": 12.6369,
"step": 23650
},
{
"epoch": 10.299918500407498,
"grad_norm": 56.63121032714844,
"learning_rate": 8.009732573432706e-06,
"loss": 12.5986,
"step": 23700
},
{
"epoch": 10.321651725074709,
"grad_norm": 30.95232582092285,
"learning_rate": 8.005348531345901e-06,
"loss": 12.6104,
"step": 23750
},
{
"epoch": 10.343384949741917,
"grad_norm": 46.855831146240234,
"learning_rate": 8.000964489259098e-06,
"loss": 12.6277,
"step": 23800
},
{
"epoch": 10.365118174409128,
"grad_norm": 38.9176139831543,
"learning_rate": 7.996580447172292e-06,
"loss": 12.5793,
"step": 23850
},
{
"epoch": 10.386851399076338,
"grad_norm": 20.209339141845703,
"learning_rate": 7.99219640508549e-06,
"loss": 12.5781,
"step": 23900
},
{
"epoch": 10.408584623743549,
"grad_norm": 34.40525817871094,
"learning_rate": 7.987812362998686e-06,
"loss": 12.6018,
"step": 23950
},
{
"epoch": 10.430317848410757,
"grad_norm": 44.757041931152344,
"learning_rate": 7.983428320911882e-06,
"loss": 12.6543,
"step": 24000
},
{
"epoch": 10.452051073077968,
"grad_norm": 40.83699035644531,
"learning_rate": 7.979044278825077e-06,
"loss": 12.6135,
"step": 24050
},
{
"epoch": 10.473784297745178,
"grad_norm": 31.089038848876953,
"learning_rate": 7.974660236738272e-06,
"loss": 12.6269,
"step": 24100
},
{
"epoch": 10.495517522412388,
"grad_norm": 33.82300567626953,
"learning_rate": 7.97027619465147e-06,
"loss": 12.622,
"step": 24150
},
{
"epoch": 10.517250747079597,
"grad_norm": 25.88127899169922,
"learning_rate": 7.965892152564665e-06,
"loss": 12.6332,
"step": 24200
},
{
"epoch": 10.538983971746807,
"grad_norm": 29.95918083190918,
"learning_rate": 7.961508110477862e-06,
"loss": 12.6166,
"step": 24250
},
{
"epoch": 10.560717196414018,
"grad_norm": 34.399444580078125,
"learning_rate": 7.957124068391057e-06,
"loss": 12.5997,
"step": 24300
},
{
"epoch": 10.582450421081228,
"grad_norm": 26.007383346557617,
"learning_rate": 7.952740026304253e-06,
"loss": 12.5829,
"step": 24350
},
{
"epoch": 10.604183645748439,
"grad_norm": 14.864594459533691,
"learning_rate": 7.94835598421745e-06,
"loss": 12.6532,
"step": 24400
},
{
"epoch": 10.625916870415647,
"grad_norm": 31.178630828857422,
"learning_rate": 7.943971942130645e-06,
"loss": 12.5909,
"step": 24450
},
{
"epoch": 10.647650095082858,
"grad_norm": 31.065549850463867,
"learning_rate": 7.93958790004384e-06,
"loss": 12.5674,
"step": 24500
},
{
"epoch": 10.669383319750068,
"grad_norm": 28.21125030517578,
"learning_rate": 7.935203857957036e-06,
"loss": 12.6476,
"step": 24550
},
{
"epoch": 10.691116544417278,
"grad_norm": 31.474586486816406,
"learning_rate": 7.930819815870233e-06,
"loss": 12.5938,
"step": 24600
},
{
"epoch": 10.712849769084489,
"grad_norm": 26.097501754760742,
"learning_rate": 7.926435773783428e-06,
"loss": 12.6483,
"step": 24650
},
{
"epoch": 10.734582993751697,
"grad_norm": 40.45956039428711,
"learning_rate": 7.922051731696626e-06,
"loss": 12.6591,
"step": 24700
},
{
"epoch": 10.756316218418908,
"grad_norm": 23.737592697143555,
"learning_rate": 7.917667689609821e-06,
"loss": 12.5698,
"step": 24750
},
{
"epoch": 10.778049443086118,
"grad_norm": 32.13654708862305,
"learning_rate": 7.913283647523016e-06,
"loss": 12.5617,
"step": 24800
},
{
"epoch": 10.799782667753329,
"grad_norm": 28.451892852783203,
"learning_rate": 7.908899605436213e-06,
"loss": 12.6304,
"step": 24850
},
{
"epoch": 10.821515892420537,
"grad_norm": 37.13362121582031,
"learning_rate": 7.904515563349409e-06,
"loss": 12.6649,
"step": 24900
},
{
"epoch": 10.843249117087748,
"grad_norm": 45.161277770996094,
"learning_rate": 7.900131521262606e-06,
"loss": 12.6335,
"step": 24950
},
{
"epoch": 10.864982341754958,
"grad_norm": 25.36030387878418,
"learning_rate": 7.8957474791758e-06,
"loss": 12.7371,
"step": 25000
},
{
"epoch": 10.886715566422168,
"grad_norm": 38.44227981567383,
"learning_rate": 7.891363437088997e-06,
"loss": 12.6187,
"step": 25050
},
{
"epoch": 10.908448791089377,
"grad_norm": 46.692874908447266,
"learning_rate": 7.886979395002192e-06,
"loss": 12.6517,
"step": 25100
},
{
"epoch": 10.930182015756587,
"grad_norm": 28.845399856567383,
"learning_rate": 7.882595352915389e-06,
"loss": 12.5677,
"step": 25150
},
{
"epoch": 10.951915240423798,
"grad_norm": 31.64191436767578,
"learning_rate": 7.878211310828585e-06,
"loss": 12.6347,
"step": 25200
},
{
"epoch": 10.973648465091008,
"grad_norm": 32.57988357543945,
"learning_rate": 7.87382726874178e-06,
"loss": 12.5652,
"step": 25250
},
{
"epoch": 10.995381689758219,
"grad_norm": 28.151342391967773,
"learning_rate": 7.869443226654977e-06,
"loss": 12.5981,
"step": 25300
},
{
"epoch": 11.016951915240424,
"grad_norm": 29.2868595123291,
"learning_rate": 7.865059184568172e-06,
"loss": 12.4366,
"step": 25350
},
{
"epoch": 11.038685139907633,
"grad_norm": 31.722579956054688,
"learning_rate": 7.86067514248137e-06,
"loss": 12.4879,
"step": 25400
},
{
"epoch": 11.060418364574844,
"grad_norm": 29.232097625732422,
"learning_rate": 7.856291100394565e-06,
"loss": 12.4597,
"step": 25450
},
{
"epoch": 11.082151589242054,
"grad_norm": 18.49676513671875,
"learning_rate": 7.85190705830776e-06,
"loss": 12.4631,
"step": 25500
},
{
"epoch": 11.103884813909264,
"grad_norm": 27.89682388305664,
"learning_rate": 7.847523016220956e-06,
"loss": 12.4507,
"step": 25550
},
{
"epoch": 11.125618038576473,
"grad_norm": 30.45709800720215,
"learning_rate": 7.843138974134153e-06,
"loss": 12.5008,
"step": 25600
},
{
"epoch": 11.147351263243683,
"grad_norm": 62.570823669433594,
"learning_rate": 7.838754932047348e-06,
"loss": 12.5107,
"step": 25650
},
{
"epoch": 11.169084487910894,
"grad_norm": 24.397315979003906,
"learning_rate": 7.834370889960543e-06,
"loss": 12.5059,
"step": 25700
},
{
"epoch": 11.190817712578104,
"grad_norm": 18.074167251586914,
"learning_rate": 7.82998684787374e-06,
"loss": 12.5071,
"step": 25750
},
{
"epoch": 11.212550937245314,
"grad_norm": 20.450908660888672,
"learning_rate": 7.825602805786936e-06,
"loss": 12.5048,
"step": 25800
},
{
"epoch": 11.234284161912523,
"grad_norm": 19.00213623046875,
"learning_rate": 7.821218763700133e-06,
"loss": 12.4887,
"step": 25850
},
{
"epoch": 11.256017386579733,
"grad_norm": 23.276472091674805,
"learning_rate": 7.816834721613328e-06,
"loss": 12.5311,
"step": 25900
},
{
"epoch": 11.277750611246944,
"grad_norm": 33.67416763305664,
"learning_rate": 7.812450679526524e-06,
"loss": 12.5503,
"step": 25950
},
{
"epoch": 11.299483835914154,
"grad_norm": 17.561626434326172,
"learning_rate": 7.80806663743972e-06,
"loss": 12.4831,
"step": 26000
},
{
"epoch": 11.321217060581363,
"grad_norm": 24.35294532775879,
"learning_rate": 7.803682595352916e-06,
"loss": 12.4869,
"step": 26050
},
{
"epoch": 11.342950285248573,
"grad_norm": 16.80247688293457,
"learning_rate": 7.799298553266113e-06,
"loss": 12.5581,
"step": 26100
},
{
"epoch": 11.364683509915784,
"grad_norm": 22.540014266967773,
"learning_rate": 7.794914511179307e-06,
"loss": 12.5552,
"step": 26150
},
{
"epoch": 11.386416734582994,
"grad_norm": 23.270639419555664,
"learning_rate": 7.790530469092504e-06,
"loss": 12.5005,
"step": 26200
},
{
"epoch": 11.408149959250204,
"grad_norm": 27.789560317993164,
"learning_rate": 7.7861464270057e-06,
"loss": 12.5405,
"step": 26250
},
{
"epoch": 11.429883183917413,
"grad_norm": 24.1334285736084,
"learning_rate": 7.781762384918897e-06,
"loss": 12.5056,
"step": 26300
},
{
"epoch": 11.451616408584623,
"grad_norm": 35.342288970947266,
"learning_rate": 7.777378342832092e-06,
"loss": 12.501,
"step": 26350
},
{
"epoch": 11.473349633251834,
"grad_norm": 27.646997451782227,
"learning_rate": 7.772994300745287e-06,
"loss": 12.4571,
"step": 26400
},
{
"epoch": 11.495082857919044,
"grad_norm": 43.06098937988281,
"learning_rate": 7.768610258658483e-06,
"loss": 12.5856,
"step": 26450
},
{
"epoch": 11.516816082586253,
"grad_norm": 21.487150192260742,
"learning_rate": 7.76422621657168e-06,
"loss": 12.4849,
"step": 26500
},
{
"epoch": 11.538549307253463,
"grad_norm": 21.75229835510254,
"learning_rate": 7.759842174484877e-06,
"loss": 12.5192,
"step": 26550
},
{
"epoch": 11.560282531920674,
"grad_norm": 23.02396011352539,
"learning_rate": 7.755458132398072e-06,
"loss": 12.5011,
"step": 26600
},
{
"epoch": 11.582015756587884,
"grad_norm": 21.738445281982422,
"learning_rate": 7.751074090311268e-06,
"loss": 12.5525,
"step": 26650
},
{
"epoch": 11.603748981255094,
"grad_norm": 38.93478775024414,
"learning_rate": 7.746690048224463e-06,
"loss": 12.4925,
"step": 26700
},
{
"epoch": 11.625482205922303,
"grad_norm": 30.070697784423828,
"learning_rate": 7.74230600613766e-06,
"loss": 12.5598,
"step": 26750
},
{
"epoch": 11.647215430589513,
"grad_norm": 44.55253982543945,
"learning_rate": 7.737921964050856e-06,
"loss": 12.4896,
"step": 26800
},
{
"epoch": 11.668948655256724,
"grad_norm": 23.052288055419922,
"learning_rate": 7.733537921964051e-06,
"loss": 12.5198,
"step": 26850
},
{
"epoch": 11.690681879923934,
"grad_norm": 24.383729934692383,
"learning_rate": 7.729153879877246e-06,
"loss": 12.5321,
"step": 26900
},
{
"epoch": 11.712415104591145,
"grad_norm": 23.777788162231445,
"learning_rate": 7.724769837790443e-06,
"loss": 12.5403,
"step": 26950
},
{
"epoch": 11.734148329258353,
"grad_norm": 22.8085994720459,
"learning_rate": 7.72038579570364e-06,
"loss": 12.5297,
"step": 27000
},
{
"epoch": 11.755881553925564,
"grad_norm": 28.690683364868164,
"learning_rate": 7.716001753616836e-06,
"loss": 12.5626,
"step": 27050
},
{
"epoch": 11.777614778592774,
"grad_norm": 23.2988338470459,
"learning_rate": 7.711617711530031e-06,
"loss": 12.4646,
"step": 27100
},
{
"epoch": 11.799348003259984,
"grad_norm": 24.85117530822754,
"learning_rate": 7.707233669443227e-06,
"loss": 12.4886,
"step": 27150
},
{
"epoch": 11.821081227927193,
"grad_norm": 34.84917449951172,
"learning_rate": 7.702849627356424e-06,
"loss": 12.578,
"step": 27200
},
{
"epoch": 11.842814452594403,
"grad_norm": 27.57342529296875,
"learning_rate": 7.698465585269619e-06,
"loss": 12.5423,
"step": 27250
},
{
"epoch": 11.864547677261614,
"grad_norm": 21.665023803710938,
"learning_rate": 7.694081543182815e-06,
"loss": 12.4848,
"step": 27300
},
{
"epoch": 11.886280901928824,
"grad_norm": 20.787555694580078,
"learning_rate": 7.68969750109601e-06,
"loss": 12.4976,
"step": 27350
},
{
"epoch": 11.908014126596033,
"grad_norm": 42.406837463378906,
"learning_rate": 7.685313459009207e-06,
"loss": 12.5549,
"step": 27400
},
{
"epoch": 11.929747351263243,
"grad_norm": 23.60106658935547,
"learning_rate": 7.680929416922404e-06,
"loss": 12.5492,
"step": 27450
},
{
"epoch": 11.951480575930454,
"grad_norm": 21.591079711914062,
"learning_rate": 7.6765453748356e-06,
"loss": 12.5018,
"step": 27500
},
{
"epoch": 11.973213800597664,
"grad_norm": 32.685333251953125,
"learning_rate": 7.672161332748795e-06,
"loss": 12.5378,
"step": 27550
},
{
"epoch": 11.994947025264874,
"grad_norm": 26.88076400756836,
"learning_rate": 7.66777729066199e-06,
"loss": 12.5529,
"step": 27600
},
{
"epoch": 12.01651725074708,
"grad_norm": 19.660898208618164,
"learning_rate": 7.663393248575187e-06,
"loss": 12.3944,
"step": 27650
},
{
"epoch": 12.038250475414289,
"grad_norm": 36.72605514526367,
"learning_rate": 7.659009206488383e-06,
"loss": 12.359,
"step": 27700
},
{
"epoch": 12.0599837000815,
"grad_norm": 27.864477157592773,
"learning_rate": 7.65462516440158e-06,
"loss": 12.3951,
"step": 27750
},
{
"epoch": 12.08171692474871,
"grad_norm": 34.72395324707031,
"learning_rate": 7.650241122314775e-06,
"loss": 12.4259,
"step": 27800
},
{
"epoch": 12.10345014941592,
"grad_norm": 20.68131446838379,
"learning_rate": 7.64585708022797e-06,
"loss": 12.4737,
"step": 27850
},
{
"epoch": 12.125183374083129,
"grad_norm": 27.369903564453125,
"learning_rate": 7.641473038141168e-06,
"loss": 12.4838,
"step": 27900
},
{
"epoch": 12.14691659875034,
"grad_norm": 14.568199157714844,
"learning_rate": 7.637088996054363e-06,
"loss": 12.3812,
"step": 27950
},
{
"epoch": 12.16864982341755,
"grad_norm": 20.099998474121094,
"learning_rate": 7.632704953967558e-06,
"loss": 12.4168,
"step": 28000
},
{
"epoch": 12.19038304808476,
"grad_norm": 21.41561508178711,
"learning_rate": 7.628320911880755e-06,
"loss": 12.3799,
"step": 28050
},
{
"epoch": 12.21211627275197,
"grad_norm": 23.49574851989746,
"learning_rate": 7.623936869793951e-06,
"loss": 12.4527,
"step": 28100
},
{
"epoch": 12.233849497419179,
"grad_norm": 30.164730072021484,
"learning_rate": 7.619552827707146e-06,
"loss": 12.4353,
"step": 28150
},
{
"epoch": 12.25558272208639,
"grad_norm": 32.27763748168945,
"learning_rate": 7.6151687856203425e-06,
"loss": 12.4808,
"step": 28200
},
{
"epoch": 12.2773159467536,
"grad_norm": 36.46564483642578,
"learning_rate": 7.610784743533538e-06,
"loss": 12.3987,
"step": 28250
},
{
"epoch": 12.29904917142081,
"grad_norm": 19.888980865478516,
"learning_rate": 7.606400701446734e-06,
"loss": 12.3889,
"step": 28300
},
{
"epoch": 12.320782396088019,
"grad_norm": 20.877737045288086,
"learning_rate": 7.602016659359931e-06,
"loss": 12.4598,
"step": 28350
},
{
"epoch": 12.34251562075523,
"grad_norm": 20.208404541015625,
"learning_rate": 7.5976326172731266e-06,
"loss": 12.3682,
"step": 28400
},
{
"epoch": 12.36424884542244,
"grad_norm": 48.63652801513672,
"learning_rate": 7.593248575186323e-06,
"loss": 12.3874,
"step": 28450
},
{
"epoch": 12.38598207008965,
"grad_norm": 23.263282775878906,
"learning_rate": 7.588864533099518e-06,
"loss": 12.4161,
"step": 28500
},
{
"epoch": 12.40771529475686,
"grad_norm": 23.76000213623047,
"learning_rate": 7.5844804910127144e-06,
"loss": 12.4247,
"step": 28550
},
{
"epoch": 12.429448519424069,
"grad_norm": 62.45661544799805,
"learning_rate": 7.58009644892591e-06,
"loss": 12.4226,
"step": 28600
},
{
"epoch": 12.45118174409128,
"grad_norm": 33.05659484863281,
"learning_rate": 7.575712406839106e-06,
"loss": 12.493,
"step": 28650
},
{
"epoch": 12.47291496875849,
"grad_norm": 23.853660583496094,
"learning_rate": 7.5713283647523014e-06,
"loss": 12.4388,
"step": 28700
},
{
"epoch": 12.4946481934257,
"grad_norm": 30.970672607421875,
"learning_rate": 7.5669443226654985e-06,
"loss": 12.4721,
"step": 28750
},
{
"epoch": 12.516381418092909,
"grad_norm": 20.660356521606445,
"learning_rate": 7.562560280578695e-06,
"loss": 12.4463,
"step": 28800
},
{
"epoch": 12.538114642760119,
"grad_norm": 27.25446319580078,
"learning_rate": 7.55817623849189e-06,
"loss": 12.4359,
"step": 28850
},
{
"epoch": 12.55984786742733,
"grad_norm": 19.96375274658203,
"learning_rate": 7.553792196405086e-06,
"loss": 12.4274,
"step": 28900
},
{
"epoch": 12.58158109209454,
"grad_norm": 25.133895874023438,
"learning_rate": 7.549408154318282e-06,
"loss": 12.4288,
"step": 28950
},
{
"epoch": 12.60331431676175,
"grad_norm": 55.64627456665039,
"learning_rate": 7.545024112231478e-06,
"loss": 12.4451,
"step": 29000
},
{
"epoch": 12.625047541428959,
"grad_norm": 70.62721252441406,
"learning_rate": 7.540640070144673e-06,
"loss": 12.451,
"step": 29050
},
{
"epoch": 12.64678076609617,
"grad_norm": 22.789186477661133,
"learning_rate": 7.5362560280578705e-06,
"loss": 12.43,
"step": 29100
},
{
"epoch": 12.66851399076338,
"grad_norm": 25.138248443603516,
"learning_rate": 7.531871985971065e-06,
"loss": 12.4,
"step": 29150
},
{
"epoch": 12.69024721543059,
"grad_norm": 18.74398422241211,
"learning_rate": 7.527487943884262e-06,
"loss": 12.4338,
"step": 29200
},
{
"epoch": 12.711980440097799,
"grad_norm": 28.796159744262695,
"learning_rate": 7.523103901797458e-06,
"loss": 12.473,
"step": 29250
},
{
"epoch": 12.733713664765009,
"grad_norm": 28.044872283935547,
"learning_rate": 7.518719859710654e-06,
"loss": 12.4155,
"step": 29300
},
{
"epoch": 12.75544688943222,
"grad_norm": 21.100650787353516,
"learning_rate": 7.51433581762385e-06,
"loss": 12.4329,
"step": 29350
},
{
"epoch": 12.77718011409943,
"grad_norm": 24.12652015686035,
"learning_rate": 7.509951775537045e-06,
"loss": 12.4504,
"step": 29400
},
{
"epoch": 12.79891333876664,
"grad_norm": 18.889480590820312,
"learning_rate": 7.5055677334502416e-06,
"loss": 12.3981,
"step": 29450
},
{
"epoch": 12.820646563433849,
"grad_norm": 20.395387649536133,
"learning_rate": 7.501183691363437e-06,
"loss": 12.4834,
"step": 29500
},
{
"epoch": 12.84237978810106,
"grad_norm": 34.01985168457031,
"learning_rate": 7.496799649276634e-06,
"loss": 12.4485,
"step": 29550
},
{
"epoch": 12.86411301276827,
"grad_norm": 36.57313537597656,
"learning_rate": 7.49241560718983e-06,
"loss": 12.5063,
"step": 29600
},
{
"epoch": 12.88584623743548,
"grad_norm": 21.946285247802734,
"learning_rate": 7.488031565103026e-06,
"loss": 12.501,
"step": 29650
},
{
"epoch": 12.907579462102689,
"grad_norm": 26.948814392089844,
"learning_rate": 7.483647523016222e-06,
"loss": 12.5122,
"step": 29700
},
{
"epoch": 12.929312686769899,
"grad_norm": 31.4482364654541,
"learning_rate": 7.479263480929417e-06,
"loss": 12.4543,
"step": 29750
},
{
"epoch": 12.95104591143711,
"grad_norm": 35.19594192504883,
"learning_rate": 7.4748794388426135e-06,
"loss": 12.4657,
"step": 29800
},
{
"epoch": 12.97277913610432,
"grad_norm": 23.498001098632812,
"learning_rate": 7.470495396755809e-06,
"loss": 12.4462,
"step": 29850
},
{
"epoch": 12.99451236077153,
"grad_norm": 48.50201416015625,
"learning_rate": 7.466111354669006e-06,
"loss": 12.4134,
"step": 29900
},
{
"epoch": 13.016082586253736,
"grad_norm": 25.189435958862305,
"learning_rate": 7.461727312582201e-06,
"loss": 12.3134,
"step": 29950
},
{
"epoch": 13.037815810920945,
"grad_norm": 21.985063552856445,
"learning_rate": 7.457343270495398e-06,
"loss": 12.3299,
"step": 30000
},
{
"epoch": 13.037815810920945,
"eval_cer": 0.0770617061459272,
"eval_loss": 2.334705352783203,
"eval_runtime": 399.4375,
"eval_samples_per_second": 13.534,
"eval_steps_per_second": 3.385,
"eval_wer": 0.23019312293923694,
"step": 30000
},
{
"epoch": 13.059549035588155,
"grad_norm": 15.290221214294434,
"learning_rate": 7.452959228408594e-06,
"loss": 12.3108,
"step": 30050
},
{
"epoch": 13.081282260255366,
"grad_norm": 26.75568389892578,
"learning_rate": 7.448575186321789e-06,
"loss": 12.3347,
"step": 30100
},
{
"epoch": 13.103015484922576,
"grad_norm": 28.02945327758789,
"learning_rate": 7.4441911442349854e-06,
"loss": 12.3172,
"step": 30150
},
{
"epoch": 13.124748709589785,
"grad_norm": 17.39537811279297,
"learning_rate": 7.439807102148181e-06,
"loss": 12.3004,
"step": 30200
},
{
"epoch": 13.146481934256995,
"grad_norm": 21.168519973754883,
"learning_rate": 7.435423060061377e-06,
"loss": 12.3882,
"step": 30250
},
{
"epoch": 13.168215158924205,
"grad_norm": 24.02804946899414,
"learning_rate": 7.4310390179745725e-06,
"loss": 12.3512,
"step": 30300
},
{
"epoch": 13.189948383591416,
"grad_norm": 25.33257484436035,
"learning_rate": 7.4266549758877695e-06,
"loss": 12.3121,
"step": 30350
},
{
"epoch": 13.211681608258626,
"grad_norm": 20.40574073791504,
"learning_rate": 7.422270933800965e-06,
"loss": 12.3737,
"step": 30400
},
{
"epoch": 13.233414832925835,
"grad_norm": 25.527008056640625,
"learning_rate": 7.417886891714161e-06,
"loss": 12.3575,
"step": 30450
},
{
"epoch": 13.255148057593045,
"grad_norm": 23.7490291595459,
"learning_rate": 7.413502849627357e-06,
"loss": 12.3835,
"step": 30500
},
{
"epoch": 13.276881282260256,
"grad_norm": 23.39885139465332,
"learning_rate": 7.409118807540553e-06,
"loss": 12.3056,
"step": 30550
},
{
"epoch": 13.298614506927466,
"grad_norm": 21.89725112915039,
"learning_rate": 7.404734765453749e-06,
"loss": 12.3262,
"step": 30600
},
{
"epoch": 13.320347731594675,
"grad_norm": 20.838117599487305,
"learning_rate": 7.400350723366944e-06,
"loss": 12.3879,
"step": 30650
},
{
"epoch": 13.342080956261885,
"grad_norm": 17.388107299804688,
"learning_rate": 7.3959666812801415e-06,
"loss": 12.3611,
"step": 30700
},
{
"epoch": 13.363814180929095,
"grad_norm": 19.158178329467773,
"learning_rate": 7.391582639193337e-06,
"loss": 12.3782,
"step": 30750
},
{
"epoch": 13.385547405596306,
"grad_norm": 28.794353485107422,
"learning_rate": 7.387198597106533e-06,
"loss": 12.4156,
"step": 30800
},
{
"epoch": 13.407280630263516,
"grad_norm": 24.086498260498047,
"learning_rate": 7.3828145550197285e-06,
"loss": 12.3903,
"step": 30850
},
{
"epoch": 13.429013854930725,
"grad_norm": 24.688875198364258,
"learning_rate": 7.378430512932925e-06,
"loss": 12.3829,
"step": 30900
},
{
"epoch": 13.450747079597935,
"grad_norm": 54.69606018066406,
"learning_rate": 7.374046470846121e-06,
"loss": 12.3398,
"step": 30950
},
{
"epoch": 13.472480304265146,
"grad_norm": 25.10434341430664,
"learning_rate": 7.369662428759316e-06,
"loss": 12.3753,
"step": 31000
},
{
"epoch": 13.494213528932356,
"grad_norm": 35.44208908081055,
"learning_rate": 7.3652783866725134e-06,
"loss": 12.3937,
"step": 31050
},
{
"epoch": 13.515946753599565,
"grad_norm": 19.743236541748047,
"learning_rate": 7.360894344585709e-06,
"loss": 12.439,
"step": 31100
},
{
"epoch": 13.537679978266775,
"grad_norm": 29.914348602294922,
"learning_rate": 7.356510302498905e-06,
"loss": 12.3594,
"step": 31150
},
{
"epoch": 13.559413202933985,
"grad_norm": 105.84856414794922,
"learning_rate": 7.3521262604121004e-06,
"loss": 12.352,
"step": 31200
},
{
"epoch": 13.581146427601196,
"grad_norm": 23.331436157226562,
"learning_rate": 7.347742218325297e-06,
"loss": 12.355,
"step": 31250
},
{
"epoch": 13.602879652268406,
"grad_norm": 18.46331214904785,
"learning_rate": 7.343358176238492e-06,
"loss": 12.3481,
"step": 31300
},
{
"epoch": 13.624612876935615,
"grad_norm": 22.384254455566406,
"learning_rate": 7.338974134151688e-06,
"loss": 12.4047,
"step": 31350
},
{
"epoch": 13.646346101602825,
"grad_norm": 34.16387176513672,
"learning_rate": 7.3345900920648845e-06,
"loss": 12.4131,
"step": 31400
},
{
"epoch": 13.668079326270036,
"grad_norm": 59.95965576171875,
"learning_rate": 7.33020604997808e-06,
"loss": 12.3539,
"step": 31450
},
{
"epoch": 13.689812550937246,
"grad_norm": 21.647342681884766,
"learning_rate": 7.325822007891277e-06,
"loss": 12.3936,
"step": 31500
},
{
"epoch": 13.711545775604455,
"grad_norm": 20.892303466796875,
"learning_rate": 7.321437965804472e-06,
"loss": 12.4042,
"step": 31550
},
{
"epoch": 13.733279000271665,
"grad_norm": 25.085771560668945,
"learning_rate": 7.317053923717669e-06,
"loss": 12.4563,
"step": 31600
},
{
"epoch": 13.755012224938875,
"grad_norm": 29.819766998291016,
"learning_rate": 7.312669881630864e-06,
"loss": 12.3417,
"step": 31650
},
{
"epoch": 13.776745449606086,
"grad_norm": 23.446327209472656,
"learning_rate": 7.30828583954406e-06,
"loss": 12.4085,
"step": 31700
},
{
"epoch": 13.798478674273296,
"grad_norm": 30.441680908203125,
"learning_rate": 7.303901797457256e-06,
"loss": 12.3269,
"step": 31750
},
{
"epoch": 13.820211898940505,
"grad_norm": 46.045162200927734,
"learning_rate": 7.299517755370452e-06,
"loss": 12.3459,
"step": 31800
},
{
"epoch": 13.841945123607715,
"grad_norm": 20.486669540405273,
"learning_rate": 7.295133713283649e-06,
"loss": 12.4457,
"step": 31850
},
{
"epoch": 13.863678348274926,
"grad_norm": 18.060197830200195,
"learning_rate": 7.290749671196844e-06,
"loss": 12.4123,
"step": 31900
},
{
"epoch": 13.885411572942136,
"grad_norm": 29.656959533691406,
"learning_rate": 7.2863656291100406e-06,
"loss": 12.3888,
"step": 31950
},
{
"epoch": 13.907144797609345,
"grad_norm": 16.68509864807129,
"learning_rate": 7.281981587023236e-06,
"loss": 12.3878,
"step": 32000
},
{
"epoch": 13.928878022276555,
"grad_norm": 27.963064193725586,
"learning_rate": 7.277597544936432e-06,
"loss": 12.3907,
"step": 32050
},
{
"epoch": 13.950611246943765,
"grad_norm": 27.46925163269043,
"learning_rate": 7.2732135028496276e-06,
"loss": 12.4483,
"step": 32100
},
{
"epoch": 13.972344471610976,
"grad_norm": 23.631675720214844,
"learning_rate": 7.268829460762824e-06,
"loss": 12.4326,
"step": 32150
},
{
"epoch": 13.994077696278186,
"grad_norm": 44.30888748168945,
"learning_rate": 7.264445418676019e-06,
"loss": 12.4026,
"step": 32200
},
{
"epoch": 14.015647921760392,
"grad_norm": 17.614269256591797,
"learning_rate": 7.260061376589215e-06,
"loss": 12.2351,
"step": 32250
},
{
"epoch": 14.0373811464276,
"grad_norm": 16.82352638244629,
"learning_rate": 7.2556773345024125e-06,
"loss": 12.2887,
"step": 32300
},
{
"epoch": 14.059114371094811,
"grad_norm": 22.863889694213867,
"learning_rate": 7.251293292415608e-06,
"loss": 12.2874,
"step": 32350
},
{
"epoch": 14.080847595762021,
"grad_norm": 22.543703079223633,
"learning_rate": 7.246909250328804e-06,
"loss": 12.2726,
"step": 32400
},
{
"epoch": 14.102580820429232,
"grad_norm": 19.95811653137207,
"learning_rate": 7.2425252082419995e-06,
"loss": 12.2948,
"step": 32450
},
{
"epoch": 14.12431404509644,
"grad_norm": 11.412972450256348,
"learning_rate": 7.238141166155196e-06,
"loss": 12.2971,
"step": 32500
},
{
"epoch": 14.14604726976365,
"grad_norm": 30.869230270385742,
"learning_rate": 7.233757124068391e-06,
"loss": 12.3222,
"step": 32550
},
{
"epoch": 14.167780494430861,
"grad_norm": 37.976741790771484,
"learning_rate": 7.229373081981587e-06,
"loss": 12.3079,
"step": 32600
},
{
"epoch": 14.189513719098072,
"grad_norm": 23.526809692382812,
"learning_rate": 7.224989039894783e-06,
"loss": 12.3085,
"step": 32650
},
{
"epoch": 14.211246943765282,
"grad_norm": 19.888294219970703,
"learning_rate": 7.22060499780798e-06,
"loss": 12.3141,
"step": 32700
},
{
"epoch": 14.23298016843249,
"grad_norm": 16.727022171020508,
"learning_rate": 7.216220955721176e-06,
"loss": 12.275,
"step": 32750
},
{
"epoch": 14.254713393099701,
"grad_norm": 14.18730640411377,
"learning_rate": 7.2118369136343715e-06,
"loss": 12.3144,
"step": 32800
},
{
"epoch": 14.276446617766911,
"grad_norm": 20.451278686523438,
"learning_rate": 7.207452871547568e-06,
"loss": 12.2727,
"step": 32850
},
{
"epoch": 14.298179842434122,
"grad_norm": 16.769447326660156,
"learning_rate": 7.203068829460763e-06,
"loss": 12.3384,
"step": 32900
},
{
"epoch": 14.31991306710133,
"grad_norm": 27.05632781982422,
"learning_rate": 7.198684787373959e-06,
"loss": 12.3492,
"step": 32950
},
{
"epoch": 14.34164629176854,
"grad_norm": 38.1939582824707,
"learning_rate": 7.194300745287155e-06,
"loss": 12.284,
"step": 33000
},
{
"epoch": 14.363379516435751,
"grad_norm": 32.06970977783203,
"learning_rate": 7.189916703200352e-06,
"loss": 12.3158,
"step": 33050
},
{
"epoch": 14.385112741102962,
"grad_norm": 25.079200744628906,
"learning_rate": 7.185532661113547e-06,
"loss": 12.3486,
"step": 33100
},
{
"epoch": 14.406845965770172,
"grad_norm": 21.099042892456055,
"learning_rate": 7.181148619026743e-06,
"loss": 12.2961,
"step": 33150
},
{
"epoch": 14.42857919043738,
"grad_norm": 18.112712860107422,
"learning_rate": 7.17676457693994e-06,
"loss": 12.2852,
"step": 33200
},
{
"epoch": 14.450312415104591,
"grad_norm": 18.887737274169922,
"learning_rate": 7.172380534853135e-06,
"loss": 12.276,
"step": 33250
},
{
"epoch": 14.472045639771801,
"grad_norm": 21.17413902282715,
"learning_rate": 7.167996492766331e-06,
"loss": 12.3109,
"step": 33300
},
{
"epoch": 14.493778864439012,
"grad_norm": 67.79141998291016,
"learning_rate": 7.163612450679527e-06,
"loss": 12.3326,
"step": 33350
},
{
"epoch": 14.51551208910622,
"grad_norm": 18.88022232055664,
"learning_rate": 7.159228408592723e-06,
"loss": 12.355,
"step": 33400
},
{
"epoch": 14.53724531377343,
"grad_norm": 14.09670639038086,
"learning_rate": 7.154844366505918e-06,
"loss": 12.3049,
"step": 33450
},
{
"epoch": 14.558978538440641,
"grad_norm": 22.51435661315918,
"learning_rate": 7.150460324419115e-06,
"loss": 12.3043,
"step": 33500
},
{
"epoch": 14.580711763107852,
"grad_norm": 20.429990768432617,
"learning_rate": 7.146076282332311e-06,
"loss": 12.3029,
"step": 33550
},
{
"epoch": 14.602444987775062,
"grad_norm": 27.559160232543945,
"learning_rate": 7.141692240245507e-06,
"loss": 12.3017,
"step": 33600
},
{
"epoch": 14.62417821244227,
"grad_norm": 26.29608917236328,
"learning_rate": 7.137308198158703e-06,
"loss": 12.347,
"step": 33650
},
{
"epoch": 14.645911437109481,
"grad_norm": 12.279489517211914,
"learning_rate": 7.132924156071899e-06,
"loss": 12.2923,
"step": 33700
},
{
"epoch": 14.667644661776691,
"grad_norm": 19.162981033325195,
"learning_rate": 7.128540113985095e-06,
"loss": 12.3289,
"step": 33750
},
{
"epoch": 14.689377886443902,
"grad_norm": 19.87074089050293,
"learning_rate": 7.12415607189829e-06,
"loss": 12.2862,
"step": 33800
},
{
"epoch": 14.71111111111111,
"grad_norm": 22.431442260742188,
"learning_rate": 7.119772029811487e-06,
"loss": 12.3116,
"step": 33850
},
{
"epoch": 14.73284433577832,
"grad_norm": 16.823158264160156,
"learning_rate": 7.115387987724683e-06,
"loss": 12.3284,
"step": 33900
},
{
"epoch": 14.754577560445531,
"grad_norm": 26.60719108581543,
"learning_rate": 7.111003945637879e-06,
"loss": 12.2962,
"step": 33950
},
{
"epoch": 14.776310785112742,
"grad_norm": 20.54785919189453,
"learning_rate": 7.106619903551074e-06,
"loss": 12.2979,
"step": 34000
},
{
"epoch": 14.79804400977995,
"grad_norm": 26.004840850830078,
"learning_rate": 7.1022358614642705e-06,
"loss": 12.3252,
"step": 34050
},
{
"epoch": 14.81977723444716,
"grad_norm": 21.51180648803711,
"learning_rate": 7.097851819377467e-06,
"loss": 12.2844,
"step": 34100
},
{
"epoch": 14.841510459114371,
"grad_norm": 21.89017105102539,
"learning_rate": 7.093467777290662e-06,
"loss": 12.3084,
"step": 34150
},
{
"epoch": 14.863243683781581,
"grad_norm": 23.298505783081055,
"learning_rate": 7.089083735203859e-06,
"loss": 12.3825,
"step": 34200
},
{
"epoch": 14.884976908448792,
"grad_norm": 21.83428382873535,
"learning_rate": 7.084699693117054e-06,
"loss": 12.2981,
"step": 34250
},
{
"epoch": 14.906710133116,
"grad_norm": 26.309865951538086,
"learning_rate": 7.080315651030251e-06,
"loss": 12.2713,
"step": 34300
},
{
"epoch": 14.92844335778321,
"grad_norm": 28.134078979492188,
"learning_rate": 7.075931608943446e-06,
"loss": 12.311,
"step": 34350
},
{
"epoch": 14.950176582450421,
"grad_norm": 25.938369750976562,
"learning_rate": 7.0715475668566425e-06,
"loss": 12.2782,
"step": 34400
},
{
"epoch": 14.971909807117632,
"grad_norm": 25.179311752319336,
"learning_rate": 7.067163524769839e-06,
"loss": 12.3384,
"step": 34450
},
{
"epoch": 14.993643031784842,
"grad_norm": 18.602447509765625,
"learning_rate": 7.062779482683034e-06,
"loss": 12.3125,
"step": 34500
},
{
"epoch": 15.015213257267048,
"grad_norm": 23.754281997680664,
"learning_rate": 7.05839544059623e-06,
"loss": 12.1766,
"step": 34550
},
{
"epoch": 15.036946481934256,
"grad_norm": 22.74106788635254,
"learning_rate": 7.054011398509426e-06,
"loss": 12.228,
"step": 34600
},
{
"epoch": 15.058679706601467,
"grad_norm": 35.1696662902832,
"learning_rate": 7.049627356422623e-06,
"loss": 12.2026,
"step": 34650
},
{
"epoch": 15.080412931268677,
"grad_norm": 20.032005310058594,
"learning_rate": 7.045243314335818e-06,
"loss": 12.2443,
"step": 34700
},
{
"epoch": 15.102146155935888,
"grad_norm": 30.315168380737305,
"learning_rate": 7.040859272249014e-06,
"loss": 12.221,
"step": 34750
},
{
"epoch": 15.123879380603096,
"grad_norm": 15.685395240783691,
"learning_rate": 7.03647523016221e-06,
"loss": 12.2394,
"step": 34800
},
{
"epoch": 15.145612605270307,
"grad_norm": 24.66408348083496,
"learning_rate": 7.032091188075406e-06,
"loss": 12.1932,
"step": 34850
},
{
"epoch": 15.167345829937517,
"grad_norm": 20.8659725189209,
"learning_rate": 7.027707145988602e-06,
"loss": 12.2888,
"step": 34900
},
{
"epoch": 15.189079054604727,
"grad_norm": 25.82394027709961,
"learning_rate": 7.023323103901798e-06,
"loss": 12.1971,
"step": 34950
},
{
"epoch": 15.210812279271938,
"grad_norm": 17.442481994628906,
"learning_rate": 7.018939061814995e-06,
"loss": 12.2246,
"step": 35000
},
{
"epoch": 15.232545503939146,
"grad_norm": 16.10444450378418,
"learning_rate": 7.01455501972819e-06,
"loss": 12.2378,
"step": 35050
},
{
"epoch": 15.254278728606357,
"grad_norm": 20.300018310546875,
"learning_rate": 7.010170977641386e-06,
"loss": 12.2806,
"step": 35100
},
{
"epoch": 15.276011953273567,
"grad_norm": 30.641281127929688,
"learning_rate": 7.005786935554582e-06,
"loss": 12.2608,
"step": 35150
},
{
"epoch": 15.297745177940778,
"grad_norm": 36.21476745605469,
"learning_rate": 7.001402893467778e-06,
"loss": 12.2502,
"step": 35200
},
{
"epoch": 15.319478402607986,
"grad_norm": 31.640207290649414,
"learning_rate": 6.997018851380973e-06,
"loss": 12.2664,
"step": 35250
},
{
"epoch": 15.341211627275197,
"grad_norm": 14.65031623840332,
"learning_rate": 6.99263480929417e-06,
"loss": 12.2727,
"step": 35300
},
{
"epoch": 15.362944851942407,
"grad_norm": 15.896147727966309,
"learning_rate": 6.988250767207367e-06,
"loss": 12.2335,
"step": 35350
},
{
"epoch": 15.384678076609617,
"grad_norm": 27.60741424560547,
"learning_rate": 6.983866725120561e-06,
"loss": 12.241,
"step": 35400
},
{
"epoch": 15.406411301276828,
"grad_norm": 27.842981338500977,
"learning_rate": 6.979482683033758e-06,
"loss": 12.299,
"step": 35450
},
{
"epoch": 15.428144525944036,
"grad_norm": 14.332504272460938,
"learning_rate": 6.975098640946954e-06,
"loss": 12.2547,
"step": 35500
},
{
"epoch": 15.449877750611247,
"grad_norm": 13.6268310546875,
"learning_rate": 6.97071459886015e-06,
"loss": 12.2764,
"step": 35550
},
{
"epoch": 15.471610975278457,
"grad_norm": 27.122060775756836,
"learning_rate": 6.966330556773345e-06,
"loss": 12.2568,
"step": 35600
},
{
"epoch": 15.493344199945668,
"grad_norm": NaN,
"learning_rate": 6.9619465146865415e-06,
"loss": 12.2716,
"step": 35650
},
{
"epoch": 15.515077424612876,
"grad_norm": 16.30205726623535,
"learning_rate": 6.957562472599737e-06,
"loss": 12.1952,
"step": 35700
},
{
"epoch": 15.536810649280087,
"grad_norm": 17.126123428344727,
"learning_rate": 6.953178430512933e-06,
"loss": 12.2365,
"step": 35750
},
{
"epoch": 15.558543873947297,
"grad_norm": 33.20661163330078,
"learning_rate": 6.94879438842613e-06,
"loss": 12.2776,
"step": 35800
},
{
"epoch": 15.580277098614507,
"grad_norm": 22.688047409057617,
"learning_rate": 6.944410346339326e-06,
"loss": 12.3202,
"step": 35850
},
{
"epoch": 15.602010323281718,
"grad_norm": 19.268665313720703,
"learning_rate": 6.940026304252522e-06,
"loss": 12.2744,
"step": 35900
},
{
"epoch": 15.623743547948926,
"grad_norm": 44.28622817993164,
"learning_rate": 6.935642262165717e-06,
"loss": 12.2619,
"step": 35950
},
{
"epoch": 15.645476772616137,
"grad_norm": 11.47972297668457,
"learning_rate": 6.9312582200789135e-06,
"loss": 12.2281,
"step": 36000
},
{
"epoch": 15.667209997283347,
"grad_norm": 26.456462860107422,
"learning_rate": 6.926874177992109e-06,
"loss": 12.2591,
"step": 36050
},
{
"epoch": 15.688943221950558,
"grad_norm": 18.363269805908203,
"learning_rate": 6.922490135905305e-06,
"loss": 12.2958,
"step": 36100
},
{
"epoch": 15.710676446617766,
"grad_norm": 21.405649185180664,
"learning_rate": 6.9181060938185005e-06,
"loss": 12.2485,
"step": 36150
},
{
"epoch": 15.732409671284977,
"grad_norm": 27.277904510498047,
"learning_rate": 6.913722051731698e-06,
"loss": 12.2324,
"step": 36200
},
{
"epoch": 15.754142895952187,
"grad_norm": 20.303300857543945,
"learning_rate": 6.909338009644894e-06,
"loss": 12.2939,
"step": 36250
},
{
"epoch": 15.775876120619397,
"grad_norm": 14.886679649353027,
"learning_rate": 6.904953967558089e-06,
"loss": 12.2036,
"step": 36300
},
{
"epoch": 15.797609345286606,
"grad_norm": 17.747087478637695,
"learning_rate": 6.9005699254712854e-06,
"loss": 12.2459,
"step": 36350
},
{
"epoch": 15.819342569953816,
"grad_norm": 34.592708587646484,
"learning_rate": 6.896185883384481e-06,
"loss": 12.2688,
"step": 36400
},
{
"epoch": 15.841075794621027,
"grad_norm": 20.060144424438477,
"learning_rate": 6.891801841297677e-06,
"loss": 12.2528,
"step": 36450
},
{
"epoch": 15.862809019288237,
"grad_norm": 13.47815227508545,
"learning_rate": 6.8874177992108724e-06,
"loss": 12.2797,
"step": 36500
},
{
"epoch": 15.884542243955448,
"grad_norm": 20.81302833557129,
"learning_rate": 6.883033757124069e-06,
"loss": 12.269,
"step": 36550
},
{
"epoch": 15.906275468622656,
"grad_norm": 29.326114654541016,
"learning_rate": 6.878649715037264e-06,
"loss": 12.296,
"step": 36600
},
{
"epoch": 15.928008693289867,
"grad_norm": 21.322439193725586,
"learning_rate": 6.874265672950461e-06,
"loss": 12.3283,
"step": 36650
},
{
"epoch": 15.949741917957077,
"grad_norm": 25.019590377807617,
"learning_rate": 6.869881630863657e-06,
"loss": 12.2913,
"step": 36700
},
{
"epoch": 15.971475142624287,
"grad_norm": 16.494823455810547,
"learning_rate": 6.865497588776853e-06,
"loss": 12.2574,
"step": 36750
},
{
"epoch": 15.993208367291498,
"grad_norm": 22.250595092773438,
"learning_rate": 6.861113546690049e-06,
"loss": 12.26,
"step": 36800
},
{
"epoch": 16.014778592773702,
"grad_norm": 15.23131275177002,
"learning_rate": 6.856729504603244e-06,
"loss": 12.1167,
"step": 36850
},
{
"epoch": 16.036511817440914,
"grad_norm": 18.172534942626953,
"learning_rate": 6.852345462516441e-06,
"loss": 12.1946,
"step": 36900
},
{
"epoch": 16.058245042108123,
"grad_norm": 27.63299560546875,
"learning_rate": 6.847961420429636e-06,
"loss": 12.1801,
"step": 36950
},
{
"epoch": 16.07997826677533,
"grad_norm": 22.287805557250977,
"learning_rate": 6.843577378342833e-06,
"loss": 12.2187,
"step": 37000
},
{
"epoch": 16.101711491442543,
"grad_norm": 15.652294158935547,
"learning_rate": 6.8391933362560285e-06,
"loss": 12.1837,
"step": 37050
},
{
"epoch": 16.123444716109752,
"grad_norm": 23.91109848022461,
"learning_rate": 6.834809294169225e-06,
"loss": 12.2074,
"step": 37100
},
{
"epoch": 16.145177940776964,
"grad_norm": 24.845624923706055,
"learning_rate": 6.830425252082421e-06,
"loss": 12.1387,
"step": 37150
},
{
"epoch": 16.166911165444173,
"grad_norm": 19.137048721313477,
"learning_rate": 6.826041209995616e-06,
"loss": 12.2304,
"step": 37200
},
{
"epoch": 16.18864439011138,
"grad_norm": 17.24692153930664,
"learning_rate": 6.8216571679088126e-06,
"loss": 12.208,
"step": 37250
},
{
"epoch": 16.210377614778594,
"grad_norm": 20.503686904907227,
"learning_rate": 6.817273125822008e-06,
"loss": 12.2103,
"step": 37300
},
{
"epoch": 16.232110839445802,
"grad_norm": 27.404552459716797,
"learning_rate": 6.812889083735205e-06,
"loss": 12.2422,
"step": 37350
},
{
"epoch": 16.253844064113014,
"grad_norm": 25.16230010986328,
"learning_rate": 6.8085050416483996e-06,
"loss": 12.2006,
"step": 37400
},
{
"epoch": 16.275577288780223,
"grad_norm": 18.156126022338867,
"learning_rate": 6.804120999561597e-06,
"loss": 12.2023,
"step": 37450
},
{
"epoch": 16.29731051344743,
"grad_norm": 19.68562889099121,
"learning_rate": 6.799736957474792e-06,
"loss": 12.1877,
"step": 37500
},
{
"epoch": 16.319043738114644,
"grad_norm": 17.91988182067871,
"learning_rate": 6.795352915387988e-06,
"loss": 12.215,
"step": 37550
},
{
"epoch": 16.340776962781852,
"grad_norm": 15.31675910949707,
"learning_rate": 6.7909688733011845e-06,
"loss": 12.1548,
"step": 37600
},
{
"epoch": 16.36251018744906,
"grad_norm": 8.975651741027832,
"learning_rate": 6.78658483121438e-06,
"loss": 12.169,
"step": 37650
},
{
"epoch": 16.384243412116273,
"grad_norm": 16.77298927307129,
"learning_rate": 6.782200789127576e-06,
"loss": 12.2139,
"step": 37700
},
{
"epoch": 16.405976636783482,
"grad_norm": 23.03885269165039,
"learning_rate": 6.7778167470407715e-06,
"loss": 12.2093,
"step": 37750
},
{
"epoch": 16.427709861450694,
"grad_norm": 18.47231101989746,
"learning_rate": 6.773432704953969e-06,
"loss": 12.1992,
"step": 37800
},
{
"epoch": 16.449443086117903,
"grad_norm": 28.977338790893555,
"learning_rate": 6.769048662867164e-06,
"loss": 12.1989,
"step": 37850
},
{
"epoch": 16.47117631078511,
"grad_norm": 16.37677574157715,
"learning_rate": 6.76466462078036e-06,
"loss": 12.2296,
"step": 37900
},
{
"epoch": 16.492909535452323,
"grad_norm": 13.731319427490234,
"learning_rate": 6.760280578693556e-06,
"loss": 12.186,
"step": 37950
},
{
"epoch": 16.514642760119532,
"grad_norm": 20.206491470336914,
"learning_rate": 6.755896536606752e-06,
"loss": 12.1552,
"step": 38000
},
{
"epoch": 16.536375984786744,
"grad_norm": 19.88826560974121,
"learning_rate": 6.751512494519948e-06,
"loss": 12.2298,
"step": 38050
},
{
"epoch": 16.558109209453953,
"grad_norm": 31.184532165527344,
"learning_rate": 6.7471284524331435e-06,
"loss": 12.2041,
"step": 38100
},
{
"epoch": 16.57984243412116,
"grad_norm": 37.404266357421875,
"learning_rate": 6.7427444103463405e-06,
"loss": 12.208,
"step": 38150
},
{
"epoch": 16.601575658788374,
"grad_norm": 12.503349304199219,
"learning_rate": 6.738360368259536e-06,
"loss": 12.2289,
"step": 38200
},
{
"epoch": 16.623308883455582,
"grad_norm": 14.80574893951416,
"learning_rate": 6.733976326172732e-06,
"loss": 12.2027,
"step": 38250
},
{
"epoch": 16.645042108122794,
"grad_norm": 18.339298248291016,
"learning_rate": 6.7295922840859276e-06,
"loss": 12.2122,
"step": 38300
},
{
"epoch": 16.666775332790003,
"grad_norm": 9.988556861877441,
"learning_rate": 6.725208241999124e-06,
"loss": 12.2169,
"step": 38350
},
{
"epoch": 16.68850855745721,
"grad_norm": 16.23221778869629,
"learning_rate": 6.720824199912319e-06,
"loss": 12.2432,
"step": 38400
},
{
"epoch": 16.710241782124424,
"grad_norm": 17.93288803100586,
"learning_rate": 6.716440157825515e-06,
"loss": 12.1946,
"step": 38450
},
{
"epoch": 16.731975006791632,
"grad_norm": 23.863719940185547,
"learning_rate": 6.7120561157387125e-06,
"loss": 12.2601,
"step": 38500
},
{
"epoch": 16.75370823145884,
"grad_norm": 32.24260330200195,
"learning_rate": 6.707672073651907e-06,
"loss": 12.2242,
"step": 38550
},
{
"epoch": 16.775441456126053,
"grad_norm": 31.188295364379883,
"learning_rate": 6.703288031565104e-06,
"loss": 12.1977,
"step": 38600
},
{
"epoch": 16.797174680793262,
"grad_norm": 21.935489654541016,
"learning_rate": 6.6989039894782995e-06,
"loss": 12.2153,
"step": 38650
},
{
"epoch": 16.818907905460474,
"grad_norm": 16.820199966430664,
"learning_rate": 6.694519947391496e-06,
"loss": 12.1841,
"step": 38700
},
{
"epoch": 16.840641130127683,
"grad_norm": 27.350257873535156,
"learning_rate": 6.690135905304691e-06,
"loss": 12.2308,
"step": 38750
},
{
"epoch": 16.86237435479489,
"grad_norm": 20.717317581176758,
"learning_rate": 6.685751863217887e-06,
"loss": 12.2139,
"step": 38800
},
{
"epoch": 16.884107579462103,
"grad_norm": 20.515241622924805,
"learning_rate": 6.681367821131084e-06,
"loss": 12.209,
"step": 38850
},
{
"epoch": 16.905840804129312,
"grad_norm": 16.544082641601562,
"learning_rate": 6.676983779044279e-06,
"loss": 12.2183,
"step": 38900
},
{
"epoch": 16.927574028796524,
"grad_norm": 17.54091453552246,
"learning_rate": 6.672599736957476e-06,
"loss": 12.2609,
"step": 38950
},
{
"epoch": 16.949307253463733,
"grad_norm": 21.071598052978516,
"learning_rate": 6.6682156948706714e-06,
"loss": 12.1727,
"step": 39000
},
{
"epoch": 16.97104047813094,
"grad_norm": 17.628015518188477,
"learning_rate": 6.663831652783868e-06,
"loss": 12.1862,
"step": 39050
},
{
"epoch": 16.992773702798154,
"grad_norm": 13.298240661621094,
"learning_rate": 6.659447610697063e-06,
"loss": 12.2279,
"step": 39100
},
{
"epoch": 17.014343928280358,
"grad_norm": 12.616509437561035,
"learning_rate": 6.655063568610259e-06,
"loss": 12.0914,
"step": 39150
},
{
"epoch": 17.03607715294757,
"grad_norm": 21.71387481689453,
"learning_rate": 6.650679526523455e-06,
"loss": 12.1576,
"step": 39200
},
{
"epoch": 17.05781037761478,
"grad_norm": 26.497800827026367,
"learning_rate": 6.646295484436651e-06,
"loss": 12.1522,
"step": 39250
},
{
"epoch": 17.079543602281987,
"grad_norm": 20.276397705078125,
"learning_rate": 6.641911442349848e-06,
"loss": 12.1579,
"step": 39300
},
{
"epoch": 17.1012768269492,
"grad_norm": 18.534727096557617,
"learning_rate": 6.637527400263043e-06,
"loss": 12.178,
"step": 39350
},
{
"epoch": 17.123010051616408,
"grad_norm": 29.980501174926758,
"learning_rate": 6.63314335817624e-06,
"loss": 12.1507,
"step": 39400
},
{
"epoch": 17.14474327628362,
"grad_norm": 25.486083984375,
"learning_rate": 6.628759316089435e-06,
"loss": 12.148,
"step": 39450
},
{
"epoch": 17.16647650095083,
"grad_norm": 24.499359130859375,
"learning_rate": 6.624375274002631e-06,
"loss": 12.1513,
"step": 39500
},
{
"epoch": 17.188209725618037,
"grad_norm": 22.07660484313965,
"learning_rate": 6.619991231915827e-06,
"loss": 12.1477,
"step": 39550
},
{
"epoch": 17.20994295028525,
"grad_norm": 28.42877960205078,
"learning_rate": 6.615607189829023e-06,
"loss": 12.1747,
"step": 39600
},
{
"epoch": 17.231676174952458,
"grad_norm": 22.489025115966797,
"learning_rate": 6.611223147742218e-06,
"loss": 12.1474,
"step": 39650
},
{
"epoch": 17.25340939961967,
"grad_norm": 24.58718490600586,
"learning_rate": 6.6068391056554145e-06,
"loss": 12.1613,
"step": 39700
},
{
"epoch": 17.27514262428688,
"grad_norm": 92.33475494384766,
"learning_rate": 6.6024550635686116e-06,
"loss": 12.1637,
"step": 39750
},
{
"epoch": 17.296875848954087,
"grad_norm": 19.350147247314453,
"learning_rate": 6.598071021481807e-06,
"loss": 12.184,
"step": 39800
},
{
"epoch": 17.3186090736213,
"grad_norm": 14.627690315246582,
"learning_rate": 6.593686979395003e-06,
"loss": 12.1552,
"step": 39850
},
{
"epoch": 17.34034229828851,
"grad_norm": 11.52912425994873,
"learning_rate": 6.5893029373081986e-06,
"loss": 12.1784,
"step": 39900
},
{
"epoch": 17.362075522955717,
"grad_norm": 18.19782829284668,
"learning_rate": 6.584918895221395e-06,
"loss": 12.127,
"step": 39950
},
{
"epoch": 17.38380874762293,
"grad_norm": 24.676179885864258,
"learning_rate": 6.58053485313459e-06,
"loss": 12.2129,
"step": 40000
},
{
"epoch": 17.38380874762293,
"eval_cer": 0.0766327626430326,
"eval_loss": 2.3462953567504883,
"eval_runtime": 399.3051,
"eval_samples_per_second": 13.539,
"eval_steps_per_second": 3.386,
"eval_wer": 0.22991050400376825,
"step": 40000
},
{
"epoch": 17.405541972290138,
"grad_norm": 10.956995010375977,
"learning_rate": 6.5761508110477864e-06,
"loss": 12.1388,
"step": 40050
},
{
"epoch": 17.42727519695735,
"grad_norm": 23.64618682861328,
"learning_rate": 6.571766768960982e-06,
"loss": 12.1628,
"step": 40100
},
{
"epoch": 17.44900842162456,
"grad_norm": 22.68800926208496,
"learning_rate": 6.567382726874179e-06,
"loss": 12.1489,
"step": 40150
},
{
"epoch": 17.470741646291767,
"grad_norm": 17.155860900878906,
"learning_rate": 6.562998684787375e-06,
"loss": 12.1374,
"step": 40200
},
{
"epoch": 17.49247487095898,
"grad_norm": 19.839338302612305,
"learning_rate": 6.5586146427005705e-06,
"loss": 12.1891,
"step": 40250
},
{
"epoch": 17.514208095626188,
"grad_norm": 24.002262115478516,
"learning_rate": 6.554230600613767e-06,
"loss": 12.1819,
"step": 40300
},
{
"epoch": 17.5359413202934,
"grad_norm": 14.681846618652344,
"learning_rate": 6.549846558526962e-06,
"loss": 12.1583,
"step": 40350
},
{
"epoch": 17.55767454496061,
"grad_norm": 28.004215240478516,
"learning_rate": 6.545462516440158e-06,
"loss": 12.1953,
"step": 40400
},
{
"epoch": 17.579407769627817,
"grad_norm": 18.857913970947266,
"learning_rate": 6.541078474353354e-06,
"loss": 12.1922,
"step": 40450
},
{
"epoch": 17.60114099429503,
"grad_norm": 27.09821319580078,
"learning_rate": 6.536694432266551e-06,
"loss": 12.1901,
"step": 40500
},
{
"epoch": 17.622874218962238,
"grad_norm": 15.759273529052734,
"learning_rate": 6.532310390179745e-06,
"loss": 12.1536,
"step": 40550
},
{
"epoch": 17.64460744362945,
"grad_norm": 13.474365234375,
"learning_rate": 6.5279263480929425e-06,
"loss": 12.1806,
"step": 40600
},
{
"epoch": 17.66634066829666,
"grad_norm": 14.334114074707031,
"learning_rate": 6.523542306006139e-06,
"loss": 12.1881,
"step": 40650
},
{
"epoch": 17.688073892963867,
"grad_norm": 28.76114845275879,
"learning_rate": 6.519158263919334e-06,
"loss": 12.2126,
"step": 40700
},
{
"epoch": 17.70980711763108,
"grad_norm": 22.386192321777344,
"learning_rate": 6.51477422183253e-06,
"loss": 12.153,
"step": 40750
},
{
"epoch": 17.73154034229829,
"grad_norm": 12.762558937072754,
"learning_rate": 6.510390179745726e-06,
"loss": 12.1787,
"step": 40800
},
{
"epoch": 17.753273566965497,
"grad_norm": 15.222195625305176,
"learning_rate": 6.506006137658922e-06,
"loss": 12.1646,
"step": 40850
},
{
"epoch": 17.77500679163271,
"grad_norm": 35.954437255859375,
"learning_rate": 6.501622095572117e-06,
"loss": 12.1707,
"step": 40900
},
{
"epoch": 17.796740016299918,
"grad_norm": 11.987882614135742,
"learning_rate": 6.497238053485314e-06,
"loss": 12.1824,
"step": 40950
},
{
"epoch": 17.81847324096713,
"grad_norm": 31.296215057373047,
"learning_rate": 6.49285401139851e-06,
"loss": 12.213,
"step": 41000
},
{
"epoch": 17.84020646563434,
"grad_norm": 16.63829231262207,
"learning_rate": 6.488469969311706e-06,
"loss": 12.1762,
"step": 41050
},
{
"epoch": 17.861939690301547,
"grad_norm": 13.500885963439941,
"learning_rate": 6.484085927224902e-06,
"loss": 12.1677,
"step": 41100
},
{
"epoch": 17.88367291496876,
"grad_norm": 29.857112884521484,
"learning_rate": 6.479701885138098e-06,
"loss": 12.1812,
"step": 41150
},
{
"epoch": 17.905406139635968,
"grad_norm": 14.494293212890625,
"learning_rate": 6.475317843051294e-06,
"loss": 12.1565,
"step": 41200
},
{
"epoch": 17.92713936430318,
"grad_norm": 11.10816478729248,
"learning_rate": 6.470933800964489e-06,
"loss": 12.1799,
"step": 41250
},
{
"epoch": 17.94887258897039,
"grad_norm": 19.924617767333984,
"learning_rate": 6.466549758877686e-06,
"loss": 12.1382,
"step": 41300
},
{
"epoch": 17.970605813637597,
"grad_norm": 21.809062957763672,
"learning_rate": 6.462165716790882e-06,
"loss": 12.1727,
"step": 41350
},
{
"epoch": 17.99233903830481,
"grad_norm": 12.314726829528809,
"learning_rate": 6.457781674704078e-06,
"loss": 12.1355,
"step": 41400
},
{
"epoch": 18.013909263787014,
"grad_norm": 22.337913513183594,
"learning_rate": 6.453397632617273e-06,
"loss": 12.0397,
"step": 41450
},
{
"epoch": 18.035642488454226,
"grad_norm": 14.111494064331055,
"learning_rate": 6.44901359053047e-06,
"loss": 12.0939,
"step": 41500
},
{
"epoch": 18.057375713121434,
"grad_norm": 16.706897735595703,
"learning_rate": 6.444629548443666e-06,
"loss": 12.1425,
"step": 41550
},
{
"epoch": 18.079108937788643,
"grad_norm": 26.20379066467285,
"learning_rate": 6.440245506356861e-06,
"loss": 12.1265,
"step": 41600
},
{
"epoch": 18.100842162455855,
"grad_norm": 14.789849281311035,
"learning_rate": 6.4358614642700574e-06,
"loss": 12.1377,
"step": 41650
},
{
"epoch": 18.122575387123064,
"grad_norm": 34.11836242675781,
"learning_rate": 6.431477422183253e-06,
"loss": 12.1273,
"step": 41700
},
{
"epoch": 18.144308611790276,
"grad_norm": 32.26976013183594,
"learning_rate": 6.42709338009645e-06,
"loss": 12.0853,
"step": 41750
},
{
"epoch": 18.166041836457484,
"grad_norm": 19.59932518005371,
"learning_rate": 6.422709338009645e-06,
"loss": 12.0887,
"step": 41800
},
{
"epoch": 18.187775061124693,
"grad_norm": 16.68062400817871,
"learning_rate": 6.4183252959228415e-06,
"loss": 12.148,
"step": 41850
},
{
"epoch": 18.209508285791905,
"grad_norm": 18.44430923461914,
"learning_rate": 6.413941253836037e-06,
"loss": 12.132,
"step": 41900
},
{
"epoch": 18.231241510459114,
"grad_norm": 23.202688217163086,
"learning_rate": 6.409557211749233e-06,
"loss": 12.1606,
"step": 41950
},
{
"epoch": 18.252974735126326,
"grad_norm": 11.007984161376953,
"learning_rate": 6.405173169662429e-06,
"loss": 12.1526,
"step": 42000
},
{
"epoch": 18.274707959793535,
"grad_norm": 43.34115219116211,
"learning_rate": 6.400789127575625e-06,
"loss": 12.1461,
"step": 42050
},
{
"epoch": 18.296441184460743,
"grad_norm": 21.273698806762695,
"learning_rate": 6.396405085488822e-06,
"loss": 12.116,
"step": 42100
},
{
"epoch": 18.318174409127955,
"grad_norm": 21.992979049682617,
"learning_rate": 6.392021043402017e-06,
"loss": 12.0956,
"step": 42150
},
{
"epoch": 18.339907633795164,
"grad_norm": 10.890033721923828,
"learning_rate": 6.3876370013152135e-06,
"loss": 12.0999,
"step": 42200
},
{
"epoch": 18.361640858462373,
"grad_norm": 10.554021835327148,
"learning_rate": 6.383252959228409e-06,
"loss": 12.1158,
"step": 42250
},
{
"epoch": 18.383374083129585,
"grad_norm": 11.385374069213867,
"learning_rate": 6.378868917141605e-06,
"loss": 12.1541,
"step": 42300
},
{
"epoch": 18.405107307796793,
"grad_norm": 11.36735725402832,
"learning_rate": 6.3744848750548005e-06,
"loss": 12.119,
"step": 42350
},
{
"epoch": 18.426840532464006,
"grad_norm": 19.5784969329834,
"learning_rate": 6.370100832967997e-06,
"loss": 12.1513,
"step": 42400
},
{
"epoch": 18.448573757131214,
"grad_norm": 10.584908485412598,
"learning_rate": 6.365716790881194e-06,
"loss": 12.1572,
"step": 42450
},
{
"epoch": 18.470306981798423,
"grad_norm": 23.416278839111328,
"learning_rate": 6.361332748794389e-06,
"loss": 12.1384,
"step": 42500
},
{
"epoch": 18.492040206465635,
"grad_norm": 22.098583221435547,
"learning_rate": 6.3569487067075854e-06,
"loss": 12.1005,
"step": 42550
},
{
"epoch": 18.513773431132844,
"grad_norm": 27.949371337890625,
"learning_rate": 6.352564664620781e-06,
"loss": 12.1248,
"step": 42600
},
{
"epoch": 18.535506655800056,
"grad_norm": 13.4013090133667,
"learning_rate": 6.348180622533977e-06,
"loss": 12.1335,
"step": 42650
},
{
"epoch": 18.557239880467264,
"grad_norm": 19.233583450317383,
"learning_rate": 6.3437965804471724e-06,
"loss": 12.1416,
"step": 42700
},
{
"epoch": 18.578973105134473,
"grad_norm": 17.514616012573242,
"learning_rate": 6.339412538360369e-06,
"loss": 12.1561,
"step": 42750
},
{
"epoch": 18.600706329801685,
"grad_norm": 17.83085823059082,
"learning_rate": 6.335028496273564e-06,
"loss": 12.1655,
"step": 42800
},
{
"epoch": 18.622439554468894,
"grad_norm": 32.00389099121094,
"learning_rate": 6.33064445418676e-06,
"loss": 12.1489,
"step": 42850
},
{
"epoch": 18.644172779136106,
"grad_norm": 36.5909309387207,
"learning_rate": 6.326260412099957e-06,
"loss": 12.1353,
"step": 42900
},
{
"epoch": 18.665906003803315,
"grad_norm": 19.841901779174805,
"learning_rate": 6.321876370013153e-06,
"loss": 12.1237,
"step": 42950
},
{
"epoch": 18.687639228470523,
"grad_norm": 12.05302619934082,
"learning_rate": 6.317492327926349e-06,
"loss": 12.0931,
"step": 43000
},
{
"epoch": 18.709372453137735,
"grad_norm": 51.42092514038086,
"learning_rate": 6.313108285839544e-06,
"loss": 12.0907,
"step": 43050
},
{
"epoch": 18.731105677804944,
"grad_norm": 21.547746658325195,
"learning_rate": 6.308724243752741e-06,
"loss": 12.1051,
"step": 43100
},
{
"epoch": 18.752838902472153,
"grad_norm": 17.779346466064453,
"learning_rate": 6.304340201665936e-06,
"loss": 12.1208,
"step": 43150
},
{
"epoch": 18.774572127139365,
"grad_norm": 12.786531448364258,
"learning_rate": 6.299956159579132e-06,
"loss": 12.1527,
"step": 43200
},
{
"epoch": 18.796305351806573,
"grad_norm": 15.865018844604492,
"learning_rate": 6.295572117492329e-06,
"loss": 12.1003,
"step": 43250
},
{
"epoch": 18.818038576473786,
"grad_norm": 12.622864723205566,
"learning_rate": 6.291188075405525e-06,
"loss": 12.1439,
"step": 43300
},
{
"epoch": 18.839771801140994,
"grad_norm": 12.189949035644531,
"learning_rate": 6.286804033318721e-06,
"loss": 12.132,
"step": 43350
},
{
"epoch": 18.861505025808203,
"grad_norm": 18.03951072692871,
"learning_rate": 6.282419991231916e-06,
"loss": 12.1327,
"step": 43400
},
{
"epoch": 18.883238250475415,
"grad_norm": 25.907819747924805,
"learning_rate": 6.2780359491451126e-06,
"loss": 12.1319,
"step": 43450
},
{
"epoch": 18.904971475142624,
"grad_norm": 39.924564361572266,
"learning_rate": 6.273651907058308e-06,
"loss": 12.1779,
"step": 43500
},
{
"epoch": 18.926704699809836,
"grad_norm": 10.564095497131348,
"learning_rate": 6.269267864971504e-06,
"loss": 12.1198,
"step": 43550
},
{
"epoch": 18.948437924477044,
"grad_norm": 16.400606155395508,
"learning_rate": 6.2648838228846996e-06,
"loss": 12.1314,
"step": 43600
},
{
"epoch": 18.970171149144253,
"grad_norm": 16.357927322387695,
"learning_rate": 6.260499780797896e-06,
"loss": 12.1305,
"step": 43650
},
{
"epoch": 18.991904373811465,
"grad_norm": 18.073299407958984,
"learning_rate": 6.256115738711093e-06,
"loss": 12.1585,
"step": 43700
},
{
"epoch": 19.01347459929367,
"grad_norm": 14.831045150756836,
"learning_rate": 6.251731696624288e-06,
"loss": 12.023,
"step": 43750
},
{
"epoch": 19.03520782396088,
"grad_norm": 20.606718063354492,
"learning_rate": 6.2473476545374845e-06,
"loss": 12.0678,
"step": 43800
},
{
"epoch": 19.05694104862809,
"grad_norm": 20.03177261352539,
"learning_rate": 6.24296361245068e-06,
"loss": 12.1054,
"step": 43850
},
{
"epoch": 19.0786742732953,
"grad_norm": 16.764787673950195,
"learning_rate": 6.238579570363876e-06,
"loss": 12.076,
"step": 43900
},
{
"epoch": 19.10040749796251,
"grad_norm": 20.074857711791992,
"learning_rate": 6.2341955282770715e-06,
"loss": 12.0784,
"step": 43950
},
{
"epoch": 19.12214072262972,
"grad_norm": 14.84661865234375,
"learning_rate": 6.229811486190268e-06,
"loss": 12.0933,
"step": 44000
},
{
"epoch": 19.14387394729693,
"grad_norm": 17.447168350219727,
"learning_rate": 6.225427444103463e-06,
"loss": 12.0883,
"step": 44050
},
{
"epoch": 19.16560717196414,
"grad_norm": 21.10520362854004,
"learning_rate": 6.22104340201666e-06,
"loss": 12.0839,
"step": 44100
},
{
"epoch": 19.18734039663135,
"grad_norm": 17.273950576782227,
"learning_rate": 6.2166593599298564e-06,
"loss": 12.0517,
"step": 44150
},
{
"epoch": 19.20907362129856,
"grad_norm": 11.963603019714355,
"learning_rate": 6.212275317843052e-06,
"loss": 12.1211,
"step": 44200
},
{
"epoch": 19.23080684596577,
"grad_norm": 28.02683448791504,
"learning_rate": 6.207891275756248e-06,
"loss": 12.1012,
"step": 44250
},
{
"epoch": 19.252540070632982,
"grad_norm": 18.750391006469727,
"learning_rate": 6.2035072336694435e-06,
"loss": 12.1257,
"step": 44300
},
{
"epoch": 19.27427329530019,
"grad_norm": 13.95964241027832,
"learning_rate": 6.19912319158264e-06,
"loss": 12.1096,
"step": 44350
},
{
"epoch": 19.2960065199674,
"grad_norm": 13.954286575317383,
"learning_rate": 6.194739149495835e-06,
"loss": 12.096,
"step": 44400
},
{
"epoch": 19.31773974463461,
"grad_norm": 19.977340698242188,
"learning_rate": 6.190355107409032e-06,
"loss": 12.1119,
"step": 44450
},
{
"epoch": 19.33947296930182,
"grad_norm": 20.84231948852539,
"learning_rate": 6.1859710653222275e-06,
"loss": 12.117,
"step": 44500
},
{
"epoch": 19.36120619396903,
"grad_norm": 15.089071273803711,
"learning_rate": 6.181587023235424e-06,
"loss": 12.0531,
"step": 44550
},
{
"epoch": 19.38293941863624,
"grad_norm": 19.530078887939453,
"learning_rate": 6.17720298114862e-06,
"loss": 12.0606,
"step": 44600
},
{
"epoch": 19.40467264330345,
"grad_norm": 35.273353576660156,
"learning_rate": 6.172818939061815e-06,
"loss": 12.0703,
"step": 44650
},
{
"epoch": 19.42640586797066,
"grad_norm": 27.611345291137695,
"learning_rate": 6.168434896975012e-06,
"loss": 12.0979,
"step": 44700
},
{
"epoch": 19.44813909263787,
"grad_norm": 12.463072776794434,
"learning_rate": 6.164050854888207e-06,
"loss": 12.1186,
"step": 44750
},
{
"epoch": 19.46987231730508,
"grad_norm": 13.169920921325684,
"learning_rate": 6.159666812801403e-06,
"loss": 12.1214,
"step": 44800
},
{
"epoch": 19.49160554197229,
"grad_norm": 19.480173110961914,
"learning_rate": 6.155282770714599e-06,
"loss": 12.0956,
"step": 44850
},
{
"epoch": 19.5133387666395,
"grad_norm": 12.746538162231445,
"learning_rate": 6.150898728627796e-06,
"loss": 12.0653,
"step": 44900
},
{
"epoch": 19.53507199130671,
"grad_norm": 20.619016647338867,
"learning_rate": 6.146514686540991e-06,
"loss": 12.1175,
"step": 44950
},
{
"epoch": 19.55680521597392,
"grad_norm": 19.82939910888672,
"learning_rate": 6.142130644454187e-06,
"loss": 12.126,
"step": 45000
},
{
"epoch": 19.57853844064113,
"grad_norm": 14.061666488647461,
"learning_rate": 6.1377466023673836e-06,
"loss": 12.0754,
"step": 45050
},
{
"epoch": 19.60027166530834,
"grad_norm": 13.00661849975586,
"learning_rate": 6.133362560280579e-06,
"loss": 12.0868,
"step": 45100
},
{
"epoch": 19.62200488997555,
"grad_norm": 14.957731246948242,
"learning_rate": 6.128978518193775e-06,
"loss": 12.0564,
"step": 45150
},
{
"epoch": 19.64373811464276,
"grad_norm": 14.701393127441406,
"learning_rate": 6.124594476106971e-06,
"loss": 12.1071,
"step": 45200
},
{
"epoch": 19.66547133930997,
"grad_norm": 17.358051300048828,
"learning_rate": 6.120210434020168e-06,
"loss": 12.118,
"step": 45250
},
{
"epoch": 19.68720456397718,
"grad_norm": 14.36281967163086,
"learning_rate": 6.115826391933363e-06,
"loss": 12.1246,
"step": 45300
},
{
"epoch": 19.70893778864439,
"grad_norm": 30.517263412475586,
"learning_rate": 6.111442349846559e-06,
"loss": 12.1116,
"step": 45350
},
{
"epoch": 19.7306710133116,
"grad_norm": 16.39494514465332,
"learning_rate": 6.107058307759755e-06,
"loss": 12.1275,
"step": 45400
},
{
"epoch": 19.75240423797881,
"grad_norm": 15.935347557067871,
"learning_rate": 6.102674265672951e-06,
"loss": 12.0961,
"step": 45450
},
{
"epoch": 19.77413746264602,
"grad_norm": 17.69158172607422,
"learning_rate": 6.098290223586147e-06,
"loss": 12.1242,
"step": 45500
},
{
"epoch": 19.79587068731323,
"grad_norm": 17.6668758392334,
"learning_rate": 6.0939061814993425e-06,
"loss": 12.0872,
"step": 45550
},
{
"epoch": 19.81760391198044,
"grad_norm": 16.675373077392578,
"learning_rate": 6.08952213941254e-06,
"loss": 12.0705,
"step": 45600
},
{
"epoch": 19.83933713664765,
"grad_norm": 18.560033798217773,
"learning_rate": 6.085138097325735e-06,
"loss": 12.098,
"step": 45650
},
{
"epoch": 19.86107036131486,
"grad_norm": 18.61153793334961,
"learning_rate": 6.080754055238931e-06,
"loss": 12.1017,
"step": 45700
},
{
"epoch": 19.88280358598207,
"grad_norm": 23.753692626953125,
"learning_rate": 6.076370013152127e-06,
"loss": 12.101,
"step": 45750
},
{
"epoch": 19.90453681064928,
"grad_norm": 12.80927848815918,
"learning_rate": 6.071985971065323e-06,
"loss": 12.116,
"step": 45800
},
{
"epoch": 19.92627003531649,
"grad_norm": 22.449129104614258,
"learning_rate": 6.067601928978518e-06,
"loss": 12.1071,
"step": 45850
},
{
"epoch": 19.9480032599837,
"grad_norm": 53.62459945678711,
"learning_rate": 6.0632178868917145e-06,
"loss": 12.1369,
"step": 45900
},
{
"epoch": 19.96973648465091,
"grad_norm": 18.846603393554688,
"learning_rate": 6.058833844804911e-06,
"loss": 12.099,
"step": 45950
},
{
"epoch": 19.99146970931812,
"grad_norm": 28.6248836517334,
"learning_rate": 6.054449802718106e-06,
"loss": 12.1369,
"step": 46000
},
{
"epoch": 20.013039934800325,
"grad_norm": 17.0070858001709,
"learning_rate": 6.050065760631303e-06,
"loss": 11.9934,
"step": 46050
},
{
"epoch": 20.034773159467537,
"grad_norm": 30.195463180541992,
"learning_rate": 6.0456817185444986e-06,
"loss": 12.0606,
"step": 46100
},
{
"epoch": 20.056506384134746,
"grad_norm": 15.557343482971191,
"learning_rate": 6.041297676457695e-06,
"loss": 12.0555,
"step": 46150
},
{
"epoch": 20.078239608801955,
"grad_norm": 20.677410125732422,
"learning_rate": 6.03691363437089e-06,
"loss": 12.0169,
"step": 46200
},
{
"epoch": 20.099972833469167,
"grad_norm": 8.35476016998291,
"learning_rate": 6.032529592284086e-06,
"loss": 12.1085,
"step": 46250
},
{
"epoch": 20.121706058136375,
"grad_norm": 21.85611915588379,
"learning_rate": 6.028145550197282e-06,
"loss": 12.0724,
"step": 46300
},
{
"epoch": 20.143439282803588,
"grad_norm": 15.336892127990723,
"learning_rate": 6.023761508110478e-06,
"loss": 12.0564,
"step": 46350
},
{
"epoch": 20.165172507470796,
"grad_norm": 14.198942184448242,
"learning_rate": 6.019377466023675e-06,
"loss": 12.0629,
"step": 46400
},
{
"epoch": 20.186905732138005,
"grad_norm": 19.750280380249023,
"learning_rate": 6.0149934239368705e-06,
"loss": 12.0606,
"step": 46450
},
{
"epoch": 20.208638956805217,
"grad_norm": 23.643993377685547,
"learning_rate": 6.010609381850067e-06,
"loss": 12.0624,
"step": 46500
},
{
"epoch": 20.230372181472426,
"grad_norm": 15.32921028137207,
"learning_rate": 6.006225339763262e-06,
"loss": 12.0515,
"step": 46550
},
{
"epoch": 20.252105406139634,
"grad_norm": 18.966848373413086,
"learning_rate": 6.001841297676458e-06,
"loss": 12.0743,
"step": 46600
},
{
"epoch": 20.273838630806846,
"grad_norm": 15.885478973388672,
"learning_rate": 5.997457255589654e-06,
"loss": 12.068,
"step": 46650
},
{
"epoch": 20.295571855474055,
"grad_norm": 25.81429672241211,
"learning_rate": 5.99307321350285e-06,
"loss": 12.1066,
"step": 46700
},
{
"epoch": 20.317305080141267,
"grad_norm": 14.397024154663086,
"learning_rate": 5.988689171416045e-06,
"loss": 12.1048,
"step": 46750
},
{
"epoch": 20.339038304808476,
"grad_norm": 38.001121520996094,
"learning_rate": 5.984305129329242e-06,
"loss": 12.0548,
"step": 46800
},
{
"epoch": 20.360771529475684,
"grad_norm": 19.49797248840332,
"learning_rate": 5.979921087242439e-06,
"loss": 12.0747,
"step": 46850
},
{
"epoch": 20.382504754142897,
"grad_norm": 13.953147888183594,
"learning_rate": 5.975537045155634e-06,
"loss": 12.0704,
"step": 46900
},
{
"epoch": 20.404237978810105,
"grad_norm": 33.00684356689453,
"learning_rate": 5.97115300306883e-06,
"loss": 12.0737,
"step": 46950
},
{
"epoch": 20.425971203477317,
"grad_norm": 14.40523910522461,
"learning_rate": 5.966768960982026e-06,
"loss": 12.0644,
"step": 47000
},
{
"epoch": 20.447704428144526,
"grad_norm": 17.341297149658203,
"learning_rate": 5.962384918895222e-06,
"loss": 12.0375,
"step": 47050
},
{
"epoch": 20.469437652811735,
"grad_norm": 11.500914573669434,
"learning_rate": 5.958000876808417e-06,
"loss": 12.0957,
"step": 47100
},
{
"epoch": 20.491170877478947,
"grad_norm": 14.926876068115234,
"learning_rate": 5.9536168347216135e-06,
"loss": 12.0661,
"step": 47150
},
{
"epoch": 20.512904102146155,
"grad_norm": 33.41230392456055,
"learning_rate": 5.949232792634809e-06,
"loss": 12.0683,
"step": 47200
},
{
"epoch": 20.534637326813368,
"grad_norm": 11.592459678649902,
"learning_rate": 5.944848750548006e-06,
"loss": 12.0852,
"step": 47250
},
{
"epoch": 20.556370551480576,
"grad_norm": 11.893900871276855,
"learning_rate": 5.940464708461202e-06,
"loss": 12.0927,
"step": 47300
},
{
"epoch": 20.578103776147785,
"grad_norm": 19.416147232055664,
"learning_rate": 5.936080666374398e-06,
"loss": 12.0571,
"step": 47350
},
{
"epoch": 20.599837000814997,
"grad_norm": 114.77404022216797,
"learning_rate": 5.931696624287594e-06,
"loss": 12.0694,
"step": 47400
},
{
"epoch": 20.621570225482206,
"grad_norm": 22.660274505615234,
"learning_rate": 5.927312582200789e-06,
"loss": 12.0863,
"step": 47450
},
{
"epoch": 20.643303450149418,
"grad_norm": 27.254777908325195,
"learning_rate": 5.9229285401139855e-06,
"loss": 12.0506,
"step": 47500
},
{
"epoch": 20.665036674816626,
"grad_norm": 18.767820358276367,
"learning_rate": 5.918544498027181e-06,
"loss": 12.0552,
"step": 47550
},
{
"epoch": 20.686769899483835,
"grad_norm": 12.995434761047363,
"learning_rate": 5.914160455940378e-06,
"loss": 12.0879,
"step": 47600
},
{
"epoch": 20.708503124151047,
"grad_norm": 14.814035415649414,
"learning_rate": 5.909776413853573e-06,
"loss": 12.0959,
"step": 47650
},
{
"epoch": 20.730236348818256,
"grad_norm": 25.315176010131836,
"learning_rate": 5.90539237176677e-06,
"loss": 12.0976,
"step": 47700
},
{
"epoch": 20.751969573485464,
"grad_norm": 25.416751861572266,
"learning_rate": 5.901008329679966e-06,
"loss": 12.0528,
"step": 47750
},
{
"epoch": 20.773702798152677,
"grad_norm": 16.93905258178711,
"learning_rate": 5.896624287593161e-06,
"loss": 12.0705,
"step": 47800
},
{
"epoch": 20.795436022819885,
"grad_norm": 30.060588836669922,
"learning_rate": 5.8922402455063574e-06,
"loss": 12.0466,
"step": 47850
},
{
"epoch": 20.817169247487097,
"grad_norm": 13.423187255859375,
"learning_rate": 5.887856203419553e-06,
"loss": 12.0681,
"step": 47900
},
{
"epoch": 20.838902472154306,
"grad_norm": 13.607131004333496,
"learning_rate": 5.883472161332749e-06,
"loss": 12.0687,
"step": 47950
},
{
"epoch": 20.860635696821515,
"grad_norm": 22.271543502807617,
"learning_rate": 5.8790881192459444e-06,
"loss": 12.1039,
"step": 48000
},
{
"epoch": 20.882368921488727,
"grad_norm": 25.268817901611328,
"learning_rate": 5.8747040771591415e-06,
"loss": 12.09,
"step": 48050
},
{
"epoch": 20.904102146155935,
"grad_norm": 15.665398597717285,
"learning_rate": 5.870320035072338e-06,
"loss": 12.0956,
"step": 48100
},
{
"epoch": 20.925835370823147,
"grad_norm": 21.067293167114258,
"learning_rate": 5.865935992985533e-06,
"loss": 12.0499,
"step": 48150
},
{
"epoch": 20.947568595490356,
"grad_norm": 22.776708602905273,
"learning_rate": 5.861551950898729e-06,
"loss": 12.0762,
"step": 48200
},
{
"epoch": 20.969301820157565,
"grad_norm": 8.629790306091309,
"learning_rate": 5.857167908811925e-06,
"loss": 12.0614,
"step": 48250
},
{
"epoch": 20.991035044824777,
"grad_norm": 15.550890922546387,
"learning_rate": 5.852783866725121e-06,
"loss": 12.0792,
"step": 48300
},
{
"epoch": 21.01260527030698,
"grad_norm": 12.225948333740234,
"learning_rate": 5.848399824638316e-06,
"loss": 11.9374,
"step": 48350
},
{
"epoch": 21.034338494974193,
"grad_norm": 14.14416790008545,
"learning_rate": 5.8440157825515135e-06,
"loss": 12.0157,
"step": 48400
},
{
"epoch": 21.056071719641402,
"grad_norm": 17.12042236328125,
"learning_rate": 5.839631740464709e-06,
"loss": 12.0288,
"step": 48450
},
{
"epoch": 21.07780494430861,
"grad_norm": 13.070446968078613,
"learning_rate": 5.835247698377905e-06,
"loss": 12.0528,
"step": 48500
},
{
"epoch": 21.099538168975823,
"grad_norm": 22.833274841308594,
"learning_rate": 5.830863656291101e-06,
"loss": 12.0479,
"step": 48550
},
{
"epoch": 21.12127139364303,
"grad_norm": 19.790773391723633,
"learning_rate": 5.826479614204297e-06,
"loss": 12.044,
"step": 48600
},
{
"epoch": 21.143004618310243,
"grad_norm": 16.40357208251953,
"learning_rate": 5.822095572117493e-06,
"loss": 12.0299,
"step": 48650
},
{
"epoch": 21.164737842977452,
"grad_norm": 13.88508129119873,
"learning_rate": 5.817711530030688e-06,
"loss": 12.0255,
"step": 48700
},
{
"epoch": 21.18647106764466,
"grad_norm": 18.211301803588867,
"learning_rate": 5.813327487943885e-06,
"loss": 12.0283,
"step": 48750
},
{
"epoch": 21.208204292311873,
"grad_norm": 13.291574478149414,
"learning_rate": 5.80894344585708e-06,
"loss": 12.0283,
"step": 48800
},
{
"epoch": 21.22993751697908,
"grad_norm": 15.905344009399414,
"learning_rate": 5.804559403770277e-06,
"loss": 12.0526,
"step": 48850
},
{
"epoch": 21.251670741646294,
"grad_norm": 11.572737693786621,
"learning_rate": 5.800175361683472e-06,
"loss": 12.0322,
"step": 48900
},
{
"epoch": 21.273403966313502,
"grad_norm": 17.022083282470703,
"learning_rate": 5.795791319596669e-06,
"loss": 12.0426,
"step": 48950
},
{
"epoch": 21.29513719098071,
"grad_norm": 23.31209945678711,
"learning_rate": 5.791407277509865e-06,
"loss": 12.0233,
"step": 49000
},
{
"epoch": 21.316870415647923,
"grad_norm": 23.8966007232666,
"learning_rate": 5.78702323542306e-06,
"loss": 12.0526,
"step": 49050
},
{
"epoch": 21.33860364031513,
"grad_norm": 17.35943031311035,
"learning_rate": 5.7826391933362565e-06,
"loss": 12.0379,
"step": 49100
},
{
"epoch": 21.36033686498234,
"grad_norm": 33.082645416259766,
"learning_rate": 5.778255151249452e-06,
"loss": 12.0407,
"step": 49150
},
{
"epoch": 21.382070089649552,
"grad_norm": 13.810714721679688,
"learning_rate": 5.773871109162649e-06,
"loss": 12.0193,
"step": 49200
},
{
"epoch": 21.40380331431676,
"grad_norm": 15.985318183898926,
"learning_rate": 5.769487067075844e-06,
"loss": 12.0437,
"step": 49250
},
{
"epoch": 21.425536538983973,
"grad_norm": 11.185006141662598,
"learning_rate": 5.765103024989041e-06,
"loss": 12.0347,
"step": 49300
},
{
"epoch": 21.44726976365118,
"grad_norm": 13.088438034057617,
"learning_rate": 5.760718982902236e-06,
"loss": 12.036,
"step": 49350
},
{
"epoch": 21.46900298831839,
"grad_norm": 35.933502197265625,
"learning_rate": 5.756334940815432e-06,
"loss": 12.0709,
"step": 49400
},
{
"epoch": 21.490736212985603,
"grad_norm": 13.896368026733398,
"learning_rate": 5.7519508987286285e-06,
"loss": 12.0181,
"step": 49450
},
{
"epoch": 21.51246943765281,
"grad_norm": 15.991681098937988,
"learning_rate": 5.747566856641824e-06,
"loss": 12.0274,
"step": 49500
},
{
"epoch": 21.534202662320023,
"grad_norm": 21.10006332397461,
"learning_rate": 5.743182814555021e-06,
"loss": 12.0587,
"step": 49550
},
{
"epoch": 21.555935886987232,
"grad_norm": 18.29193115234375,
"learning_rate": 5.738798772468216e-06,
"loss": 12.028,
"step": 49600
},
{
"epoch": 21.57766911165444,
"grad_norm": 27.753482818603516,
"learning_rate": 5.7344147303814125e-06,
"loss": 11.9988,
"step": 49650
},
{
"epoch": 21.599402336321653,
"grad_norm": 24.744070053100586,
"learning_rate": 5.730030688294608e-06,
"loss": 12.0743,
"step": 49700
},
{
"epoch": 21.62113556098886,
"grad_norm": 21.145042419433594,
"learning_rate": 5.725646646207804e-06,
"loss": 12.0425,
"step": 49750
},
{
"epoch": 21.64286878565607,
"grad_norm": 13.751763343811035,
"learning_rate": 5.7212626041209996e-06,
"loss": 12.077,
"step": 49800
},
{
"epoch": 21.664602010323282,
"grad_norm": 31.52511978149414,
"learning_rate": 5.716878562034196e-06,
"loss": 12.0228,
"step": 49850
},
{
"epoch": 21.68633523499049,
"grad_norm": 51.40691375732422,
"learning_rate": 5.712494519947393e-06,
"loss": 12.0487,
"step": 49900
},
{
"epoch": 21.708068459657703,
"grad_norm": 12.909490585327148,
"learning_rate": 5.708110477860587e-06,
"loss": 12.0468,
"step": 49950
},
{
"epoch": 21.72980168432491,
"grad_norm": 14.6589937210083,
"learning_rate": 5.7037264357737845e-06,
"loss": 12.0168,
"step": 50000
},
{
"epoch": 21.72980168432491,
"eval_cer": 0.07568846975176824,
"eval_loss": 2.362048864364624,
"eval_runtime": 397.6775,
"eval_samples_per_second": 13.594,
"eval_steps_per_second": 3.4,
"eval_wer": 0.22898414193750982,
"step": 50000
},
{
"epoch": 21.75153490899212,
"grad_norm": 20.892807006835938,
"learning_rate": 5.69934239368698e-06,
"loss": 12.021,
"step": 50050
},
{
"epoch": 21.773268133659332,
"grad_norm": 14.854979515075684,
"learning_rate": 5.694958351600176e-06,
"loss": 12.0355,
"step": 50100
},
{
"epoch": 21.79500135832654,
"grad_norm": 18.140365600585938,
"learning_rate": 5.6905743095133715e-06,
"loss": 12.0173,
"step": 50150
},
{
"epoch": 21.816734582993753,
"grad_norm": 17.70104217529297,
"learning_rate": 5.686190267426568e-06,
"loss": 12.0801,
"step": 50200
},
{
"epoch": 21.83846780766096,
"grad_norm": 18.51262092590332,
"learning_rate": 5.681806225339763e-06,
"loss": 12.0334,
"step": 50250
},
{
"epoch": 21.86020103232817,
"grad_norm": 15.687026023864746,
"learning_rate": 5.677422183252959e-06,
"loss": 12.0553,
"step": 50300
},
{
"epoch": 21.881934256995383,
"grad_norm": 19.184951782226562,
"learning_rate": 5.6730381411661564e-06,
"loss": 12.0409,
"step": 50350
},
{
"epoch": 21.90366748166259,
"grad_norm": 18.097457885742188,
"learning_rate": 5.668654099079352e-06,
"loss": 12.058,
"step": 50400
},
{
"epoch": 21.925400706329803,
"grad_norm": 26.270936965942383,
"learning_rate": 5.664270056992548e-06,
"loss": 12.064,
"step": 50450
},
{
"epoch": 21.947133930997012,
"grad_norm": 26.288280487060547,
"learning_rate": 5.6598860149057434e-06,
"loss": 12.034,
"step": 50500
},
{
"epoch": 21.96886715566422,
"grad_norm": 10.051491737365723,
"learning_rate": 5.65550197281894e-06,
"loss": 12.0676,
"step": 50550
},
{
"epoch": 21.990600380331433,
"grad_norm": 15.91609001159668,
"learning_rate": 5.651117930732135e-06,
"loss": 12.0488,
"step": 50600
},
{
"epoch": 22.012170605813637,
"grad_norm": 16.341890335083008,
"learning_rate": 5.646733888645331e-06,
"loss": 11.9307,
"step": 50650
},
{
"epoch": 22.03390383048085,
"grad_norm": 17.389766693115234,
"learning_rate": 5.642349846558527e-06,
"loss": 11.9959,
"step": 50700
},
{
"epoch": 22.055637055148058,
"grad_norm": 15.45628547668457,
"learning_rate": 5.637965804471724e-06,
"loss": 12.0322,
"step": 50750
},
{
"epoch": 22.077370279815266,
"grad_norm": 14.2662935256958,
"learning_rate": 5.63358176238492e-06,
"loss": 12.0679,
"step": 50800
},
{
"epoch": 22.09910350448248,
"grad_norm": 18.397008895874023,
"learning_rate": 5.629197720298115e-06,
"loss": 12.0007,
"step": 50850
},
{
"epoch": 22.120836729149687,
"grad_norm": 14.498343467712402,
"learning_rate": 5.624813678211312e-06,
"loss": 11.9903,
"step": 50900
},
{
"epoch": 22.1425699538169,
"grad_norm": 26.300201416015625,
"learning_rate": 5.620429636124507e-06,
"loss": 12.0488,
"step": 50950
},
{
"epoch": 22.164303178484108,
"grad_norm": 17.42373275756836,
"learning_rate": 5.616045594037703e-06,
"loss": 12.0156,
"step": 51000
},
{
"epoch": 22.186036403151316,
"grad_norm": 13.430180549621582,
"learning_rate": 5.611661551950899e-06,
"loss": 12.0147,
"step": 51050
},
{
"epoch": 22.20776962781853,
"grad_norm": 8.827760696411133,
"learning_rate": 5.607277509864095e-06,
"loss": 12.0464,
"step": 51100
},
{
"epoch": 22.229502852485737,
"grad_norm": 13.834342002868652,
"learning_rate": 5.60289346777729e-06,
"loss": 11.9739,
"step": 51150
},
{
"epoch": 22.251236077152946,
"grad_norm": 15.042898178100586,
"learning_rate": 5.598509425690487e-06,
"loss": 12.0098,
"step": 51200
},
{
"epoch": 22.272969301820158,
"grad_norm": 19.06934356689453,
"learning_rate": 5.5941253836036836e-06,
"loss": 11.9855,
"step": 51250
},
{
"epoch": 22.294702526487367,
"grad_norm": 11.361977577209473,
"learning_rate": 5.589741341516879e-06,
"loss": 12.0193,
"step": 51300
},
{
"epoch": 22.31643575115458,
"grad_norm": 19.977092742919922,
"learning_rate": 5.585357299430075e-06,
"loss": 12.0072,
"step": 51350
},
{
"epoch": 22.338168975821787,
"grad_norm": 18.312875747680664,
"learning_rate": 5.5809732573432706e-06,
"loss": 12.0161,
"step": 51400
},
{
"epoch": 22.359902200488996,
"grad_norm": 10.536518096923828,
"learning_rate": 5.576589215256467e-06,
"loss": 12.0285,
"step": 51450
},
{
"epoch": 22.381635425156208,
"grad_norm": 15.011421203613281,
"learning_rate": 5.572205173169662e-06,
"loss": 11.9876,
"step": 51500
},
{
"epoch": 22.403368649823417,
"grad_norm": 17.05405616760254,
"learning_rate": 5.567821131082859e-06,
"loss": 12.0425,
"step": 51550
},
{
"epoch": 22.42510187449063,
"grad_norm": 16.87340545654297,
"learning_rate": 5.563437088996055e-06,
"loss": 12.0218,
"step": 51600
},
{
"epoch": 22.446835099157838,
"grad_norm": 19.586755752563477,
"learning_rate": 5.559053046909251e-06,
"loss": 12.032,
"step": 51650
},
{
"epoch": 22.468568323825046,
"grad_norm": 27.009822845458984,
"learning_rate": 5.554669004822447e-06,
"loss": 12.0083,
"step": 51700
},
{
"epoch": 22.49030154849226,
"grad_norm": 11.635884284973145,
"learning_rate": 5.5502849627356425e-06,
"loss": 12.025,
"step": 51750
},
{
"epoch": 22.512034773159467,
"grad_norm": 17.531131744384766,
"learning_rate": 5.545900920648839e-06,
"loss": 12.0123,
"step": 51800
},
{
"epoch": 22.53376799782668,
"grad_norm": 10.203145980834961,
"learning_rate": 5.541516878562034e-06,
"loss": 12.0013,
"step": 51850
},
{
"epoch": 22.555501222493888,
"grad_norm": 19.1767635345459,
"learning_rate": 5.537132836475231e-06,
"loss": 12.0279,
"step": 51900
},
{
"epoch": 22.577234447161096,
"grad_norm": 31.68284034729004,
"learning_rate": 5.532748794388426e-06,
"loss": 12.0053,
"step": 51950
},
{
"epoch": 22.59896767182831,
"grad_norm": 10.772562980651855,
"learning_rate": 5.528364752301623e-06,
"loss": 12.0153,
"step": 52000
},
{
"epoch": 22.620700896495517,
"grad_norm": 99.19184875488281,
"learning_rate": 5.523980710214818e-06,
"loss": 12.0059,
"step": 52050
},
{
"epoch": 22.642434121162726,
"grad_norm": 20.737354278564453,
"learning_rate": 5.5195966681280145e-06,
"loss": 12.0263,
"step": 52100
},
{
"epoch": 22.664167345829938,
"grad_norm": 15.494745254516602,
"learning_rate": 5.515212626041211e-06,
"loss": 12.0129,
"step": 52150
},
{
"epoch": 22.685900570497147,
"grad_norm": 34.782100677490234,
"learning_rate": 5.510828583954406e-06,
"loss": 12.0497,
"step": 52200
},
{
"epoch": 22.70763379516436,
"grad_norm": 18.235090255737305,
"learning_rate": 5.506444541867602e-06,
"loss": 11.9992,
"step": 52250
},
{
"epoch": 22.729367019831567,
"grad_norm": 27.689912796020508,
"learning_rate": 5.502060499780798e-06,
"loss": 12.0023,
"step": 52300
},
{
"epoch": 22.751100244498776,
"grad_norm": 18.36990737915039,
"learning_rate": 5.497676457693995e-06,
"loss": 12.0056,
"step": 52350
},
{
"epoch": 22.772833469165988,
"grad_norm": 18.038314819335938,
"learning_rate": 5.49329241560719e-06,
"loss": 12.0212,
"step": 52400
},
{
"epoch": 22.794566693833197,
"grad_norm": 8.06163501739502,
"learning_rate": 5.488908373520386e-06,
"loss": 12.0274,
"step": 52450
},
{
"epoch": 22.81629991850041,
"grad_norm": 15.676831245422363,
"learning_rate": 5.484524331433583e-06,
"loss": 12.0148,
"step": 52500
},
{
"epoch": 22.838033143167618,
"grad_norm": 24.74848747253418,
"learning_rate": 5.480140289346778e-06,
"loss": 12.0186,
"step": 52550
},
{
"epoch": 22.859766367834826,
"grad_norm": 10.006168365478516,
"learning_rate": 5.475756247259974e-06,
"loss": 12.0071,
"step": 52600
},
{
"epoch": 22.88149959250204,
"grad_norm": 10.135807991027832,
"learning_rate": 5.47137220517317e-06,
"loss": 12.0224,
"step": 52650
},
{
"epoch": 22.903232817169247,
"grad_norm": 16.03304100036621,
"learning_rate": 5.466988163086367e-06,
"loss": 12.0253,
"step": 52700
},
{
"epoch": 22.92496604183646,
"grad_norm": 15.307913780212402,
"learning_rate": 5.462604120999562e-06,
"loss": 12.0234,
"step": 52750
},
{
"epoch": 22.946699266503668,
"grad_norm": 27.5895938873291,
"learning_rate": 5.458220078912758e-06,
"loss": 12.0162,
"step": 52800
},
{
"epoch": 22.968432491170876,
"grad_norm": 14.608256340026855,
"learning_rate": 5.453836036825954e-06,
"loss": 12.0005,
"step": 52850
},
{
"epoch": 22.99016571583809,
"grad_norm": 41.10546112060547,
"learning_rate": 5.44945199473915e-06,
"loss": 12.0735,
"step": 52900
},
{
"epoch": 23.011735941320293,
"grad_norm": 12.675127983093262,
"learning_rate": 5.445067952652346e-06,
"loss": 11.9152,
"step": 52950
},
{
"epoch": 23.033469165987505,
"grad_norm": 16.779767990112305,
"learning_rate": 5.440683910565542e-06,
"loss": 11.9743,
"step": 53000
},
{
"epoch": 23.055202390654713,
"grad_norm": 29.24107551574707,
"learning_rate": 5.436299868478739e-06,
"loss": 11.9844,
"step": 53050
},
{
"epoch": 23.076935615321922,
"grad_norm": 15.517463684082031,
"learning_rate": 5.431915826391933e-06,
"loss": 12.0084,
"step": 53100
},
{
"epoch": 23.098668839989134,
"grad_norm": 14.068320274353027,
"learning_rate": 5.42753178430513e-06,
"loss": 11.982,
"step": 53150
},
{
"epoch": 23.120402064656343,
"grad_norm": 13.296953201293945,
"learning_rate": 5.423147742218326e-06,
"loss": 12.0076,
"step": 53200
},
{
"epoch": 23.142135289323555,
"grad_norm": 11.365141868591309,
"learning_rate": 5.418763700131522e-06,
"loss": 11.9825,
"step": 53250
},
{
"epoch": 23.163868513990764,
"grad_norm": 11.649621963500977,
"learning_rate": 5.414379658044717e-06,
"loss": 11.9874,
"step": 53300
},
{
"epoch": 23.185601738657972,
"grad_norm": 12.506479263305664,
"learning_rate": 5.4099956159579135e-06,
"loss": 12.0203,
"step": 53350
},
{
"epoch": 23.207334963325184,
"grad_norm": 26.387269973754883,
"learning_rate": 5.40561157387111e-06,
"loss": 11.9718,
"step": 53400
},
{
"epoch": 23.229068187992393,
"grad_norm": 30.277488708496094,
"learning_rate": 5.401227531784305e-06,
"loss": 11.9922,
"step": 53450
},
{
"epoch": 23.2508014126596,
"grad_norm": 16.27001953125,
"learning_rate": 5.396843489697502e-06,
"loss": 12.0103,
"step": 53500
},
{
"epoch": 23.272534637326814,
"grad_norm": 10.601898193359375,
"learning_rate": 5.392459447610698e-06,
"loss": 11.982,
"step": 53550
},
{
"epoch": 23.294267861994022,
"grad_norm": 16.928091049194336,
"learning_rate": 5.388075405523894e-06,
"loss": 11.9921,
"step": 53600
},
{
"epoch": 23.316001086661235,
"grad_norm": 17.180408477783203,
"learning_rate": 5.383691363437089e-06,
"loss": 11.9681,
"step": 53650
},
{
"epoch": 23.337734311328443,
"grad_norm": 9.645658493041992,
"learning_rate": 5.3793073213502855e-06,
"loss": 11.9921,
"step": 53700
},
{
"epoch": 23.359467535995652,
"grad_norm": 7.888517379760742,
"learning_rate": 5.374923279263481e-06,
"loss": 11.9957,
"step": 53750
},
{
"epoch": 23.381200760662864,
"grad_norm": 23.52006721496582,
"learning_rate": 5.370539237176677e-06,
"loss": 11.9913,
"step": 53800
},
{
"epoch": 23.402933985330073,
"grad_norm": 17.327842712402344,
"learning_rate": 5.366155195089874e-06,
"loss": 11.985,
"step": 53850
},
{
"epoch": 23.424667209997285,
"grad_norm": 15.461244583129883,
"learning_rate": 5.3617711530030696e-06,
"loss": 11.9856,
"step": 53900
},
{
"epoch": 23.446400434664493,
"grad_norm": 10.2888822555542,
"learning_rate": 5.357387110916266e-06,
"loss": 12.0014,
"step": 53950
},
{
"epoch": 23.468133659331702,
"grad_norm": 16.063997268676758,
"learning_rate": 5.353003068829461e-06,
"loss": 11.997,
"step": 54000
},
{
"epoch": 23.489866883998914,
"grad_norm": 28.185026168823242,
"learning_rate": 5.3486190267426574e-06,
"loss": 11.9855,
"step": 54050
},
{
"epoch": 23.511600108666123,
"grad_norm": 16.92442512512207,
"learning_rate": 5.344234984655853e-06,
"loss": 12.0206,
"step": 54100
},
{
"epoch": 23.533333333333335,
"grad_norm": 6.245467662811279,
"learning_rate": 5.339850942569049e-06,
"loss": 11.9748,
"step": 54150
},
{
"epoch": 23.555066558000544,
"grad_norm": 14.348546981811523,
"learning_rate": 5.3354669004822444e-06,
"loss": 11.9609,
"step": 54200
},
{
"epoch": 23.576799782667752,
"grad_norm": 10.864014625549316,
"learning_rate": 5.331082858395441e-06,
"loss": 11.9947,
"step": 54250
},
{
"epoch": 23.598533007334964,
"grad_norm": 8.79773998260498,
"learning_rate": 5.326698816308638e-06,
"loss": 12.0031,
"step": 54300
},
{
"epoch": 23.620266232002173,
"grad_norm": 19.14083480834961,
"learning_rate": 5.322314774221833e-06,
"loss": 11.9738,
"step": 54350
},
{
"epoch": 23.64199945666938,
"grad_norm": 10.049248695373535,
"learning_rate": 5.317930732135029e-06,
"loss": 11.9514,
"step": 54400
},
{
"epoch": 23.663732681336594,
"grad_norm": 11.119285583496094,
"learning_rate": 5.313546690048225e-06,
"loss": 11.9914,
"step": 54450
},
{
"epoch": 23.685465906003802,
"grad_norm": 8.268950462341309,
"learning_rate": 5.309162647961421e-06,
"loss": 11.994,
"step": 54500
},
{
"epoch": 23.707199130671015,
"grad_norm": 14.429734230041504,
"learning_rate": 5.304778605874616e-06,
"loss": 11.975,
"step": 54550
},
{
"epoch": 23.728932355338223,
"grad_norm": 15.248434066772461,
"learning_rate": 5.300394563787813e-06,
"loss": 11.9967,
"step": 54600
},
{
"epoch": 23.750665580005432,
"grad_norm": 27.12610626220703,
"learning_rate": 5.296010521701008e-06,
"loss": 12.0066,
"step": 54650
},
{
"epoch": 23.772398804672644,
"grad_norm": 11.624201774597168,
"learning_rate": 5.291626479614205e-06,
"loss": 11.9857,
"step": 54700
},
{
"epoch": 23.794132029339853,
"grad_norm": 38.6632194519043,
"learning_rate": 5.287242437527401e-06,
"loss": 12.0068,
"step": 54750
},
{
"epoch": 23.815865254007065,
"grad_norm": 21.433034896850586,
"learning_rate": 5.282858395440597e-06,
"loss": 11.9545,
"step": 54800
},
{
"epoch": 23.837598478674273,
"grad_norm": 12.88279914855957,
"learning_rate": 5.278474353353793e-06,
"loss": 11.9675,
"step": 54850
},
{
"epoch": 23.859331703341482,
"grad_norm": 11.213829040527344,
"learning_rate": 5.274090311266988e-06,
"loss": 11.9907,
"step": 54900
},
{
"epoch": 23.881064928008694,
"grad_norm": 32.87601852416992,
"learning_rate": 5.2697062691801846e-06,
"loss": 12.0041,
"step": 54950
},
{
"epoch": 23.902798152675903,
"grad_norm": 12.214354515075684,
"learning_rate": 5.26532222709338e-06,
"loss": 12.0013,
"step": 55000
},
{
"epoch": 23.924531377343115,
"grad_norm": 18.823352813720703,
"learning_rate": 5.260938185006577e-06,
"loss": 12.0205,
"step": 55050
},
{
"epoch": 23.946264602010324,
"grad_norm": 11.764278411865234,
"learning_rate": 5.2565541429197716e-06,
"loss": 12.0045,
"step": 55100
},
{
"epoch": 23.967997826677532,
"grad_norm": 33.26872253417969,
"learning_rate": 5.252170100832969e-06,
"loss": 11.9852,
"step": 55150
},
{
"epoch": 23.989731051344744,
"grad_norm": 20.137388229370117,
"learning_rate": 5.247786058746165e-06,
"loss": 12.0023,
"step": 55200
},
{
"epoch": 24.01130127682695,
"grad_norm": 13.359118461608887,
"learning_rate": 5.24340201665936e-06,
"loss": 11.8893,
"step": 55250
},
{
"epoch": 24.03303450149416,
"grad_norm": 12.654318809509277,
"learning_rate": 5.2390179745725565e-06,
"loss": 11.9913,
"step": 55300
},
{
"epoch": 24.05476772616137,
"grad_norm": 12.723244667053223,
"learning_rate": 5.234633932485752e-06,
"loss": 11.9835,
"step": 55350
},
{
"epoch": 24.076500950828578,
"grad_norm": 10.007128715515137,
"learning_rate": 5.230249890398948e-06,
"loss": 11.9639,
"step": 55400
},
{
"epoch": 24.09823417549579,
"grad_norm": 24.932937622070312,
"learning_rate": 5.2258658483121435e-06,
"loss": 11.9567,
"step": 55450
},
{
"epoch": 24.119967400163,
"grad_norm": 13.288817405700684,
"learning_rate": 5.221481806225341e-06,
"loss": 11.9896,
"step": 55500
},
{
"epoch": 24.14170062483021,
"grad_norm": 24.153135299682617,
"learning_rate": 5.217097764138536e-06,
"loss": 11.9458,
"step": 55550
},
{
"epoch": 24.16343384949742,
"grad_norm": 21.456832885742188,
"learning_rate": 5.212713722051732e-06,
"loss": 11.9637,
"step": 55600
},
{
"epoch": 24.185167074164628,
"grad_norm": 11.885467529296875,
"learning_rate": 5.2083296799649284e-06,
"loss": 11.9763,
"step": 55650
},
{
"epoch": 24.20690029883184,
"grad_norm": 18.14926528930664,
"learning_rate": 5.203945637878124e-06,
"loss": 11.9793,
"step": 55700
},
{
"epoch": 24.22863352349905,
"grad_norm": 10.626521110534668,
"learning_rate": 5.19956159579132e-06,
"loss": 11.9717,
"step": 55750
},
{
"epoch": 24.250366748166257,
"grad_norm": 18.046018600463867,
"learning_rate": 5.1951775537045155e-06,
"loss": 11.9679,
"step": 55800
},
{
"epoch": 24.27209997283347,
"grad_norm": 19.871051788330078,
"learning_rate": 5.1907935116177125e-06,
"loss": 11.9655,
"step": 55850
},
{
"epoch": 24.29383319750068,
"grad_norm": 26.990354537963867,
"learning_rate": 5.186409469530908e-06,
"loss": 11.9776,
"step": 55900
},
{
"epoch": 24.31556642216789,
"grad_norm": 13.593362808227539,
"learning_rate": 5.182025427444104e-06,
"loss": 11.9765,
"step": 55950
},
{
"epoch": 24.3372996468351,
"grad_norm": 21.99699592590332,
"learning_rate": 5.1776413853572995e-06,
"loss": 11.9698,
"step": 56000
},
{
"epoch": 24.359032871502308,
"grad_norm": 17.28653335571289,
"learning_rate": 5.173257343270496e-06,
"loss": 11.9668,
"step": 56050
},
{
"epoch": 24.38076609616952,
"grad_norm": 46.031005859375,
"learning_rate": 5.168873301183692e-06,
"loss": 11.9729,
"step": 56100
},
{
"epoch": 24.40249932083673,
"grad_norm": 32.24114227294922,
"learning_rate": 5.164489259096887e-06,
"loss": 11.9543,
"step": 56150
},
{
"epoch": 24.42423254550394,
"grad_norm": 32.9847297668457,
"learning_rate": 5.160105217010084e-06,
"loss": 11.9631,
"step": 56200
},
{
"epoch": 24.44596577017115,
"grad_norm": 28.538616180419922,
"learning_rate": 5.155721174923279e-06,
"loss": 11.9914,
"step": 56250
},
{
"epoch": 24.467698994838358,
"grad_norm": 10.636951446533203,
"learning_rate": 5.151337132836476e-06,
"loss": 11.9533,
"step": 56300
},
{
"epoch": 24.48943221950557,
"grad_norm": 18.541378021240234,
"learning_rate": 5.1469530907496715e-06,
"loss": 11.9635,
"step": 56350
},
{
"epoch": 24.51116544417278,
"grad_norm": 15.477215766906738,
"learning_rate": 5.142569048662868e-06,
"loss": 11.973,
"step": 56400
},
{
"epoch": 24.53289866883999,
"grad_norm": 8.257668495178223,
"learning_rate": 5.138185006576063e-06,
"loss": 11.9541,
"step": 56450
},
{
"epoch": 24.5546318935072,
"grad_norm": 12.362825393676758,
"learning_rate": 5.133800964489259e-06,
"loss": 11.9543,
"step": 56500
},
{
"epoch": 24.576365118174408,
"grad_norm": 18.897563934326172,
"learning_rate": 5.1294169224024556e-06,
"loss": 11.9828,
"step": 56550
},
{
"epoch": 24.59809834284162,
"grad_norm": 22.83639907836914,
"learning_rate": 5.125032880315651e-06,
"loss": 11.9907,
"step": 56600
},
{
"epoch": 24.61983156750883,
"grad_norm": 26.016014099121094,
"learning_rate": 5.120648838228848e-06,
"loss": 11.9798,
"step": 56650
},
{
"epoch": 24.641564792176037,
"grad_norm": 7.745444297790527,
"learning_rate": 5.1162647961420434e-06,
"loss": 12.0051,
"step": 56700
},
{
"epoch": 24.66329801684325,
"grad_norm": 14.89815616607666,
"learning_rate": 5.11188075405524e-06,
"loss": 11.9648,
"step": 56750
},
{
"epoch": 24.68503124151046,
"grad_norm": 13.663446426391602,
"learning_rate": 5.107496711968435e-06,
"loss": 11.9961,
"step": 56800
},
{
"epoch": 24.70676446617767,
"grad_norm": 15.474350929260254,
"learning_rate": 5.103112669881631e-06,
"loss": 11.9687,
"step": 56850
},
{
"epoch": 24.72849769084488,
"grad_norm": 32.1036376953125,
"learning_rate": 5.0987286277948275e-06,
"loss": 12.0102,
"step": 56900
},
{
"epoch": 24.750230915512088,
"grad_norm": 21.14737892150879,
"learning_rate": 5.094344585708023e-06,
"loss": 11.9577,
"step": 56950
},
{
"epoch": 24.7719641401793,
"grad_norm": 26.35091781616211,
"learning_rate": 5.08996054362122e-06,
"loss": 11.9979,
"step": 57000
},
{
"epoch": 24.79369736484651,
"grad_norm": 40.08930587768555,
"learning_rate": 5.085576501534415e-06,
"loss": 11.956,
"step": 57050
},
{
"epoch": 24.81543058951372,
"grad_norm": 21.480506896972656,
"learning_rate": 5.081192459447612e-06,
"loss": 11.9701,
"step": 57100
},
{
"epoch": 24.83716381418093,
"grad_norm": 12.940244674682617,
"learning_rate": 5.076808417360807e-06,
"loss": 11.9642,
"step": 57150
},
{
"epoch": 24.858897038848138,
"grad_norm": 14.284876823425293,
"learning_rate": 5.072424375274003e-06,
"loss": 11.9604,
"step": 57200
},
{
"epoch": 24.88063026351535,
"grad_norm": 9.244315147399902,
"learning_rate": 5.068040333187199e-06,
"loss": 11.9762,
"step": 57250
},
{
"epoch": 24.90236348818256,
"grad_norm": 21.19985580444336,
"learning_rate": 5.063656291100395e-06,
"loss": 11.9824,
"step": 57300
},
{
"epoch": 24.92409671284977,
"grad_norm": 20.60128402709961,
"learning_rate": 5.059272249013591e-06,
"loss": 11.9888,
"step": 57350
},
{
"epoch": 24.94582993751698,
"grad_norm": 22.071367263793945,
"learning_rate": 5.0548882069267865e-06,
"loss": 11.9722,
"step": 57400
},
{
"epoch": 24.967563162184188,
"grad_norm": 19.631771087646484,
"learning_rate": 5.0505041648399836e-06,
"loss": 11.9691,
"step": 57450
},
{
"epoch": 24.9892963868514,
"grad_norm": 11.300741195678711,
"learning_rate": 5.046120122753179e-06,
"loss": 11.9764,
"step": 57500
},
{
"epoch": 25.010866612333604,
"grad_norm": 28.297489166259766,
"learning_rate": 5.041736080666375e-06,
"loss": 11.8502,
"step": 57550
},
{
"epoch": 25.032599837000816,
"grad_norm": 19.681974411010742,
"learning_rate": 5.0373520385795706e-06,
"loss": 11.9632,
"step": 57600
},
{
"epoch": 25.054333061668025,
"grad_norm": 9.978123664855957,
"learning_rate": 5.032967996492767e-06,
"loss": 11.9736,
"step": 57650
},
{
"epoch": 25.076066286335234,
"grad_norm": 22.59627342224121,
"learning_rate": 5.028583954405962e-06,
"loss": 11.9202,
"step": 57700
},
{
"epoch": 25.097799511002446,
"grad_norm": 15.177567481994629,
"learning_rate": 5.024199912319158e-06,
"loss": 11.9432,
"step": 57750
},
{
"epoch": 25.119532735669654,
"grad_norm": 11.103377342224121,
"learning_rate": 5.0198158702323555e-06,
"loss": 11.933,
"step": 57800
},
{
"epoch": 25.141265960336867,
"grad_norm": 15.902565956115723,
"learning_rate": 5.015431828145551e-06,
"loss": 11.9292,
"step": 57850
},
{
"epoch": 25.162999185004075,
"grad_norm": 21.157047271728516,
"learning_rate": 5.011047786058747e-06,
"loss": 11.9355,
"step": 57900
},
{
"epoch": 25.184732409671284,
"grad_norm": 15.4396333694458,
"learning_rate": 5.0066637439719425e-06,
"loss": 11.9417,
"step": 57950
},
{
"epoch": 25.206465634338496,
"grad_norm": 17.689163208007812,
"learning_rate": 5.002279701885139e-06,
"loss": 11.9411,
"step": 58000
},
{
"epoch": 25.228198859005705,
"grad_norm": 20.323307037353516,
"learning_rate": 4.997895659798335e-06,
"loss": 11.9537,
"step": 58050
},
{
"epoch": 25.249932083672913,
"grad_norm": 11.056938171386719,
"learning_rate": 4.99351161771153e-06,
"loss": 11.9289,
"step": 58100
},
{
"epoch": 25.271665308340125,
"grad_norm": 13.280766487121582,
"learning_rate": 4.989127575624727e-06,
"loss": 11.9525,
"step": 58150
},
{
"epoch": 25.293398533007334,
"grad_norm": 19.21057891845703,
"learning_rate": 4.984743533537922e-06,
"loss": 11.9329,
"step": 58200
},
{
"epoch": 25.315131757674546,
"grad_norm": 11.26260757446289,
"learning_rate": 4.980359491451118e-06,
"loss": 11.9324,
"step": 58250
},
{
"epoch": 25.336864982341755,
"grad_norm": 23.691085815429688,
"learning_rate": 4.9759754493643145e-06,
"loss": 11.9377,
"step": 58300
},
{
"epoch": 25.358598207008963,
"grad_norm": 14.544368743896484,
"learning_rate": 4.97159140727751e-06,
"loss": 11.9555,
"step": 58350
},
{
"epoch": 25.380331431676176,
"grad_norm": 30.192901611328125,
"learning_rate": 4.967207365190706e-06,
"loss": 11.9389,
"step": 58400
},
{
"epoch": 25.402064656343384,
"grad_norm": 13.255487442016602,
"learning_rate": 4.962823323103902e-06,
"loss": 11.9655,
"step": 58450
},
{
"epoch": 25.423797881010596,
"grad_norm": 21.28059959411621,
"learning_rate": 4.9584392810170985e-06,
"loss": 11.9339,
"step": 58500
},
{
"epoch": 25.445531105677805,
"grad_norm": 19.402381896972656,
"learning_rate": 4.954055238930294e-06,
"loss": 11.9438,
"step": 58550
},
{
"epoch": 25.467264330345014,
"grad_norm": 23.586254119873047,
"learning_rate": 4.94967119684349e-06,
"loss": 11.9565,
"step": 58600
},
{
"epoch": 25.488997555012226,
"grad_norm": 12.589113235473633,
"learning_rate": 4.945287154756686e-06,
"loss": 11.9255,
"step": 58650
},
{
"epoch": 25.510730779679434,
"grad_norm": 13.459474563598633,
"learning_rate": 4.940903112669882e-06,
"loss": 11.9577,
"step": 58700
},
{
"epoch": 25.532464004346647,
"grad_norm": 44.4463005065918,
"learning_rate": 4.936519070583078e-06,
"loss": 11.9748,
"step": 58750
},
{
"epoch": 25.554197229013855,
"grad_norm": 17.335121154785156,
"learning_rate": 4.932135028496273e-06,
"loss": 11.9485,
"step": 58800
},
{
"epoch": 25.575930453681064,
"grad_norm": 13.910146713256836,
"learning_rate": 4.92775098640947e-06,
"loss": 11.9251,
"step": 58850
},
{
"epoch": 25.597663678348276,
"grad_norm": 12.966668128967285,
"learning_rate": 4.923366944322666e-06,
"loss": 11.9461,
"step": 58900
},
{
"epoch": 25.619396903015485,
"grad_norm": 11.38027572631836,
"learning_rate": 4.918982902235862e-06,
"loss": 11.9485,
"step": 58950
},
{
"epoch": 25.641130127682693,
"grad_norm": 19.2831974029541,
"learning_rate": 4.914598860149058e-06,
"loss": 11.9539,
"step": 59000
},
{
"epoch": 25.662863352349905,
"grad_norm": 14.93049144744873,
"learning_rate": 4.910214818062254e-06,
"loss": 11.9358,
"step": 59050
},
{
"epoch": 25.684596577017114,
"grad_norm": 20.345487594604492,
"learning_rate": 4.90583077597545e-06,
"loss": 11.963,
"step": 59100
},
{
"epoch": 25.706329801684326,
"grad_norm": 22.333740234375,
"learning_rate": 4.901446733888645e-06,
"loss": 11.9593,
"step": 59150
},
{
"epoch": 25.728063026351535,
"grad_norm": 15.723165512084961,
"learning_rate": 4.897062691801842e-06,
"loss": 11.9462,
"step": 59200
},
{
"epoch": 25.749796251018743,
"grad_norm": 23.927995681762695,
"learning_rate": 4.892678649715038e-06,
"loss": 11.9395,
"step": 59250
},
{
"epoch": 25.771529475685956,
"grad_norm": 9.985795974731445,
"learning_rate": 4.888294607628233e-06,
"loss": 11.9668,
"step": 59300
},
{
"epoch": 25.793262700353164,
"grad_norm": 13.037304878234863,
"learning_rate": 4.8839105655414294e-06,
"loss": 11.9362,
"step": 59350
},
{
"epoch": 25.814995925020376,
"grad_norm": 14.396384239196777,
"learning_rate": 4.879526523454626e-06,
"loss": 11.9613,
"step": 59400
},
{
"epoch": 25.836729149687585,
"grad_norm": 12.580947875976562,
"learning_rate": 4.875142481367822e-06,
"loss": 11.947,
"step": 59450
},
{
"epoch": 25.858462374354794,
"grad_norm": 9.566840171813965,
"learning_rate": 4.870758439281017e-06,
"loss": 11.9588,
"step": 59500
},
{
"epoch": 25.880195599022006,
"grad_norm": 14.287603378295898,
"learning_rate": 4.8663743971942135e-06,
"loss": 11.9367,
"step": 59550
},
{
"epoch": 25.901928823689214,
"grad_norm": 22.067798614501953,
"learning_rate": 4.86199035510741e-06,
"loss": 11.9608,
"step": 59600
},
{
"epoch": 25.923662048356427,
"grad_norm": 18.1433162689209,
"learning_rate": 4.857606313020605e-06,
"loss": 11.9365,
"step": 59650
},
{
"epoch": 25.945395273023635,
"grad_norm": 19.52138900756836,
"learning_rate": 4.853222270933801e-06,
"loss": 11.9533,
"step": 59700
},
{
"epoch": 25.967128497690844,
"grad_norm": 8.619915008544922,
"learning_rate": 4.848838228846997e-06,
"loss": 11.9343,
"step": 59750
},
{
"epoch": 25.988861722358056,
"grad_norm": 23.551292419433594,
"learning_rate": 4.844454186760194e-06,
"loss": 11.9369,
"step": 59800
},
{
"epoch": 26.01043194784026,
"grad_norm": 11.714635848999023,
"learning_rate": 4.840070144673389e-06,
"loss": 11.8362,
"step": 59850
},
{
"epoch": 26.032165172507472,
"grad_norm": 12.336874961853027,
"learning_rate": 4.8356861025865855e-06,
"loss": 11.8948,
"step": 59900
},
{
"epoch": 26.05389839717468,
"grad_norm": 20.45733642578125,
"learning_rate": 4.831302060499781e-06,
"loss": 11.9095,
"step": 59950
},
{
"epoch": 26.07563162184189,
"grad_norm": 19.363704681396484,
"learning_rate": 4.826918018412977e-06,
"loss": 11.9093,
"step": 60000
},
{
"epoch": 26.07563162184189,
"eval_cer": 0.07668522335921395,
"eval_loss": 2.3816096782684326,
"eval_runtime": 394.5974,
"eval_samples_per_second": 13.7,
"eval_steps_per_second": 3.426,
"eval_wer": 0.2290783482493327,
"step": 60000
},
{
"epoch": 26.0973648465091,
"grad_norm": 10.920241355895996,
"learning_rate": 4.822533976326173e-06,
"loss": 11.8988,
"step": 60050
},
{
"epoch": 26.11909807117631,
"grad_norm": 8.169657707214355,
"learning_rate": 4.818149934239369e-06,
"loss": 11.9302,
"step": 60100
},
{
"epoch": 26.140831295843522,
"grad_norm": 21.631534576416016,
"learning_rate": 4.813765892152565e-06,
"loss": 11.9295,
"step": 60150
},
{
"epoch": 26.16256452051073,
"grad_norm": 15.736180305480957,
"learning_rate": 4.80938185006576e-06,
"loss": 11.9402,
"step": 60200
},
{
"epoch": 26.18429774517794,
"grad_norm": 8.994476318359375,
"learning_rate": 4.804997807978957e-06,
"loss": 11.9254,
"step": 60250
},
{
"epoch": 26.206030969845152,
"grad_norm": 15.551674842834473,
"learning_rate": 4.800613765892153e-06,
"loss": 11.9274,
"step": 60300
},
{
"epoch": 26.22776419451236,
"grad_norm": 8.010394096374512,
"learning_rate": 4.796229723805349e-06,
"loss": 11.9204,
"step": 60350
},
{
"epoch": 26.24949741917957,
"grad_norm": 8.433065414428711,
"learning_rate": 4.791845681718545e-06,
"loss": 11.9123,
"step": 60400
},
{
"epoch": 26.27123064384678,
"grad_norm": 11.69290542602539,
"learning_rate": 4.787461639631741e-06,
"loss": 11.9423,
"step": 60450
},
{
"epoch": 26.29296386851399,
"grad_norm": 11.806631088256836,
"learning_rate": 4.783077597544937e-06,
"loss": 11.9229,
"step": 60500
},
{
"epoch": 26.314697093181202,
"grad_norm": 9.421358108520508,
"learning_rate": 4.778693555458132e-06,
"loss": 11.9254,
"step": 60550
},
{
"epoch": 26.33643031784841,
"grad_norm": 15.151471138000488,
"learning_rate": 4.7743095133713285e-06,
"loss": 11.9324,
"step": 60600
},
{
"epoch": 26.35816354251562,
"grad_norm": 7.523982524871826,
"learning_rate": 4.769925471284525e-06,
"loss": 11.9094,
"step": 60650
},
{
"epoch": 26.37989676718283,
"grad_norm": 7.315085411071777,
"learning_rate": 4.765541429197721e-06,
"loss": 11.9156,
"step": 60700
},
{
"epoch": 26.40162999185004,
"grad_norm": 37.69257354736328,
"learning_rate": 4.761157387110917e-06,
"loss": 11.9491,
"step": 60750
},
{
"epoch": 26.423363216517252,
"grad_norm": 12.536825180053711,
"learning_rate": 4.756773345024113e-06,
"loss": 11.937,
"step": 60800
},
{
"epoch": 26.44509644118446,
"grad_norm": 19.952590942382812,
"learning_rate": 4.752389302937309e-06,
"loss": 11.9031,
"step": 60850
},
{
"epoch": 26.46682966585167,
"grad_norm": 9.468097686767578,
"learning_rate": 4.748005260850504e-06,
"loss": 11.93,
"step": 60900
},
{
"epoch": 26.48856289051888,
"grad_norm": 9.063526153564453,
"learning_rate": 4.7436212187637005e-06,
"loss": 11.9282,
"step": 60950
},
{
"epoch": 26.51029611518609,
"grad_norm": 23.76058006286621,
"learning_rate": 4.739237176676897e-06,
"loss": 11.9431,
"step": 61000
},
{
"epoch": 26.532029339853302,
"grad_norm": 16.783021926879883,
"learning_rate": 4.734853134590092e-06,
"loss": 11.9221,
"step": 61050
},
{
"epoch": 26.55376256452051,
"grad_norm": 20.15511131286621,
"learning_rate": 4.730469092503288e-06,
"loss": 11.9392,
"step": 61100
},
{
"epoch": 26.57549578918772,
"grad_norm": 14.874903678894043,
"learning_rate": 4.7260850504164845e-06,
"loss": 11.953,
"step": 61150
},
{
"epoch": 26.597229013854932,
"grad_norm": 7.126718044281006,
"learning_rate": 4.721701008329681e-06,
"loss": 11.9446,
"step": 61200
},
{
"epoch": 26.61896223852214,
"grad_norm": 9.697017669677734,
"learning_rate": 4.717316966242876e-06,
"loss": 11.9047,
"step": 61250
},
{
"epoch": 26.64069546318935,
"grad_norm": 16.13836097717285,
"learning_rate": 4.712932924156072e-06,
"loss": 11.9156,
"step": 61300
},
{
"epoch": 26.66242868785656,
"grad_norm": 10.770340919494629,
"learning_rate": 4.708548882069268e-06,
"loss": 11.9602,
"step": 61350
},
{
"epoch": 26.68416191252377,
"grad_norm": 20.800886154174805,
"learning_rate": 4.704164839982464e-06,
"loss": 11.9496,
"step": 61400
},
{
"epoch": 26.705895137190982,
"grad_norm": 14.415149688720703,
"learning_rate": 4.69978079789566e-06,
"loss": 11.8953,
"step": 61450
},
{
"epoch": 26.72762836185819,
"grad_norm": 16.533891677856445,
"learning_rate": 4.695396755808856e-06,
"loss": 11.9318,
"step": 61500
},
{
"epoch": 26.7493615865254,
"grad_norm": 12.036311149597168,
"learning_rate": 4.691012713722053e-06,
"loss": 11.9219,
"step": 61550
},
{
"epoch": 26.77109481119261,
"grad_norm": 9.894879341125488,
"learning_rate": 4.686628671635248e-06,
"loss": 11.925,
"step": 61600
},
{
"epoch": 26.79282803585982,
"grad_norm": 13.89318561553955,
"learning_rate": 4.682244629548444e-06,
"loss": 11.9642,
"step": 61650
},
{
"epoch": 26.814561260527032,
"grad_norm": 7.87830114364624,
"learning_rate": 4.67786058746164e-06,
"loss": 11.9357,
"step": 61700
},
{
"epoch": 26.83629448519424,
"grad_norm": 12.9856595993042,
"learning_rate": 4.673476545374836e-06,
"loss": 11.9305,
"step": 61750
},
{
"epoch": 26.85802770986145,
"grad_norm": 9.654988288879395,
"learning_rate": 4.669092503288032e-06,
"loss": 11.9184,
"step": 61800
},
{
"epoch": 26.87976093452866,
"grad_norm": 58.50657653808594,
"learning_rate": 4.664708461201228e-06,
"loss": 11.9274,
"step": 61850
},
{
"epoch": 26.90149415919587,
"grad_norm": 9.662385940551758,
"learning_rate": 4.660324419114424e-06,
"loss": 11.9326,
"step": 61900
},
{
"epoch": 26.92322738386308,
"grad_norm": 11.249975204467773,
"learning_rate": 4.655940377027619e-06,
"loss": 11.9301,
"step": 61950
},
{
"epoch": 26.94496060853029,
"grad_norm": 14.355755805969238,
"learning_rate": 4.651556334940816e-06,
"loss": 11.925,
"step": 62000
},
{
"epoch": 26.9666938331975,
"grad_norm": 55.27675247192383,
"learning_rate": 4.647172292854012e-06,
"loss": 11.9072,
"step": 62050
},
{
"epoch": 26.988427057864712,
"grad_norm": 9.871424674987793,
"learning_rate": 4.642788250767208e-06,
"loss": 11.9371,
"step": 62100
},
{
"epoch": 27.009997283346916,
"grad_norm": 11.582411766052246,
"learning_rate": 4.638404208680404e-06,
"loss": 11.8118,
"step": 62150
},
{
"epoch": 27.031730508014128,
"grad_norm": 7.072801113128662,
"learning_rate": 4.6340201665935995e-06,
"loss": 11.8716,
"step": 62200
},
{
"epoch": 27.053463732681337,
"grad_norm": 12.715493202209473,
"learning_rate": 4.629636124506796e-06,
"loss": 11.9066,
"step": 62250
},
{
"epoch": 27.075196957348545,
"grad_norm": 13.285543441772461,
"learning_rate": 4.625252082419991e-06,
"loss": 11.9065,
"step": 62300
},
{
"epoch": 27.096930182015758,
"grad_norm": 14.948770523071289,
"learning_rate": 4.620868040333187e-06,
"loss": 11.8932,
"step": 62350
},
{
"epoch": 27.118663406682966,
"grad_norm": 7.0187296867370605,
"learning_rate": 4.616483998246384e-06,
"loss": 11.9063,
"step": 62400
},
{
"epoch": 27.14039663135018,
"grad_norm": 11.898140907287598,
"learning_rate": 4.61209995615958e-06,
"loss": 11.8861,
"step": 62450
},
{
"epoch": 27.162129856017387,
"grad_norm": 7.729825496673584,
"learning_rate": 4.607715914072775e-06,
"loss": 11.9102,
"step": 62500
},
{
"epoch": 27.183863080684596,
"grad_norm": 9.05493450164795,
"learning_rate": 4.6033318719859715e-06,
"loss": 11.901,
"step": 62550
},
{
"epoch": 27.205596305351808,
"grad_norm": 48.41245651245117,
"learning_rate": 4.598947829899168e-06,
"loss": 11.9217,
"step": 62600
},
{
"epoch": 27.227329530019016,
"grad_norm": 8.19921875,
"learning_rate": 4.594563787812363e-06,
"loss": 11.9103,
"step": 62650
},
{
"epoch": 27.249062754686225,
"grad_norm": 7.067399024963379,
"learning_rate": 4.590179745725559e-06,
"loss": 11.9102,
"step": 62700
},
{
"epoch": 27.270795979353437,
"grad_norm": 10.219547271728516,
"learning_rate": 4.5857957036387556e-06,
"loss": 11.9086,
"step": 62750
},
{
"epoch": 27.292529204020646,
"grad_norm": 11.2730073928833,
"learning_rate": 4.581411661551951e-06,
"loss": 11.8907,
"step": 62800
},
{
"epoch": 27.314262428687858,
"grad_norm": 23.644775390625,
"learning_rate": 4.577027619465147e-06,
"loss": 11.9194,
"step": 62850
},
{
"epoch": 27.335995653355067,
"grad_norm": 13.088956832885742,
"learning_rate": 4.572643577378343e-06,
"loss": 11.9178,
"step": 62900
},
{
"epoch": 27.357728878022275,
"grad_norm": 12.945446968078613,
"learning_rate": 4.56825953529154e-06,
"loss": 11.9127,
"step": 62950
},
{
"epoch": 27.379462102689487,
"grad_norm": 7.951735019683838,
"learning_rate": 4.563875493204735e-06,
"loss": 11.9237,
"step": 63000
},
{
"epoch": 27.401195327356696,
"grad_norm": 13.66278076171875,
"learning_rate": 4.559491451117931e-06,
"loss": 11.8985,
"step": 63050
},
{
"epoch": 27.422928552023908,
"grad_norm": 6.567673683166504,
"learning_rate": 4.555107409031127e-06,
"loss": 11.9311,
"step": 63100
},
{
"epoch": 27.444661776691117,
"grad_norm": 11.139328956604004,
"learning_rate": 4.550723366944323e-06,
"loss": 11.9207,
"step": 63150
},
{
"epoch": 27.466395001358325,
"grad_norm": 18.506877899169922,
"learning_rate": 4.546339324857519e-06,
"loss": 11.9053,
"step": 63200
},
{
"epoch": 27.488128226025538,
"grad_norm": 16.45941925048828,
"learning_rate": 4.5419552827707145e-06,
"loss": 11.9132,
"step": 63250
},
{
"epoch": 27.509861450692746,
"grad_norm": 13.74703311920166,
"learning_rate": 4.537571240683912e-06,
"loss": 11.9032,
"step": 63300
},
{
"epoch": 27.531594675359955,
"grad_norm": 5.686723232269287,
"learning_rate": 4.533187198597107e-06,
"loss": 11.9135,
"step": 63350
},
{
"epoch": 27.553327900027167,
"grad_norm": 47.760013580322266,
"learning_rate": 4.528803156510303e-06,
"loss": 11.9263,
"step": 63400
},
{
"epoch": 27.575061124694376,
"grad_norm": 13.832674026489258,
"learning_rate": 4.524419114423499e-06,
"loss": 11.9114,
"step": 63450
},
{
"epoch": 27.596794349361588,
"grad_norm": 22.621736526489258,
"learning_rate": 4.520035072336695e-06,
"loss": 11.9134,
"step": 63500
},
{
"epoch": 27.618527574028796,
"grad_norm": 13.379792213439941,
"learning_rate": 4.515651030249891e-06,
"loss": 11.9058,
"step": 63550
},
{
"epoch": 27.640260798696005,
"grad_norm": 12.987919807434082,
"learning_rate": 4.5112669881630865e-06,
"loss": 11.9083,
"step": 63600
},
{
"epoch": 27.661994023363217,
"grad_norm": 16.87094497680664,
"learning_rate": 4.506882946076283e-06,
"loss": 11.908,
"step": 63650
},
{
"epoch": 27.683727248030426,
"grad_norm": 9.978212356567383,
"learning_rate": 4.502498903989478e-06,
"loss": 11.8919,
"step": 63700
},
{
"epoch": 27.705460472697638,
"grad_norm": 13.08248519897461,
"learning_rate": 4.498114861902675e-06,
"loss": 11.9,
"step": 63750
},
{
"epoch": 27.727193697364847,
"grad_norm": 14.08407974243164,
"learning_rate": 4.4937308198158706e-06,
"loss": 11.9366,
"step": 63800
},
{
"epoch": 27.748926922032055,
"grad_norm": 11.779139518737793,
"learning_rate": 4.489346777729067e-06,
"loss": 11.9216,
"step": 63850
},
{
"epoch": 27.770660146699267,
"grad_norm": 7.019837856292725,
"learning_rate": 4.484962735642262e-06,
"loss": 11.9144,
"step": 63900
},
{
"epoch": 27.792393371366476,
"grad_norm": 8.715902328491211,
"learning_rate": 4.480578693555458e-06,
"loss": 11.9335,
"step": 63950
},
{
"epoch": 27.814126596033688,
"grad_norm": 17.31736183166504,
"learning_rate": 4.476194651468655e-06,
"loss": 11.9164,
"step": 64000
},
{
"epoch": 27.835859820700897,
"grad_norm": 7.397292613983154,
"learning_rate": 4.47181060938185e-06,
"loss": 11.8935,
"step": 64050
},
{
"epoch": 27.857593045368105,
"grad_norm": 15.1404447555542,
"learning_rate": 4.467426567295046e-06,
"loss": 11.9156,
"step": 64100
},
{
"epoch": 27.879326270035317,
"grad_norm": 16.563631057739258,
"learning_rate": 4.4630425252082425e-06,
"loss": 11.8858,
"step": 64150
},
{
"epoch": 27.901059494702526,
"grad_norm": 10.400628089904785,
"learning_rate": 4.458658483121439e-06,
"loss": 11.9083,
"step": 64200
},
{
"epoch": 27.92279271936974,
"grad_norm": 8.129082679748535,
"learning_rate": 4.454274441034634e-06,
"loss": 11.9361,
"step": 64250
},
{
"epoch": 27.944525944036947,
"grad_norm": 22.946596145629883,
"learning_rate": 4.44989039894783e-06,
"loss": 11.8997,
"step": 64300
},
{
"epoch": 27.966259168704156,
"grad_norm": 17.139440536499023,
"learning_rate": 4.445506356861027e-06,
"loss": 11.9067,
"step": 64350
},
{
"epoch": 27.987992393371368,
"grad_norm": 8.700691223144531,
"learning_rate": 4.441122314774222e-06,
"loss": 11.8941,
"step": 64400
},
{
"epoch": 28.009562618853572,
"grad_norm": 9.634552001953125,
"learning_rate": 4.436738272687418e-06,
"loss": 11.7983,
"step": 64450
},
{
"epoch": 28.031295843520784,
"grad_norm": 12.564841270446777,
"learning_rate": 4.432354230600614e-06,
"loss": 11.8731,
"step": 64500
},
{
"epoch": 28.053029068187993,
"grad_norm": 10.420557022094727,
"learning_rate": 4.42797018851381e-06,
"loss": 11.896,
"step": 64550
},
{
"epoch": 28.0747622928552,
"grad_norm": 13.071510314941406,
"learning_rate": 4.423586146427006e-06,
"loss": 11.8855,
"step": 64600
},
{
"epoch": 28.096495517522413,
"grad_norm": 11.409537315368652,
"learning_rate": 4.419202104340202e-06,
"loss": 11.8987,
"step": 64650
},
{
"epoch": 28.118228742189622,
"grad_norm": 17.64859390258789,
"learning_rate": 4.4148180622533985e-06,
"loss": 11.8742,
"step": 64700
},
{
"epoch": 28.13996196685683,
"grad_norm": 8.101343154907227,
"learning_rate": 4.410434020166594e-06,
"loss": 11.8781,
"step": 64750
},
{
"epoch": 28.161695191524043,
"grad_norm": 11.35251522064209,
"learning_rate": 4.40604997807979e-06,
"loss": 11.8891,
"step": 64800
},
{
"epoch": 28.18342841619125,
"grad_norm": 19.521108627319336,
"learning_rate": 4.4016659359929855e-06,
"loss": 11.882,
"step": 64850
},
{
"epoch": 28.205161640858464,
"grad_norm": 14.904671669006348,
"learning_rate": 4.397281893906182e-06,
"loss": 11.8987,
"step": 64900
},
{
"epoch": 28.226894865525672,
"grad_norm": 11.82111644744873,
"learning_rate": 4.392897851819378e-06,
"loss": 11.8718,
"step": 64950
},
{
"epoch": 28.24862809019288,
"grad_norm": 7.986074924468994,
"learning_rate": 4.388513809732573e-06,
"loss": 11.8931,
"step": 65000
},
{
"epoch": 28.270361314860093,
"grad_norm": 10.135086059570312,
"learning_rate": 4.38412976764577e-06,
"loss": 11.8845,
"step": 65050
},
{
"epoch": 28.2920945395273,
"grad_norm": 9.275798797607422,
"learning_rate": 4.379745725558966e-06,
"loss": 11.8647,
"step": 65100
},
{
"epoch": 28.313827764194514,
"grad_norm": 7.864231586456299,
"learning_rate": 4.375361683472162e-06,
"loss": 11.9019,
"step": 65150
},
{
"epoch": 28.335560988861722,
"grad_norm": 37.51991653442383,
"learning_rate": 4.3709776413853575e-06,
"loss": 11.8779,
"step": 65200
},
{
"epoch": 28.35729421352893,
"grad_norm": 7.752624034881592,
"learning_rate": 4.366593599298554e-06,
"loss": 11.9023,
"step": 65250
},
{
"epoch": 28.379027438196143,
"grad_norm": 12.627674102783203,
"learning_rate": 4.36220955721175e-06,
"loss": 11.8921,
"step": 65300
},
{
"epoch": 28.40076066286335,
"grad_norm": 26.206846237182617,
"learning_rate": 4.357825515124945e-06,
"loss": 11.891,
"step": 65350
},
{
"epoch": 28.422493887530564,
"grad_norm": 18.58912467956543,
"learning_rate": 4.3534414730381416e-06,
"loss": 11.908,
"step": 65400
},
{
"epoch": 28.444227112197773,
"grad_norm": 16.89732551574707,
"learning_rate": 4.349057430951337e-06,
"loss": 11.8899,
"step": 65450
},
{
"epoch": 28.46596033686498,
"grad_norm": 7.8719964027404785,
"learning_rate": 4.344673388864534e-06,
"loss": 11.8946,
"step": 65500
},
{
"epoch": 28.487693561532193,
"grad_norm": 11.639144897460938,
"learning_rate": 4.3402893467777294e-06,
"loss": 11.9214,
"step": 65550
},
{
"epoch": 28.509426786199402,
"grad_norm": 29.2702579498291,
"learning_rate": 4.335905304690926e-06,
"loss": 11.9217,
"step": 65600
},
{
"epoch": 28.531160010866614,
"grad_norm": 48.321807861328125,
"learning_rate": 4.331521262604121e-06,
"loss": 11.889,
"step": 65650
},
{
"epoch": 28.552893235533823,
"grad_norm": 12.334220886230469,
"learning_rate": 4.327137220517317e-06,
"loss": 11.8814,
"step": 65700
},
{
"epoch": 28.57462646020103,
"grad_norm": 13.60355281829834,
"learning_rate": 4.3227531784305135e-06,
"loss": 11.9137,
"step": 65750
},
{
"epoch": 28.596359684868244,
"grad_norm": 12.374007225036621,
"learning_rate": 4.318369136343709e-06,
"loss": 11.9145,
"step": 65800
},
{
"epoch": 28.618092909535452,
"grad_norm": 15.23318862915039,
"learning_rate": 4.313985094256905e-06,
"loss": 11.8971,
"step": 65850
},
{
"epoch": 28.63982613420266,
"grad_norm": 8.697155952453613,
"learning_rate": 4.3096010521701005e-06,
"loss": 11.8899,
"step": 65900
},
{
"epoch": 28.661559358869873,
"grad_norm": 6.101230621337891,
"learning_rate": 4.305217010083298e-06,
"loss": 11.8835,
"step": 65950
},
{
"epoch": 28.68329258353708,
"grad_norm": 30.645008087158203,
"learning_rate": 4.300832967996493e-06,
"loss": 11.9118,
"step": 66000
},
{
"epoch": 28.705025808204294,
"grad_norm": 10.432790756225586,
"learning_rate": 4.296448925909689e-06,
"loss": 11.898,
"step": 66050
},
{
"epoch": 28.726759032871502,
"grad_norm": 22.726320266723633,
"learning_rate": 4.2920648838228855e-06,
"loss": 11.8829,
"step": 66100
},
{
"epoch": 28.74849225753871,
"grad_norm": 15.002222061157227,
"learning_rate": 4.287680841736081e-06,
"loss": 11.8922,
"step": 66150
},
{
"epoch": 28.770225482205923,
"grad_norm": 16.7822208404541,
"learning_rate": 4.283296799649277e-06,
"loss": 11.9015,
"step": 66200
},
{
"epoch": 28.79195870687313,
"grad_norm": 10.86782455444336,
"learning_rate": 4.2789127575624725e-06,
"loss": 11.9095,
"step": 66250
},
{
"epoch": 28.813691931540344,
"grad_norm": 14.24905776977539,
"learning_rate": 4.274528715475669e-06,
"loss": 11.8791,
"step": 66300
},
{
"epoch": 28.835425156207553,
"grad_norm": 8.511114120483398,
"learning_rate": 4.270144673388865e-06,
"loss": 11.8846,
"step": 66350
},
{
"epoch": 28.85715838087476,
"grad_norm": 10.261749267578125,
"learning_rate": 4.265760631302061e-06,
"loss": 11.9029,
"step": 66400
},
{
"epoch": 28.878891605541973,
"grad_norm": 48.72242736816406,
"learning_rate": 4.261376589215257e-06,
"loss": 11.9049,
"step": 66450
},
{
"epoch": 28.900624830209182,
"grad_norm": 10.668495178222656,
"learning_rate": 4.256992547128453e-06,
"loss": 11.8856,
"step": 66500
},
{
"epoch": 28.92235805487639,
"grad_norm": 7.709607124328613,
"learning_rate": 4.252608505041649e-06,
"loss": 11.8888,
"step": 66550
},
{
"epoch": 28.944091279543603,
"grad_norm": 30.70176124572754,
"learning_rate": 4.248224462954844e-06,
"loss": 11.9187,
"step": 66600
},
{
"epoch": 28.96582450421081,
"grad_norm": 13.879278182983398,
"learning_rate": 4.243840420868041e-06,
"loss": 11.8825,
"step": 66650
},
{
"epoch": 28.987557728878024,
"grad_norm": 7.8939714431762695,
"learning_rate": 4.239456378781237e-06,
"loss": 11.9042,
"step": 66700
},
{
"epoch": 29.009127954360228,
"grad_norm": 16.196550369262695,
"learning_rate": 4.235072336694432e-06,
"loss": 11.8091,
"step": 66750
},
{
"epoch": 29.03086117902744,
"grad_norm": 10.502305030822754,
"learning_rate": 4.2306882946076285e-06,
"loss": 11.8647,
"step": 66800
},
{
"epoch": 29.05259440369465,
"grad_norm": 28.054792404174805,
"learning_rate": 4.226304252520825e-06,
"loss": 11.8775,
"step": 66850
},
{
"epoch": 29.074327628361857,
"grad_norm": 5.852464199066162,
"learning_rate": 4.221920210434021e-06,
"loss": 11.8717,
"step": 66900
},
{
"epoch": 29.09606085302907,
"grad_norm": 10.438371658325195,
"learning_rate": 4.217536168347216e-06,
"loss": 11.8783,
"step": 66950
},
{
"epoch": 29.117794077696278,
"grad_norm": 5.391887664794922,
"learning_rate": 4.213152126260413e-06,
"loss": 11.8597,
"step": 67000
},
{
"epoch": 29.139527302363486,
"grad_norm": 15.71295166015625,
"learning_rate": 4.208768084173608e-06,
"loss": 11.8726,
"step": 67050
},
{
"epoch": 29.1612605270307,
"grad_norm": 15.637112617492676,
"learning_rate": 4.204384042086804e-06,
"loss": 11.8549,
"step": 67100
},
{
"epoch": 29.182993751697907,
"grad_norm": 16.201160430908203,
"learning_rate": 4.2000000000000004e-06,
"loss": 11.8751,
"step": 67150
},
{
"epoch": 29.20472697636512,
"grad_norm": 18.363697052001953,
"learning_rate": 4.195615957913196e-06,
"loss": 11.9021,
"step": 67200
},
{
"epoch": 29.226460201032328,
"grad_norm": 15.013435363769531,
"learning_rate": 4.191231915826393e-06,
"loss": 11.8747,
"step": 67250
},
{
"epoch": 29.248193425699537,
"grad_norm": 14.785465240478516,
"learning_rate": 4.186847873739588e-06,
"loss": 11.8775,
"step": 67300
},
{
"epoch": 29.26992665036675,
"grad_norm": 13.100189208984375,
"learning_rate": 4.1824638316527845e-06,
"loss": 11.8587,
"step": 67350
},
{
"epoch": 29.291659875033957,
"grad_norm": 9.864031791687012,
"learning_rate": 4.17807978956598e-06,
"loss": 11.9118,
"step": 67400
},
{
"epoch": 29.31339309970117,
"grad_norm": 19.341495513916016,
"learning_rate": 4.173695747479176e-06,
"loss": 11.8819,
"step": 67450
},
{
"epoch": 29.335126324368378,
"grad_norm": 7.35308837890625,
"learning_rate": 4.169311705392372e-06,
"loss": 11.8731,
"step": 67500
},
{
"epoch": 29.356859549035587,
"grad_norm": 8.811240196228027,
"learning_rate": 4.164927663305568e-06,
"loss": 11.8819,
"step": 67550
},
{
"epoch": 29.3785927737028,
"grad_norm": 9.851766586303711,
"learning_rate": 4.160543621218764e-06,
"loss": 11.8942,
"step": 67600
},
{
"epoch": 29.400325998370008,
"grad_norm": 14.708338737487793,
"learning_rate": 4.156159579131959e-06,
"loss": 11.8899,
"step": 67650
},
{
"epoch": 29.42205922303722,
"grad_norm": 11.063777923583984,
"learning_rate": 4.1517755370451565e-06,
"loss": 11.8602,
"step": 67700
},
{
"epoch": 29.44379244770443,
"grad_norm": 11.282812118530273,
"learning_rate": 4.147391494958352e-06,
"loss": 11.8651,
"step": 67750
},
{
"epoch": 29.465525672371637,
"grad_norm": 258.5189514160156,
"learning_rate": 4.143007452871548e-06,
"loss": 11.8813,
"step": 67800
},
{
"epoch": 29.48725889703885,
"grad_norm": 17.533771514892578,
"learning_rate": 4.138623410784744e-06,
"loss": 11.8777,
"step": 67850
},
{
"epoch": 29.508992121706058,
"grad_norm": 9.061328887939453,
"learning_rate": 4.13423936869794e-06,
"loss": 11.863,
"step": 67900
},
{
"epoch": 29.530725346373266,
"grad_norm": 14.129364013671875,
"learning_rate": 4.129855326611136e-06,
"loss": 11.8837,
"step": 67950
},
{
"epoch": 29.55245857104048,
"grad_norm": 21.77886390686035,
"learning_rate": 4.125471284524331e-06,
"loss": 11.8897,
"step": 68000
},
{
"epoch": 29.574191795707687,
"grad_norm": 8.441765785217285,
"learning_rate": 4.1210872424375276e-06,
"loss": 11.9048,
"step": 68050
},
{
"epoch": 29.5959250203749,
"grad_norm": 11.595650672912598,
"learning_rate": 4.116703200350724e-06,
"loss": 11.8899,
"step": 68100
},
{
"epoch": 29.617658245042108,
"grad_norm": 16.048147201538086,
"learning_rate": 4.11231915826392e-06,
"loss": 11.8787,
"step": 68150
},
{
"epoch": 29.639391469709317,
"grad_norm": 9.9227294921875,
"learning_rate": 4.1079351161771154e-06,
"loss": 11.8748,
"step": 68200
},
{
"epoch": 29.66112469437653,
"grad_norm": 9.97187614440918,
"learning_rate": 4.103551074090312e-06,
"loss": 11.8759,
"step": 68250
},
{
"epoch": 29.682857919043737,
"grad_norm": 18.43181610107422,
"learning_rate": 4.099167032003508e-06,
"loss": 11.8683,
"step": 68300
},
{
"epoch": 29.70459114371095,
"grad_norm": 18.20121192932129,
"learning_rate": 4.094782989916703e-06,
"loss": 11.8865,
"step": 68350
},
{
"epoch": 29.726324368378158,
"grad_norm": 6.934305667877197,
"learning_rate": 4.0903989478298995e-06,
"loss": 11.8502,
"step": 68400
},
{
"epoch": 29.748057593045367,
"grad_norm": 12.715697288513184,
"learning_rate": 4.086014905743096e-06,
"loss": 11.8733,
"step": 68450
},
{
"epoch": 29.76979081771258,
"grad_norm": 9.016664505004883,
"learning_rate": 4.081630863656291e-06,
"loss": 11.8775,
"step": 68500
},
{
"epoch": 29.791524042379788,
"grad_norm": 7.763296127319336,
"learning_rate": 4.077246821569487e-06,
"loss": 11.8733,
"step": 68550
},
{
"epoch": 29.813257267047,
"grad_norm": 10.350701332092285,
"learning_rate": 4.072862779482684e-06,
"loss": 11.8632,
"step": 68600
},
{
"epoch": 29.83499049171421,
"grad_norm": 6.480827331542969,
"learning_rate": 4.06847873739588e-06,
"loss": 11.8788,
"step": 68650
},
{
"epoch": 29.856723716381417,
"grad_norm": 20.947677612304688,
"learning_rate": 4.064094695309075e-06,
"loss": 11.8773,
"step": 68700
},
{
"epoch": 29.87845694104863,
"grad_norm": 10.931136131286621,
"learning_rate": 4.0597106532222715e-06,
"loss": 11.8612,
"step": 68750
},
{
"epoch": 29.900190165715838,
"grad_norm": 10.79286003112793,
"learning_rate": 4.055326611135467e-06,
"loss": 11.8702,
"step": 68800
},
{
"epoch": 29.921923390383046,
"grad_norm": 45.66188049316406,
"learning_rate": 4.050942569048663e-06,
"loss": 11.8613,
"step": 68850
},
{
"epoch": 29.94365661505026,
"grad_norm": 6.688445091247559,
"learning_rate": 4.046558526961859e-06,
"loss": 11.8739,
"step": 68900
},
{
"epoch": 29.965389839717467,
"grad_norm": 14.173410415649414,
"learning_rate": 4.042174484875055e-06,
"loss": 11.8595,
"step": 68950
},
{
"epoch": 29.98712306438468,
"grad_norm": 13.653775215148926,
"learning_rate": 4.037790442788251e-06,
"loss": 11.8927,
"step": 69000
},
{
"epoch": 30.008693289866883,
"grad_norm": 9.138008117675781,
"learning_rate": 4.033406400701447e-06,
"loss": 11.7683,
"step": 69050
},
{
"epoch": 30.030426514534096,
"grad_norm": 16.62093162536621,
"learning_rate": 4.029022358614643e-06,
"loss": 11.8934,
"step": 69100
},
{
"epoch": 30.052159739201304,
"grad_norm": 7.672760486602783,
"learning_rate": 4.024638316527839e-06,
"loss": 11.8631,
"step": 69150
},
{
"epoch": 30.073892963868513,
"grad_norm": 8.038310050964355,
"learning_rate": 4.020254274441035e-06,
"loss": 11.8599,
"step": 69200
},
{
"epoch": 30.095626188535725,
"grad_norm": 10.817283630371094,
"learning_rate": 4.015870232354231e-06,
"loss": 11.8627,
"step": 69250
},
{
"epoch": 30.117359413202934,
"grad_norm": 6.556225299835205,
"learning_rate": 4.011486190267427e-06,
"loss": 11.8671,
"step": 69300
},
{
"epoch": 30.139092637870142,
"grad_norm": 16.242650985717773,
"learning_rate": 4.007102148180623e-06,
"loss": 11.875,
"step": 69350
},
{
"epoch": 30.160825862537354,
"grad_norm": 5.174230575561523,
"learning_rate": 4.002718106093818e-06,
"loss": 11.8433,
"step": 69400
},
{
"epoch": 30.182559087204563,
"grad_norm": 7.197856426239014,
"learning_rate": 3.998334064007015e-06,
"loss": 11.8622,
"step": 69450
},
{
"epoch": 30.204292311871775,
"grad_norm": 21.63473892211914,
"learning_rate": 3.993950021920211e-06,
"loss": 11.8656,
"step": 69500
},
{
"epoch": 30.226025536538984,
"grad_norm": 17.78504753112793,
"learning_rate": 3.989565979833407e-06,
"loss": 11.8521,
"step": 69550
},
{
"epoch": 30.247758761206192,
"grad_norm": 18.68705940246582,
"learning_rate": 3.985181937746602e-06,
"loss": 11.8453,
"step": 69600
},
{
"epoch": 30.269491985873405,
"grad_norm": 7.354127407073975,
"learning_rate": 3.980797895659799e-06,
"loss": 11.8665,
"step": 69650
},
{
"epoch": 30.291225210540613,
"grad_norm": 7.651024341583252,
"learning_rate": 3.976413853572995e-06,
"loss": 11.853,
"step": 69700
},
{
"epoch": 30.312958435207825,
"grad_norm": 8.84490966796875,
"learning_rate": 3.97202981148619e-06,
"loss": 11.8501,
"step": 69750
},
{
"epoch": 30.334691659875034,
"grad_norm": 8.941247940063477,
"learning_rate": 3.9676457693993865e-06,
"loss": 11.8615,
"step": 69800
},
{
"epoch": 30.356424884542243,
"grad_norm": 13.154361724853516,
"learning_rate": 3.963261727312583e-06,
"loss": 11.8584,
"step": 69850
},
{
"epoch": 30.378158109209455,
"grad_norm": 13.795583724975586,
"learning_rate": 3.958877685225779e-06,
"loss": 11.8604,
"step": 69900
},
{
"epoch": 30.399891333876663,
"grad_norm": 8.268631935119629,
"learning_rate": 3.954493643138974e-06,
"loss": 11.859,
"step": 69950
},
{
"epoch": 30.421624558543876,
"grad_norm": 28.959548950195312,
"learning_rate": 3.9501096010521705e-06,
"loss": 11.8702,
"step": 70000
},
{
"epoch": 30.421624558543876,
"eval_cer": 0.0757471023169121,
"eval_loss": 2.39117431640625,
"eval_runtime": 396.909,
"eval_samples_per_second": 13.62,
"eval_steps_per_second": 3.406,
"eval_wer": 0.22849740932642487,
"step": 70000
},
{
"epoch": 30.443357783211084,
"grad_norm": 15.879110336303711,
"learning_rate": 3.945725558965367e-06,
"loss": 11.8687,
"step": 70050
},
{
"epoch": 30.465091007878293,
"grad_norm": 14.088164329528809,
"learning_rate": 3.941341516878562e-06,
"loss": 11.85,
"step": 70100
},
{
"epoch": 30.486824232545505,
"grad_norm": 5.0238752365112305,
"learning_rate": 3.936957474791758e-06,
"loss": 11.8533,
"step": 70150
},
{
"epoch": 30.508557457212714,
"grad_norm": 11.336899757385254,
"learning_rate": 3.932573432704954e-06,
"loss": 11.8699,
"step": 70200
},
{
"epoch": 30.530290681879922,
"grad_norm": 17.313730239868164,
"learning_rate": 3.92818939061815e-06,
"loss": 11.8562,
"step": 70250
},
{
"epoch": 30.552023906547134,
"grad_norm": 28.565584182739258,
"learning_rate": 3.923805348531346e-06,
"loss": 11.8547,
"step": 70300
},
{
"epoch": 30.573757131214343,
"grad_norm": 6.773772239685059,
"learning_rate": 3.9194213064445425e-06,
"loss": 11.8538,
"step": 70350
},
{
"epoch": 30.595490355881555,
"grad_norm": 15.116411209106445,
"learning_rate": 3.915037264357739e-06,
"loss": 11.8638,
"step": 70400
},
{
"epoch": 30.617223580548764,
"grad_norm": 9.379572868347168,
"learning_rate": 3.910653222270934e-06,
"loss": 11.8757,
"step": 70450
},
{
"epoch": 30.638956805215972,
"grad_norm": 12.259918212890625,
"learning_rate": 3.90626918018413e-06,
"loss": 11.876,
"step": 70500
},
{
"epoch": 30.660690029883185,
"grad_norm": 12.57608699798584,
"learning_rate": 3.901885138097326e-06,
"loss": 11.8507,
"step": 70550
},
{
"epoch": 30.682423254550393,
"grad_norm": 8.661283493041992,
"learning_rate": 3.897501096010522e-06,
"loss": 11.8491,
"step": 70600
},
{
"epoch": 30.704156479217605,
"grad_norm": 9.84383773803711,
"learning_rate": 3.893117053923718e-06,
"loss": 11.8489,
"step": 70650
},
{
"epoch": 30.725889703884814,
"grad_norm": 9.917572975158691,
"learning_rate": 3.888733011836914e-06,
"loss": 11.8785,
"step": 70700
},
{
"epoch": 30.747622928552023,
"grad_norm": 7.059745788574219,
"learning_rate": 3.88434896975011e-06,
"loss": 11.8737,
"step": 70750
},
{
"epoch": 30.769356153219235,
"grad_norm": 20.44463348388672,
"learning_rate": 3.879964927663306e-06,
"loss": 11.8699,
"step": 70800
},
{
"epoch": 30.791089377886443,
"grad_norm": 6.311903476715088,
"learning_rate": 3.875580885576502e-06,
"loss": 11.8542,
"step": 70850
},
{
"epoch": 30.812822602553656,
"grad_norm": 6.262167930603027,
"learning_rate": 3.871196843489698e-06,
"loss": 11.8626,
"step": 70900
},
{
"epoch": 30.834555827220864,
"grad_norm": 8.859283447265625,
"learning_rate": 3.866812801402894e-06,
"loss": 11.8909,
"step": 70950
},
{
"epoch": 30.856289051888073,
"grad_norm": 6.593499660491943,
"learning_rate": 3.86242875931609e-06,
"loss": 11.8474,
"step": 71000
},
{
"epoch": 30.878022276555285,
"grad_norm": 16.074264526367188,
"learning_rate": 3.8580447172292855e-06,
"loss": 11.8634,
"step": 71050
},
{
"epoch": 30.899755501222494,
"grad_norm": 16.934633255004883,
"learning_rate": 3.853660675142482e-06,
"loss": 11.8481,
"step": 71100
},
{
"epoch": 30.921488725889702,
"grad_norm": 11.176169395446777,
"learning_rate": 3.849276633055677e-06,
"loss": 11.8678,
"step": 71150
},
{
"epoch": 30.943221950556914,
"grad_norm": 13.823466300964355,
"learning_rate": 3.844892590968873e-06,
"loss": 11.8525,
"step": 71200
},
{
"epoch": 30.964955175224123,
"grad_norm": 12.757974624633789,
"learning_rate": 3.84050854888207e-06,
"loss": 11.8596,
"step": 71250
},
{
"epoch": 30.986688399891335,
"grad_norm": 6.2555108070373535,
"learning_rate": 3.836124506795266e-06,
"loss": 11.8729,
"step": 71300
},
{
"epoch": 31.00825862537354,
"grad_norm": 7.998335361480713,
"learning_rate": 3.831740464708461e-06,
"loss": 11.7557,
"step": 71350
},
{
"epoch": 31.02999185004075,
"grad_norm": 7.063460826873779,
"learning_rate": 3.8273564226216575e-06,
"loss": 11.8673,
"step": 71400
},
{
"epoch": 31.05172507470796,
"grad_norm": 7.559152126312256,
"learning_rate": 3.822972380534854e-06,
"loss": 11.8614,
"step": 71450
},
{
"epoch": 31.07345829937517,
"grad_norm": 9.765264511108398,
"learning_rate": 3.818588338448049e-06,
"loss": 11.8243,
"step": 71500
},
{
"epoch": 31.09519152404238,
"grad_norm": 8.741211891174316,
"learning_rate": 3.8142042963612453e-06,
"loss": 11.8631,
"step": 71550
},
{
"epoch": 31.11692474870959,
"grad_norm": 10.110342025756836,
"learning_rate": 3.809820254274441e-06,
"loss": 11.8624,
"step": 71600
},
{
"epoch": 31.138657973376798,
"grad_norm": 7.525726318359375,
"learning_rate": 3.805436212187637e-06,
"loss": 11.8511,
"step": 71650
},
{
"epoch": 31.16039119804401,
"grad_norm": 6.264368057250977,
"learning_rate": 3.8010521701008336e-06,
"loss": 11.8447,
"step": 71700
},
{
"epoch": 31.18212442271122,
"grad_norm": 6.522670745849609,
"learning_rate": 3.7966681280140294e-06,
"loss": 11.8619,
"step": 71750
},
{
"epoch": 31.20385764737843,
"grad_norm": 17.985116958618164,
"learning_rate": 3.7922840859272252e-06,
"loss": 11.8723,
"step": 71800
},
{
"epoch": 31.22559087204564,
"grad_norm": 10.094488143920898,
"learning_rate": 3.787900043840421e-06,
"loss": 11.8429,
"step": 71850
},
{
"epoch": 31.24732409671285,
"grad_norm": 12.937264442443848,
"learning_rate": 3.7835160017536173e-06,
"loss": 11.8569,
"step": 71900
},
{
"epoch": 31.26905732138006,
"grad_norm": 20.594358444213867,
"learning_rate": 3.779131959666813e-06,
"loss": 11.8438,
"step": 71950
},
{
"epoch": 31.29079054604727,
"grad_norm": 10.052034378051758,
"learning_rate": 3.774747917580009e-06,
"loss": 11.8419,
"step": 72000
},
{
"epoch": 31.31252377071448,
"grad_norm": 8.929048538208008,
"learning_rate": 3.7703638754932047e-06,
"loss": 11.8299,
"step": 72050
},
{
"epoch": 31.33425699538169,
"grad_norm": 9.807400703430176,
"learning_rate": 3.7659798334064014e-06,
"loss": 11.8243,
"step": 72100
},
{
"epoch": 31.3559902200489,
"grad_norm": 17.955623626708984,
"learning_rate": 3.761595791319597e-06,
"loss": 11.8255,
"step": 72150
},
{
"epoch": 31.37772344471611,
"grad_norm": 19.642745971679688,
"learning_rate": 3.757211749232793e-06,
"loss": 11.8498,
"step": 72200
},
{
"epoch": 31.39945666938332,
"grad_norm": 8.74807357788086,
"learning_rate": 3.752827707145989e-06,
"loss": 11.8492,
"step": 72250
},
{
"epoch": 31.42118989405053,
"grad_norm": 8.516878128051758,
"learning_rate": 3.748443665059185e-06,
"loss": 11.8481,
"step": 72300
},
{
"epoch": 31.44292311871774,
"grad_norm": 26.898788452148438,
"learning_rate": 3.744059622972381e-06,
"loss": 11.8371,
"step": 72350
},
{
"epoch": 31.46465634338495,
"grad_norm": 6.748674392700195,
"learning_rate": 3.7396755808855766e-06,
"loss": 11.8486,
"step": 72400
},
{
"epoch": 31.48638956805216,
"grad_norm": 10.551872253417969,
"learning_rate": 3.7352915387987725e-06,
"loss": 11.8533,
"step": 72450
},
{
"epoch": 31.50812279271937,
"grad_norm": 14.1845703125,
"learning_rate": 3.7309074967119687e-06,
"loss": 11.8414,
"step": 72500
},
{
"epoch": 31.529856017386578,
"grad_norm": 16.51775360107422,
"learning_rate": 3.726523454625165e-06,
"loss": 11.8535,
"step": 72550
},
{
"epoch": 31.55158924205379,
"grad_norm": 24.120222091674805,
"learning_rate": 3.7221394125383607e-06,
"loss": 11.8557,
"step": 72600
},
{
"epoch": 31.573322466721,
"grad_norm": 6.063103199005127,
"learning_rate": 3.717755370451557e-06,
"loss": 11.8375,
"step": 72650
},
{
"epoch": 31.59505569138821,
"grad_norm": 11.34897232055664,
"learning_rate": 3.7133713283647528e-06,
"loss": 11.8438,
"step": 72700
},
{
"epoch": 31.61678891605542,
"grad_norm": 9.746992111206055,
"learning_rate": 3.7089872862779486e-06,
"loss": 11.8471,
"step": 72750
},
{
"epoch": 31.63852214072263,
"grad_norm": 8.114310264587402,
"learning_rate": 3.7046032441911444e-06,
"loss": 11.8412,
"step": 72800
},
{
"epoch": 31.66025536538984,
"grad_norm": 8.393730163574219,
"learning_rate": 3.70021920210434e-06,
"loss": 11.837,
"step": 72850
},
{
"epoch": 31.68198859005705,
"grad_norm": 8.245162963867188,
"learning_rate": 3.6958351600175364e-06,
"loss": 11.8553,
"step": 72900
},
{
"epoch": 31.70372181472426,
"grad_norm": 7.575582981109619,
"learning_rate": 3.6914511179307323e-06,
"loss": 11.845,
"step": 72950
},
{
"epoch": 31.72545503939147,
"grad_norm": 7.178465366363525,
"learning_rate": 3.6870670758439285e-06,
"loss": 11.8327,
"step": 73000
},
{
"epoch": 31.74718826405868,
"grad_norm": 8.260749816894531,
"learning_rate": 3.6826830337571247e-06,
"loss": 11.8478,
"step": 73050
},
{
"epoch": 31.76892148872589,
"grad_norm": 45.3736457824707,
"learning_rate": 3.6782989916703205e-06,
"loss": 11.8445,
"step": 73100
},
{
"epoch": 31.7906547133931,
"grad_norm": 15.68336296081543,
"learning_rate": 3.6739149495835163e-06,
"loss": 11.8551,
"step": 73150
},
{
"epoch": 31.81238793806031,
"grad_norm": 5.821103572845459,
"learning_rate": 3.669530907496712e-06,
"loss": 11.8517,
"step": 73200
},
{
"epoch": 31.83412116272752,
"grad_norm": 12.418885231018066,
"learning_rate": 3.665146865409908e-06,
"loss": 11.8517,
"step": 73250
},
{
"epoch": 31.85585438739473,
"grad_norm": 8.705698013305664,
"learning_rate": 3.660762823323104e-06,
"loss": 11.8424,
"step": 73300
},
{
"epoch": 31.87758761206194,
"grad_norm": 9.667759895324707,
"learning_rate": 3.6563787812363e-06,
"loss": 11.8561,
"step": 73350
},
{
"epoch": 31.89932083672915,
"grad_norm": 14.76951789855957,
"learning_rate": 3.651994739149496e-06,
"loss": 11.8605,
"step": 73400
},
{
"epoch": 31.921054061396358,
"grad_norm": 14.691853523254395,
"learning_rate": 3.6476106970626925e-06,
"loss": 11.8595,
"step": 73450
},
{
"epoch": 31.94278728606357,
"grad_norm": 7.9246721267700195,
"learning_rate": 3.6432266549758883e-06,
"loss": 11.8473,
"step": 73500
},
{
"epoch": 31.96452051073078,
"grad_norm": 5.882972240447998,
"learning_rate": 3.638842612889084e-06,
"loss": 11.8418,
"step": 73550
},
{
"epoch": 31.98625373539799,
"grad_norm": 6.664644718170166,
"learning_rate": 3.63445857080228e-06,
"loss": 11.8388,
"step": 73600
},
{
"epoch": 32.007823960880195,
"grad_norm": 7.938138961791992,
"learning_rate": 3.630074528715476e-06,
"loss": 11.7538,
"step": 73650
},
{
"epoch": 32.029557185547404,
"grad_norm": 8.011933326721191,
"learning_rate": 3.625690486628672e-06,
"loss": 11.8182,
"step": 73700
},
{
"epoch": 32.05129041021461,
"grad_norm": 11.604764938354492,
"learning_rate": 3.6213064445418678e-06,
"loss": 11.8178,
"step": 73750
},
{
"epoch": 32.07302363488183,
"grad_norm": 13.241369247436523,
"learning_rate": 3.6169224024550636e-06,
"loss": 11.8383,
"step": 73800
},
{
"epoch": 32.09475685954904,
"grad_norm": 5.11595344543457,
"learning_rate": 3.6125383603682594e-06,
"loss": 11.8519,
"step": 73850
},
{
"epoch": 32.116490084216245,
"grad_norm": 17.570140838623047,
"learning_rate": 3.608154318281456e-06,
"loss": 11.8329,
"step": 73900
},
{
"epoch": 32.138223308883454,
"grad_norm": 10.764384269714355,
"learning_rate": 3.603770276194652e-06,
"loss": 11.8312,
"step": 73950
},
{
"epoch": 32.15995653355066,
"grad_norm": 21.758943557739258,
"learning_rate": 3.5993862341078477e-06,
"loss": 11.8513,
"step": 74000
},
{
"epoch": 32.18168975821788,
"grad_norm": 6.227720260620117,
"learning_rate": 3.595002192021044e-06,
"loss": 11.8468,
"step": 74050
},
{
"epoch": 32.20342298288509,
"grad_norm": 6.502994537353516,
"learning_rate": 3.5906181499342397e-06,
"loss": 11.8287,
"step": 74100
},
{
"epoch": 32.225156207552295,
"grad_norm": 8.124176025390625,
"learning_rate": 3.5862341078474355e-06,
"loss": 11.8244,
"step": 74150
},
{
"epoch": 32.246889432219504,
"grad_norm": 17.224422454833984,
"learning_rate": 3.5818500657606313e-06,
"loss": 11.8529,
"step": 74200
},
{
"epoch": 32.26862265688671,
"grad_norm": 16.075273513793945,
"learning_rate": 3.577466023673827e-06,
"loss": 11.8337,
"step": 74250
},
{
"epoch": 32.29035588155393,
"grad_norm": 10.724888801574707,
"learning_rate": 3.573081981587024e-06,
"loss": 11.8234,
"step": 74300
},
{
"epoch": 32.31208910622114,
"grad_norm": 13.913077354431152,
"learning_rate": 3.5686979395002196e-06,
"loss": 11.824,
"step": 74350
},
{
"epoch": 32.333822330888346,
"grad_norm": 5.98539924621582,
"learning_rate": 3.5643138974134154e-06,
"loss": 11.8469,
"step": 74400
},
{
"epoch": 32.355555555555554,
"grad_norm": 16.95889663696289,
"learning_rate": 3.5599298553266117e-06,
"loss": 11.8407,
"step": 74450
},
{
"epoch": 32.37728878022276,
"grad_norm": 11.7858304977417,
"learning_rate": 3.5555458132398075e-06,
"loss": 11.8458,
"step": 74500
},
{
"epoch": 32.39902200488998,
"grad_norm": 12.35476303100586,
"learning_rate": 3.5511617711530033e-06,
"loss": 11.8322,
"step": 74550
},
{
"epoch": 32.42075522955719,
"grad_norm": 8.592928886413574,
"learning_rate": 3.546777729066199e-06,
"loss": 11.8464,
"step": 74600
},
{
"epoch": 32.442488454224396,
"grad_norm": 15.99875259399414,
"learning_rate": 3.5423936869793953e-06,
"loss": 11.8412,
"step": 74650
},
{
"epoch": 32.464221678891604,
"grad_norm": 6.029876232147217,
"learning_rate": 3.538009644892591e-06,
"loss": 11.8529,
"step": 74700
},
{
"epoch": 32.48595490355881,
"grad_norm": 25.144210815429688,
"learning_rate": 3.5336256028057874e-06,
"loss": 11.8538,
"step": 74750
},
{
"epoch": 32.50768812822603,
"grad_norm": 4.607775688171387,
"learning_rate": 3.529241560718983e-06,
"loss": 11.8652,
"step": 74800
},
{
"epoch": 32.52942135289324,
"grad_norm": 9.385605812072754,
"learning_rate": 3.5248575186321794e-06,
"loss": 11.8256,
"step": 74850
},
{
"epoch": 32.551154577560446,
"grad_norm": 8.230783462524414,
"learning_rate": 3.5204734765453752e-06,
"loss": 11.8065,
"step": 74900
},
{
"epoch": 32.572887802227655,
"grad_norm": 38.624691009521484,
"learning_rate": 3.516089434458571e-06,
"loss": 11.8167,
"step": 74950
},
{
"epoch": 32.59462102689486,
"grad_norm": 17.61267852783203,
"learning_rate": 3.511705392371767e-06,
"loss": 11.8357,
"step": 75000
},
{
"epoch": 32.61635425156208,
"grad_norm": 6.209005355834961,
"learning_rate": 3.507321350284963e-06,
"loss": 11.8375,
"step": 75050
},
{
"epoch": 32.63808747622929,
"grad_norm": 14.121482849121094,
"learning_rate": 3.502937308198159e-06,
"loss": 11.8476,
"step": 75100
},
{
"epoch": 32.659820700896496,
"grad_norm": 28.74132537841797,
"learning_rate": 3.4985532661113547e-06,
"loss": 11.8317,
"step": 75150
},
{
"epoch": 32.681553925563705,
"grad_norm": 6.806987762451172,
"learning_rate": 3.4941692240245514e-06,
"loss": 11.8527,
"step": 75200
},
{
"epoch": 32.70328715023091,
"grad_norm": 7.174561500549316,
"learning_rate": 3.489785181937747e-06,
"loss": 11.8188,
"step": 75250
},
{
"epoch": 32.72502037489812,
"grad_norm": 13.119464874267578,
"learning_rate": 3.485401139850943e-06,
"loss": 11.8392,
"step": 75300
},
{
"epoch": 32.74675359956534,
"grad_norm": 8.41006851196289,
"learning_rate": 3.4810170977641388e-06,
"loss": 11.8283,
"step": 75350
},
{
"epoch": 32.768486824232546,
"grad_norm": 10.47354507446289,
"learning_rate": 3.4766330556773346e-06,
"loss": 11.8295,
"step": 75400
},
{
"epoch": 32.790220048899755,
"grad_norm": 7.730106353759766,
"learning_rate": 3.472249013590531e-06,
"loss": 11.8672,
"step": 75450
},
{
"epoch": 32.811953273566964,
"grad_norm": 6.337311744689941,
"learning_rate": 3.4678649715037266e-06,
"loss": 11.8289,
"step": 75500
},
{
"epoch": 32.83368649823417,
"grad_norm": 9.5441255569458,
"learning_rate": 3.4634809294169225e-06,
"loss": 11.8215,
"step": 75550
},
{
"epoch": 32.85541972290139,
"grad_norm": 8.01675796508789,
"learning_rate": 3.4590968873301183e-06,
"loss": 11.8303,
"step": 75600
},
{
"epoch": 32.8771529475686,
"grad_norm": 10.308701515197754,
"learning_rate": 3.454712845243315e-06,
"loss": 11.8337,
"step": 75650
},
{
"epoch": 32.898886172235805,
"grad_norm": 8.78437614440918,
"learning_rate": 3.4503288031565107e-06,
"loss": 11.8283,
"step": 75700
},
{
"epoch": 32.920619396903014,
"grad_norm": 12.1674222946167,
"learning_rate": 3.4459447610697065e-06,
"loss": 11.8378,
"step": 75750
},
{
"epoch": 32.94235262157022,
"grad_norm": 11.723808288574219,
"learning_rate": 3.4415607189829024e-06,
"loss": 11.8242,
"step": 75800
},
{
"epoch": 32.96408584623744,
"grad_norm": 18.23768424987793,
"learning_rate": 3.4371766768960986e-06,
"loss": 11.8577,
"step": 75850
},
{
"epoch": 32.98581907090465,
"grad_norm": 23.5877742767334,
"learning_rate": 3.4327926348092944e-06,
"loss": 11.8222,
"step": 75900
},
{
"epoch": 33.007389296386854,
"grad_norm": 6.948608875274658,
"learning_rate": 3.42840859272249e-06,
"loss": 11.7388,
"step": 75950
},
{
"epoch": 33.02912252105406,
"grad_norm": 4.4768476486206055,
"learning_rate": 3.424024550635686e-06,
"loss": 11.8317,
"step": 76000
},
{
"epoch": 33.05085574572127,
"grad_norm": 7.8470282554626465,
"learning_rate": 3.4196405085488823e-06,
"loss": 11.8416,
"step": 76050
},
{
"epoch": 33.07258897038848,
"grad_norm": 7.3259053230285645,
"learning_rate": 3.4152564664620785e-06,
"loss": 11.8322,
"step": 76100
},
{
"epoch": 33.09432219505569,
"grad_norm": 16.797231674194336,
"learning_rate": 3.4108724243752743e-06,
"loss": 11.8436,
"step": 76150
},
{
"epoch": 33.116055419722905,
"grad_norm": 4.982487201690674,
"learning_rate": 3.4064883822884705e-06,
"loss": 11.799,
"step": 76200
},
{
"epoch": 33.13778864439011,
"grad_norm": 8.252666473388672,
"learning_rate": 3.4021043402016663e-06,
"loss": 11.8154,
"step": 76250
},
{
"epoch": 33.15952186905732,
"grad_norm": 9.021413803100586,
"learning_rate": 3.397720298114862e-06,
"loss": 11.8364,
"step": 76300
},
{
"epoch": 33.18125509372453,
"grad_norm": 4.675612926483154,
"learning_rate": 3.393336256028058e-06,
"loss": 11.8131,
"step": 76350
},
{
"epoch": 33.20298831839174,
"grad_norm": 8.468708992004395,
"learning_rate": 3.3889522139412538e-06,
"loss": 11.8201,
"step": 76400
},
{
"epoch": 33.224721543058955,
"grad_norm": 21.99992561340332,
"learning_rate": 3.38456817185445e-06,
"loss": 11.8318,
"step": 76450
},
{
"epoch": 33.24645476772616,
"grad_norm": 5.2964301109313965,
"learning_rate": 3.3801841297676462e-06,
"loss": 11.8196,
"step": 76500
},
{
"epoch": 33.26818799239337,
"grad_norm": 12.34626579284668,
"learning_rate": 3.375800087680842e-06,
"loss": 11.8333,
"step": 76550
},
{
"epoch": 33.28992121706058,
"grad_norm": 12.113372802734375,
"learning_rate": 3.3714160455940383e-06,
"loss": 11.8187,
"step": 76600
},
{
"epoch": 33.31165444172779,
"grad_norm": 15.364481925964355,
"learning_rate": 3.367032003507234e-06,
"loss": 11.8173,
"step": 76650
},
{
"epoch": 33.333387666395,
"grad_norm": 4.8235063552856445,
"learning_rate": 3.36264796142043e-06,
"loss": 11.8306,
"step": 76700
},
{
"epoch": 33.355120891062214,
"grad_norm": 23.78803253173828,
"learning_rate": 3.3582639193336257e-06,
"loss": 11.8027,
"step": 76750
},
{
"epoch": 33.37685411572942,
"grad_norm": 9.344151496887207,
"learning_rate": 3.3538798772468215e-06,
"loss": 11.8113,
"step": 76800
},
{
"epoch": 33.39858734039663,
"grad_norm": 8.895915985107422,
"learning_rate": 3.3494958351600178e-06,
"loss": 11.8214,
"step": 76850
},
{
"epoch": 33.42032056506384,
"grad_norm": 33.99968719482422,
"learning_rate": 3.3451117930732136e-06,
"loss": 11.8217,
"step": 76900
},
{
"epoch": 33.44205378973105,
"grad_norm": 9.92707633972168,
"learning_rate": 3.34072775098641e-06,
"loss": 11.8012,
"step": 76950
},
{
"epoch": 33.463787014398264,
"grad_norm": 12.355010986328125,
"learning_rate": 3.336343708899606e-06,
"loss": 11.8093,
"step": 77000
},
{
"epoch": 33.48552023906547,
"grad_norm": 12.512097358703613,
"learning_rate": 3.331959666812802e-06,
"loss": 11.8357,
"step": 77050
},
{
"epoch": 33.50725346373268,
"grad_norm": 4.472128391265869,
"learning_rate": 3.3275756247259977e-06,
"loss": 11.8175,
"step": 77100
},
{
"epoch": 33.52898668839989,
"grad_norm": 12.460317611694336,
"learning_rate": 3.3231915826391935e-06,
"loss": 11.8219,
"step": 77150
},
{
"epoch": 33.5507199130671,
"grad_norm": 10.255359649658203,
"learning_rate": 3.3188075405523897e-06,
"loss": 11.8135,
"step": 77200
},
{
"epoch": 33.572453137734314,
"grad_norm": 9.60875415802002,
"learning_rate": 3.3144234984655855e-06,
"loss": 11.8244,
"step": 77250
},
{
"epoch": 33.59418636240152,
"grad_norm": 7.315709590911865,
"learning_rate": 3.3100394563787813e-06,
"loss": 11.8137,
"step": 77300
},
{
"epoch": 33.61591958706873,
"grad_norm": 16.642723083496094,
"learning_rate": 3.305655414291977e-06,
"loss": 11.8368,
"step": 77350
},
{
"epoch": 33.63765281173594,
"grad_norm": 4.400660991668701,
"learning_rate": 3.301271372205174e-06,
"loss": 11.8195,
"step": 77400
},
{
"epoch": 33.65938603640315,
"grad_norm": 8.862713813781738,
"learning_rate": 3.2968873301183696e-06,
"loss": 11.8168,
"step": 77450
},
{
"epoch": 33.681119261070364,
"grad_norm": 10.427742004394531,
"learning_rate": 3.2925032880315654e-06,
"loss": 11.8033,
"step": 77500
},
{
"epoch": 33.70285248573757,
"grad_norm": 6.926135540008545,
"learning_rate": 3.2881192459447612e-06,
"loss": 11.828,
"step": 77550
},
{
"epoch": 33.72458571040478,
"grad_norm": 5.068178176879883,
"learning_rate": 3.2837352038579575e-06,
"loss": 11.8273,
"step": 77600
},
{
"epoch": 33.74631893507199,
"grad_norm": 6.944793224334717,
"learning_rate": 3.2793511617711533e-06,
"loss": 11.813,
"step": 77650
},
{
"epoch": 33.7680521597392,
"grad_norm": 36.322383880615234,
"learning_rate": 3.274967119684349e-06,
"loss": 11.8141,
"step": 77700
},
{
"epoch": 33.789785384406414,
"grad_norm": 6.488020420074463,
"learning_rate": 3.270583077597545e-06,
"loss": 11.8322,
"step": 77750
},
{
"epoch": 33.81151860907362,
"grad_norm": 9.435515403747559,
"learning_rate": 3.2661990355107407e-06,
"loss": 11.8242,
"step": 77800
},
{
"epoch": 33.83325183374083,
"grad_norm": 4.060996055603027,
"learning_rate": 3.2618149934239374e-06,
"loss": 11.816,
"step": 77850
},
{
"epoch": 33.85498505840804,
"grad_norm": 13.589747428894043,
"learning_rate": 3.257430951337133e-06,
"loss": 11.8091,
"step": 77900
},
{
"epoch": 33.87671828307525,
"grad_norm": 11.052616119384766,
"learning_rate": 3.253046909250329e-06,
"loss": 11.8391,
"step": 77950
},
{
"epoch": 33.898451507742465,
"grad_norm": 10.746622085571289,
"learning_rate": 3.2486628671635252e-06,
"loss": 11.8432,
"step": 78000
},
{
"epoch": 33.92018473240967,
"grad_norm": 10.50125503540039,
"learning_rate": 3.244278825076721e-06,
"loss": 11.8408,
"step": 78050
},
{
"epoch": 33.94191795707688,
"grad_norm": 6.73277473449707,
"learning_rate": 3.239894782989917e-06,
"loss": 11.8296,
"step": 78100
},
{
"epoch": 33.96365118174409,
"grad_norm": 10.480985641479492,
"learning_rate": 3.2355107409031126e-06,
"loss": 11.8362,
"step": 78150
},
{
"epoch": 33.9853844064113,
"grad_norm": 7.480873107910156,
"learning_rate": 3.231126698816309e-06,
"loss": 11.8262,
"step": 78200
},
{
"epoch": 34.00695463189351,
"grad_norm": 9.988080024719238,
"learning_rate": 3.2267426567295047e-06,
"loss": 11.735,
"step": 78250
},
{
"epoch": 34.028687856560715,
"grad_norm": 10.169675827026367,
"learning_rate": 3.222358614642701e-06,
"loss": 11.8251,
"step": 78300
},
{
"epoch": 34.050421081227924,
"grad_norm": 13.815673828125,
"learning_rate": 3.217974572555897e-06,
"loss": 11.8091,
"step": 78350
},
{
"epoch": 34.07215430589514,
"grad_norm": 10.704404830932617,
"learning_rate": 3.213590530469093e-06,
"loss": 11.8009,
"step": 78400
},
{
"epoch": 34.09388753056235,
"grad_norm": 9.71978759765625,
"learning_rate": 3.2092064883822888e-06,
"loss": 11.8105,
"step": 78450
},
{
"epoch": 34.11562075522956,
"grad_norm": 18.075393676757812,
"learning_rate": 3.2048224462954846e-06,
"loss": 11.8083,
"step": 78500
},
{
"epoch": 34.137353979896766,
"grad_norm": 10.046432495117188,
"learning_rate": 3.2004384042086804e-06,
"loss": 11.7901,
"step": 78550
},
{
"epoch": 34.159087204563974,
"grad_norm": 11.01378345489502,
"learning_rate": 3.1960543621218766e-06,
"loss": 11.7937,
"step": 78600
},
{
"epoch": 34.18082042923119,
"grad_norm": 20.022729873657227,
"learning_rate": 3.1916703200350724e-06,
"loss": 11.8135,
"step": 78650
},
{
"epoch": 34.2025536538984,
"grad_norm": 6.636748790740967,
"learning_rate": 3.1872862779482687e-06,
"loss": 11.8014,
"step": 78700
},
{
"epoch": 34.22428687856561,
"grad_norm": 13.776731491088867,
"learning_rate": 3.182902235861465e-06,
"loss": 11.8353,
"step": 78750
},
{
"epoch": 34.246020103232816,
"grad_norm": 4.75822114944458,
"learning_rate": 3.1785181937746607e-06,
"loss": 11.7999,
"step": 78800
},
{
"epoch": 34.267753327900024,
"grad_norm": 11.153389930725098,
"learning_rate": 3.1741341516878565e-06,
"loss": 11.8159,
"step": 78850
},
{
"epoch": 34.28948655256724,
"grad_norm": 13.353851318359375,
"learning_rate": 3.1697501096010523e-06,
"loss": 11.807,
"step": 78900
},
{
"epoch": 34.31121977723445,
"grad_norm": 5.565258026123047,
"learning_rate": 3.165366067514248e-06,
"loss": 11.8092,
"step": 78950
},
{
"epoch": 34.33295300190166,
"grad_norm": 16.32341194152832,
"learning_rate": 3.1609820254274444e-06,
"loss": 11.8032,
"step": 79000
},
{
"epoch": 34.354686226568866,
"grad_norm": 8.501863479614258,
"learning_rate": 3.15659798334064e-06,
"loss": 11.8121,
"step": 79050
},
{
"epoch": 34.376419451236075,
"grad_norm": 5.864038467407227,
"learning_rate": 3.152213941253836e-06,
"loss": 11.8086,
"step": 79100
},
{
"epoch": 34.39815267590329,
"grad_norm": 12.040675163269043,
"learning_rate": 3.1478298991670327e-06,
"loss": 11.8134,
"step": 79150
},
{
"epoch": 34.4198859005705,
"grad_norm": 24.84530258178711,
"learning_rate": 3.1434458570802285e-06,
"loss": 11.8149,
"step": 79200
},
{
"epoch": 34.44161912523771,
"grad_norm": 11.11407470703125,
"learning_rate": 3.1390618149934243e-06,
"loss": 11.8105,
"step": 79250
},
{
"epoch": 34.463352349904916,
"grad_norm": 15.913960456848145,
"learning_rate": 3.13467777290662e-06,
"loss": 11.8004,
"step": 79300
},
{
"epoch": 34.485085574572125,
"grad_norm": 7.755058288574219,
"learning_rate": 3.1302937308198163e-06,
"loss": 11.8044,
"step": 79350
},
{
"epoch": 34.50681879923934,
"grad_norm": 5.433537006378174,
"learning_rate": 3.125909688733012e-06,
"loss": 11.8161,
"step": 79400
},
{
"epoch": 34.52855202390655,
"grad_norm": 6.0616912841796875,
"learning_rate": 3.121525646646208e-06,
"loss": 11.806,
"step": 79450
},
{
"epoch": 34.55028524857376,
"grad_norm": 8.498095512390137,
"learning_rate": 3.1171416045594038e-06,
"loss": 11.8407,
"step": 79500
},
{
"epoch": 34.572018473240966,
"grad_norm": 24.549198150634766,
"learning_rate": 3.1127575624725996e-06,
"loss": 11.8168,
"step": 79550
},
{
"epoch": 34.593751697908175,
"grad_norm": 11.136092185974121,
"learning_rate": 3.1083735203857962e-06,
"loss": 11.8184,
"step": 79600
},
{
"epoch": 34.61548492257539,
"grad_norm": 8.212324142456055,
"learning_rate": 3.103989478298992e-06,
"loss": 11.802,
"step": 79650
},
{
"epoch": 34.6372181472426,
"grad_norm": 4.912588596343994,
"learning_rate": 3.099605436212188e-06,
"loss": 11.8164,
"step": 79700
},
{
"epoch": 34.65895137190981,
"grad_norm": 5.911812782287598,
"learning_rate": 3.095221394125384e-06,
"loss": 11.8035,
"step": 79750
},
{
"epoch": 34.68068459657702,
"grad_norm": 13.612801551818848,
"learning_rate": 3.09083735203858e-06,
"loss": 11.8221,
"step": 79800
},
{
"epoch": 34.702417821244225,
"grad_norm": 7.984292030334473,
"learning_rate": 3.0864533099517757e-06,
"loss": 11.8073,
"step": 79850
},
{
"epoch": 34.724151045911434,
"grad_norm": 12.358894348144531,
"learning_rate": 3.0820692678649715e-06,
"loss": 11.793,
"step": 79900
},
{
"epoch": 34.74588427057865,
"grad_norm": 9.695011138916016,
"learning_rate": 3.0776852257781673e-06,
"loss": 11.8138,
"step": 79950
},
{
"epoch": 34.76761749524586,
"grad_norm": 13.982564926147461,
"learning_rate": 3.0733011836913636e-06,
"loss": 11.8171,
"step": 80000
},
{
"epoch": 34.76761749524586,
"eval_cer": 0.0757471023169121,
"eval_loss": 2.4033260345458984,
"eval_runtime": 399.2668,
"eval_samples_per_second": 13.54,
"eval_steps_per_second": 3.386,
"eval_wer": 0.22807348092322186,
"step": 80000
},
{
"epoch": 34.78935071991307,
"grad_norm": 12.179760932922363,
"learning_rate": 3.06891714160456e-06,
"loss": 11.8053,
"step": 80050
},
{
"epoch": 34.811083944580275,
"grad_norm": 11.413451194763184,
"learning_rate": 3.0645330995177556e-06,
"loss": 11.8123,
"step": 80100
},
{
"epoch": 34.832817169247484,
"grad_norm": 4.437108993530273,
"learning_rate": 3.060149057430952e-06,
"loss": 11.8026,
"step": 80150
},
{
"epoch": 34.8545503939147,
"grad_norm": 73.1333236694336,
"learning_rate": 3.0557650153441477e-06,
"loss": 11.8005,
"step": 80200
},
{
"epoch": 34.87628361858191,
"grad_norm": 8.468038558959961,
"learning_rate": 3.0513809732573435e-06,
"loss": 11.8035,
"step": 80250
},
{
"epoch": 34.89801684324912,
"grad_norm": 6.311350345611572,
"learning_rate": 3.0469969311705393e-06,
"loss": 11.8177,
"step": 80300
},
{
"epoch": 34.919750067916326,
"grad_norm": 6.435243606567383,
"learning_rate": 3.0426128890837355e-06,
"loss": 11.8085,
"step": 80350
},
{
"epoch": 34.941483292583534,
"grad_norm": 23.506589889526367,
"learning_rate": 3.0382288469969313e-06,
"loss": 11.8198,
"step": 80400
},
{
"epoch": 34.96321651725075,
"grad_norm": 14.792353630065918,
"learning_rate": 3.033844804910127e-06,
"loss": 11.8236,
"step": 80450
},
{
"epoch": 34.98494974191796,
"grad_norm": 9.948033332824707,
"learning_rate": 3.0294607628233234e-06,
"loss": 11.7999,
"step": 80500
},
{
"epoch": 35.006519967400166,
"grad_norm": 10.65179443359375,
"learning_rate": 3.0250767207365196e-06,
"loss": 11.7357,
"step": 80550
},
{
"epoch": 35.028253192067375,
"grad_norm": 17.818878173828125,
"learning_rate": 3.0206926786497154e-06,
"loss": 11.8027,
"step": 80600
},
{
"epoch": 35.04998641673458,
"grad_norm": 12.1105318069458,
"learning_rate": 3.0163086365629112e-06,
"loss": 11.8061,
"step": 80650
},
{
"epoch": 35.07171964140179,
"grad_norm": 9.265717506408691,
"learning_rate": 3.011924594476107e-06,
"loss": 11.8093,
"step": 80700
},
{
"epoch": 35.093452866069,
"grad_norm": 7.630993366241455,
"learning_rate": 3.0075405523893033e-06,
"loss": 11.8209,
"step": 80750
},
{
"epoch": 35.115186090736216,
"grad_norm": 9.022866249084473,
"learning_rate": 3.003156510302499e-06,
"loss": 11.8025,
"step": 80800
},
{
"epoch": 35.136919315403425,
"grad_norm": 7.761841297149658,
"learning_rate": 2.998772468215695e-06,
"loss": 11.8045,
"step": 80850
},
{
"epoch": 35.158652540070634,
"grad_norm": 5.690446853637695,
"learning_rate": 2.9943884261288915e-06,
"loss": 11.7887,
"step": 80900
},
{
"epoch": 35.18038576473784,
"grad_norm": 8.062559127807617,
"learning_rate": 2.9900043840420874e-06,
"loss": 11.8076,
"step": 80950
},
{
"epoch": 35.20211898940505,
"grad_norm": 11.13079833984375,
"learning_rate": 2.985620341955283e-06,
"loss": 11.791,
"step": 81000
},
{
"epoch": 35.22385221407227,
"grad_norm": 9.493050575256348,
"learning_rate": 2.981236299868479e-06,
"loss": 11.7975,
"step": 81050
},
{
"epoch": 35.245585438739475,
"grad_norm": 5.601952075958252,
"learning_rate": 2.9768522577816748e-06,
"loss": 11.8132,
"step": 81100
},
{
"epoch": 35.267318663406684,
"grad_norm": 16.74399757385254,
"learning_rate": 2.972468215694871e-06,
"loss": 11.8072,
"step": 81150
},
{
"epoch": 35.28905188807389,
"grad_norm": 14.785292625427246,
"learning_rate": 2.968084173608067e-06,
"loss": 11.7861,
"step": 81200
},
{
"epoch": 35.3107851127411,
"grad_norm": 4.938207149505615,
"learning_rate": 2.9637001315212626e-06,
"loss": 11.7976,
"step": 81250
},
{
"epoch": 35.33251833740831,
"grad_norm": 7.194094181060791,
"learning_rate": 2.9593160894344585e-06,
"loss": 11.8092,
"step": 81300
},
{
"epoch": 35.354251562075525,
"grad_norm": 8.084842681884766,
"learning_rate": 2.954932047347655e-06,
"loss": 11.8066,
"step": 81350
},
{
"epoch": 35.375984786742734,
"grad_norm": 13.50389289855957,
"learning_rate": 2.950548005260851e-06,
"loss": 11.8166,
"step": 81400
},
{
"epoch": 35.39771801140994,
"grad_norm": 34.29204559326172,
"learning_rate": 2.9461639631740467e-06,
"loss": 11.7957,
"step": 81450
},
{
"epoch": 35.41945123607715,
"grad_norm": 115.18916320800781,
"learning_rate": 2.9417799210872425e-06,
"loss": 11.7966,
"step": 81500
},
{
"epoch": 35.44118446074436,
"grad_norm": 6.231071949005127,
"learning_rate": 2.9373958790004388e-06,
"loss": 11.8096,
"step": 81550
},
{
"epoch": 35.462917685411576,
"grad_norm": 12.070874214172363,
"learning_rate": 2.9330118369136346e-06,
"loss": 11.7912,
"step": 81600
},
{
"epoch": 35.484650910078784,
"grad_norm": 11.014456748962402,
"learning_rate": 2.9286277948268304e-06,
"loss": 11.7999,
"step": 81650
},
{
"epoch": 35.50638413474599,
"grad_norm": 7.878298759460449,
"learning_rate": 2.924243752740026e-06,
"loss": 11.7924,
"step": 81700
},
{
"epoch": 35.5281173594132,
"grad_norm": 9.946538925170898,
"learning_rate": 2.9198597106532224e-06,
"loss": 11.7876,
"step": 81750
},
{
"epoch": 35.54985058408041,
"grad_norm": 9.082895278930664,
"learning_rate": 2.9154756685664187e-06,
"loss": 11.7947,
"step": 81800
},
{
"epoch": 35.571583808747626,
"grad_norm": 8.261942863464355,
"learning_rate": 2.9110916264796145e-06,
"loss": 11.8,
"step": 81850
},
{
"epoch": 35.593317033414834,
"grad_norm": 8.274785041809082,
"learning_rate": 2.9067075843928107e-06,
"loss": 11.7947,
"step": 81900
},
{
"epoch": 35.61505025808204,
"grad_norm": 12.555307388305664,
"learning_rate": 2.9023235423060065e-06,
"loss": 11.8046,
"step": 81950
},
{
"epoch": 35.63678348274925,
"grad_norm": 16.864561080932617,
"learning_rate": 2.8979395002192023e-06,
"loss": 11.8078,
"step": 82000
},
{
"epoch": 35.65851670741646,
"grad_norm": 7.3884782791137695,
"learning_rate": 2.893555458132398e-06,
"loss": 11.7997,
"step": 82050
},
{
"epoch": 35.680249932083676,
"grad_norm": 5.330382823944092,
"learning_rate": 2.889171416045594e-06,
"loss": 11.7931,
"step": 82100
},
{
"epoch": 35.701983156750885,
"grad_norm": 135.67330932617188,
"learning_rate": 2.88478737395879e-06,
"loss": 11.8205,
"step": 82150
},
{
"epoch": 35.72371638141809,
"grad_norm": 11.841288566589355,
"learning_rate": 2.880403331871986e-06,
"loss": 11.8093,
"step": 82200
},
{
"epoch": 35.7454496060853,
"grad_norm": 7.48586368560791,
"learning_rate": 2.8760192897851822e-06,
"loss": 11.7827,
"step": 82250
},
{
"epoch": 35.76718283075251,
"grad_norm": 8.122782707214355,
"learning_rate": 2.8716352476983785e-06,
"loss": 11.8002,
"step": 82300
},
{
"epoch": 35.788916055419726,
"grad_norm": 11.976639747619629,
"learning_rate": 2.8672512056115743e-06,
"loss": 11.7877,
"step": 82350
},
{
"epoch": 35.810649280086935,
"grad_norm": 4.260416507720947,
"learning_rate": 2.86286716352477e-06,
"loss": 11.8016,
"step": 82400
},
{
"epoch": 35.83238250475414,
"grad_norm": 6.422642230987549,
"learning_rate": 2.858483121437966e-06,
"loss": 11.7963,
"step": 82450
},
{
"epoch": 35.85411572942135,
"grad_norm": 17.52088165283203,
"learning_rate": 2.8540990793511617e-06,
"loss": 11.8099,
"step": 82500
},
{
"epoch": 35.87584895408856,
"grad_norm": 18.16527557373047,
"learning_rate": 2.849715037264358e-06,
"loss": 11.8045,
"step": 82550
},
{
"epoch": 35.897582178755776,
"grad_norm": 21.54142189025879,
"learning_rate": 2.8453309951775538e-06,
"loss": 11.8147,
"step": 82600
},
{
"epoch": 35.919315403422985,
"grad_norm": 10.738289833068848,
"learning_rate": 2.8409469530907496e-06,
"loss": 11.7977,
"step": 82650
},
{
"epoch": 35.94104862809019,
"grad_norm": 7.5517144203186035,
"learning_rate": 2.8365629110039462e-06,
"loss": 11.8223,
"step": 82700
},
{
"epoch": 35.9627818527574,
"grad_norm": 17.005064010620117,
"learning_rate": 2.832178868917142e-06,
"loss": 11.7941,
"step": 82750
},
{
"epoch": 35.98451507742461,
"grad_norm": 20.802410125732422,
"learning_rate": 2.827794826830338e-06,
"loss": 11.8144,
"step": 82800
},
{
"epoch": 36.00608530290682,
"grad_norm": 4.643016815185547,
"learning_rate": 2.8234107847435337e-06,
"loss": 11.7041,
"step": 82850
},
{
"epoch": 36.02781852757403,
"grad_norm": 5.0188398361206055,
"learning_rate": 2.81902674265673e-06,
"loss": 11.7857,
"step": 82900
},
{
"epoch": 36.049551752241236,
"grad_norm": 43.052833557128906,
"learning_rate": 2.8146427005699257e-06,
"loss": 11.7965,
"step": 82950
},
{
"epoch": 36.07128497690845,
"grad_norm": 5.6486382484436035,
"learning_rate": 2.8102586584831215e-06,
"loss": 11.7801,
"step": 83000
},
{
"epoch": 36.09301820157566,
"grad_norm": 9.257708549499512,
"learning_rate": 2.8058746163963173e-06,
"loss": 11.7884,
"step": 83050
},
{
"epoch": 36.11475142624287,
"grad_norm": 9.969672203063965,
"learning_rate": 2.801490574309514e-06,
"loss": 11.7893,
"step": 83100
},
{
"epoch": 36.13648465091008,
"grad_norm": 4.864919185638428,
"learning_rate": 2.79710653222271e-06,
"loss": 11.7891,
"step": 83150
},
{
"epoch": 36.158217875577286,
"grad_norm": 15.945795059204102,
"learning_rate": 2.7927224901359056e-06,
"loss": 11.8076,
"step": 83200
},
{
"epoch": 36.1799511002445,
"grad_norm": 8.471965789794922,
"learning_rate": 2.7883384480491014e-06,
"loss": 11.8079,
"step": 83250
},
{
"epoch": 36.20168432491171,
"grad_norm": 3.7765846252441406,
"learning_rate": 2.7839544059622976e-06,
"loss": 11.7893,
"step": 83300
},
{
"epoch": 36.22341754957892,
"grad_norm": 32.80738067626953,
"learning_rate": 2.7795703638754935e-06,
"loss": 11.8117,
"step": 83350
},
{
"epoch": 36.24515077424613,
"grad_norm": 11.759632110595703,
"learning_rate": 2.7751863217886893e-06,
"loss": 11.7887,
"step": 83400
},
{
"epoch": 36.266883998913336,
"grad_norm": 9.582806587219238,
"learning_rate": 2.770802279701885e-06,
"loss": 11.7988,
"step": 83450
},
{
"epoch": 36.28861722358055,
"grad_norm": 13.065892219543457,
"learning_rate": 2.7664182376150813e-06,
"loss": 11.7974,
"step": 83500
},
{
"epoch": 36.31035044824776,
"grad_norm": 25.009721755981445,
"learning_rate": 2.7620341955282775e-06,
"loss": 11.7938,
"step": 83550
},
{
"epoch": 36.33208367291497,
"grad_norm": 7.72334098815918,
"learning_rate": 2.7576501534414734e-06,
"loss": 11.7859,
"step": 83600
},
{
"epoch": 36.35381689758218,
"grad_norm": 8.665655136108398,
"learning_rate": 2.753266111354669e-06,
"loss": 11.7859,
"step": 83650
},
{
"epoch": 36.375550122249386,
"grad_norm": 19.630573272705078,
"learning_rate": 2.7488820692678654e-06,
"loss": 11.7842,
"step": 83700
},
{
"epoch": 36.3972833469166,
"grad_norm": 13.641834259033203,
"learning_rate": 2.7444980271810612e-06,
"loss": 11.8031,
"step": 83750
},
{
"epoch": 36.41901657158381,
"grad_norm": 5.9598917961120605,
"learning_rate": 2.740113985094257e-06,
"loss": 11.7813,
"step": 83800
},
{
"epoch": 36.44074979625102,
"grad_norm": 8.549332618713379,
"learning_rate": 2.735729943007453e-06,
"loss": 11.7959,
"step": 83850
},
{
"epoch": 36.46248302091823,
"grad_norm": 4.3795857429504395,
"learning_rate": 2.731345900920649e-06,
"loss": 11.7963,
"step": 83900
},
{
"epoch": 36.484216245585436,
"grad_norm": 7.300856113433838,
"learning_rate": 2.726961858833845e-06,
"loss": 11.7902,
"step": 83950
},
{
"epoch": 36.50594947025265,
"grad_norm": 7.026275157928467,
"learning_rate": 2.722577816747041e-06,
"loss": 11.7988,
"step": 84000
},
{
"epoch": 36.52768269491986,
"grad_norm": 12.537973403930664,
"learning_rate": 2.7181937746602373e-06,
"loss": 11.7891,
"step": 84050
},
{
"epoch": 36.54941591958707,
"grad_norm": 6.903670787811279,
"learning_rate": 2.713809732573433e-06,
"loss": 11.7984,
"step": 84100
},
{
"epoch": 36.57114914425428,
"grad_norm": 11.342251777648926,
"learning_rate": 2.709425690486629e-06,
"loss": 11.7915,
"step": 84150
},
{
"epoch": 36.59288236892149,
"grad_norm": 10.707886695861816,
"learning_rate": 2.7050416483998248e-06,
"loss": 11.7955,
"step": 84200
},
{
"epoch": 36.614615593588695,
"grad_norm": 7.921166896820068,
"learning_rate": 2.7006576063130206e-06,
"loss": 11.7868,
"step": 84250
},
{
"epoch": 36.63634881825591,
"grad_norm": 9.0649995803833,
"learning_rate": 2.696273564226217e-06,
"loss": 11.7949,
"step": 84300
},
{
"epoch": 36.65808204292312,
"grad_norm": 13.355379104614258,
"learning_rate": 2.6918895221394126e-06,
"loss": 11.7763,
"step": 84350
},
{
"epoch": 36.67981526759033,
"grad_norm": 12.289958953857422,
"learning_rate": 2.6875054800526084e-06,
"loss": 11.7861,
"step": 84400
},
{
"epoch": 36.70154849225754,
"grad_norm": 4.684927940368652,
"learning_rate": 2.683121437965805e-06,
"loss": 11.7803,
"step": 84450
},
{
"epoch": 36.723281716924745,
"grad_norm": 7.917582035064697,
"learning_rate": 2.678737395879001e-06,
"loss": 11.799,
"step": 84500
},
{
"epoch": 36.74501494159196,
"grad_norm": 5.413401126861572,
"learning_rate": 2.6743533537921967e-06,
"loss": 11.7873,
"step": 84550
},
{
"epoch": 36.76674816625917,
"grad_norm": 14.283327102661133,
"learning_rate": 2.6699693117053925e-06,
"loss": 11.7867,
"step": 84600
},
{
"epoch": 36.78848139092638,
"grad_norm": 8.163966178894043,
"learning_rate": 2.6655852696185883e-06,
"loss": 11.8112,
"step": 84650
},
{
"epoch": 36.81021461559359,
"grad_norm": 7.235820770263672,
"learning_rate": 2.6612012275317846e-06,
"loss": 11.781,
"step": 84700
},
{
"epoch": 36.831947840260796,
"grad_norm": 4.746747016906738,
"learning_rate": 2.6568171854449804e-06,
"loss": 11.7941,
"step": 84750
},
{
"epoch": 36.85368106492801,
"grad_norm": 4.514492511749268,
"learning_rate": 2.652433143358176e-06,
"loss": 11.7947,
"step": 84800
},
{
"epoch": 36.87541428959522,
"grad_norm": 10.46290111541748,
"learning_rate": 2.648049101271372e-06,
"loss": 11.7826,
"step": 84850
},
{
"epoch": 36.89714751426243,
"grad_norm": 8.848064422607422,
"learning_rate": 2.6436650591845687e-06,
"loss": 11.7802,
"step": 84900
},
{
"epoch": 36.91888073892964,
"grad_norm": 6.194151401519775,
"learning_rate": 2.6392810170977645e-06,
"loss": 11.7857,
"step": 84950
},
{
"epoch": 36.940613963596846,
"grad_norm": 8.114167213439941,
"learning_rate": 2.6348969750109603e-06,
"loss": 11.7934,
"step": 85000
},
{
"epoch": 36.96234718826406,
"grad_norm": 11.507193565368652,
"learning_rate": 2.6305129329241565e-06,
"loss": 11.7928,
"step": 85050
},
{
"epoch": 36.98408041293127,
"grad_norm": 12.524467468261719,
"learning_rate": 2.6261288908373523e-06,
"loss": 11.7915,
"step": 85100
},
{
"epoch": 37.00565063841348,
"grad_norm": 12.674412727355957,
"learning_rate": 2.621744848750548e-06,
"loss": 11.6996,
"step": 85150
},
{
"epoch": 37.027383863080686,
"grad_norm": 4.092529773712158,
"learning_rate": 2.617360806663744e-06,
"loss": 11.784,
"step": 85200
},
{
"epoch": 37.049117087747895,
"grad_norm": 4.772137641906738,
"learning_rate": 2.6129767645769398e-06,
"loss": 11.7883,
"step": 85250
},
{
"epoch": 37.070850312415104,
"grad_norm": 9.263222694396973,
"learning_rate": 2.608592722490136e-06,
"loss": 11.7655,
"step": 85300
},
{
"epoch": 37.09258353708231,
"grad_norm": 11.615614891052246,
"learning_rate": 2.6042086804033322e-06,
"loss": 11.7987,
"step": 85350
},
{
"epoch": 37.11431676174953,
"grad_norm": 3.8077142238616943,
"learning_rate": 2.599824638316528e-06,
"loss": 11.7786,
"step": 85400
},
{
"epoch": 37.13604998641674,
"grad_norm": 5.6897993087768555,
"learning_rate": 2.5954405962297243e-06,
"loss": 11.797,
"step": 85450
},
{
"epoch": 37.157783211083945,
"grad_norm": 5.3308305740356445,
"learning_rate": 2.59105655414292e-06,
"loss": 11.7697,
"step": 85500
},
{
"epoch": 37.179516435751154,
"grad_norm": 8.419775009155273,
"learning_rate": 2.586672512056116e-06,
"loss": 11.7771,
"step": 85550
},
{
"epoch": 37.20124966041836,
"grad_norm": 4.629072189331055,
"learning_rate": 2.5822884699693117e-06,
"loss": 11.7899,
"step": 85600
},
{
"epoch": 37.22298288508557,
"grad_norm": 16.38741683959961,
"learning_rate": 2.5779044278825075e-06,
"loss": 11.7799,
"step": 85650
},
{
"epoch": 37.24471610975279,
"grad_norm": 39.32244110107422,
"learning_rate": 2.5735203857957038e-06,
"loss": 11.791,
"step": 85700
},
{
"epoch": 37.266449334419995,
"grad_norm": 14.12863826751709,
"learning_rate": 2.5691363437089e-06,
"loss": 11.7906,
"step": 85750
},
{
"epoch": 37.288182559087204,
"grad_norm": 7.304044723510742,
"learning_rate": 2.564752301622096e-06,
"loss": 11.7865,
"step": 85800
},
{
"epoch": 37.30991578375441,
"grad_norm": 11.347620964050293,
"learning_rate": 2.560368259535292e-06,
"loss": 11.8021,
"step": 85850
},
{
"epoch": 37.33164900842162,
"grad_norm": 9.358373641967773,
"learning_rate": 2.555984217448488e-06,
"loss": 11.7979,
"step": 85900
},
{
"epoch": 37.35338223308884,
"grad_norm": 3.5885915756225586,
"learning_rate": 2.5516001753616837e-06,
"loss": 11.7782,
"step": 85950
},
{
"epoch": 37.375115457756046,
"grad_norm": 9.129725456237793,
"learning_rate": 2.5472161332748795e-06,
"loss": 11.783,
"step": 86000
},
{
"epoch": 37.396848682423254,
"grad_norm": 8.22261905670166,
"learning_rate": 2.5428320911880757e-06,
"loss": 11.7652,
"step": 86050
},
{
"epoch": 37.41858190709046,
"grad_norm": 6.796608924865723,
"learning_rate": 2.5384480491012715e-06,
"loss": 11.7897,
"step": 86100
},
{
"epoch": 37.44031513175767,
"grad_norm": 3.8288304805755615,
"learning_rate": 2.5340640070144673e-06,
"loss": 11.7803,
"step": 86150
},
{
"epoch": 37.46204835642489,
"grad_norm": 4.8984599113464355,
"learning_rate": 2.529679964927664e-06,
"loss": 11.7998,
"step": 86200
},
{
"epoch": 37.483781581092096,
"grad_norm": 11.828535079956055,
"learning_rate": 2.52529592284086e-06,
"loss": 11.7756,
"step": 86250
},
{
"epoch": 37.505514805759304,
"grad_norm": 7.632526397705078,
"learning_rate": 2.5209118807540556e-06,
"loss": 11.7816,
"step": 86300
},
{
"epoch": 37.52724803042651,
"grad_norm": 13.461671829223633,
"learning_rate": 2.5165278386672514e-06,
"loss": 11.7924,
"step": 86350
},
{
"epoch": 37.54898125509372,
"grad_norm": 5.831872940063477,
"learning_rate": 2.5121437965804472e-06,
"loss": 11.767,
"step": 86400
},
{
"epoch": 37.57071447976094,
"grad_norm": 15.29990005493164,
"learning_rate": 2.5077597544936435e-06,
"loss": 11.7853,
"step": 86450
},
{
"epoch": 37.592447704428146,
"grad_norm": 18.313222885131836,
"learning_rate": 2.5033757124068393e-06,
"loss": 11.7759,
"step": 86500
},
{
"epoch": 37.614180929095355,
"grad_norm": 7.100087642669678,
"learning_rate": 2.4989916703200355e-06,
"loss": 11.7861,
"step": 86550
},
{
"epoch": 37.63591415376256,
"grad_norm": 10.970837593078613,
"learning_rate": 2.4946076282332313e-06,
"loss": 11.7859,
"step": 86600
},
{
"epoch": 37.65764737842977,
"grad_norm": 17.918962478637695,
"learning_rate": 2.490223586146427e-06,
"loss": 11.781,
"step": 86650
},
{
"epoch": 37.67938060309699,
"grad_norm": 23.407426834106445,
"learning_rate": 2.485839544059623e-06,
"loss": 11.7763,
"step": 86700
},
{
"epoch": 37.701113827764196,
"grad_norm": 6.149535655975342,
"learning_rate": 2.481455501972819e-06,
"loss": 11.7819,
"step": 86750
},
{
"epoch": 37.722847052431405,
"grad_norm": 5.372469425201416,
"learning_rate": 2.477071459886015e-06,
"loss": 11.7762,
"step": 86800
},
{
"epoch": 37.74458027709861,
"grad_norm": 21.797292709350586,
"learning_rate": 2.472687417799211e-06,
"loss": 11.7822,
"step": 86850
},
{
"epoch": 37.76631350176582,
"grad_norm": 17.68259048461914,
"learning_rate": 2.468303375712407e-06,
"loss": 11.7753,
"step": 86900
},
{
"epoch": 37.78804672643304,
"grad_norm": 10.163092613220215,
"learning_rate": 2.463919333625603e-06,
"loss": 11.7662,
"step": 86950
},
{
"epoch": 37.809779951100246,
"grad_norm": 6.926383972167969,
"learning_rate": 2.459535291538799e-06,
"loss": 11.7897,
"step": 87000
},
{
"epoch": 37.831513175767455,
"grad_norm": 8.474647521972656,
"learning_rate": 2.455151249451995e-06,
"loss": 11.78,
"step": 87050
},
{
"epoch": 37.853246400434664,
"grad_norm": 16.252666473388672,
"learning_rate": 2.4507672073651907e-06,
"loss": 11.7683,
"step": 87100
},
{
"epoch": 37.87497962510187,
"grad_norm": 9.422881126403809,
"learning_rate": 2.446383165278387e-06,
"loss": 11.78,
"step": 87150
},
{
"epoch": 37.89671284976909,
"grad_norm": 13.145493507385254,
"learning_rate": 2.441999123191583e-06,
"loss": 11.7806,
"step": 87200
},
{
"epoch": 37.9184460744363,
"grad_norm": 9.204483985900879,
"learning_rate": 2.437615081104779e-06,
"loss": 11.7722,
"step": 87250
},
{
"epoch": 37.940179299103505,
"grad_norm": 9.611700057983398,
"learning_rate": 2.4332310390179748e-06,
"loss": 11.7957,
"step": 87300
},
{
"epoch": 37.961912523770714,
"grad_norm": 11.884017944335938,
"learning_rate": 2.4288469969311706e-06,
"loss": 11.7755,
"step": 87350
},
{
"epoch": 37.98364574843792,
"grad_norm": 5.692808151245117,
"learning_rate": 2.424462954844367e-06,
"loss": 11.7766,
"step": 87400
},
{
"epoch": 38.00521597392013,
"grad_norm": 66.74461364746094,
"learning_rate": 2.4200789127575626e-06,
"loss": 11.7051,
"step": 87450
},
{
"epoch": 38.02694919858734,
"grad_norm": 5.268041610717773,
"learning_rate": 2.4156948706707584e-06,
"loss": 11.7609,
"step": 87500
},
{
"epoch": 38.04868242325455,
"grad_norm": 8.159131050109863,
"learning_rate": 2.4113108285839547e-06,
"loss": 11.773,
"step": 87550
},
{
"epoch": 38.07041564792176,
"grad_norm": 8.749338150024414,
"learning_rate": 2.4069267864971505e-06,
"loss": 11.7782,
"step": 87600
},
{
"epoch": 38.09214887258897,
"grad_norm": 9.121374130249023,
"learning_rate": 2.4025427444103467e-06,
"loss": 11.7703,
"step": 87650
},
{
"epoch": 38.11388209725618,
"grad_norm": 10.743656158447266,
"learning_rate": 2.3981587023235425e-06,
"loss": 11.7798,
"step": 87700
},
{
"epoch": 38.13561532192339,
"grad_norm": 5.2683000564575195,
"learning_rate": 2.3937746602367383e-06,
"loss": 11.7791,
"step": 87750
},
{
"epoch": 38.1573485465906,
"grad_norm": 9.600702285766602,
"learning_rate": 2.389390618149934e-06,
"loss": 11.7658,
"step": 87800
},
{
"epoch": 38.17908177125781,
"grad_norm": 10.094902992248535,
"learning_rate": 2.3850065760631304e-06,
"loss": 11.7891,
"step": 87850
},
{
"epoch": 38.20081499592502,
"grad_norm": 8.227887153625488,
"learning_rate": 2.3806225339763266e-06,
"loss": 11.7706,
"step": 87900
},
{
"epoch": 38.22254822059223,
"grad_norm": 7.997677803039551,
"learning_rate": 2.3762384918895224e-06,
"loss": 11.772,
"step": 87950
},
{
"epoch": 38.24428144525944,
"grad_norm": 6.51764440536499,
"learning_rate": 2.3718544498027182e-06,
"loss": 11.7739,
"step": 88000
},
{
"epoch": 38.26601466992665,
"grad_norm": 4.949069499969482,
"learning_rate": 2.367470407715914e-06,
"loss": 11.778,
"step": 88050
},
{
"epoch": 38.28774789459386,
"grad_norm": 4.438246250152588,
"learning_rate": 2.3630863656291103e-06,
"loss": 11.7742,
"step": 88100
},
{
"epoch": 38.30948111926107,
"grad_norm": 11.066926956176758,
"learning_rate": 2.358702323542306e-06,
"loss": 11.7817,
"step": 88150
},
{
"epoch": 38.33121434392828,
"grad_norm": 6.765926837921143,
"learning_rate": 2.3543182814555023e-06,
"loss": 11.7767,
"step": 88200
},
{
"epoch": 38.35294756859549,
"grad_norm": 6.704973220825195,
"learning_rate": 2.349934239368698e-06,
"loss": 11.7671,
"step": 88250
},
{
"epoch": 38.3746807932627,
"grad_norm": 10.575370788574219,
"learning_rate": 2.3455501972818944e-06,
"loss": 11.771,
"step": 88300
},
{
"epoch": 38.396414017929914,
"grad_norm": 3.860527992248535,
"learning_rate": 2.34116615519509e-06,
"loss": 11.7685,
"step": 88350
},
{
"epoch": 38.41814724259712,
"grad_norm": 10.35341739654541,
"learning_rate": 2.336782113108286e-06,
"loss": 11.7715,
"step": 88400
},
{
"epoch": 38.43988046726433,
"grad_norm": 7.268162727355957,
"learning_rate": 2.332398071021482e-06,
"loss": 11.7878,
"step": 88450
},
{
"epoch": 38.46161369193154,
"grad_norm": 6.3647871017456055,
"learning_rate": 2.328014028934678e-06,
"loss": 11.7675,
"step": 88500
},
{
"epoch": 38.48334691659875,
"grad_norm": 27.453014373779297,
"learning_rate": 2.323629986847874e-06,
"loss": 11.7635,
"step": 88550
},
{
"epoch": 38.505080141265964,
"grad_norm": 12.105439186096191,
"learning_rate": 2.31924594476107e-06,
"loss": 11.7825,
"step": 88600
},
{
"epoch": 38.52681336593317,
"grad_norm": 12.992817878723145,
"learning_rate": 2.314861902674266e-06,
"loss": 11.76,
"step": 88650
},
{
"epoch": 38.54854659060038,
"grad_norm": 8.603561401367188,
"learning_rate": 2.3104778605874617e-06,
"loss": 11.7655,
"step": 88700
},
{
"epoch": 38.57027981526759,
"grad_norm": 4.036582946777344,
"learning_rate": 2.306093818500658e-06,
"loss": 11.7882,
"step": 88750
},
{
"epoch": 38.5920130399348,
"grad_norm": 5.422863483428955,
"learning_rate": 2.3017097764138538e-06,
"loss": 11.7683,
"step": 88800
},
{
"epoch": 38.61374626460201,
"grad_norm": 5.842752933502197,
"learning_rate": 2.2973257343270496e-06,
"loss": 11.7695,
"step": 88850
},
{
"epoch": 38.63547948926922,
"grad_norm": 9.653190612792969,
"learning_rate": 2.292941692240246e-06,
"loss": 11.7873,
"step": 88900
},
{
"epoch": 38.65721271393643,
"grad_norm": 5.730354309082031,
"learning_rate": 2.2885576501534416e-06,
"loss": 11.768,
"step": 88950
},
{
"epoch": 38.67894593860364,
"grad_norm": 17.826345443725586,
"learning_rate": 2.284173608066638e-06,
"loss": 11.776,
"step": 89000
},
{
"epoch": 38.70067916327085,
"grad_norm": 9.027566909790039,
"learning_rate": 2.2797895659798336e-06,
"loss": 11.755,
"step": 89050
},
{
"epoch": 38.72241238793806,
"grad_norm": 4.53999662399292,
"learning_rate": 2.2754055238930295e-06,
"loss": 11.765,
"step": 89100
},
{
"epoch": 38.74414561260527,
"grad_norm": 8.569631576538086,
"learning_rate": 2.2710214818062253e-06,
"loss": 11.7777,
"step": 89150
},
{
"epoch": 38.76587883727248,
"grad_norm": 24.965681076049805,
"learning_rate": 2.2666374397194215e-06,
"loss": 11.7738,
"step": 89200
},
{
"epoch": 38.78761206193969,
"grad_norm": 7.554117202758789,
"learning_rate": 2.2622533976326173e-06,
"loss": 11.7801,
"step": 89250
},
{
"epoch": 38.8093452866069,
"grad_norm": 16.02465057373047,
"learning_rate": 2.2578693555458135e-06,
"loss": 11.785,
"step": 89300
},
{
"epoch": 38.83107851127411,
"grad_norm": 9.892585754394531,
"learning_rate": 2.2534853134590094e-06,
"loss": 11.7812,
"step": 89350
},
{
"epoch": 38.85281173594132,
"grad_norm": 20.471792221069336,
"learning_rate": 2.2491012713722056e-06,
"loss": 11.774,
"step": 89400
},
{
"epoch": 38.87454496060853,
"grad_norm": 15.924908638000488,
"learning_rate": 2.2447172292854014e-06,
"loss": 11.7735,
"step": 89450
},
{
"epoch": 38.89627818527574,
"grad_norm": 9.257697105407715,
"learning_rate": 2.2403331871985972e-06,
"loss": 11.784,
"step": 89500
},
{
"epoch": 38.91801140994295,
"grad_norm": 8.59609317779541,
"learning_rate": 2.235949145111793e-06,
"loss": 11.7924,
"step": 89550
},
{
"epoch": 38.93974463461016,
"grad_norm": 5.643759727478027,
"learning_rate": 2.2315651030249893e-06,
"loss": 11.7727,
"step": 89600
},
{
"epoch": 38.96147785927737,
"grad_norm": 3.887133836746216,
"learning_rate": 2.227181060938185e-06,
"loss": 11.7728,
"step": 89650
},
{
"epoch": 38.98321108394458,
"grad_norm": 15.085949897766113,
"learning_rate": 2.2227970188513813e-06,
"loss": 11.7837,
"step": 89700
},
{
"epoch": 39.00478130942679,
"grad_norm": 4.734740257263184,
"learning_rate": 2.218412976764577e-06,
"loss": 11.6785,
"step": 89750
},
{
"epoch": 39.026514534094,
"grad_norm": 7.277717590332031,
"learning_rate": 2.214028934677773e-06,
"loss": 11.7636,
"step": 89800
},
{
"epoch": 39.04824775876121,
"grad_norm": 12.622576713562012,
"learning_rate": 2.209644892590969e-06,
"loss": 11.7632,
"step": 89850
},
{
"epoch": 39.069980983428415,
"grad_norm": 8.72470760345459,
"learning_rate": 2.205260850504165e-06,
"loss": 11.7806,
"step": 89900
},
{
"epoch": 39.091714208095624,
"grad_norm": 8.548195838928223,
"learning_rate": 2.2008768084173608e-06,
"loss": 11.7642,
"step": 89950
},
{
"epoch": 39.11344743276284,
"grad_norm": 5.267911911010742,
"learning_rate": 2.196492766330557e-06,
"loss": 11.7606,
"step": 90000
},
{
"epoch": 39.11344743276284,
"eval_cer": 0.07434609260242184,
"eval_loss": 2.4127438068389893,
"eval_runtime": 397.8168,
"eval_samples_per_second": 13.589,
"eval_steps_per_second": 3.399,
"eval_wer": 0.22556131260794474,
"step": 90000
},
{
"epoch": 39.13518065743005,
"grad_norm": 13.38624095916748,
"learning_rate": 2.1921087242437532e-06,
"loss": 11.7632,
"step": 90050
},
{
"epoch": 39.15691388209726,
"grad_norm": 13.824868202209473,
"learning_rate": 2.187724682156949e-06,
"loss": 11.7722,
"step": 90100
},
{
"epoch": 39.178647106764465,
"grad_norm": 10.600975036621094,
"learning_rate": 2.183340640070145e-06,
"loss": 11.7692,
"step": 90150
},
{
"epoch": 39.200380331431674,
"grad_norm": 10.806214332580566,
"learning_rate": 2.1789565979833407e-06,
"loss": 11.753,
"step": 90200
},
{
"epoch": 39.22211355609888,
"grad_norm": 7.800635814666748,
"learning_rate": 2.1745725558965365e-06,
"loss": 11.7616,
"step": 90250
},
{
"epoch": 39.2438467807661,
"grad_norm": 7.492881774902344,
"learning_rate": 2.1701885138097327e-06,
"loss": 11.7439,
"step": 90300
},
{
"epoch": 39.26558000543331,
"grad_norm": 12.061040878295898,
"learning_rate": 2.1658044717229285e-06,
"loss": 11.7567,
"step": 90350
},
{
"epoch": 39.287313230100516,
"grad_norm": 6.541141510009766,
"learning_rate": 2.1614204296361248e-06,
"loss": 11.775,
"step": 90400
},
{
"epoch": 39.309046454767724,
"grad_norm": 5.959283828735352,
"learning_rate": 2.1570363875493206e-06,
"loss": 11.7633,
"step": 90450
},
{
"epoch": 39.33077967943493,
"grad_norm": 9.226263046264648,
"learning_rate": 2.152652345462517e-06,
"loss": 11.7746,
"step": 90500
},
{
"epoch": 39.35251290410215,
"grad_norm": 15.486861228942871,
"learning_rate": 2.1482683033757126e-06,
"loss": 11.775,
"step": 90550
},
{
"epoch": 39.37424612876936,
"grad_norm": 6.420067310333252,
"learning_rate": 2.1438842612889084e-06,
"loss": 11.7812,
"step": 90600
},
{
"epoch": 39.395979353436566,
"grad_norm": 7.908544540405273,
"learning_rate": 2.1395002192021042e-06,
"loss": 11.7573,
"step": 90650
},
{
"epoch": 39.417712578103774,
"grad_norm": 7.737216472625732,
"learning_rate": 2.1351161771153005e-06,
"loss": 11.7621,
"step": 90700
},
{
"epoch": 39.43944580277098,
"grad_norm": 5.630201816558838,
"learning_rate": 2.1307321350284967e-06,
"loss": 11.7629,
"step": 90750
},
{
"epoch": 39.4611790274382,
"grad_norm": 5.563817024230957,
"learning_rate": 2.1263480929416925e-06,
"loss": 11.7663,
"step": 90800
},
{
"epoch": 39.48291225210541,
"grad_norm": 6.785152912139893,
"learning_rate": 2.1219640508548883e-06,
"loss": 11.7575,
"step": 90850
},
{
"epoch": 39.504645476772616,
"grad_norm": 5.261542797088623,
"learning_rate": 2.117580008768084e-06,
"loss": 11.7676,
"step": 90900
},
{
"epoch": 39.526378701439825,
"grad_norm": 8.951974868774414,
"learning_rate": 2.1131959666812804e-06,
"loss": 11.7626,
"step": 90950
},
{
"epoch": 39.54811192610703,
"grad_norm": 5.925467491149902,
"learning_rate": 2.108811924594476e-06,
"loss": 11.7652,
"step": 91000
},
{
"epoch": 39.56984515077425,
"grad_norm": 6.1618194580078125,
"learning_rate": 2.1044278825076724e-06,
"loss": 11.7526,
"step": 91050
},
{
"epoch": 39.59157837544146,
"grad_norm": 5.625064373016357,
"learning_rate": 2.1000438404208682e-06,
"loss": 11.7605,
"step": 91100
},
{
"epoch": 39.613311600108666,
"grad_norm": 12.690841674804688,
"learning_rate": 2.0956597983340645e-06,
"loss": 11.7649,
"step": 91150
},
{
"epoch": 39.635044824775875,
"grad_norm": 12.550384521484375,
"learning_rate": 2.0912757562472603e-06,
"loss": 11.792,
"step": 91200
},
{
"epoch": 39.65677804944308,
"grad_norm": 9.650748252868652,
"learning_rate": 2.086891714160456e-06,
"loss": 11.7733,
"step": 91250
},
{
"epoch": 39.6785112741103,
"grad_norm": 8.173276901245117,
"learning_rate": 2.082507672073652e-06,
"loss": 11.7835,
"step": 91300
},
{
"epoch": 39.70024449877751,
"grad_norm": 8.434061050415039,
"learning_rate": 2.0781236299868477e-06,
"loss": 11.7474,
"step": 91350
},
{
"epoch": 39.721977723444716,
"grad_norm": 9.518670082092285,
"learning_rate": 2.073739587900044e-06,
"loss": 11.7687,
"step": 91400
},
{
"epoch": 39.743710948111925,
"grad_norm": 6.331693649291992,
"learning_rate": 2.06935554581324e-06,
"loss": 11.7578,
"step": 91450
},
{
"epoch": 39.765444172779134,
"grad_norm": 9.190653800964355,
"learning_rate": 2.064971503726436e-06,
"loss": 11.7756,
"step": 91500
},
{
"epoch": 39.78717739744635,
"grad_norm": 13.398709297180176,
"learning_rate": 2.060587461639632e-06,
"loss": 11.7704,
"step": 91550
},
{
"epoch": 39.80891062211356,
"grad_norm": 11.104494094848633,
"learning_rate": 2.056203419552828e-06,
"loss": 11.7623,
"step": 91600
},
{
"epoch": 39.83064384678077,
"grad_norm": 5.477586269378662,
"learning_rate": 2.051819377466024e-06,
"loss": 11.7647,
"step": 91650
},
{
"epoch": 39.852377071447975,
"grad_norm": 3.5749564170837402,
"learning_rate": 2.0474353353792197e-06,
"loss": 11.7695,
"step": 91700
},
{
"epoch": 39.874110296115184,
"grad_norm": 5.978471755981445,
"learning_rate": 2.043051293292416e-06,
"loss": 11.7648,
"step": 91750
},
{
"epoch": 39.89584352078239,
"grad_norm": 25.19099235534668,
"learning_rate": 2.0386672512056117e-06,
"loss": 11.7612,
"step": 91800
},
{
"epoch": 39.91757674544961,
"grad_norm": 5.882903099060059,
"learning_rate": 2.034283209118808e-06,
"loss": 11.7799,
"step": 91850
},
{
"epoch": 39.93930997011682,
"grad_norm": 27.048709869384766,
"learning_rate": 2.0298991670320037e-06,
"loss": 11.7737,
"step": 91900
},
{
"epoch": 39.961043194784025,
"grad_norm": 9.073412895202637,
"learning_rate": 2.0255151249451996e-06,
"loss": 11.7612,
"step": 91950
},
{
"epoch": 39.982776419451234,
"grad_norm": 13.002010345458984,
"learning_rate": 2.0211310828583954e-06,
"loss": 11.7782,
"step": 92000
},
{
"epoch": 40.00434664493344,
"grad_norm": 9.383039474487305,
"learning_rate": 2.0167470407715916e-06,
"loss": 11.6788,
"step": 92050
},
{
"epoch": 40.02607986960065,
"grad_norm": 9.476020812988281,
"learning_rate": 2.0123629986847874e-06,
"loss": 11.7597,
"step": 92100
},
{
"epoch": 40.04781309426786,
"grad_norm": 8.409124374389648,
"learning_rate": 2.0079789565979836e-06,
"loss": 11.7492,
"step": 92150
},
{
"epoch": 40.069546318935075,
"grad_norm": 6.030084609985352,
"learning_rate": 2.0035949145111795e-06,
"loss": 11.7751,
"step": 92200
},
{
"epoch": 40.09127954360228,
"grad_norm": 4.96845006942749,
"learning_rate": 1.9992108724243757e-06,
"loss": 11.7443,
"step": 92250
},
{
"epoch": 40.11301276826949,
"grad_norm": 11.096965789794922,
"learning_rate": 1.9948268303375715e-06,
"loss": 11.7718,
"step": 92300
},
{
"epoch": 40.1347459929367,
"grad_norm": 6.45682430267334,
"learning_rate": 1.9904427882507673e-06,
"loss": 11.7592,
"step": 92350
},
{
"epoch": 40.15647921760391,
"grad_norm": 7.665036678314209,
"learning_rate": 1.986058746163963e-06,
"loss": 11.7715,
"step": 92400
},
{
"epoch": 40.178212442271125,
"grad_norm": 4.477396011352539,
"learning_rate": 1.9816747040771594e-06,
"loss": 11.7572,
"step": 92450
},
{
"epoch": 40.19994566693833,
"grad_norm": 13.868246078491211,
"learning_rate": 1.977290661990355e-06,
"loss": 11.7656,
"step": 92500
},
{
"epoch": 40.22167889160554,
"grad_norm": 7.29514217376709,
"learning_rate": 1.9729066199035514e-06,
"loss": 11.7525,
"step": 92550
},
{
"epoch": 40.24341211627275,
"grad_norm": 5.126781463623047,
"learning_rate": 1.968522577816747e-06,
"loss": 11.7623,
"step": 92600
},
{
"epoch": 40.26514534093996,
"grad_norm": 13.333416938781738,
"learning_rate": 1.964138535729943e-06,
"loss": 11.7645,
"step": 92650
},
{
"epoch": 40.286878565607175,
"grad_norm": 4.38609504699707,
"learning_rate": 1.9597544936431393e-06,
"loss": 11.7471,
"step": 92700
},
{
"epoch": 40.308611790274384,
"grad_norm": 18.64853858947754,
"learning_rate": 1.955370451556335e-06,
"loss": 11.7614,
"step": 92750
},
{
"epoch": 40.33034501494159,
"grad_norm": 10.51586627960205,
"learning_rate": 1.950986409469531e-06,
"loss": 11.7572,
"step": 92800
},
{
"epoch": 40.3520782396088,
"grad_norm": 10.547462463378906,
"learning_rate": 1.946602367382727e-06,
"loss": 11.7609,
"step": 92850
},
{
"epoch": 40.37381146427601,
"grad_norm": 5.690642356872559,
"learning_rate": 1.9422183252959233e-06,
"loss": 11.7481,
"step": 92900
},
{
"epoch": 40.395544688943225,
"grad_norm": 3.9211175441741943,
"learning_rate": 1.937834283209119e-06,
"loss": 11.7461,
"step": 92950
},
{
"epoch": 40.417277913610434,
"grad_norm": 4.40631103515625,
"learning_rate": 1.933450241122315e-06,
"loss": 11.7587,
"step": 93000
},
{
"epoch": 40.43901113827764,
"grad_norm": 4.007054328918457,
"learning_rate": 1.9290661990355108e-06,
"loss": 11.7649,
"step": 93050
},
{
"epoch": 40.46074436294485,
"grad_norm": 9.994613647460938,
"learning_rate": 1.9246821569487066e-06,
"loss": 11.7632,
"step": 93100
},
{
"epoch": 40.48247758761206,
"grad_norm": 7.985722541809082,
"learning_rate": 1.920298114861903e-06,
"loss": 11.7661,
"step": 93150
},
{
"epoch": 40.50421081227927,
"grad_norm": 17.07448387145996,
"learning_rate": 1.9159140727750986e-06,
"loss": 11.7407,
"step": 93200
},
{
"epoch": 40.525944036946484,
"grad_norm": 5.9471211433410645,
"learning_rate": 1.911530030688295e-06,
"loss": 11.7688,
"step": 93250
},
{
"epoch": 40.54767726161369,
"grad_norm": 5.977828502655029,
"learning_rate": 1.9071459886014907e-06,
"loss": 11.7515,
"step": 93300
},
{
"epoch": 40.5694104862809,
"grad_norm": 37.1627082824707,
"learning_rate": 1.902761946514687e-06,
"loss": 11.7671,
"step": 93350
},
{
"epoch": 40.59114371094811,
"grad_norm": 6.538187503814697,
"learning_rate": 1.8983779044278827e-06,
"loss": 11.7546,
"step": 93400
},
{
"epoch": 40.61287693561532,
"grad_norm": 7.786463260650635,
"learning_rate": 1.8939938623410785e-06,
"loss": 11.7539,
"step": 93450
},
{
"epoch": 40.634610160282534,
"grad_norm": 9.64076042175293,
"learning_rate": 1.8896098202542746e-06,
"loss": 11.7615,
"step": 93500
},
{
"epoch": 40.65634338494974,
"grad_norm": 6.382667064666748,
"learning_rate": 1.8852257781674704e-06,
"loss": 11.749,
"step": 93550
},
{
"epoch": 40.67807660961695,
"grad_norm": 9.91306209564209,
"learning_rate": 1.8808417360806666e-06,
"loss": 11.7645,
"step": 93600
},
{
"epoch": 40.69980983428416,
"grad_norm": 5.685323715209961,
"learning_rate": 1.8764576939938626e-06,
"loss": 11.7511,
"step": 93650
},
{
"epoch": 40.72154305895137,
"grad_norm": 6.233023166656494,
"learning_rate": 1.8720736519070584e-06,
"loss": 11.7425,
"step": 93700
},
{
"epoch": 40.743276283618584,
"grad_norm": 7.542703628540039,
"learning_rate": 1.8676896098202542e-06,
"loss": 11.7679,
"step": 93750
},
{
"epoch": 40.76500950828579,
"grad_norm": 20.795351028442383,
"learning_rate": 1.8633055677334505e-06,
"loss": 11.7475,
"step": 93800
},
{
"epoch": 40.786742732953,
"grad_norm": 8.967228889465332,
"learning_rate": 1.8589215256466465e-06,
"loss": 11.7556,
"step": 93850
},
{
"epoch": 40.80847595762021,
"grad_norm": 9.180179595947266,
"learning_rate": 1.8545374835598423e-06,
"loss": 11.7549,
"step": 93900
},
{
"epoch": 40.83020918228742,
"grad_norm": 4.092516899108887,
"learning_rate": 1.8501534414730381e-06,
"loss": 11.7597,
"step": 93950
},
{
"epoch": 40.851942406954635,
"grad_norm": 4.368731498718262,
"learning_rate": 1.8457693993862344e-06,
"loss": 11.7655,
"step": 94000
},
{
"epoch": 40.87367563162184,
"grad_norm": 3.8829784393310547,
"learning_rate": 1.8413853572994304e-06,
"loss": 11.7653,
"step": 94050
},
{
"epoch": 40.89540885628905,
"grad_norm": 30.0755615234375,
"learning_rate": 1.8370013152126262e-06,
"loss": 11.7594,
"step": 94100
},
{
"epoch": 40.91714208095626,
"grad_norm": 5.881009578704834,
"learning_rate": 1.8326172731258222e-06,
"loss": 11.7536,
"step": 94150
},
{
"epoch": 40.93887530562347,
"grad_norm": 11.916665077209473,
"learning_rate": 1.828233231039018e-06,
"loss": 11.746,
"step": 94200
},
{
"epoch": 40.960608530290685,
"grad_norm": 4.125995635986328,
"learning_rate": 1.8238491889522143e-06,
"loss": 11.7549,
"step": 94250
},
{
"epoch": 40.98234175495789,
"grad_norm": 9.298914909362793,
"learning_rate": 1.81946514686541e-06,
"loss": 11.746,
"step": 94300
},
{
"epoch": 41.0039119804401,
"grad_norm": 6.543211460113525,
"learning_rate": 1.815081104778606e-06,
"loss": 11.6774,
"step": 94350
},
{
"epoch": 41.02564520510731,
"grad_norm": 10.013541221618652,
"learning_rate": 1.810697062691802e-06,
"loss": 11.747,
"step": 94400
},
{
"epoch": 41.04737842977452,
"grad_norm": 26.30128288269043,
"learning_rate": 1.8063130206049981e-06,
"loss": 11.7447,
"step": 94450
},
{
"epoch": 41.06911165444173,
"grad_norm": 6.381859302520752,
"learning_rate": 1.801928978518194e-06,
"loss": 11.7397,
"step": 94500
},
{
"epoch": 41.090844879108936,
"grad_norm": 6.652487754821777,
"learning_rate": 1.79754493643139e-06,
"loss": 11.7431,
"step": 94550
},
{
"epoch": 41.11257810377615,
"grad_norm": 5.449718952178955,
"learning_rate": 1.7931608943445858e-06,
"loss": 11.7549,
"step": 94600
},
{
"epoch": 41.13431132844336,
"grad_norm": 3.2079617977142334,
"learning_rate": 1.7887768522577818e-06,
"loss": 11.7596,
"step": 94650
},
{
"epoch": 41.15604455311057,
"grad_norm": 9.676004409790039,
"learning_rate": 1.7843928101709778e-06,
"loss": 11.7529,
"step": 94700
},
{
"epoch": 41.17777777777778,
"grad_norm": 9.563153266906738,
"learning_rate": 1.7800087680841738e-06,
"loss": 11.7469,
"step": 94750
},
{
"epoch": 41.199511002444986,
"grad_norm": 8.786322593688965,
"learning_rate": 1.7756247259973697e-06,
"loss": 11.7364,
"step": 94800
},
{
"epoch": 41.221244227112194,
"grad_norm": 8.319864273071289,
"learning_rate": 1.7712406839105657e-06,
"loss": 11.7545,
"step": 94850
},
{
"epoch": 41.24297745177941,
"grad_norm": 11.104187965393066,
"learning_rate": 1.7668566418237617e-06,
"loss": 11.7425,
"step": 94900
},
{
"epoch": 41.26471067644662,
"grad_norm": 5.2200026512146,
"learning_rate": 1.7624725997369577e-06,
"loss": 11.7511,
"step": 94950
},
{
"epoch": 41.28644390111383,
"grad_norm": 5.0673933029174805,
"learning_rate": 1.7580885576501535e-06,
"loss": 11.7597,
"step": 95000
},
{
"epoch": 41.308177125781036,
"grad_norm": 6.790633678436279,
"learning_rate": 1.7537045155633495e-06,
"loss": 11.7566,
"step": 95050
},
{
"epoch": 41.329910350448245,
"grad_norm": 5.625386714935303,
"learning_rate": 1.7493204734765456e-06,
"loss": 11.7483,
"step": 95100
},
{
"epoch": 41.35164357511546,
"grad_norm": 5.591269493103027,
"learning_rate": 1.7449364313897416e-06,
"loss": 11.7497,
"step": 95150
},
{
"epoch": 41.37337679978267,
"grad_norm": 5.52367639541626,
"learning_rate": 1.7405523893029374e-06,
"loss": 11.7637,
"step": 95200
},
{
"epoch": 41.39511002444988,
"grad_norm": 3.352064371109009,
"learning_rate": 1.7361683472161334e-06,
"loss": 11.7539,
"step": 95250
},
{
"epoch": 41.416843249117086,
"grad_norm": 10.966846466064453,
"learning_rate": 1.7317843051293292e-06,
"loss": 11.7587,
"step": 95300
},
{
"epoch": 41.438576473784295,
"grad_norm": 4.151219844818115,
"learning_rate": 1.7274002630425255e-06,
"loss": 11.7389,
"step": 95350
},
{
"epoch": 41.46030969845151,
"grad_norm": 7.952151775360107,
"learning_rate": 1.7230162209557213e-06,
"loss": 11.7477,
"step": 95400
},
{
"epoch": 41.48204292311872,
"grad_norm": 7.96391487121582,
"learning_rate": 1.7186321788689173e-06,
"loss": 11.7551,
"step": 95450
},
{
"epoch": 41.50377614778593,
"grad_norm": 5.120693206787109,
"learning_rate": 1.7142481367821131e-06,
"loss": 11.7426,
"step": 95500
},
{
"epoch": 41.525509372453136,
"grad_norm": 4.3286027908325195,
"learning_rate": 1.7098640946953093e-06,
"loss": 11.7461,
"step": 95550
},
{
"epoch": 41.547242597120345,
"grad_norm": 7.304409027099609,
"learning_rate": 1.7054800526085052e-06,
"loss": 11.7565,
"step": 95600
},
{
"epoch": 41.56897582178756,
"grad_norm": 5.609922885894775,
"learning_rate": 1.7010960105217012e-06,
"loss": 11.7654,
"step": 95650
},
{
"epoch": 41.59070904645477,
"grad_norm": 7.889837265014648,
"learning_rate": 1.696711968434897e-06,
"loss": 11.7447,
"step": 95700
},
{
"epoch": 41.61244227112198,
"grad_norm": 6.620103359222412,
"learning_rate": 1.692327926348093e-06,
"loss": 11.7406,
"step": 95750
},
{
"epoch": 41.63417549578919,
"grad_norm": 5.3006463050842285,
"learning_rate": 1.687943884261289e-06,
"loss": 11.7421,
"step": 95800
},
{
"epoch": 41.655908720456395,
"grad_norm": 6.590972423553467,
"learning_rate": 1.683559842174485e-06,
"loss": 11.7446,
"step": 95850
},
{
"epoch": 41.67764194512361,
"grad_norm": 9.739943504333496,
"learning_rate": 1.6791758000876809e-06,
"loss": 11.7506,
"step": 95900
},
{
"epoch": 41.69937516979082,
"grad_norm": 5.318305969238281,
"learning_rate": 1.6747917580008769e-06,
"loss": 11.7503,
"step": 95950
},
{
"epoch": 41.72110839445803,
"grad_norm": 4.048628807067871,
"learning_rate": 1.6704077159140731e-06,
"loss": 11.7579,
"step": 96000
},
{
"epoch": 41.74284161912524,
"grad_norm": 6.817996025085449,
"learning_rate": 1.666023673827269e-06,
"loss": 11.7427,
"step": 96050
},
{
"epoch": 41.764574843792445,
"grad_norm": 10.243191719055176,
"learning_rate": 1.6616396317404647e-06,
"loss": 11.7662,
"step": 96100
},
{
"epoch": 41.78630806845966,
"grad_norm": 17.077388763427734,
"learning_rate": 1.6572555896536608e-06,
"loss": 11.7471,
"step": 96150
},
{
"epoch": 41.80804129312687,
"grad_norm": 10.154135704040527,
"learning_rate": 1.652871547566857e-06,
"loss": 11.7526,
"step": 96200
},
{
"epoch": 41.82977451779408,
"grad_norm": 15.618302345275879,
"learning_rate": 1.6484875054800528e-06,
"loss": 11.7451,
"step": 96250
},
{
"epoch": 41.85150774246129,
"grad_norm": 5.547327041625977,
"learning_rate": 1.6441034633932486e-06,
"loss": 11.759,
"step": 96300
},
{
"epoch": 41.873240967128496,
"grad_norm": 4.5382304191589355,
"learning_rate": 1.6397194213064446e-06,
"loss": 11.7377,
"step": 96350
},
{
"epoch": 41.89497419179571,
"grad_norm": 4.469516277313232,
"learning_rate": 1.6353353792196405e-06,
"loss": 11.7555,
"step": 96400
},
{
"epoch": 41.91670741646292,
"grad_norm": 4.71604061126709,
"learning_rate": 1.6309513371328367e-06,
"loss": 11.745,
"step": 96450
},
{
"epoch": 41.93844064113013,
"grad_norm": 4.897347450256348,
"learning_rate": 1.6265672950460327e-06,
"loss": 11.7572,
"step": 96500
},
{
"epoch": 41.96017386579734,
"grad_norm": 19.85100746154785,
"learning_rate": 1.6221832529592285e-06,
"loss": 11.7499,
"step": 96550
},
{
"epoch": 41.981907090464546,
"grad_norm": 5.2028117179870605,
"learning_rate": 1.6177992108724243e-06,
"loss": 11.7622,
"step": 96600
},
{
"epoch": 42.00347731594675,
"grad_norm": 12.004639625549316,
"learning_rate": 1.6134151687856206e-06,
"loss": 11.6536,
"step": 96650
},
{
"epoch": 42.02521054061396,
"grad_norm": 5.1376848220825195,
"learning_rate": 1.6090311266988166e-06,
"loss": 11.7351,
"step": 96700
},
{
"epoch": 42.04694376528117,
"grad_norm": 5.914182662963867,
"learning_rate": 1.6046470846120124e-06,
"loss": 11.7339,
"step": 96750
},
{
"epoch": 42.068676989948386,
"grad_norm": 7.787753105163574,
"learning_rate": 1.6002630425252082e-06,
"loss": 11.7402,
"step": 96800
},
{
"epoch": 42.090410214615595,
"grad_norm": 6.375885009765625,
"learning_rate": 1.5958790004384042e-06,
"loss": 11.7447,
"step": 96850
},
{
"epoch": 42.112143439282804,
"grad_norm": 24.318653106689453,
"learning_rate": 1.5914949583516005e-06,
"loss": 11.7561,
"step": 96900
},
{
"epoch": 42.13387666395001,
"grad_norm": 3.9973437786102295,
"learning_rate": 1.5871109162647963e-06,
"loss": 11.7402,
"step": 96950
},
{
"epoch": 42.15560988861722,
"grad_norm": 7.005822658538818,
"learning_rate": 1.5827268741779923e-06,
"loss": 11.739,
"step": 97000
},
{
"epoch": 42.17734311328444,
"grad_norm": 7.374983787536621,
"learning_rate": 1.5783428320911881e-06,
"loss": 11.7416,
"step": 97050
},
{
"epoch": 42.199076337951645,
"grad_norm": 7.227538108825684,
"learning_rate": 1.5739587900043843e-06,
"loss": 11.7344,
"step": 97100
},
{
"epoch": 42.220809562618854,
"grad_norm": 16.485198974609375,
"learning_rate": 1.5695747479175802e-06,
"loss": 11.7412,
"step": 97150
},
{
"epoch": 42.24254278728606,
"grad_norm": 8.366280555725098,
"learning_rate": 1.5651907058307762e-06,
"loss": 11.7441,
"step": 97200
},
{
"epoch": 42.26427601195327,
"grad_norm": 4.64595365524292,
"learning_rate": 1.560806663743972e-06,
"loss": 11.7412,
"step": 97250
},
{
"epoch": 42.28600923662049,
"grad_norm": 8.422093391418457,
"learning_rate": 1.5564226216571678e-06,
"loss": 11.7501,
"step": 97300
},
{
"epoch": 42.307742461287695,
"grad_norm": 7.655717849731445,
"learning_rate": 1.552038579570364e-06,
"loss": 11.7379,
"step": 97350
},
{
"epoch": 42.329475685954904,
"grad_norm": 5.5334272384643555,
"learning_rate": 1.54765453748356e-06,
"loss": 11.7498,
"step": 97400
},
{
"epoch": 42.35120891062211,
"grad_norm": 23.848054885864258,
"learning_rate": 1.5432704953967559e-06,
"loss": 11.7518,
"step": 97450
},
{
"epoch": 42.37294213528932,
"grad_norm": 12.059696197509766,
"learning_rate": 1.5388864533099519e-06,
"loss": 11.7456,
"step": 97500
},
{
"epoch": 42.39467535995654,
"grad_norm": 5.756223678588867,
"learning_rate": 1.534502411223148e-06,
"loss": 11.7399,
"step": 97550
},
{
"epoch": 42.416408584623746,
"grad_norm": 3.3854639530181885,
"learning_rate": 1.530118369136344e-06,
"loss": 11.7399,
"step": 97600
},
{
"epoch": 42.438141809290954,
"grad_norm": 33.99950408935547,
"learning_rate": 1.5257343270495397e-06,
"loss": 11.7448,
"step": 97650
},
{
"epoch": 42.45987503395816,
"grad_norm": 12.245558738708496,
"learning_rate": 1.5213502849627358e-06,
"loss": 11.7508,
"step": 97700
},
{
"epoch": 42.48160825862537,
"grad_norm": 13.875435829162598,
"learning_rate": 1.5169662428759318e-06,
"loss": 11.7354,
"step": 97750
},
{
"epoch": 42.50334148329259,
"grad_norm": 4.234302997589111,
"learning_rate": 1.5125822007891278e-06,
"loss": 11.7334,
"step": 97800
},
{
"epoch": 42.525074707959796,
"grad_norm": 6.025676727294922,
"learning_rate": 1.5081981587023236e-06,
"loss": 11.7598,
"step": 97850
},
{
"epoch": 42.546807932627004,
"grad_norm": 6.693691253662109,
"learning_rate": 1.5038141166155196e-06,
"loss": 11.7353,
"step": 97900
},
{
"epoch": 42.56854115729421,
"grad_norm": 15.162924766540527,
"learning_rate": 1.4994300745287155e-06,
"loss": 11.7358,
"step": 97950
},
{
"epoch": 42.59027438196142,
"grad_norm": 14.031683921813965,
"learning_rate": 1.4950460324419117e-06,
"loss": 11.7534,
"step": 98000
},
{
"epoch": 42.61200760662863,
"grad_norm": 5.764448642730713,
"learning_rate": 1.4906619903551075e-06,
"loss": 11.7335,
"step": 98050
},
{
"epoch": 42.633740831295846,
"grad_norm": 4.945387840270996,
"learning_rate": 1.4862779482683035e-06,
"loss": 11.7459,
"step": 98100
},
{
"epoch": 42.655474055963055,
"grad_norm": 5.07288932800293,
"learning_rate": 1.4818939061814993e-06,
"loss": 11.7434,
"step": 98150
},
{
"epoch": 42.67720728063026,
"grad_norm": 5.957432270050049,
"learning_rate": 1.4775098640946956e-06,
"loss": 11.7353,
"step": 98200
},
{
"epoch": 42.69894050529747,
"grad_norm": 11.405596733093262,
"learning_rate": 1.4731258220078914e-06,
"loss": 11.7419,
"step": 98250
},
{
"epoch": 42.72067372996468,
"grad_norm": 5.61083984375,
"learning_rate": 1.4687417799210874e-06,
"loss": 11.7435,
"step": 98300
},
{
"epoch": 42.742406954631896,
"grad_norm": 5.795932292938232,
"learning_rate": 1.4643577378342832e-06,
"loss": 11.7536,
"step": 98350
},
{
"epoch": 42.764140179299105,
"grad_norm": 14.968274116516113,
"learning_rate": 1.4599736957474792e-06,
"loss": 11.7532,
"step": 98400
},
{
"epoch": 42.78587340396631,
"grad_norm": 4.567286491394043,
"learning_rate": 1.4555896536606753e-06,
"loss": 11.7412,
"step": 98450
},
{
"epoch": 42.80760662863352,
"grad_norm": 3.640240430831909,
"learning_rate": 1.4512056115738713e-06,
"loss": 11.7526,
"step": 98500
},
{
"epoch": 42.82933985330073,
"grad_norm": 7.847878456115723,
"learning_rate": 1.446821569487067e-06,
"loss": 11.7427,
"step": 98550
},
{
"epoch": 42.851073077967946,
"grad_norm": 9.818808555603027,
"learning_rate": 1.4424375274002631e-06,
"loss": 11.7491,
"step": 98600
},
{
"epoch": 42.872806302635155,
"grad_norm": 6.466832637786865,
"learning_rate": 1.4380534853134591e-06,
"loss": 11.7487,
"step": 98650
},
{
"epoch": 42.89453952730236,
"grad_norm": 12.53541374206543,
"learning_rate": 1.4336694432266552e-06,
"loss": 11.7451,
"step": 98700
},
{
"epoch": 42.91627275196957,
"grad_norm": 9.651721000671387,
"learning_rate": 1.429285401139851e-06,
"loss": 11.7455,
"step": 98750
},
{
"epoch": 42.93800597663678,
"grad_norm": 7.363193511962891,
"learning_rate": 1.424901359053047e-06,
"loss": 11.7442,
"step": 98800
},
{
"epoch": 42.959739201304,
"grad_norm": 6.456315040588379,
"learning_rate": 1.4205173169662432e-06,
"loss": 11.7378,
"step": 98850
},
{
"epoch": 42.981472425971205,
"grad_norm": 4.924785137176514,
"learning_rate": 1.416133274879439e-06,
"loss": 11.7338,
"step": 98900
},
{
"epoch": 43.00304265145341,
"grad_norm": 12.623988151550293,
"learning_rate": 1.4117492327926348e-06,
"loss": 11.6662,
"step": 98950
},
{
"epoch": 43.02477587612062,
"grad_norm": 9.521653175354004,
"learning_rate": 1.4073651907058309e-06,
"loss": 11.7329,
"step": 99000
},
{
"epoch": 43.04650910078783,
"grad_norm": 4.056196212768555,
"learning_rate": 1.4029811486190267e-06,
"loss": 11.7467,
"step": 99050
},
{
"epoch": 43.06824232545504,
"grad_norm": 3.722097396850586,
"learning_rate": 1.398597106532223e-06,
"loss": 11.735,
"step": 99100
},
{
"epoch": 43.08997555012225,
"grad_norm": 3.6029582023620605,
"learning_rate": 1.3942130644454187e-06,
"loss": 11.7395,
"step": 99150
},
{
"epoch": 43.111708774789456,
"grad_norm": 4.464113712310791,
"learning_rate": 1.3898290223586147e-06,
"loss": 11.7417,
"step": 99200
},
{
"epoch": 43.13344199945667,
"grad_norm": 7.3181986808776855,
"learning_rate": 1.3854449802718106e-06,
"loss": 11.7556,
"step": 99250
},
{
"epoch": 43.15517522412388,
"grad_norm": 12.537137031555176,
"learning_rate": 1.3810609381850068e-06,
"loss": 11.7485,
"step": 99300
},
{
"epoch": 43.17690844879109,
"grad_norm": 14.284486770629883,
"learning_rate": 1.3766768960982028e-06,
"loss": 11.7374,
"step": 99350
},
{
"epoch": 43.1986416734583,
"grad_norm": 3.9522204399108887,
"learning_rate": 1.3722928540113986e-06,
"loss": 11.7394,
"step": 99400
},
{
"epoch": 43.220374898125506,
"grad_norm": 3.287987232208252,
"learning_rate": 1.3679088119245944e-06,
"loss": 11.7358,
"step": 99450
},
{
"epoch": 43.24210812279272,
"grad_norm": 5.351846694946289,
"learning_rate": 1.3635247698377905e-06,
"loss": 11.7402,
"step": 99500
},
{
"epoch": 43.26384134745993,
"grad_norm": 13.18363094329834,
"learning_rate": 1.3591407277509867e-06,
"loss": 11.7343,
"step": 99550
},
{
"epoch": 43.28557457212714,
"grad_norm": 2.8352560997009277,
"learning_rate": 1.3547566856641825e-06,
"loss": 11.7408,
"step": 99600
},
{
"epoch": 43.30730779679435,
"grad_norm": 4.404722690582275,
"learning_rate": 1.3503726435773783e-06,
"loss": 11.7488,
"step": 99650
},
{
"epoch": 43.329041021461556,
"grad_norm": 5.89795446395874,
"learning_rate": 1.3459886014905743e-06,
"loss": 11.7417,
"step": 99700
},
{
"epoch": 43.35077424612877,
"grad_norm": 3.792579412460327,
"learning_rate": 1.3416045594037706e-06,
"loss": 11.742,
"step": 99750
},
{
"epoch": 43.37250747079598,
"grad_norm": 6.295860290527344,
"learning_rate": 1.3372205173169664e-06,
"loss": 11.7419,
"step": 99800
},
{
"epoch": 43.39424069546319,
"grad_norm": 4.617618560791016,
"learning_rate": 1.3328364752301624e-06,
"loss": 11.7388,
"step": 99850
},
{
"epoch": 43.4159739201304,
"grad_norm": 3.46701717376709,
"learning_rate": 1.3284524331433582e-06,
"loss": 11.739,
"step": 99900
},
{
"epoch": 43.437707144797606,
"grad_norm": 5.266394138336182,
"learning_rate": 1.3240683910565544e-06,
"loss": 11.7336,
"step": 99950
},
{
"epoch": 43.45944036946482,
"grad_norm": 4.314075946807861,
"learning_rate": 1.3196843489697503e-06,
"loss": 11.7505,
"step": 100000
},
{
"epoch": 43.45944036946482,
"eval_cer": 0.07396960981570859,
"eval_loss": 2.4215219020843506,
"eval_runtime": 398.4535,
"eval_samples_per_second": 13.567,
"eval_steps_per_second": 3.393,
"eval_wer": 0.22507457999685979,
"step": 100000
},
{
"epoch": 43.48117359413203,
"grad_norm": 4.137760639190674,
"learning_rate": 1.3153003068829463e-06,
"loss": 11.7369,
"step": 100050
},
{
"epoch": 43.50290681879924,
"grad_norm": 16.71613311767578,
"learning_rate": 1.310916264796142e-06,
"loss": 11.7308,
"step": 100100
},
{
"epoch": 43.52464004346645,
"grad_norm": 3.2521612644195557,
"learning_rate": 1.306532222709338e-06,
"loss": 11.7266,
"step": 100150
},
{
"epoch": 43.54637326813366,
"grad_norm": 5.732780456542969,
"learning_rate": 1.3021481806225341e-06,
"loss": 11.7389,
"step": 100200
},
{
"epoch": 43.56810649280087,
"grad_norm": 6.189255714416504,
"learning_rate": 1.2977641385357302e-06,
"loss": 11.7341,
"step": 100250
},
{
"epoch": 43.58983971746808,
"grad_norm": 4.808958053588867,
"learning_rate": 1.293380096448926e-06,
"loss": 11.739,
"step": 100300
},
{
"epoch": 43.61157294213529,
"grad_norm": 13.771133422851562,
"learning_rate": 1.288996054362122e-06,
"loss": 11.734,
"step": 100350
},
{
"epoch": 43.6333061668025,
"grad_norm": 5.9765400886535645,
"learning_rate": 1.284612012275318e-06,
"loss": 11.7398,
"step": 100400
},
{
"epoch": 43.65503939146971,
"grad_norm": 4.513848304748535,
"learning_rate": 1.280227970188514e-06,
"loss": 11.7321,
"step": 100450
},
{
"epoch": 43.67677261613692,
"grad_norm": 3.8837485313415527,
"learning_rate": 1.2758439281017098e-06,
"loss": 11.7257,
"step": 100500
},
{
"epoch": 43.69850584080413,
"grad_norm": 5.340435028076172,
"learning_rate": 1.2714598860149059e-06,
"loss": 11.7429,
"step": 100550
},
{
"epoch": 43.72023906547134,
"grad_norm": 5.877197742462158,
"learning_rate": 1.2670758439281017e-06,
"loss": 11.728,
"step": 100600
},
{
"epoch": 43.74197229013855,
"grad_norm": 11.274967193603516,
"learning_rate": 1.262691801841298e-06,
"loss": 11.7314,
"step": 100650
},
{
"epoch": 43.76370551480576,
"grad_norm": 7.218222141265869,
"learning_rate": 1.2583077597544937e-06,
"loss": 11.726,
"step": 100700
},
{
"epoch": 43.78543873947297,
"grad_norm": 3.588624954223633,
"learning_rate": 1.2539237176676897e-06,
"loss": 11.7394,
"step": 100750
},
{
"epoch": 43.80717196414018,
"grad_norm": 10.603730201721191,
"learning_rate": 1.2495396755808858e-06,
"loss": 11.7442,
"step": 100800
},
{
"epoch": 43.82890518880739,
"grad_norm": 4.9444379806518555,
"learning_rate": 1.2451556334940816e-06,
"loss": 11.7318,
"step": 100850
},
{
"epoch": 43.8506384134746,
"grad_norm": 16.76546287536621,
"learning_rate": 1.2407715914072776e-06,
"loss": 11.731,
"step": 100900
},
{
"epoch": 43.87237163814181,
"grad_norm": 3.7398102283477783,
"learning_rate": 1.2363875493204736e-06,
"loss": 11.7348,
"step": 100950
},
{
"epoch": 43.894104862809016,
"grad_norm": 4.954889297485352,
"learning_rate": 1.2320035072336696e-06,
"loss": 11.7333,
"step": 101000
},
{
"epoch": 43.91583808747623,
"grad_norm": 3.635148286819458,
"learning_rate": 1.2276194651468654e-06,
"loss": 11.7388,
"step": 101050
},
{
"epoch": 43.93757131214344,
"grad_norm": 9.620790481567383,
"learning_rate": 1.2232354230600615e-06,
"loss": 11.7254,
"step": 101100
},
{
"epoch": 43.95930453681065,
"grad_norm": 4.13824462890625,
"learning_rate": 1.2188513809732575e-06,
"loss": 11.7366,
"step": 101150
},
{
"epoch": 43.98103776147786,
"grad_norm": 6.983582973480225,
"learning_rate": 1.2144673388864533e-06,
"loss": 11.7287,
"step": 101200
},
{
"epoch": 44.002607986960065,
"grad_norm": 4.2465691566467285,
"learning_rate": 1.2100832967996493e-06,
"loss": 11.649,
"step": 101250
},
{
"epoch": 44.024341211627274,
"grad_norm": 17.737117767333984,
"learning_rate": 1.2056992547128453e-06,
"loss": 11.7302,
"step": 101300
},
{
"epoch": 44.04607443629448,
"grad_norm": 4.739099502563477,
"learning_rate": 1.2013152126260414e-06,
"loss": 11.75,
"step": 101350
},
{
"epoch": 44.0678076609617,
"grad_norm": 3.695364236831665,
"learning_rate": 1.1969311705392372e-06,
"loss": 11.7399,
"step": 101400
},
{
"epoch": 44.08954088562891,
"grad_norm": 6.371554851531982,
"learning_rate": 1.1925471284524332e-06,
"loss": 11.7314,
"step": 101450
},
{
"epoch": 44.111274110296115,
"grad_norm": 24.45339584350586,
"learning_rate": 1.1881630863656292e-06,
"loss": 11.7215,
"step": 101500
},
{
"epoch": 44.133007334963324,
"grad_norm": 5.760914325714111,
"learning_rate": 1.1837790442788252e-06,
"loss": 11.7376,
"step": 101550
},
{
"epoch": 44.15474055963053,
"grad_norm": 4.051183700561523,
"learning_rate": 1.179395002192021e-06,
"loss": 11.7265,
"step": 101600
},
{
"epoch": 44.17647378429775,
"grad_norm": 5.363418102264404,
"learning_rate": 1.175010960105217e-06,
"loss": 11.7367,
"step": 101650
},
{
"epoch": 44.19820700896496,
"grad_norm": 3.471618890762329,
"learning_rate": 1.170626918018413e-06,
"loss": 11.7384,
"step": 101700
},
{
"epoch": 44.219940233632165,
"grad_norm": 7.958500385284424,
"learning_rate": 1.166242875931609e-06,
"loss": 11.7332,
"step": 101750
},
{
"epoch": 44.241673458299374,
"grad_norm": 25.589679718017578,
"learning_rate": 1.161858833844805e-06,
"loss": 11.7298,
"step": 101800
},
{
"epoch": 44.26340668296658,
"grad_norm": 11.511533737182617,
"learning_rate": 1.157474791758001e-06,
"loss": 11.7272,
"step": 101850
},
{
"epoch": 44.2851399076338,
"grad_norm": 4.467309474945068,
"learning_rate": 1.153090749671197e-06,
"loss": 11.7184,
"step": 101900
},
{
"epoch": 44.30687313230101,
"grad_norm": 4.863615989685059,
"learning_rate": 1.1487067075843928e-06,
"loss": 11.7395,
"step": 101950
},
{
"epoch": 44.328606356968216,
"grad_norm": 6.230271816253662,
"learning_rate": 1.1443226654975888e-06,
"loss": 11.7373,
"step": 102000
},
{
"epoch": 44.350339581635424,
"grad_norm": 4.7530517578125,
"learning_rate": 1.1399386234107848e-06,
"loss": 11.7472,
"step": 102050
},
{
"epoch": 44.37207280630263,
"grad_norm": 22.007238388061523,
"learning_rate": 1.1355545813239809e-06,
"loss": 11.7212,
"step": 102100
},
{
"epoch": 44.39380603096985,
"grad_norm": 6.660682678222656,
"learning_rate": 1.1311705392371767e-06,
"loss": 11.7316,
"step": 102150
},
{
"epoch": 44.41553925563706,
"grad_norm": 5.25778865814209,
"learning_rate": 1.126786497150373e-06,
"loss": 11.7305,
"step": 102200
},
{
"epoch": 44.437272480304266,
"grad_norm": 5.097360134124756,
"learning_rate": 1.1224024550635687e-06,
"loss": 11.7314,
"step": 102250
},
{
"epoch": 44.459005704971474,
"grad_norm": 4.272281169891357,
"learning_rate": 1.1180184129767645e-06,
"loss": 11.7419,
"step": 102300
},
{
"epoch": 44.48073892963868,
"grad_norm": 6.060675144195557,
"learning_rate": 1.1136343708899605e-06,
"loss": 11.7367,
"step": 102350
},
{
"epoch": 44.50247215430589,
"grad_norm": 5.883248329162598,
"learning_rate": 1.1092503288031566e-06,
"loss": 11.7368,
"step": 102400
},
{
"epoch": 44.52420537897311,
"grad_norm": 6.329914093017578,
"learning_rate": 1.1048662867163526e-06,
"loss": 11.7303,
"step": 102450
},
{
"epoch": 44.545938603640316,
"grad_norm": 6.62354850769043,
"learning_rate": 1.1004822446295484e-06,
"loss": 11.7375,
"step": 102500
},
{
"epoch": 44.567671828307525,
"grad_norm": 10.634700775146484,
"learning_rate": 1.0960982025427446e-06,
"loss": 11.7295,
"step": 102550
},
{
"epoch": 44.58940505297473,
"grad_norm": 2.787297487258911,
"learning_rate": 1.0917141604559404e-06,
"loss": 11.7311,
"step": 102600
},
{
"epoch": 44.61113827764194,
"grad_norm": 4.915313720703125,
"learning_rate": 1.0873301183691365e-06,
"loss": 11.7368,
"step": 102650
},
{
"epoch": 44.63287150230916,
"grad_norm": 13.359769821166992,
"learning_rate": 1.0829460762823325e-06,
"loss": 11.7417,
"step": 102700
},
{
"epoch": 44.654604726976366,
"grad_norm": 3.9991888999938965,
"learning_rate": 1.0785620341955285e-06,
"loss": 11.7392,
"step": 102750
},
{
"epoch": 44.676337951643575,
"grad_norm": 3.797086238861084,
"learning_rate": 1.0741779921087243e-06,
"loss": 11.7242,
"step": 102800
},
{
"epoch": 44.69807117631078,
"grad_norm": 6.608884811401367,
"learning_rate": 1.0697939500219203e-06,
"loss": 11.7231,
"step": 102850
},
{
"epoch": 44.71980440097799,
"grad_norm": 10.230695724487305,
"learning_rate": 1.0654099079351164e-06,
"loss": 11.7325,
"step": 102900
},
{
"epoch": 44.74153762564521,
"grad_norm": 4.2929301261901855,
"learning_rate": 1.0610258658483122e-06,
"loss": 11.7303,
"step": 102950
},
{
"epoch": 44.763270850312416,
"grad_norm": 5.952197074890137,
"learning_rate": 1.0566418237615082e-06,
"loss": 11.7291,
"step": 103000
},
{
"epoch": 44.785004074979625,
"grad_norm": 10.304634094238281,
"learning_rate": 1.0522577816747042e-06,
"loss": 11.7452,
"step": 103050
},
{
"epoch": 44.806737299646834,
"grad_norm": 4.195600509643555,
"learning_rate": 1.0478737395879002e-06,
"loss": 11.744,
"step": 103100
},
{
"epoch": 44.82847052431404,
"grad_norm": 3.8358092308044434,
"learning_rate": 1.043489697501096e-06,
"loss": 11.7301,
"step": 103150
},
{
"epoch": 44.85020374898126,
"grad_norm": 13.731127738952637,
"learning_rate": 1.039105655414292e-06,
"loss": 11.7318,
"step": 103200
},
{
"epoch": 44.87193697364847,
"grad_norm": 4.197743892669678,
"learning_rate": 1.034721613327488e-06,
"loss": 11.7318,
"step": 103250
},
{
"epoch": 44.893670198315675,
"grad_norm": 8.494996070861816,
"learning_rate": 1.030337571240684e-06,
"loss": 11.7357,
"step": 103300
},
{
"epoch": 44.915403422982884,
"grad_norm": 3.6425983905792236,
"learning_rate": 1.02595352915388e-06,
"loss": 11.729,
"step": 103350
},
{
"epoch": 44.93713664765009,
"grad_norm": 4.333567142486572,
"learning_rate": 1.021569487067076e-06,
"loss": 11.7339,
"step": 103400
},
{
"epoch": 44.95886987231731,
"grad_norm": 4.8357930183410645,
"learning_rate": 1.017185444980272e-06,
"loss": 11.735,
"step": 103450
},
{
"epoch": 44.98060309698452,
"grad_norm": 8.409868240356445,
"learning_rate": 1.0128014028934678e-06,
"loss": 11.7336,
"step": 103500
},
{
"epoch": 45.002173322466724,
"grad_norm": 3.9856626987457275,
"learning_rate": 1.0084173608066638e-06,
"loss": 11.653,
"step": 103550
},
{
"epoch": 45.02390654713393,
"grad_norm": 4.529376029968262,
"learning_rate": 1.0040333187198598e-06,
"loss": 11.7279,
"step": 103600
},
{
"epoch": 45.04563977180114,
"grad_norm": 4.276280403137207,
"learning_rate": 9.996492766330559e-07,
"loss": 11.7362,
"step": 103650
},
{
"epoch": 45.06737299646835,
"grad_norm": 4.415678024291992,
"learning_rate": 9.952652345462517e-07,
"loss": 11.7194,
"step": 103700
},
{
"epoch": 45.08910622113556,
"grad_norm": 4.513889789581299,
"learning_rate": 9.908811924594477e-07,
"loss": 11.7178,
"step": 103750
},
{
"epoch": 45.11083944580277,
"grad_norm": 5.300011157989502,
"learning_rate": 9.864971503726437e-07,
"loss": 11.727,
"step": 103800
},
{
"epoch": 45.13257267046998,
"grad_norm": 3.8258767127990723,
"learning_rate": 9.821131082858395e-07,
"loss": 11.7328,
"step": 103850
},
{
"epoch": 45.15430589513719,
"grad_norm": 7.767271995544434,
"learning_rate": 9.777290661990355e-07,
"loss": 11.7202,
"step": 103900
},
{
"epoch": 45.1760391198044,
"grad_norm": 3.230754852294922,
"learning_rate": 9.733450241122316e-07,
"loss": 11.7268,
"step": 103950
},
{
"epoch": 45.19777234447161,
"grad_norm": 3.8269119262695312,
"learning_rate": 9.689609820254276e-07,
"loss": 11.7249,
"step": 104000
},
{
"epoch": 45.21950556913882,
"grad_norm": 5.030121803283691,
"learning_rate": 9.645769399386234e-07,
"loss": 11.7181,
"step": 104050
},
{
"epoch": 45.24123879380603,
"grad_norm": 4.850019931793213,
"learning_rate": 9.601928978518194e-07,
"loss": 11.7272,
"step": 104100
},
{
"epoch": 45.26297201847324,
"grad_norm": 6.58116340637207,
"learning_rate": 9.558088557650154e-07,
"loss": 11.7237,
"step": 104150
},
{
"epoch": 45.28470524314045,
"grad_norm": 20.67346954345703,
"learning_rate": 9.514248136782115e-07,
"loss": 11.7302,
"step": 104200
},
{
"epoch": 45.30643846780766,
"grad_norm": 3.362128973007202,
"learning_rate": 9.470407715914074e-07,
"loss": 11.7209,
"step": 104250
},
{
"epoch": 45.32817169247487,
"grad_norm": 7.51302433013916,
"learning_rate": 9.426567295046034e-07,
"loss": 11.7303,
"step": 104300
},
{
"epoch": 45.349904917142084,
"grad_norm": 4.610814094543457,
"learning_rate": 9.382726874177993e-07,
"loss": 11.7177,
"step": 104350
},
{
"epoch": 45.37163814180929,
"grad_norm": 13.158862113952637,
"learning_rate": 9.338886453309952e-07,
"loss": 11.7227,
"step": 104400
},
{
"epoch": 45.3933713664765,
"grad_norm": 4.248621940612793,
"learning_rate": 9.295046032441913e-07,
"loss": 11.7428,
"step": 104450
},
{
"epoch": 45.41510459114371,
"grad_norm": 3.553060531616211,
"learning_rate": 9.251205611573872e-07,
"loss": 11.7294,
"step": 104500
},
{
"epoch": 45.43683781581092,
"grad_norm": 5.807036399841309,
"learning_rate": 9.207365190705832e-07,
"loss": 11.7284,
"step": 104550
},
{
"epoch": 45.458571040478134,
"grad_norm": 13.629132270812988,
"learning_rate": 9.163524769837791e-07,
"loss": 11.7301,
"step": 104600
},
{
"epoch": 45.48030426514534,
"grad_norm": 4.1011857986450195,
"learning_rate": 9.119684348969751e-07,
"loss": 11.7319,
"step": 104650
},
{
"epoch": 45.50203748981255,
"grad_norm": 10.649341583251953,
"learning_rate": 9.075843928101711e-07,
"loss": 11.7333,
"step": 104700
},
{
"epoch": 45.52377071447976,
"grad_norm": 20.217660903930664,
"learning_rate": 9.032003507233671e-07,
"loss": 11.7319,
"step": 104750
},
{
"epoch": 45.54550393914697,
"grad_norm": 7.371703624725342,
"learning_rate": 8.98816308636563e-07,
"loss": 11.7223,
"step": 104800
},
{
"epoch": 45.567237163814184,
"grad_norm": 6.1061110496521,
"learning_rate": 8.94432266549759e-07,
"loss": 11.7134,
"step": 104850
},
{
"epoch": 45.58897038848139,
"grad_norm": 3.3697314262390137,
"learning_rate": 8.900482244629549e-07,
"loss": 11.7206,
"step": 104900
},
{
"epoch": 45.6107036131486,
"grad_norm": 5.704832077026367,
"learning_rate": 8.856641823761508e-07,
"loss": 11.7343,
"step": 104950
},
{
"epoch": 45.63243683781581,
"grad_norm": 5.612690448760986,
"learning_rate": 8.812801402893469e-07,
"loss": 11.7228,
"step": 105000
},
{
"epoch": 45.65417006248302,
"grad_norm": 4.661070823669434,
"learning_rate": 8.768960982025428e-07,
"loss": 11.7344,
"step": 105050
},
{
"epoch": 45.675903287150234,
"grad_norm": 4.922998905181885,
"learning_rate": 8.725120561157388e-07,
"loss": 11.7161,
"step": 105100
},
{
"epoch": 45.69763651181744,
"grad_norm": 6.320181369781494,
"learning_rate": 8.681280140289347e-07,
"loss": 11.7451,
"step": 105150
},
{
"epoch": 45.71936973648465,
"grad_norm": 6.543067455291748,
"learning_rate": 8.637439719421307e-07,
"loss": 11.7187,
"step": 105200
},
{
"epoch": 45.74110296115186,
"grad_norm": 7.506560802459717,
"learning_rate": 8.593599298553267e-07,
"loss": 11.7368,
"step": 105250
},
{
"epoch": 45.76283618581907,
"grad_norm": 6.871926307678223,
"learning_rate": 8.549758877685227e-07,
"loss": 11.7347,
"step": 105300
},
{
"epoch": 45.784569410486284,
"grad_norm": 4.491659641265869,
"learning_rate": 8.505918456817186e-07,
"loss": 11.7379,
"step": 105350
},
{
"epoch": 45.80630263515349,
"grad_norm": 21.81031036376953,
"learning_rate": 8.462078035949146e-07,
"loss": 11.7307,
"step": 105400
},
{
"epoch": 45.8280358598207,
"grad_norm": 20.492307662963867,
"learning_rate": 8.418237615081105e-07,
"loss": 11.7268,
"step": 105450
},
{
"epoch": 45.84976908448791,
"grad_norm": 7.620596408843994,
"learning_rate": 8.374397194213065e-07,
"loss": 11.7384,
"step": 105500
},
{
"epoch": 45.87150230915512,
"grad_norm": 4.937099456787109,
"learning_rate": 8.330556773345025e-07,
"loss": 11.7266,
"step": 105550
},
{
"epoch": 45.89323553382233,
"grad_norm": 3.815049409866333,
"learning_rate": 8.286716352476984e-07,
"loss": 11.7281,
"step": 105600
},
{
"epoch": 45.91496875848954,
"grad_norm": 9.32738208770752,
"learning_rate": 8.242875931608944e-07,
"loss": 11.7424,
"step": 105650
},
{
"epoch": 45.93670198315675,
"grad_norm": 12.112308502197266,
"learning_rate": 8.199035510740903e-07,
"loss": 11.729,
"step": 105700
},
{
"epoch": 45.95843520782396,
"grad_norm": 4.76987361907959,
"learning_rate": 8.155195089872864e-07,
"loss": 11.7292,
"step": 105750
},
{
"epoch": 45.98016843249117,
"grad_norm": 11.38598346710205,
"learning_rate": 8.111354669004823e-07,
"loss": 11.7356,
"step": 105800
},
{
"epoch": 46.00173865797338,
"grad_norm": 18.5734806060791,
"learning_rate": 8.067514248136783e-07,
"loss": 11.6526,
"step": 105850
},
{
"epoch": 46.023471882640585,
"grad_norm": 3.3094968795776367,
"learning_rate": 8.023673827268742e-07,
"loss": 11.7246,
"step": 105900
},
{
"epoch": 46.045205107307794,
"grad_norm": 10.625943183898926,
"learning_rate": 7.979833406400702e-07,
"loss": 11.7197,
"step": 105950
},
{
"epoch": 46.06693833197501,
"grad_norm": 11.11587142944336,
"learning_rate": 7.935992985532662e-07,
"loss": 11.712,
"step": 106000
},
{
"epoch": 46.08867155664222,
"grad_norm": 5.9816083908081055,
"learning_rate": 7.892152564664621e-07,
"loss": 11.7291,
"step": 106050
},
{
"epoch": 46.11040478130943,
"grad_norm": 5.810311317443848,
"learning_rate": 7.848312143796581e-07,
"loss": 11.72,
"step": 106100
},
{
"epoch": 46.132138005976635,
"grad_norm": 9.10987377166748,
"learning_rate": 7.80447172292854e-07,
"loss": 11.7341,
"step": 106150
},
{
"epoch": 46.153871230643844,
"grad_norm": 3.9713680744171143,
"learning_rate": 7.7606313020605e-07,
"loss": 11.728,
"step": 106200
},
{
"epoch": 46.17560445531106,
"grad_norm": 10.883817672729492,
"learning_rate": 7.716790881192459e-07,
"loss": 11.7321,
"step": 106250
},
{
"epoch": 46.19733767997827,
"grad_norm": 3.261399745941162,
"learning_rate": 7.67295046032442e-07,
"loss": 11.7197,
"step": 106300
},
{
"epoch": 46.21907090464548,
"grad_norm": 3.867229461669922,
"learning_rate": 7.629110039456379e-07,
"loss": 11.7311,
"step": 106350
},
{
"epoch": 46.240804129312686,
"grad_norm": 5.125184059143066,
"learning_rate": 7.585269618588339e-07,
"loss": 11.7357,
"step": 106400
},
{
"epoch": 46.262537353979894,
"grad_norm": 3.271857500076294,
"learning_rate": 7.541429197720298e-07,
"loss": 11.7182,
"step": 106450
},
{
"epoch": 46.28427057864711,
"grad_norm": 2.972466230392456,
"learning_rate": 7.49758877685226e-07,
"loss": 11.7244,
"step": 106500
},
{
"epoch": 46.30600380331432,
"grad_norm": 10.320878028869629,
"learning_rate": 7.453748355984218e-07,
"loss": 11.7296,
"step": 106550
},
{
"epoch": 46.32773702798153,
"grad_norm": 7.7540483474731445,
"learning_rate": 7.409907935116177e-07,
"loss": 11.7175,
"step": 106600
},
{
"epoch": 46.349470252648736,
"grad_norm": 5.142116546630859,
"learning_rate": 7.366067514248137e-07,
"loss": 11.7324,
"step": 106650
},
{
"epoch": 46.371203477315944,
"grad_norm": 3.158510446548462,
"learning_rate": 7.322227093380096e-07,
"loss": 11.7292,
"step": 106700
},
{
"epoch": 46.39293670198316,
"grad_norm": 3.982985258102417,
"learning_rate": 7.278386672512057e-07,
"loss": 11.7214,
"step": 106750
},
{
"epoch": 46.41466992665037,
"grad_norm": 3.4331562519073486,
"learning_rate": 7.234546251644016e-07,
"loss": 11.7263,
"step": 106800
},
{
"epoch": 46.43640315131758,
"grad_norm": 5.8017401695251465,
"learning_rate": 7.190705830775977e-07,
"loss": 11.7349,
"step": 106850
},
{
"epoch": 46.458136375984786,
"grad_norm": 4.63163948059082,
"learning_rate": 7.146865409907935e-07,
"loss": 11.7241,
"step": 106900
},
{
"epoch": 46.479869600651995,
"grad_norm": 4.267096996307373,
"learning_rate": 7.103024989039896e-07,
"loss": 11.7189,
"step": 106950
},
{
"epoch": 46.5016028253192,
"grad_norm": 4.0522871017456055,
"learning_rate": 7.059184568171855e-07,
"loss": 11.7209,
"step": 107000
},
{
"epoch": 46.52333604998642,
"grad_norm": 5.3363142013549805,
"learning_rate": 7.015344147303816e-07,
"loss": 11.7084,
"step": 107050
},
{
"epoch": 46.54506927465363,
"grad_norm": 4.059858322143555,
"learning_rate": 6.971503726435775e-07,
"loss": 11.7125,
"step": 107100
},
{
"epoch": 46.566802499320836,
"grad_norm": 3.252812623977661,
"learning_rate": 6.927663305567733e-07,
"loss": 11.7268,
"step": 107150
},
{
"epoch": 46.588535723988045,
"grad_norm": 4.82172966003418,
"learning_rate": 6.883822884699694e-07,
"loss": 11.7139,
"step": 107200
},
{
"epoch": 46.61026894865525,
"grad_norm": 8.201459884643555,
"learning_rate": 6.839982463831653e-07,
"loss": 11.7298,
"step": 107250
},
{
"epoch": 46.63200217332247,
"grad_norm": 3.159785747528076,
"learning_rate": 6.796142042963614e-07,
"loss": 11.7209,
"step": 107300
},
{
"epoch": 46.65373539798968,
"grad_norm": 11.2830171585083,
"learning_rate": 6.752301622095573e-07,
"loss": 11.7288,
"step": 107350
},
{
"epoch": 46.67546862265689,
"grad_norm": 4.074632167816162,
"learning_rate": 6.708461201227533e-07,
"loss": 11.7174,
"step": 107400
},
{
"epoch": 46.697201847324095,
"grad_norm": 12.465502738952637,
"learning_rate": 6.664620780359492e-07,
"loss": 11.7186,
"step": 107450
},
{
"epoch": 46.718935071991304,
"grad_norm": 4.864065647125244,
"learning_rate": 6.620780359491452e-07,
"loss": 11.7249,
"step": 107500
},
{
"epoch": 46.74066829665852,
"grad_norm": 7.3475341796875,
"learning_rate": 6.576939938623411e-07,
"loss": 11.7212,
"step": 107550
},
{
"epoch": 46.76240152132573,
"grad_norm": 6.0634565353393555,
"learning_rate": 6.533099517755372e-07,
"loss": 11.7342,
"step": 107600
},
{
"epoch": 46.78413474599294,
"grad_norm": 6.251104831695557,
"learning_rate": 6.489259096887331e-07,
"loss": 11.7225,
"step": 107650
},
{
"epoch": 46.805867970660145,
"grad_norm": 5.822422027587891,
"learning_rate": 6.44541867601929e-07,
"loss": 11.7272,
"step": 107700
},
{
"epoch": 46.827601195327354,
"grad_norm": 8.700297355651855,
"learning_rate": 6.40157825515125e-07,
"loss": 11.7297,
"step": 107750
},
{
"epoch": 46.84933441999457,
"grad_norm": 5.136385917663574,
"learning_rate": 6.357737834283209e-07,
"loss": 11.726,
"step": 107800
},
{
"epoch": 46.87106764466178,
"grad_norm": 5.658658981323242,
"learning_rate": 6.31389741341517e-07,
"loss": 11.7237,
"step": 107850
},
{
"epoch": 46.89280086932899,
"grad_norm": 6.1630353927612305,
"learning_rate": 6.270056992547129e-07,
"loss": 11.728,
"step": 107900
},
{
"epoch": 46.914534093996195,
"grad_norm": 11.851746559143066,
"learning_rate": 6.226216571679088e-07,
"loss": 11.7218,
"step": 107950
},
{
"epoch": 46.936267318663404,
"grad_norm": 3.989478588104248,
"learning_rate": 6.182376150811048e-07,
"loss": 11.7246,
"step": 108000
},
{
"epoch": 46.95800054333062,
"grad_norm": 10.78637981414795,
"learning_rate": 6.138535729943007e-07,
"loss": 11.7238,
"step": 108050
},
{
"epoch": 46.97973376799783,
"grad_norm": 11.566459655761719,
"learning_rate": 6.094695309074968e-07,
"loss": 11.7193,
"step": 108100
},
{
"epoch": 47.001303993480036,
"grad_norm": 9.378003120422363,
"learning_rate": 6.050854888206927e-07,
"loss": 11.6363,
"step": 108150
},
{
"epoch": 47.023037218147245,
"grad_norm": 5.3512091636657715,
"learning_rate": 6.007014467338887e-07,
"loss": 11.7135,
"step": 108200
},
{
"epoch": 47.04477044281445,
"grad_norm": 10.854682922363281,
"learning_rate": 5.963174046470846e-07,
"loss": 11.7218,
"step": 108250
},
{
"epoch": 47.06650366748166,
"grad_norm": 3.557173728942871,
"learning_rate": 5.919333625602806e-07,
"loss": 11.7216,
"step": 108300
},
{
"epoch": 47.08823689214887,
"grad_norm": 4.374483585357666,
"learning_rate": 5.875493204734767e-07,
"loss": 11.722,
"step": 108350
},
{
"epoch": 47.10997011681608,
"grad_norm": 4.371666431427002,
"learning_rate": 5.831652783866726e-07,
"loss": 11.7127,
"step": 108400
},
{
"epoch": 47.131703341483295,
"grad_norm": 6.458693504333496,
"learning_rate": 5.787812362998686e-07,
"loss": 11.7278,
"step": 108450
},
{
"epoch": 47.1534365661505,
"grad_norm": 20.349096298217773,
"learning_rate": 5.743971942130644e-07,
"loss": 11.7243,
"step": 108500
},
{
"epoch": 47.17516979081771,
"grad_norm": 24.32076072692871,
"learning_rate": 5.700131521262604e-07,
"loss": 11.7229,
"step": 108550
},
{
"epoch": 47.19690301548492,
"grad_norm": 3.82700252532959,
"learning_rate": 5.656291100394565e-07,
"loss": 11.7084,
"step": 108600
},
{
"epoch": 47.21863624015213,
"grad_norm": 3.007939338684082,
"learning_rate": 5.612450679526524e-07,
"loss": 11.7192,
"step": 108650
},
{
"epoch": 47.240369464819345,
"grad_norm": 4.3855299949646,
"learning_rate": 5.568610258658484e-07,
"loss": 11.7216,
"step": 108700
},
{
"epoch": 47.262102689486554,
"grad_norm": 3.7610890865325928,
"learning_rate": 5.524769837790443e-07,
"loss": 11.7107,
"step": 108750
},
{
"epoch": 47.28383591415376,
"grad_norm": 5.21887731552124,
"learning_rate": 5.480929416922403e-07,
"loss": 11.7165,
"step": 108800
},
{
"epoch": 47.30556913882097,
"grad_norm": 4.387558460235596,
"learning_rate": 5.437088996054362e-07,
"loss": 11.7145,
"step": 108850
},
{
"epoch": 47.32730236348818,
"grad_norm": 2.78105092048645,
"learning_rate": 5.393248575186323e-07,
"loss": 11.7132,
"step": 108900
},
{
"epoch": 47.349035588155395,
"grad_norm": 6.782215595245361,
"learning_rate": 5.349408154318282e-07,
"loss": 11.7313,
"step": 108950
},
{
"epoch": 47.370768812822604,
"grad_norm": 4.510542392730713,
"learning_rate": 5.305567733450242e-07,
"loss": 11.7191,
"step": 109000
},
{
"epoch": 47.39250203748981,
"grad_norm": 5.735984802246094,
"learning_rate": 5.261727312582201e-07,
"loss": 11.7178,
"step": 109050
},
{
"epoch": 47.41423526215702,
"grad_norm": 9.908329963684082,
"learning_rate": 5.21788689171416e-07,
"loss": 11.7219,
"step": 109100
},
{
"epoch": 47.43596848682423,
"grad_norm": 5.137279987335205,
"learning_rate": 5.174046470846121e-07,
"loss": 11.7231,
"step": 109150
},
{
"epoch": 47.457701711491445,
"grad_norm": 15.530988693237305,
"learning_rate": 5.13020604997808e-07,
"loss": 11.7219,
"step": 109200
},
{
"epoch": 47.479434936158654,
"grad_norm": 5.565670490264893,
"learning_rate": 5.08636562911004e-07,
"loss": 11.7231,
"step": 109250
},
{
"epoch": 47.50116816082586,
"grad_norm": 3.390558958053589,
"learning_rate": 5.042525208241999e-07,
"loss": 11.7143,
"step": 109300
},
{
"epoch": 47.52290138549307,
"grad_norm": 3.168869972229004,
"learning_rate": 4.998684787373959e-07,
"loss": 11.7107,
"step": 109350
},
{
"epoch": 47.54463461016028,
"grad_norm": 4.391485691070557,
"learning_rate": 4.954844366505919e-07,
"loss": 11.7171,
"step": 109400
},
{
"epoch": 47.566367834827496,
"grad_norm": 10.428187370300293,
"learning_rate": 4.911003945637879e-07,
"loss": 11.7214,
"step": 109450
},
{
"epoch": 47.588101059494704,
"grad_norm": 8.480759620666504,
"learning_rate": 4.867163524769838e-07,
"loss": 11.7284,
"step": 109500
},
{
"epoch": 47.60983428416191,
"grad_norm": 8.282448768615723,
"learning_rate": 4.823323103901798e-07,
"loss": 11.723,
"step": 109550
},
{
"epoch": 47.63156750882912,
"grad_norm": 3.495969295501709,
"learning_rate": 4.779482683033757e-07,
"loss": 11.7213,
"step": 109600
},
{
"epoch": 47.65330073349633,
"grad_norm": 4.484890937805176,
"learning_rate": 4.735642262165717e-07,
"loss": 11.7149,
"step": 109650
},
{
"epoch": 47.675033958163546,
"grad_norm": 11.390790939331055,
"learning_rate": 4.6918018412976767e-07,
"loss": 11.7138,
"step": 109700
},
{
"epoch": 47.696767182830754,
"grad_norm": 3.9627888202667236,
"learning_rate": 4.6479614204296364e-07,
"loss": 11.722,
"step": 109750
},
{
"epoch": 47.71850040749796,
"grad_norm": 5.796283721923828,
"learning_rate": 4.604120999561596e-07,
"loss": 11.7182,
"step": 109800
},
{
"epoch": 47.74023363216517,
"grad_norm": 8.347150802612305,
"learning_rate": 4.560280578693556e-07,
"loss": 11.7257,
"step": 109850
},
{
"epoch": 47.76196685683238,
"grad_norm": 18.176475524902344,
"learning_rate": 4.5164401578255155e-07,
"loss": 11.7289,
"step": 109900
},
{
"epoch": 47.78370008149959,
"grad_norm": 4.672854900360107,
"learning_rate": 4.472599736957475e-07,
"loss": 11.7134,
"step": 109950
},
{
"epoch": 47.805433306166805,
"grad_norm": 4.16023588180542,
"learning_rate": 4.428759316089435e-07,
"loss": 11.7233,
"step": 110000
},
{
"epoch": 47.805433306166805,
"eval_cer": 0.07368261883895177,
"eval_loss": 2.426945209503174,
"eval_runtime": 398.6555,
"eval_samples_per_second": 13.561,
"eval_steps_per_second": 3.391,
"eval_wer": 0.224462238970011,
"step": 110000
},
{
"epoch": 47.82716653083401,
"grad_norm": 6.881287097930908,
"learning_rate": 4.3849188952213946e-07,
"loss": 11.7274,
"step": 110050
},
{
"epoch": 47.84889975550122,
"grad_norm": 22.432987213134766,
"learning_rate": 4.3410784743533543e-07,
"loss": 11.7251,
"step": 110100
},
{
"epoch": 47.87063298016843,
"grad_norm": 5.372289180755615,
"learning_rate": 4.2972380534853134e-07,
"loss": 11.7208,
"step": 110150
},
{
"epoch": 47.89236620483564,
"grad_norm": 7.969860553741455,
"learning_rate": 4.253397632617273e-07,
"loss": 11.714,
"step": 110200
},
{
"epoch": 47.914099429502855,
"grad_norm": 4.329230308532715,
"learning_rate": 4.209557211749233e-07,
"loss": 11.7203,
"step": 110250
},
{
"epoch": 47.93583265417006,
"grad_norm": 5.077062606811523,
"learning_rate": 4.1657167908811925e-07,
"loss": 11.7147,
"step": 110300
},
{
"epoch": 47.95756587883727,
"grad_norm": 10.139983177185059,
"learning_rate": 4.121876370013152e-07,
"loss": 11.7224,
"step": 110350
},
{
"epoch": 47.97929910350448,
"grad_norm": 7.735840320587158,
"learning_rate": 4.078035949145112e-07,
"loss": 11.7244,
"step": 110400
},
{
"epoch": 48.00086932898669,
"grad_norm": 3.7973625659942627,
"learning_rate": 4.0341955282770716e-07,
"loss": 11.6379,
"step": 110450
},
{
"epoch": 48.0226025536539,
"grad_norm": 30.568002700805664,
"learning_rate": 3.990355107409032e-07,
"loss": 11.7251,
"step": 110500
},
{
"epoch": 48.044335778321106,
"grad_norm": 4.356334209442139,
"learning_rate": 3.9465146865409915e-07,
"loss": 11.7165,
"step": 110550
},
{
"epoch": 48.06606900298832,
"grad_norm": 2.87776780128479,
"learning_rate": 3.902674265672951e-07,
"loss": 11.7094,
"step": 110600
},
{
"epoch": 48.08780222765553,
"grad_norm": 7.523682117462158,
"learning_rate": 3.858833844804911e-07,
"loss": 11.7341,
"step": 110650
},
{
"epoch": 48.10953545232274,
"grad_norm": 4.565247535705566,
"learning_rate": 3.8149934239368695e-07,
"loss": 11.7107,
"step": 110700
},
{
"epoch": 48.13126867698995,
"grad_norm": 8.14168643951416,
"learning_rate": 3.77115300306883e-07,
"loss": 11.708,
"step": 110750
},
{
"epoch": 48.153001901657156,
"grad_norm": 9.361733436584473,
"learning_rate": 3.7273125822007895e-07,
"loss": 11.7357,
"step": 110800
},
{
"epoch": 48.17473512632437,
"grad_norm": 3.413947105407715,
"learning_rate": 3.683472161332749e-07,
"loss": 11.7106,
"step": 110850
},
{
"epoch": 48.19646835099158,
"grad_norm": 3.5155258178710938,
"learning_rate": 3.639631740464709e-07,
"loss": 11.7122,
"step": 110900
},
{
"epoch": 48.21820157565879,
"grad_norm": 3.1602470874786377,
"learning_rate": 3.5957913195966685e-07,
"loss": 11.7176,
"step": 110950
},
{
"epoch": 48.239934800326,
"grad_norm": 9.386332511901855,
"learning_rate": 3.551950898728628e-07,
"loss": 11.724,
"step": 111000
},
{
"epoch": 48.261668024993206,
"grad_norm": 4.479788780212402,
"learning_rate": 3.508110477860588e-07,
"loss": 11.7121,
"step": 111050
},
{
"epoch": 48.28340124966042,
"grad_norm": 5.899557590484619,
"learning_rate": 3.4642700569925476e-07,
"loss": 11.7212,
"step": 111100
},
{
"epoch": 48.30513447432763,
"grad_norm": 11.247237205505371,
"learning_rate": 3.4204296361245073e-07,
"loss": 11.7196,
"step": 111150
},
{
"epoch": 48.32686769899484,
"grad_norm": 5.190598487854004,
"learning_rate": 3.376589215256467e-07,
"loss": 11.7078,
"step": 111200
},
{
"epoch": 48.34860092366205,
"grad_norm": 4.109508037567139,
"learning_rate": 3.332748794388426e-07,
"loss": 11.7155,
"step": 111250
},
{
"epoch": 48.370334148329256,
"grad_norm": 3.0162370204925537,
"learning_rate": 3.288908373520386e-07,
"loss": 11.7168,
"step": 111300
},
{
"epoch": 48.39206737299647,
"grad_norm": 11.546656608581543,
"learning_rate": 3.2450679526523456e-07,
"loss": 11.7252,
"step": 111350
},
{
"epoch": 48.41380059766368,
"grad_norm": 3.388889789581299,
"learning_rate": 3.201227531784305e-07,
"loss": 11.7268,
"step": 111400
},
{
"epoch": 48.43553382233089,
"grad_norm": 6.033073902130127,
"learning_rate": 3.157387110916265e-07,
"loss": 11.7127,
"step": 111450
},
{
"epoch": 48.4572670469981,
"grad_norm": 4.543982028961182,
"learning_rate": 3.1135466900482246e-07,
"loss": 11.7272,
"step": 111500
},
{
"epoch": 48.479000271665306,
"grad_norm": 9.906218528747559,
"learning_rate": 3.0697062691801843e-07,
"loss": 11.7268,
"step": 111550
},
{
"epoch": 48.500733496332515,
"grad_norm": 7.095948696136475,
"learning_rate": 3.025865848312144e-07,
"loss": 11.712,
"step": 111600
},
{
"epoch": 48.52246672099973,
"grad_norm": 9.97701644897461,
"learning_rate": 2.9820254274441037e-07,
"loss": 11.722,
"step": 111650
},
{
"epoch": 48.54419994566694,
"grad_norm": 4.398223876953125,
"learning_rate": 2.9381850065760634e-07,
"loss": 11.7101,
"step": 111700
},
{
"epoch": 48.56593317033415,
"grad_norm": 3.77424693107605,
"learning_rate": 2.894344585708023e-07,
"loss": 11.7272,
"step": 111750
},
{
"epoch": 48.58766639500136,
"grad_norm": 17.39592933654785,
"learning_rate": 2.850504164839983e-07,
"loss": 11.7154,
"step": 111800
},
{
"epoch": 48.609399619668565,
"grad_norm": 3.8219528198242188,
"learning_rate": 2.8066637439719425e-07,
"loss": 11.7156,
"step": 111850
},
{
"epoch": 48.63113284433578,
"grad_norm": 9.067111015319824,
"learning_rate": 2.7628233231039017e-07,
"loss": 11.7133,
"step": 111900
},
{
"epoch": 48.65286606900299,
"grad_norm": 15.224953651428223,
"learning_rate": 2.7189829022358614e-07,
"loss": 11.7166,
"step": 111950
},
{
"epoch": 48.6745992936702,
"grad_norm": 4.944436073303223,
"learning_rate": 2.675142481367821e-07,
"loss": 11.7152,
"step": 112000
},
{
"epoch": 48.69633251833741,
"grad_norm": 11.312178611755371,
"learning_rate": 2.6313020604997813e-07,
"loss": 11.7157,
"step": 112050
},
{
"epoch": 48.718065743004615,
"grad_norm": 5.6469011306762695,
"learning_rate": 2.587461639631741e-07,
"loss": 11.7188,
"step": 112100
},
{
"epoch": 48.73979896767183,
"grad_norm": 3.34533429145813,
"learning_rate": 2.5436212187637007e-07,
"loss": 11.7275,
"step": 112150
},
{
"epoch": 48.76153219233904,
"grad_norm": 9.967689514160156,
"learning_rate": 2.49978079789566e-07,
"loss": 11.717,
"step": 112200
},
{
"epoch": 48.78326541700625,
"grad_norm": 5.482551574707031,
"learning_rate": 2.4559403770276195e-07,
"loss": 11.7111,
"step": 112250
},
{
"epoch": 48.80499864167346,
"grad_norm": 4.191429615020752,
"learning_rate": 2.412099956159579e-07,
"loss": 11.7112,
"step": 112300
},
{
"epoch": 48.826731866340666,
"grad_norm": 4.112410068511963,
"learning_rate": 2.3682595352915392e-07,
"loss": 11.7252,
"step": 112350
},
{
"epoch": 48.84846509100788,
"grad_norm": 4.255959510803223,
"learning_rate": 2.3244191144234989e-07,
"loss": 11.7149,
"step": 112400
},
{
"epoch": 48.87019831567509,
"grad_norm": 8.20151424407959,
"learning_rate": 2.2805786935554583e-07,
"loss": 11.7073,
"step": 112450
},
{
"epoch": 48.8919315403423,
"grad_norm": 4.13128137588501,
"learning_rate": 2.236738272687418e-07,
"loss": 11.72,
"step": 112500
},
{
"epoch": 48.91366476500951,
"grad_norm": 6.540150165557861,
"learning_rate": 2.1928978518193777e-07,
"loss": 11.707,
"step": 112550
},
{
"epoch": 48.935397989676716,
"grad_norm": 10.835039138793945,
"learning_rate": 2.1490574309513374e-07,
"loss": 11.7185,
"step": 112600
},
{
"epoch": 48.95713121434393,
"grad_norm": 11.767996788024902,
"learning_rate": 2.105217010083297e-07,
"loss": 11.7273,
"step": 112650
},
{
"epoch": 48.97886443901114,
"grad_norm": 7.164200305938721,
"learning_rate": 2.0613765892152568e-07,
"loss": 11.7255,
"step": 112700
},
{
"epoch": 49.00043466449335,
"grad_norm": 3.889307737350464,
"learning_rate": 2.0175361683472162e-07,
"loss": 11.6305,
"step": 112750
},
{
"epoch": 49.022167889160556,
"grad_norm": 3.3905019760131836,
"learning_rate": 1.973695747479176e-07,
"loss": 11.7146,
"step": 112800
},
{
"epoch": 49.043901113827765,
"grad_norm": 10.843219757080078,
"learning_rate": 1.9298553266111356e-07,
"loss": 11.7074,
"step": 112850
},
{
"epoch": 49.065634338494974,
"grad_norm": 6.1026082038879395,
"learning_rate": 1.8860149057430953e-07,
"loss": 11.7187,
"step": 112900
},
{
"epoch": 49.08736756316218,
"grad_norm": 12.958758354187012,
"learning_rate": 1.842174484875055e-07,
"loss": 11.7198,
"step": 112950
},
{
"epoch": 49.10910078782939,
"grad_norm": 7.045960426330566,
"learning_rate": 1.7983340640070144e-07,
"loss": 11.7171,
"step": 113000
},
{
"epoch": 49.13083401249661,
"grad_norm": 8.215546607971191,
"learning_rate": 1.754493643138974e-07,
"loss": 11.7161,
"step": 113050
},
{
"epoch": 49.152567237163815,
"grad_norm": 3.4392971992492676,
"learning_rate": 1.710653222270934e-07,
"loss": 11.7033,
"step": 113100
},
{
"epoch": 49.174300461831024,
"grad_norm": 4.333184242248535,
"learning_rate": 1.6668128014028937e-07,
"loss": 11.7221,
"step": 113150
},
{
"epoch": 49.19603368649823,
"grad_norm": 8.148516654968262,
"learning_rate": 1.6229723805348534e-07,
"loss": 11.7099,
"step": 113200
},
{
"epoch": 49.21776691116544,
"grad_norm": 10.723722457885742,
"learning_rate": 1.579131959666813e-07,
"loss": 11.7253,
"step": 113250
},
{
"epoch": 49.23950013583266,
"grad_norm": 5.778897285461426,
"learning_rate": 1.5352915387987726e-07,
"loss": 11.7058,
"step": 113300
},
{
"epoch": 49.261233360499865,
"grad_norm": 5.398443698883057,
"learning_rate": 1.4914511179307322e-07,
"loss": 11.7219,
"step": 113350
},
{
"epoch": 49.282966585167074,
"grad_norm": 3.614530324935913,
"learning_rate": 1.447610697062692e-07,
"loss": 11.7105,
"step": 113400
},
{
"epoch": 49.30469980983428,
"grad_norm": 4.205718040466309,
"learning_rate": 1.4037702761946516e-07,
"loss": 11.7128,
"step": 113450
},
{
"epoch": 49.32643303450149,
"grad_norm": 5.203486442565918,
"learning_rate": 1.3599298553266113e-07,
"loss": 11.7145,
"step": 113500
},
{
"epoch": 49.34816625916871,
"grad_norm": 3.4985852241516113,
"learning_rate": 1.316089434458571e-07,
"loss": 11.7212,
"step": 113550
},
{
"epoch": 49.369899483835916,
"grad_norm": 9.43883991241455,
"learning_rate": 1.2722490135905305e-07,
"loss": 11.7124,
"step": 113600
},
{
"epoch": 49.391632708503124,
"grad_norm": 7.489180088043213,
"learning_rate": 1.2284085927224901e-07,
"loss": 11.7207,
"step": 113650
},
{
"epoch": 49.41336593317033,
"grad_norm": 9.499123573303223,
"learning_rate": 1.18456817185445e-07,
"loss": 11.7147,
"step": 113700
},
{
"epoch": 49.43509915783754,
"grad_norm": 7.789849758148193,
"learning_rate": 1.1407277509864095e-07,
"loss": 11.7171,
"step": 113750
},
{
"epoch": 49.45683238250476,
"grad_norm": 9.22687816619873,
"learning_rate": 1.0968873301183692e-07,
"loss": 11.7071,
"step": 113800
},
{
"epoch": 49.478565607171966,
"grad_norm": 5.999274253845215,
"learning_rate": 1.0530469092503289e-07,
"loss": 11.7137,
"step": 113850
},
{
"epoch": 49.500298831839174,
"grad_norm": 9.73884391784668,
"learning_rate": 1.0092064883822885e-07,
"loss": 11.7201,
"step": 113900
},
{
"epoch": 49.52203205650638,
"grad_norm": 9.630657196044922,
"learning_rate": 9.653660675142482e-08,
"loss": 11.7212,
"step": 113950
},
{
"epoch": 49.54376528117359,
"grad_norm": 4.612308979034424,
"learning_rate": 9.21525646646208e-08,
"loss": 11.7247,
"step": 114000
},
{
"epoch": 49.56549850584081,
"grad_norm": 2.9876091480255127,
"learning_rate": 8.776852257781676e-08,
"loss": 11.7146,
"step": 114050
},
{
"epoch": 49.587231730508016,
"grad_norm": 4.555498123168945,
"learning_rate": 8.338448049101273e-08,
"loss": 11.7073,
"step": 114100
},
{
"epoch": 49.608964955175225,
"grad_norm": 5.015764236450195,
"learning_rate": 7.900043840420868e-08,
"loss": 11.7119,
"step": 114150
},
{
"epoch": 49.63069817984243,
"grad_norm": 5.208141326904297,
"learning_rate": 7.461639631740465e-08,
"loss": 11.7178,
"step": 114200
},
{
"epoch": 49.65243140450964,
"grad_norm": 5.420982837677002,
"learning_rate": 7.023235423060062e-08,
"loss": 11.7187,
"step": 114250
},
{
"epoch": 49.67416462917686,
"grad_norm": 6.694780349731445,
"learning_rate": 6.584831214379659e-08,
"loss": 11.7102,
"step": 114300
},
{
"epoch": 49.695897853844066,
"grad_norm": 4.203577995300293,
"learning_rate": 6.146427005699255e-08,
"loss": 11.7142,
"step": 114350
},
{
"epoch": 49.717631078511275,
"grad_norm": 3.1716277599334717,
"learning_rate": 5.7080227970188515e-08,
"loss": 11.7139,
"step": 114400
},
{
"epoch": 49.73936430317848,
"grad_norm": 3.897326946258545,
"learning_rate": 5.269618588338448e-08,
"loss": 11.712,
"step": 114450
},
{
"epoch": 49.76109752784569,
"grad_norm": 13.347712516784668,
"learning_rate": 4.8312143796580454e-08,
"loss": 11.7155,
"step": 114500
},
{
"epoch": 49.78283075251291,
"grad_norm": 6.420513153076172,
"learning_rate": 4.392810170977642e-08,
"loss": 11.7286,
"step": 114550
},
{
"epoch": 49.804563977180116,
"grad_norm": 5.6966447830200195,
"learning_rate": 3.954405962297238e-08,
"loss": 11.7078,
"step": 114600
},
{
"epoch": 49.826297201847325,
"grad_norm": 5.481497287750244,
"learning_rate": 3.516001753616835e-08,
"loss": 11.7061,
"step": 114650
},
{
"epoch": 49.84803042651453,
"grad_norm": 6.5728840827941895,
"learning_rate": 3.077597544936432e-08,
"loss": 11.7121,
"step": 114700
},
{
"epoch": 49.86976365118174,
"grad_norm": 7.013606071472168,
"learning_rate": 2.639193336256028e-08,
"loss": 11.7165,
"step": 114750
},
{
"epoch": 49.89149687584895,
"grad_norm": 8.17546272277832,
"learning_rate": 2.200789127575625e-08,
"loss": 11.7226,
"step": 114800
},
{
"epoch": 49.91323010051617,
"grad_norm": 11.53906536102295,
"learning_rate": 1.7623849188952217e-08,
"loss": 11.7159,
"step": 114850
},
{
"epoch": 49.934963325183375,
"grad_norm": 3.259451389312744,
"learning_rate": 1.3239807102148183e-08,
"loss": 11.7256,
"step": 114900
},
{
"epoch": 49.956696549850584,
"grad_norm": 9.84170913696289,
"learning_rate": 8.855765015344147e-09,
"loss": 11.7094,
"step": 114950
},
{
"epoch": 49.97842977451779,
"grad_norm": 3.992011070251465,
"learning_rate": 4.471722928540115e-09,
"loss": 11.7083,
"step": 115000
},
{
"epoch": 50.0,
"grad_norm": 7.805567264556885,
"learning_rate": 8.768084173608068e-11,
"loss": 11.6432,
"step": 115050
}
],
"logging_steps": 50,
"max_steps": 115050,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}