mHubert-basque-ASR / trainer_state.json
Ansu's picture
Upload folder using huggingface_hub
97a764f verified
{
"best_metric": 0.10333551466464996,
"best_model_checkpoint": "checkpoints/mHubert-basque-ASR-30ep/checkpoint-144000",
"epoch": 24.116286752560292,
"eval_steps": 1000,
"global_step": 146000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016518004625041296,
"grad_norm": 11.923916816711426,
"learning_rate": 5.506001541680432e-08,
"loss": 24.4951,
"step": 100
},
{
"epoch": 0.03303600925008259,
"grad_norm": 14.201896667480469,
"learning_rate": 1.1012003083360864e-07,
"loss": 24.611,
"step": 200
},
{
"epoch": 0.049554013875123884,
"grad_norm": 16.669321060180664,
"learning_rate": 1.6518004625041296e-07,
"loss": 24.4459,
"step": 300
},
{
"epoch": 0.06607201850016518,
"grad_norm": 15.999144554138184,
"learning_rate": 2.2024006166721728e-07,
"loss": 23.9418,
"step": 400
},
{
"epoch": 0.08259002312520647,
"grad_norm": 15.337122917175293,
"learning_rate": 2.753000770840216e-07,
"loss": 23.6429,
"step": 500
},
{
"epoch": 0.09910802775024777,
"grad_norm": 20.767488479614258,
"learning_rate": 3.3036009250082593e-07,
"loss": 22.9927,
"step": 600
},
{
"epoch": 0.11562603237528907,
"grad_norm": 25.313589096069336,
"learning_rate": 3.8542010791763027e-07,
"loss": 22.5284,
"step": 700
},
{
"epoch": 0.13214403700033037,
"grad_norm": 24.19838523864746,
"learning_rate": 4.4048012333443456e-07,
"loss": 21.2884,
"step": 800
},
{
"epoch": 0.14866204162537167,
"grad_norm": 27.155614852905273,
"learning_rate": 4.955401387512389e-07,
"loss": 20.3421,
"step": 900
},
{
"epoch": 0.16518004625041294,
"grad_norm": 30.608129501342773,
"learning_rate": 5.506001541680432e-07,
"loss": 19.4542,
"step": 1000
},
{
"epoch": 0.16518004625041294,
"eval_cer": 0.9883050441448223,
"eval_loss": 16.246232986450195,
"eval_runtime": 48.061,
"eval_samples_per_second": 35.184,
"eval_steps_per_second": 8.801,
"eval_wer": 0.9999375507400238,
"step": 1000
},
{
"epoch": 0.18169805087545424,
"grad_norm": 27.388935089111328,
"learning_rate": 6.056601695848476e-07,
"loss": 18.666,
"step": 1100
},
{
"epoch": 0.19821605550049554,
"grad_norm": 35.63807678222656,
"learning_rate": 6.607201850016519e-07,
"loss": 17.1664,
"step": 1200
},
{
"epoch": 0.21473406012553684,
"grad_norm": 46.33199691772461,
"learning_rate": 7.157802004184563e-07,
"loss": 15.5691,
"step": 1300
},
{
"epoch": 0.23125206475057813,
"grad_norm": 37.326534271240234,
"learning_rate": 7.708402158352605e-07,
"loss": 14.6982,
"step": 1400
},
{
"epoch": 0.24777006937561943,
"grad_norm": 32.61579895019531,
"learning_rate": 8.259002312520647e-07,
"loss": 14.1957,
"step": 1500
},
{
"epoch": 0.26428807400066073,
"grad_norm": 32.68245315551758,
"learning_rate": 8.809602466688691e-07,
"loss": 13.7794,
"step": 1600
},
{
"epoch": 0.280806078625702,
"grad_norm": 36.71652603149414,
"learning_rate": 9.360202620856734e-07,
"loss": 13.1926,
"step": 1700
},
{
"epoch": 0.29732408325074333,
"grad_norm": 33.95559310913086,
"learning_rate": 9.910802775024778e-07,
"loss": 12.549,
"step": 1800
},
{
"epoch": 0.3138420878757846,
"grad_norm": 34.27357864379883,
"learning_rate": 1.0461402929192822e-06,
"loss": 12.9556,
"step": 1900
},
{
"epoch": 0.3303600925008259,
"grad_norm": 29.778766632080078,
"learning_rate": 1.1012003083360864e-06,
"loss": 11.7038,
"step": 2000
},
{
"epoch": 0.3303600925008259,
"eval_cer": 0.9883050441448223,
"eval_loss": 10.252079963684082,
"eval_runtime": 47.4771,
"eval_samples_per_second": 35.617,
"eval_steps_per_second": 8.91,
"eval_wer": 0.9999375507400238,
"step": 2000
},
{
"epoch": 0.3468780971258672,
"grad_norm": 34.33523941040039,
"learning_rate": 1.1562603237528908e-06,
"loss": 12.5611,
"step": 2100
},
{
"epoch": 0.3633961017509085,
"grad_norm": 34.92203140258789,
"learning_rate": 1.2113203391696951e-06,
"loss": 11.634,
"step": 2200
},
{
"epoch": 0.3799141063759498,
"grad_norm": 35.76356887817383,
"learning_rate": 1.2663803545864995e-06,
"loss": 12.158,
"step": 2300
},
{
"epoch": 0.39643211100099107,
"grad_norm": 33.04209518432617,
"learning_rate": 1.3214403700033037e-06,
"loss": 11.245,
"step": 2400
},
{
"epoch": 0.4129501156260324,
"grad_norm": 36.18589782714844,
"learning_rate": 1.3765003854201081e-06,
"loss": 11.5037,
"step": 2500
},
{
"epoch": 0.42946812025107367,
"grad_norm": 30.83759307861328,
"learning_rate": 1.4315604008369125e-06,
"loss": 11.6204,
"step": 2600
},
{
"epoch": 0.44598612487611494,
"grad_norm": 31.8559627532959,
"learning_rate": 1.4866204162537167e-06,
"loss": 10.6165,
"step": 2700
},
{
"epoch": 0.46250412950115627,
"grad_norm": 2.7403502464294434,
"learning_rate": 1.541680431670521e-06,
"loss": 10.7605,
"step": 2800
},
{
"epoch": 0.47902213412619754,
"grad_norm": 3.411168098449707,
"learning_rate": 1.5967404470873255e-06,
"loss": 10.6777,
"step": 2900
},
{
"epoch": 0.49554013875123887,
"grad_norm": 31.218460083007812,
"learning_rate": 1.6518004625041294e-06,
"loss": 10.5214,
"step": 3000
},
{
"epoch": 0.49554013875123887,
"eval_cer": 0.9883050441448223,
"eval_loss": 8.999906539916992,
"eval_runtime": 47.8317,
"eval_samples_per_second": 35.353,
"eval_steps_per_second": 8.844,
"eval_wer": 0.9999375507400238,
"step": 3000
},
{
"epoch": 0.5120581433762802,
"grad_norm": 33.284549713134766,
"learning_rate": 1.7068604779209338e-06,
"loss": 10.5589,
"step": 3100
},
{
"epoch": 0.5285761480013215,
"grad_norm": 29.90840721130371,
"learning_rate": 1.7619204933377382e-06,
"loss": 10.1695,
"step": 3200
},
{
"epoch": 0.5450941526263627,
"grad_norm": 37.552734375,
"learning_rate": 1.8169805087545426e-06,
"loss": 10.1221,
"step": 3300
},
{
"epoch": 0.561612157251404,
"grad_norm": 33.36090850830078,
"learning_rate": 1.8720405241713468e-06,
"loss": 9.6691,
"step": 3400
},
{
"epoch": 0.5781301618764453,
"grad_norm": 28.224496841430664,
"learning_rate": 1.9271005395881514e-06,
"loss": 9.6393,
"step": 3500
},
{
"epoch": 0.5946481665014867,
"grad_norm": 29.11280059814453,
"learning_rate": 1.9821605550049556e-06,
"loss": 9.6595,
"step": 3600
},
{
"epoch": 0.6111661711265279,
"grad_norm": 30.484336853027344,
"learning_rate": 2.0372205704217598e-06,
"loss": 9.4354,
"step": 3700
},
{
"epoch": 0.6276841757515692,
"grad_norm": 28.713830947875977,
"learning_rate": 2.0922805858385644e-06,
"loss": 9.107,
"step": 3800
},
{
"epoch": 0.6442021803766105,
"grad_norm": 28.442527770996094,
"learning_rate": 2.1473406012553685e-06,
"loss": 9.1297,
"step": 3900
},
{
"epoch": 0.6607201850016517,
"grad_norm": 30.577152252197266,
"learning_rate": 2.2024006166721727e-06,
"loss": 8.8754,
"step": 4000
},
{
"epoch": 0.6607201850016517,
"eval_cer": 0.9883050441448223,
"eval_loss": 7.591804504394531,
"eval_runtime": 48.3109,
"eval_samples_per_second": 35.002,
"eval_steps_per_second": 8.756,
"eval_wer": 0.9999375507400238,
"step": 4000
},
{
"epoch": 0.6772381896266931,
"grad_norm": 33.752967834472656,
"learning_rate": 2.2574606320889773e-06,
"loss": 8.5952,
"step": 4100
},
{
"epoch": 0.6937561942517344,
"grad_norm": 29.171703338623047,
"learning_rate": 2.3125206475057815e-06,
"loss": 8.363,
"step": 4200
},
{
"epoch": 0.7102741988767757,
"grad_norm": 29.746253967285156,
"learning_rate": 2.3675806629225857e-06,
"loss": 8.3281,
"step": 4300
},
{
"epoch": 0.726792203501817,
"grad_norm": 31.43389129638672,
"learning_rate": 2.4226406783393903e-06,
"loss": 8.0488,
"step": 4400
},
{
"epoch": 0.7433102081268583,
"grad_norm": 26.350412368774414,
"learning_rate": 2.4777006937561945e-06,
"loss": 8.0053,
"step": 4500
},
{
"epoch": 0.7598282127518996,
"grad_norm": 24.809051513671875,
"learning_rate": 2.532760709172999e-06,
"loss": 7.8292,
"step": 4600
},
{
"epoch": 0.7763462173769409,
"grad_norm": 5.660928726196289,
"learning_rate": 2.587820724589803e-06,
"loss": 7.2419,
"step": 4700
},
{
"epoch": 0.7928642220019821,
"grad_norm": 25.451852798461914,
"learning_rate": 2.6428807400066074e-06,
"loss": 7.3732,
"step": 4800
},
{
"epoch": 0.8093822266270234,
"grad_norm": 26.097301483154297,
"learning_rate": 2.6979407554234116e-06,
"loss": 7.1173,
"step": 4900
},
{
"epoch": 0.8259002312520648,
"grad_norm": 22.98796844482422,
"learning_rate": 2.7530007708402162e-06,
"loss": 6.9653,
"step": 5000
},
{
"epoch": 0.8259002312520648,
"eval_cer": 0.9883050441448223,
"eval_loss": 5.927879810333252,
"eval_runtime": 47.9693,
"eval_samples_per_second": 35.252,
"eval_steps_per_second": 8.818,
"eval_wer": 0.9999375507400238,
"step": 5000
},
{
"epoch": 0.8424182358771061,
"grad_norm": 22.172231674194336,
"learning_rate": 2.8080607862570204e-06,
"loss": 6.5446,
"step": 5100
},
{
"epoch": 0.8589362405021473,
"grad_norm": 20.156213760375977,
"learning_rate": 2.863120801673825e-06,
"loss": 6.5427,
"step": 5200
},
{
"epoch": 0.8754542451271886,
"grad_norm": 23.265291213989258,
"learning_rate": 2.9181808170906288e-06,
"loss": 6.1793,
"step": 5300
},
{
"epoch": 0.8919722497522299,
"grad_norm": 19.185626983642578,
"learning_rate": 2.9732408325074334e-06,
"loss": 6.0592,
"step": 5400
},
{
"epoch": 0.9084902543772713,
"grad_norm": 2.092716693878174,
"learning_rate": 3.0283008479242375e-06,
"loss": 5.7659,
"step": 5500
},
{
"epoch": 0.9250082590023125,
"grad_norm": 21.02352523803711,
"learning_rate": 3.083360863341042e-06,
"loss": 5.6754,
"step": 5600
},
{
"epoch": 0.9415262636273538,
"grad_norm": 18.27204132080078,
"learning_rate": 3.1384208787578463e-06,
"loss": 5.5381,
"step": 5700
},
{
"epoch": 0.9580442682523951,
"grad_norm": 18.369115829467773,
"learning_rate": 3.193480894174651e-06,
"loss": 5.4687,
"step": 5800
},
{
"epoch": 0.9745622728774364,
"grad_norm": 19.799074172973633,
"learning_rate": 3.248540909591455e-06,
"loss": 5.2652,
"step": 5900
},
{
"epoch": 0.9910802775024777,
"grad_norm": 16.94482421875,
"learning_rate": 3.303600925008259e-06,
"loss": 5.0711,
"step": 6000
},
{
"epoch": 0.9910802775024777,
"eval_cer": 0.9883050441448223,
"eval_loss": 4.399995803833008,
"eval_runtime": 48.2208,
"eval_samples_per_second": 35.068,
"eval_steps_per_second": 8.772,
"eval_wer": 0.9999375507400238,
"step": 6000
},
{
"epoch": 1.007598282127519,
"grad_norm": 13.568734169006348,
"learning_rate": 3.3586609404250635e-06,
"loss": 4.7523,
"step": 6100
},
{
"epoch": 1.0241162867525604,
"grad_norm": 13.136434555053711,
"learning_rate": 3.4137209558418677e-06,
"loss": 4.5941,
"step": 6200
},
{
"epoch": 1.0406342913776017,
"grad_norm": 12.08462905883789,
"learning_rate": 3.4687809712586723e-06,
"loss": 4.431,
"step": 6300
},
{
"epoch": 1.057152296002643,
"grad_norm": 11.658442497253418,
"learning_rate": 3.5238409866754764e-06,
"loss": 4.4636,
"step": 6400
},
{
"epoch": 1.0736703006276842,
"grad_norm": 11.084771156311035,
"learning_rate": 3.578901002092281e-06,
"loss": 4.2883,
"step": 6500
},
{
"epoch": 1.0901883052527255,
"grad_norm": 9.543913841247559,
"learning_rate": 3.6339610175090852e-06,
"loss": 4.1244,
"step": 6600
},
{
"epoch": 1.1067063098777667,
"grad_norm": 9.388916015625,
"learning_rate": 3.68902103292589e-06,
"loss": 3.9917,
"step": 6700
},
{
"epoch": 1.123224314502808,
"grad_norm": 6.666196346282959,
"learning_rate": 3.7440810483426936e-06,
"loss": 3.8466,
"step": 6800
},
{
"epoch": 1.1397423191278493,
"grad_norm": 16.961889266967773,
"learning_rate": 3.799141063759498e-06,
"loss": 3.7877,
"step": 6900
},
{
"epoch": 1.1562603237528906,
"grad_norm": 5.992101669311523,
"learning_rate": 3.854201079176303e-06,
"loss": 3.6487,
"step": 7000
},
{
"epoch": 1.1562603237528906,
"eval_cer": 0.9883050441448223,
"eval_loss": 3.413706064224243,
"eval_runtime": 47.9091,
"eval_samples_per_second": 35.296,
"eval_steps_per_second": 8.829,
"eval_wer": 0.9999375507400238,
"step": 7000
},
{
"epoch": 1.172778328377932,
"grad_norm": 6.866231441497803,
"learning_rate": 3.909261094593107e-06,
"loss": 3.5613,
"step": 7100
},
{
"epoch": 1.1892963330029733,
"grad_norm": 2.480027675628662,
"learning_rate": 3.964321110009911e-06,
"loss": 3.4725,
"step": 7200
},
{
"epoch": 1.2058143376280146,
"grad_norm": 4.010397911071777,
"learning_rate": 4.019381125426716e-06,
"loss": 3.4202,
"step": 7300
},
{
"epoch": 1.2223323422530559,
"grad_norm": 5.230499267578125,
"learning_rate": 4.0744411408435195e-06,
"loss": 3.3595,
"step": 7400
},
{
"epoch": 1.2388503468780971,
"grad_norm": 3.2052972316741943,
"learning_rate": 4.129501156260324e-06,
"loss": 3.3002,
"step": 7500
},
{
"epoch": 1.2553683515031384,
"grad_norm": 1.894927978515625,
"learning_rate": 4.184561171677129e-06,
"loss": 3.1986,
"step": 7600
},
{
"epoch": 1.2718863561281797,
"grad_norm": 2.197263240814209,
"learning_rate": 4.239621187093933e-06,
"loss": 3.1351,
"step": 7700
},
{
"epoch": 1.288404360753221,
"grad_norm": 2.1114447116851807,
"learning_rate": 4.294681202510737e-06,
"loss": 3.126,
"step": 7800
},
{
"epoch": 1.3049223653782622,
"grad_norm": 1.9289065599441528,
"learning_rate": 4.349741217927541e-06,
"loss": 3.0814,
"step": 7900
},
{
"epoch": 1.3214403700033035,
"grad_norm": 1.314790964126587,
"learning_rate": 4.4048012333443454e-06,
"loss": 3.061,
"step": 8000
},
{
"epoch": 1.3214403700033035,
"eval_cer": 0.9883050441448223,
"eval_loss": 2.9990100860595703,
"eval_runtime": 47.905,
"eval_samples_per_second": 35.299,
"eval_steps_per_second": 8.83,
"eval_wer": 0.9999375507400238,
"step": 8000
},
{
"epoch": 1.3379583746283448,
"grad_norm": 1.4136910438537598,
"learning_rate": 4.45986124876115e-06,
"loss": 3.0138,
"step": 8100
},
{
"epoch": 1.3544763792533863,
"grad_norm": 1.2747600078582764,
"learning_rate": 4.514921264177955e-06,
"loss": 2.9893,
"step": 8200
},
{
"epoch": 1.3709943838784275,
"grad_norm": 1.3073844909667969,
"learning_rate": 4.569981279594758e-06,
"loss": 2.9698,
"step": 8300
},
{
"epoch": 1.3875123885034688,
"grad_norm": 1.0685036182403564,
"learning_rate": 4.625041295011563e-06,
"loss": 2.9459,
"step": 8400
},
{
"epoch": 1.40403039312851,
"grad_norm": 1.4280977249145508,
"learning_rate": 4.680101310428367e-06,
"loss": 2.9276,
"step": 8500
},
{
"epoch": 1.4205483977535514,
"grad_norm": 0.5592168569564819,
"learning_rate": 4.735161325845171e-06,
"loss": 2.9128,
"step": 8600
},
{
"epoch": 1.4370664023785926,
"grad_norm": 0.6144903302192688,
"learning_rate": 4.790221341261976e-06,
"loss": 2.89,
"step": 8700
},
{
"epoch": 1.453584407003634,
"grad_norm": 0.5364680886268616,
"learning_rate": 4.845281356678781e-06,
"loss": 2.884,
"step": 8800
},
{
"epoch": 1.4701024116286754,
"grad_norm": 0.42214369773864746,
"learning_rate": 4.900341372095584e-06,
"loss": 2.8763,
"step": 8900
},
{
"epoch": 1.4866204162537167,
"grad_norm": 1.1128541231155396,
"learning_rate": 4.955401387512389e-06,
"loss": 2.8674,
"step": 9000
},
{
"epoch": 1.4866204162537167,
"eval_cer": 0.9883050441448223,
"eval_loss": 2.8672590255737305,
"eval_runtime": 47.8727,
"eval_samples_per_second": 35.323,
"eval_steps_per_second": 8.836,
"eval_wer": 0.9999375507400238,
"step": 9000
},
{
"epoch": 1.503138420878758,
"grad_norm": 1.4287844896316528,
"learning_rate": 5.0104614029291935e-06,
"loss": 2.8587,
"step": 9100
},
{
"epoch": 1.5196564255037992,
"grad_norm": 0.7117812037467957,
"learning_rate": 5.065521418345998e-06,
"loss": 2.8496,
"step": 9200
},
{
"epoch": 1.5361744301288405,
"grad_norm": 0.346927285194397,
"learning_rate": 5.120581433762803e-06,
"loss": 2.8843,
"step": 9300
},
{
"epoch": 1.5526924347538817,
"grad_norm": 0.28466975688934326,
"learning_rate": 5.175641449179606e-06,
"loss": 2.8361,
"step": 9400
},
{
"epoch": 1.569210439378923,
"grad_norm": 0.9709968566894531,
"learning_rate": 5.23070146459641e-06,
"loss": 2.8347,
"step": 9500
},
{
"epoch": 1.5857284440039643,
"grad_norm": 0.41504138708114624,
"learning_rate": 5.285761480013215e-06,
"loss": 2.8281,
"step": 9600
},
{
"epoch": 1.6022464486290056,
"grad_norm": 0.7209063172340393,
"learning_rate": 5.3408214954300195e-06,
"loss": 2.8216,
"step": 9700
},
{
"epoch": 1.6187644532540468,
"grad_norm": 0.17556777596473694,
"learning_rate": 5.395881510846823e-06,
"loss": 2.8191,
"step": 9800
},
{
"epoch": 1.635282457879088,
"grad_norm": 0.24542377889156342,
"learning_rate": 5.450941526263628e-06,
"loss": 2.8171,
"step": 9900
},
{
"epoch": 1.6518004625041294,
"grad_norm": 0.5793740153312683,
"learning_rate": 5.5060015416804324e-06,
"loss": 2.8139,
"step": 10000
},
{
"epoch": 1.6518004625041294,
"eval_cer": 0.9883050441448223,
"eval_loss": 2.8268349170684814,
"eval_runtime": 47.7716,
"eval_samples_per_second": 35.398,
"eval_steps_per_second": 8.855,
"eval_wer": 0.9999375507400238,
"step": 10000
},
{
"epoch": 1.6683184671291706,
"grad_norm": 0.5430779457092285,
"learning_rate": 5.561061557097236e-06,
"loss": 2.8118,
"step": 10100
},
{
"epoch": 1.6848364717542121,
"grad_norm": 0.16136805713176727,
"learning_rate": 5.616121572514041e-06,
"loss": 2.8084,
"step": 10200
},
{
"epoch": 1.7013544763792534,
"grad_norm": 0.180739626288414,
"learning_rate": 5.671181587930845e-06,
"loss": 2.8075,
"step": 10300
},
{
"epoch": 1.7178724810042947,
"grad_norm": 0.33964207768440247,
"learning_rate": 5.72624160334765e-06,
"loss": 2.8029,
"step": 10400
},
{
"epoch": 1.734390485629336,
"grad_norm": 0.3545405864715576,
"learning_rate": 5.781301618764453e-06,
"loss": 2.8026,
"step": 10500
},
{
"epoch": 1.7509084902543772,
"grad_norm": 0.6962557435035706,
"learning_rate": 5.8363616341812575e-06,
"loss": 2.8008,
"step": 10600
},
{
"epoch": 1.7674264948794187,
"grad_norm": 0.49546900391578674,
"learning_rate": 5.891421649598062e-06,
"loss": 2.798,
"step": 10700
},
{
"epoch": 1.78394449950446,
"grad_norm": 0.10469625890254974,
"learning_rate": 5.946481665014867e-06,
"loss": 2.803,
"step": 10800
},
{
"epoch": 1.8004625041295013,
"grad_norm": 0.48037204146385193,
"learning_rate": 6.0015416804316705e-06,
"loss": 2.7972,
"step": 10900
},
{
"epoch": 1.8169805087545425,
"grad_norm": 1.4760863780975342,
"learning_rate": 6.056601695848475e-06,
"loss": 2.7957,
"step": 11000
},
{
"epoch": 1.8169805087545425,
"eval_cer": 0.9883050441448223,
"eval_loss": 2.815251588821411,
"eval_runtime": 47.7131,
"eval_samples_per_second": 35.441,
"eval_steps_per_second": 8.865,
"eval_wer": 0.9999375507400238,
"step": 11000
},
{
"epoch": 1.8334985133795838,
"grad_norm": 0.6480938196182251,
"learning_rate": 6.11166171126528e-06,
"loss": 2.7959,
"step": 11100
},
{
"epoch": 1.850016518004625,
"grad_norm": 0.09613073617219925,
"learning_rate": 6.166721726682084e-06,
"loss": 2.795,
"step": 11200
},
{
"epoch": 1.8665345226296663,
"grad_norm": 0.7749711275100708,
"learning_rate": 6.221781742098888e-06,
"loss": 2.7929,
"step": 11300
},
{
"epoch": 1.8830525272547076,
"grad_norm": 0.3546479046344757,
"learning_rate": 6.276841757515693e-06,
"loss": 2.7918,
"step": 11400
},
{
"epoch": 1.899570531879749,
"grad_norm": 1.12019681930542,
"learning_rate": 6.331901772932497e-06,
"loss": 2.7918,
"step": 11500
},
{
"epoch": 1.9160885365047902,
"grad_norm": 0.8891560435295105,
"learning_rate": 6.386961788349302e-06,
"loss": 2.7891,
"step": 11600
},
{
"epoch": 1.9326065411298314,
"grad_norm": 0.17968548834323883,
"learning_rate": 6.442021803766106e-06,
"loss": 2.7893,
"step": 11700
},
{
"epoch": 1.9491245457548727,
"grad_norm": 0.7718554139137268,
"learning_rate": 6.49708181918291e-06,
"loss": 2.7906,
"step": 11800
},
{
"epoch": 1.965642550379914,
"grad_norm": 0.20580369234085083,
"learning_rate": 6.552141834599715e-06,
"loss": 2.7855,
"step": 11900
},
{
"epoch": 1.9821605550049552,
"grad_norm": 0.12557658553123474,
"learning_rate": 6.607201850016518e-06,
"loss": 2.7821,
"step": 12000
},
{
"epoch": 1.9821605550049552,
"eval_cer": 0.9883050441448223,
"eval_loss": 2.801440477371216,
"eval_runtime": 48.1673,
"eval_samples_per_second": 35.107,
"eval_steps_per_second": 8.782,
"eval_wer": 0.9999375507400238,
"step": 12000
},
{
"epoch": 1.9986785596299967,
"grad_norm": 0.29796159267425537,
"learning_rate": 6.662261865433322e-06,
"loss": 2.8061,
"step": 12100
},
{
"epoch": 2.015196564255038,
"grad_norm": 0.43523645401000977,
"learning_rate": 6.717321880850127e-06,
"loss": 2.7722,
"step": 12200
},
{
"epoch": 2.031714568880079,
"grad_norm": 0.8194575905799866,
"learning_rate": 6.7723818962669316e-06,
"loss": 2.7428,
"step": 12300
},
{
"epoch": 2.048232573505121,
"grad_norm": 0.5913192629814148,
"learning_rate": 6.827441911683735e-06,
"loss": 2.6796,
"step": 12400
},
{
"epoch": 2.064750578130162,
"grad_norm": 1.2272390127182007,
"learning_rate": 6.88250192710054e-06,
"loss": 2.5647,
"step": 12500
},
{
"epoch": 2.0812685827552033,
"grad_norm": 0.7809718251228333,
"learning_rate": 6.9375619425173445e-06,
"loss": 2.4445,
"step": 12600
},
{
"epoch": 2.0977865873802446,
"grad_norm": 1.4848648309707642,
"learning_rate": 6.992621957934149e-06,
"loss": 2.3472,
"step": 12700
},
{
"epoch": 2.114304592005286,
"grad_norm": 0.9019191265106201,
"learning_rate": 7.047681973350953e-06,
"loss": 2.2088,
"step": 12800
},
{
"epoch": 2.130822596630327,
"grad_norm": 1.6210988759994507,
"learning_rate": 7.1027419887677575e-06,
"loss": 2.0813,
"step": 12900
},
{
"epoch": 2.1473406012553684,
"grad_norm": 0.9672953486442566,
"learning_rate": 7.157802004184562e-06,
"loss": 1.9636,
"step": 13000
},
{
"epoch": 2.1473406012553684,
"eval_cer": 0.42876942029977416,
"eval_loss": 1.6961537599563599,
"eval_runtime": 48.2183,
"eval_samples_per_second": 35.07,
"eval_steps_per_second": 8.773,
"eval_wer": 0.9999375507400238,
"step": 13000
},
{
"epoch": 2.1638586058804097,
"grad_norm": 1.5448602437973022,
"learning_rate": 7.212862019601367e-06,
"loss": 1.8347,
"step": 13100
},
{
"epoch": 2.180376610505451,
"grad_norm": 1.3155843019485474,
"learning_rate": 7.2679220350181704e-06,
"loss": 1.6876,
"step": 13200
},
{
"epoch": 2.1968946151304922,
"grad_norm": 1.4173344373703003,
"learning_rate": 7.322982050434975e-06,
"loss": 1.6086,
"step": 13300
},
{
"epoch": 2.2134126197555335,
"grad_norm": 1.2968000173568726,
"learning_rate": 7.37804206585178e-06,
"loss": 1.4922,
"step": 13400
},
{
"epoch": 2.2299306243805748,
"grad_norm": 1.3589733839035034,
"learning_rate": 7.433102081268584e-06,
"loss": 1.4186,
"step": 13500
},
{
"epoch": 2.246448629005616,
"grad_norm": 1.5690885782241821,
"learning_rate": 7.488162096685387e-06,
"loss": 1.3541,
"step": 13600
},
{
"epoch": 2.2629666336306573,
"grad_norm": 1.1378659009933472,
"learning_rate": 7.543222112102192e-06,
"loss": 1.2945,
"step": 13700
},
{
"epoch": 2.2794846382556986,
"grad_norm": 1.2513694763183594,
"learning_rate": 7.598282127518996e-06,
"loss": 1.1947,
"step": 13800
},
{
"epoch": 2.29600264288074,
"grad_norm": 1.4649907350540161,
"learning_rate": 7.6533421429358e-06,
"loss": 1.1518,
"step": 13900
},
{
"epoch": 2.312520647505781,
"grad_norm": 1.2642732858657837,
"learning_rate": 7.708402158352606e-06,
"loss": 1.117,
"step": 14000
},
{
"epoch": 2.312520647505781,
"eval_cer": 0.13544589692697284,
"eval_loss": 0.8784080147743225,
"eval_runtime": 48.3911,
"eval_samples_per_second": 34.944,
"eval_steps_per_second": 8.741,
"eval_wer": 0.7124836070692562,
"step": 14000
},
{
"epoch": 2.329038652130823,
"grad_norm": 1.14898681640625,
"learning_rate": 7.76346217376941e-06,
"loss": 1.0267,
"step": 14100
},
{
"epoch": 2.345556656755864,
"grad_norm": 1.1903839111328125,
"learning_rate": 7.818522189186215e-06,
"loss": 0.994,
"step": 14200
},
{
"epoch": 2.3620746613809054,
"grad_norm": 1.2429312467575073,
"learning_rate": 7.873582204603017e-06,
"loss": 0.9589,
"step": 14300
},
{
"epoch": 2.3785926660059467,
"grad_norm": 1.2845401763916016,
"learning_rate": 7.928642220019822e-06,
"loss": 0.9027,
"step": 14400
},
{
"epoch": 2.395110670630988,
"grad_norm": 1.204010009765625,
"learning_rate": 7.983702235436626e-06,
"loss": 0.8773,
"step": 14500
},
{
"epoch": 2.411628675256029,
"grad_norm": 1.1621061563491821,
"learning_rate": 8.038762250853432e-06,
"loss": 0.8351,
"step": 14600
},
{
"epoch": 2.4281466798810705,
"grad_norm": 1.153045654296875,
"learning_rate": 8.093822266270235e-06,
"loss": 0.8008,
"step": 14700
},
{
"epoch": 2.4446646845061117,
"grad_norm": 1.1481854915618896,
"learning_rate": 8.148882281687039e-06,
"loss": 0.7573,
"step": 14800
},
{
"epoch": 2.461182689131153,
"grad_norm": 1.1236218214035034,
"learning_rate": 8.203942297103844e-06,
"loss": 0.7381,
"step": 14900
},
{
"epoch": 2.4777006937561943,
"grad_norm": 0.9569886326789856,
"learning_rate": 8.259002312520648e-06,
"loss": 0.7118,
"step": 15000
},
{
"epoch": 2.4777006937561943,
"eval_cer": 0.1136900280610499,
"eval_loss": 0.5557882189750671,
"eval_runtime": 48.3382,
"eval_samples_per_second": 34.983,
"eval_steps_per_second": 8.751,
"eval_wer": 0.6227440204833573,
"step": 15000
},
{
"epoch": 2.4942186983812356,
"grad_norm": 1.3618088960647583,
"learning_rate": 8.314062327937452e-06,
"loss": 0.7074,
"step": 15100
},
{
"epoch": 2.510736703006277,
"grad_norm": 0.933047354221344,
"learning_rate": 8.369122343354257e-06,
"loss": 0.6549,
"step": 15200
},
{
"epoch": 2.527254707631318,
"grad_norm": 1.266646146774292,
"learning_rate": 8.424182358771061e-06,
"loss": 0.6476,
"step": 15300
},
{
"epoch": 2.5437727122563594,
"grad_norm": 1.2776057720184326,
"learning_rate": 8.479242374187867e-06,
"loss": 0.6121,
"step": 15400
},
{
"epoch": 2.5602907168814006,
"grad_norm": 0.9074415564537048,
"learning_rate": 8.534302389604669e-06,
"loss": 0.5804,
"step": 15500
},
{
"epoch": 2.576808721506442,
"grad_norm": 0.9598638415336609,
"learning_rate": 8.589362405021474e-06,
"loss": 0.5695,
"step": 15600
},
{
"epoch": 2.593326726131483,
"grad_norm": 1.142428994178772,
"learning_rate": 8.644422420438278e-06,
"loss": 0.5548,
"step": 15700
},
{
"epoch": 2.6098447307565245,
"grad_norm": 1.1081598997116089,
"learning_rate": 8.699482435855082e-06,
"loss": 0.5482,
"step": 15800
},
{
"epoch": 2.6263627353815657,
"grad_norm": 1.1400047540664673,
"learning_rate": 8.754542451271887e-06,
"loss": 0.5071,
"step": 15900
},
{
"epoch": 2.642880740006607,
"grad_norm": 1.0958024263381958,
"learning_rate": 8.809602466688691e-06,
"loss": 0.4936,
"step": 16000
},
{
"epoch": 2.642880740006607,
"eval_cer": 0.09633153103825885,
"eval_loss": 0.38487282395362854,
"eval_runtime": 48.7501,
"eval_samples_per_second": 34.687,
"eval_steps_per_second": 8.677,
"eval_wer": 0.5314432023980515,
"step": 16000
},
{
"epoch": 2.6593987446316483,
"grad_norm": 1.1065205335617065,
"learning_rate": 8.864662482105496e-06,
"loss": 0.4899,
"step": 16100
},
{
"epoch": 2.6759167492566895,
"grad_norm": 1.138617992401123,
"learning_rate": 8.9197224975223e-06,
"loss": 0.4721,
"step": 16200
},
{
"epoch": 2.692434753881731,
"grad_norm": 1.2217905521392822,
"learning_rate": 8.974782512939104e-06,
"loss": 0.4723,
"step": 16300
},
{
"epoch": 2.7089527585067725,
"grad_norm": 1.0747772455215454,
"learning_rate": 9.02984252835591e-06,
"loss": 0.4861,
"step": 16400
},
{
"epoch": 2.725470763131814,
"grad_norm": 1.0680921077728271,
"learning_rate": 9.084902543772713e-06,
"loss": 0.4374,
"step": 16500
},
{
"epoch": 2.741988767756855,
"grad_norm": 1.0042054653167725,
"learning_rate": 9.139962559189517e-06,
"loss": 0.4247,
"step": 16600
},
{
"epoch": 2.7585067723818963,
"grad_norm": 0.9904269576072693,
"learning_rate": 9.195022574606322e-06,
"loss": 0.4356,
"step": 16700
},
{
"epoch": 2.7750247770069376,
"grad_norm": 1.1831291913986206,
"learning_rate": 9.250082590023126e-06,
"loss": 0.41,
"step": 16800
},
{
"epoch": 2.791542781631979,
"grad_norm": 0.973407506942749,
"learning_rate": 9.305142605439931e-06,
"loss": 0.4053,
"step": 16900
},
{
"epoch": 2.80806078625702,
"grad_norm": 1.0600470304489136,
"learning_rate": 9.360202620856734e-06,
"loss": 0.4109,
"step": 17000
},
{
"epoch": 2.80806078625702,
"eval_cer": 0.08471357196632674,
"eval_loss": 0.3102828562259674,
"eval_runtime": 50.4826,
"eval_samples_per_second": 33.497,
"eval_steps_per_second": 8.379,
"eval_wer": 0.46568413164304,
"step": 17000
},
{
"epoch": 2.8245787908820614,
"grad_norm": 1.3490804433822632,
"learning_rate": 9.415262636273539e-06,
"loss": 0.4253,
"step": 17100
},
{
"epoch": 2.8410967955071027,
"grad_norm": 0.9531931281089783,
"learning_rate": 9.470322651690343e-06,
"loss": 0.4057,
"step": 17200
},
{
"epoch": 2.857614800132144,
"grad_norm": 1.31855046749115,
"learning_rate": 9.525382667107148e-06,
"loss": 0.3935,
"step": 17300
},
{
"epoch": 2.8741328047571852,
"grad_norm": 0.9209637641906738,
"learning_rate": 9.580442682523952e-06,
"loss": 0.3822,
"step": 17400
},
{
"epoch": 2.8906508093822265,
"grad_norm": 1.0796180963516235,
"learning_rate": 9.635502697940756e-06,
"loss": 0.3829,
"step": 17500
},
{
"epoch": 2.907168814007268,
"grad_norm": 0.9043625593185425,
"learning_rate": 9.690562713357561e-06,
"loss": 0.358,
"step": 17600
},
{
"epoch": 2.923686818632309,
"grad_norm": 0.956969678401947,
"learning_rate": 9.745622728774365e-06,
"loss": 0.3563,
"step": 17700
},
{
"epoch": 2.9402048232573508,
"grad_norm": 0.9611093997955322,
"learning_rate": 9.800682744191169e-06,
"loss": 0.3886,
"step": 17800
},
{
"epoch": 2.956722827882392,
"grad_norm": 0.9433591365814209,
"learning_rate": 9.855742759607974e-06,
"loss": 0.3646,
"step": 17900
},
{
"epoch": 2.9732408325074333,
"grad_norm": 1.0778321027755737,
"learning_rate": 9.910802775024778e-06,
"loss": 0.3928,
"step": 18000
},
{
"epoch": 2.9732408325074333,
"eval_cer": 0.07708233522688386,
"eval_loss": 0.2756275534629822,
"eval_runtime": 48.2974,
"eval_samples_per_second": 35.012,
"eval_steps_per_second": 8.758,
"eval_wer": 0.42696559045775306,
"step": 18000
},
{
"epoch": 2.9897588371324746,
"grad_norm": 0.880342423915863,
"learning_rate": 9.965862790441582e-06,
"loss": 0.3485,
"step": 18100
},
{
"epoch": 3.006276841757516,
"grad_norm": 0.955066442489624,
"learning_rate": 9.997675243793513e-06,
"loss": 0.3558,
"step": 18200
},
{
"epoch": 3.022794846382557,
"grad_norm": 0.9584730863571167,
"learning_rate": 9.991557464302758e-06,
"loss": 0.3374,
"step": 18300
},
{
"epoch": 3.0393128510075984,
"grad_norm": 0.9742453694343567,
"learning_rate": 9.985439684812002e-06,
"loss": 0.3238,
"step": 18400
},
{
"epoch": 3.0558308556326397,
"grad_norm": 0.9343051910400391,
"learning_rate": 9.979321905321245e-06,
"loss": 0.325,
"step": 18500
},
{
"epoch": 3.072348860257681,
"grad_norm": 0.9533226490020752,
"learning_rate": 9.973204125830489e-06,
"loss": 0.3221,
"step": 18600
},
{
"epoch": 3.088866864882722,
"grad_norm": 1.0769504308700562,
"learning_rate": 9.967086346339734e-06,
"loss": 0.3212,
"step": 18700
},
{
"epoch": 3.1053848695077635,
"grad_norm": 1.0930256843566895,
"learning_rate": 9.960968566848977e-06,
"loss": 0.323,
"step": 18800
},
{
"epoch": 3.1219028741328048,
"grad_norm": 1.0789791345596313,
"learning_rate": 9.954850787358221e-06,
"loss": 0.3082,
"step": 18900
},
{
"epoch": 3.138420878757846,
"grad_norm": 0.8638594150543213,
"learning_rate": 9.948733007867464e-06,
"loss": 0.3282,
"step": 19000
},
{
"epoch": 3.138420878757846,
"eval_cer": 0.07125624529464103,
"eval_loss": 0.25253963470458984,
"eval_runtime": 48.5825,
"eval_samples_per_second": 34.807,
"eval_steps_per_second": 8.707,
"eval_wer": 0.3938050334103541,
"step": 19000
},
{
"epoch": 3.1549388833828873,
"grad_norm": 1.095402479171753,
"learning_rate": 9.94261522837671e-06,
"loss": 0.3278,
"step": 19100
},
{
"epoch": 3.1714568880079286,
"grad_norm": 0.7922815680503845,
"learning_rate": 9.936497448885953e-06,
"loss": 0.3157,
"step": 19200
},
{
"epoch": 3.18797489263297,
"grad_norm": 0.9539555907249451,
"learning_rate": 9.930379669395196e-06,
"loss": 0.3025,
"step": 19300
},
{
"epoch": 3.204492897258011,
"grad_norm": 0.8902342915534973,
"learning_rate": 9.92426188990444e-06,
"loss": 0.3065,
"step": 19400
},
{
"epoch": 3.2210109018830524,
"grad_norm": 0.8279675841331482,
"learning_rate": 9.918144110413685e-06,
"loss": 0.2993,
"step": 19500
},
{
"epoch": 3.2375289065080937,
"grad_norm": 0.8788127899169922,
"learning_rate": 9.912026330922929e-06,
"loss": 0.3052,
"step": 19600
},
{
"epoch": 3.254046911133135,
"grad_norm": 1.043555736541748,
"learning_rate": 9.905908551432174e-06,
"loss": 0.3041,
"step": 19700
},
{
"epoch": 3.270564915758176,
"grad_norm": 1.0483660697937012,
"learning_rate": 9.899790771941417e-06,
"loss": 0.3075,
"step": 19800
},
{
"epoch": 3.2870829203832175,
"grad_norm": 1.1182364225387573,
"learning_rate": 9.89367299245066e-06,
"loss": 0.2896,
"step": 19900
},
{
"epoch": 3.303600925008259,
"grad_norm": 0.951245903968811,
"learning_rate": 9.887555212959906e-06,
"loss": 0.2924,
"step": 20000
},
{
"epoch": 3.303600925008259,
"eval_cer": 0.06822770515365136,
"eval_loss": 0.23321548104286194,
"eval_runtime": 47.9978,
"eval_samples_per_second": 35.231,
"eval_steps_per_second": 8.813,
"eval_wer": 0.3738837194779242,
"step": 20000
},
{
"epoch": 3.3201189296333005,
"grad_norm": 0.8600097894668579,
"learning_rate": 9.88143743346915e-06,
"loss": 0.2907,
"step": 20100
},
{
"epoch": 3.3366369342583417,
"grad_norm": 1.1580179929733276,
"learning_rate": 9.875319653978393e-06,
"loss": 0.2864,
"step": 20200
},
{
"epoch": 3.353154938883383,
"grad_norm": 0.8637755513191223,
"learning_rate": 9.869201874487638e-06,
"loss": 0.2851,
"step": 20300
},
{
"epoch": 3.3696729435084243,
"grad_norm": 0.8564406633377075,
"learning_rate": 9.863084094996881e-06,
"loss": 0.2808,
"step": 20400
},
{
"epoch": 3.3861909481334656,
"grad_norm": 0.8136361241340637,
"learning_rate": 9.856966315506125e-06,
"loss": 0.3224,
"step": 20500
},
{
"epoch": 3.402708952758507,
"grad_norm": 1.0240442752838135,
"learning_rate": 9.850848536015368e-06,
"loss": 0.2779,
"step": 20600
},
{
"epoch": 3.419226957383548,
"grad_norm": 1.09860360622406,
"learning_rate": 9.844730756524613e-06,
"loss": 0.2817,
"step": 20700
},
{
"epoch": 3.4357449620085894,
"grad_norm": 1.0666810274124146,
"learning_rate": 9.838612977033857e-06,
"loss": 0.2917,
"step": 20800
},
{
"epoch": 3.4522629666336306,
"grad_norm": 0.9965100288391113,
"learning_rate": 9.8324951975431e-06,
"loss": 0.3204,
"step": 20900
},
{
"epoch": 3.468780971258672,
"grad_norm": 0.9994562864303589,
"learning_rate": 9.826377418052344e-06,
"loss": 0.2647,
"step": 21000
},
{
"epoch": 3.468780971258672,
"eval_cer": 0.06489973307781809,
"eval_loss": 0.22496198117733002,
"eval_runtime": 48.3102,
"eval_samples_per_second": 35.003,
"eval_steps_per_second": 8.756,
"eval_wer": 0.3570224192843315,
"step": 21000
},
{
"epoch": 3.485298975883713,
"grad_norm": 0.7928122878074646,
"learning_rate": 9.820259638561589e-06,
"loss": 0.3177,
"step": 21100
},
{
"epoch": 3.5018169805087545,
"grad_norm": 0.8977111577987671,
"learning_rate": 9.814141859070832e-06,
"loss": 0.2634,
"step": 21200
},
{
"epoch": 3.5183349851337957,
"grad_norm": 0.8508104085922241,
"learning_rate": 9.808024079580076e-06,
"loss": 0.2672,
"step": 21300
},
{
"epoch": 3.534852989758837,
"grad_norm": 0.8211712837219238,
"learning_rate": 9.80190630008932e-06,
"loss": 0.2742,
"step": 21400
},
{
"epoch": 3.5513709943838783,
"grad_norm": 0.739600658416748,
"learning_rate": 9.795788520598564e-06,
"loss": 0.2756,
"step": 21500
},
{
"epoch": 3.56788899900892,
"grad_norm": 1.0395748615264893,
"learning_rate": 9.789670741107808e-06,
"loss": 0.277,
"step": 21600
},
{
"epoch": 3.5844070036339613,
"grad_norm": 0.8592670559883118,
"learning_rate": 9.783552961617051e-06,
"loss": 0.2576,
"step": 21700
},
{
"epoch": 3.6009250082590025,
"grad_norm": 2.0866379737854004,
"learning_rate": 9.777435182126297e-06,
"loss": 0.2561,
"step": 21800
},
{
"epoch": 3.617443012884044,
"grad_norm": 0.7784512042999268,
"learning_rate": 9.77131740263554e-06,
"loss": 0.2649,
"step": 21900
},
{
"epoch": 3.633961017509085,
"grad_norm": 0.8441452383995056,
"learning_rate": 9.765199623144783e-06,
"loss": 0.2516,
"step": 22000
},
{
"epoch": 3.633961017509085,
"eval_cer": 0.06247861200465403,
"eval_loss": 0.20949821174144745,
"eval_runtime": 48.003,
"eval_samples_per_second": 35.227,
"eval_steps_per_second": 8.812,
"eval_wer": 0.3444701180291014,
"step": 22000
},
{
"epoch": 3.6504790221341263,
"grad_norm": 1.5408446788787842,
"learning_rate": 9.759081843654029e-06,
"loss": 0.2725,
"step": 22100
},
{
"epoch": 3.6669970267591676,
"grad_norm": 0.9491544961929321,
"learning_rate": 9.752964064163272e-06,
"loss": 0.2484,
"step": 22200
},
{
"epoch": 3.683515031384209,
"grad_norm": 0.9039120674133301,
"learning_rate": 9.746846284672517e-06,
"loss": 0.2673,
"step": 22300
},
{
"epoch": 3.70003303600925,
"grad_norm": 2.336216926574707,
"learning_rate": 9.74072850518176e-06,
"loss": 0.2588,
"step": 22400
},
{
"epoch": 3.7165510406342914,
"grad_norm": 0.889430046081543,
"learning_rate": 9.734610725691004e-06,
"loss": 0.2418,
"step": 22500
},
{
"epoch": 3.7330690452593327,
"grad_norm": 0.9641227722167969,
"learning_rate": 9.728492946200248e-06,
"loss": 0.2447,
"step": 22600
},
{
"epoch": 3.749587049884374,
"grad_norm": 1.2751914262771606,
"learning_rate": 9.722375166709493e-06,
"loss": 0.2481,
"step": 22700
},
{
"epoch": 3.7661050545094152,
"grad_norm": 0.8244801759719849,
"learning_rate": 9.716257387218736e-06,
"loss": 0.281,
"step": 22800
},
{
"epoch": 3.7826230591344565,
"grad_norm": 0.8751392364501953,
"learning_rate": 9.71013960772798e-06,
"loss": 0.2613,
"step": 22900
},
{
"epoch": 3.799141063759498,
"grad_norm": 0.8482999801635742,
"learning_rate": 9.704021828237223e-06,
"loss": 0.2372,
"step": 23000
},
{
"epoch": 3.799141063759498,
"eval_cer": 0.06080179316952981,
"eval_loss": 0.20372046530246735,
"eval_runtime": 48.3892,
"eval_samples_per_second": 34.946,
"eval_steps_per_second": 8.742,
"eval_wer": 0.33447823643289826,
"step": 23000
},
{
"epoch": 3.815659068384539,
"grad_norm": 0.7899025082588196,
"learning_rate": 9.697904048746468e-06,
"loss": 0.239,
"step": 23100
},
{
"epoch": 3.8321770730095803,
"grad_norm": 1.0239996910095215,
"learning_rate": 9.691786269255712e-06,
"loss": 0.2355,
"step": 23200
},
{
"epoch": 3.8486950776346216,
"grad_norm": 1.0043885707855225,
"learning_rate": 9.685668489764955e-06,
"loss": 0.2517,
"step": 23300
},
{
"epoch": 3.865213082259663,
"grad_norm": 0.7398520708084106,
"learning_rate": 9.679550710274199e-06,
"loss": 0.2349,
"step": 23400
},
{
"epoch": 3.881731086884704,
"grad_norm": 0.8725094199180603,
"learning_rate": 9.673432930783444e-06,
"loss": 0.2357,
"step": 23500
},
{
"epoch": 3.8982490915097454,
"grad_norm": 0.9465588927268982,
"learning_rate": 9.667315151292687e-06,
"loss": 0.249,
"step": 23600
},
{
"epoch": 3.9147670961347867,
"grad_norm": 0.814136266708374,
"learning_rate": 9.66119737180193e-06,
"loss": 0.229,
"step": 23700
},
{
"epoch": 3.931285100759828,
"grad_norm": 0.7686489820480347,
"learning_rate": 9.655079592311174e-06,
"loss": 0.2287,
"step": 23800
},
{
"epoch": 3.9478031053848697,
"grad_norm": 1.0001749992370605,
"learning_rate": 9.64896181282042e-06,
"loss": 0.2334,
"step": 23900
},
{
"epoch": 3.964321110009911,
"grad_norm": 0.9546142220497131,
"learning_rate": 9.642844033329663e-06,
"loss": 0.2447,
"step": 24000
},
{
"epoch": 3.964321110009911,
"eval_cer": 0.058791321607008416,
"eval_loss": 0.1985878199338913,
"eval_runtime": 48.7285,
"eval_samples_per_second": 34.702,
"eval_steps_per_second": 8.681,
"eval_wer": 0.32417410853681383,
"step": 24000
},
{
"epoch": 3.980839114634952,
"grad_norm": 0.7841982841491699,
"learning_rate": 9.636726253838908e-06,
"loss": 0.2366,
"step": 24100
},
{
"epoch": 3.9973571192599935,
"grad_norm": 0.7850095629692078,
"learning_rate": 9.630608474348151e-06,
"loss": 0.2393,
"step": 24200
},
{
"epoch": 4.013875123885034,
"grad_norm": 0.8924582004547119,
"learning_rate": 9.624490694857395e-06,
"loss": 0.2382,
"step": 24300
},
{
"epoch": 4.030393128510076,
"grad_norm": 0.7775335907936096,
"learning_rate": 9.61837291536664e-06,
"loss": 0.2317,
"step": 24400
},
{
"epoch": 4.046911133135117,
"grad_norm": 0.8482388257980347,
"learning_rate": 9.612255135875884e-06,
"loss": 0.2673,
"step": 24500
},
{
"epoch": 4.063429137760158,
"grad_norm": 0.8175519704818726,
"learning_rate": 9.606137356385127e-06,
"loss": 0.2271,
"step": 24600
},
{
"epoch": 4.0799471423852,
"grad_norm": 0.6910988688468933,
"learning_rate": 9.600019576894372e-06,
"loss": 0.2321,
"step": 24700
},
{
"epoch": 4.096465147010242,
"grad_norm": 0.8976187109947205,
"learning_rate": 9.593901797403616e-06,
"loss": 0.2348,
"step": 24800
},
{
"epoch": 4.112983151635283,
"grad_norm": 0.8644862174987793,
"learning_rate": 9.587784017912859e-06,
"loss": 0.2219,
"step": 24900
},
{
"epoch": 4.129501156260324,
"grad_norm": 0.7493522763252258,
"learning_rate": 9.581666238422103e-06,
"loss": 0.2304,
"step": 25000
},
{
"epoch": 4.129501156260324,
"eval_cer": 0.05679796044076381,
"eval_loss": 0.18797007203102112,
"eval_runtime": 48.2897,
"eval_samples_per_second": 35.018,
"eval_steps_per_second": 8.76,
"eval_wer": 0.3129956910010616,
"step": 25000
},
{
"epoch": 4.146019160885365,
"grad_norm": 0.7841621041297913,
"learning_rate": 9.575548458931348e-06,
"loss": 0.2297,
"step": 25100
},
{
"epoch": 4.162537165510407,
"grad_norm": 0.8792735934257507,
"learning_rate": 9.569430679440591e-06,
"loss": 0.2305,
"step": 25200
},
{
"epoch": 4.179055170135448,
"grad_norm": 1.1668968200683594,
"learning_rate": 9.563312899949835e-06,
"loss": 0.2595,
"step": 25300
},
{
"epoch": 4.195573174760489,
"grad_norm": 0.859511137008667,
"learning_rate": 9.557195120459078e-06,
"loss": 0.217,
"step": 25400
},
{
"epoch": 4.2120911793855305,
"grad_norm": 0.9139505624771118,
"learning_rate": 9.551077340968323e-06,
"loss": 0.2263,
"step": 25500
},
{
"epoch": 4.228609184010572,
"grad_norm": 0.776094377040863,
"learning_rate": 9.544959561477567e-06,
"loss": 0.2208,
"step": 25600
},
{
"epoch": 4.245127188635613,
"grad_norm": 0.8811630606651306,
"learning_rate": 9.53884178198681e-06,
"loss": 0.2229,
"step": 25700
},
{
"epoch": 4.261645193260654,
"grad_norm": 0.9367398619651794,
"learning_rate": 9.532724002496054e-06,
"loss": 0.2221,
"step": 25800
},
{
"epoch": 4.2781631978856955,
"grad_norm": 0.8413158655166626,
"learning_rate": 9.526606223005299e-06,
"loss": 0.2616,
"step": 25900
},
{
"epoch": 4.294681202510737,
"grad_norm": 0.9497280120849609,
"learning_rate": 9.520488443514542e-06,
"loss": 0.2828,
"step": 26000
},
{
"epoch": 4.294681202510737,
"eval_cer": 0.05571145027718842,
"eval_loss": 0.18053312599658966,
"eval_runtime": 48.2767,
"eval_samples_per_second": 35.027,
"eval_steps_per_second": 8.762,
"eval_wer": 0.30774995316305503,
"step": 26000
},
{
"epoch": 4.311199207135778,
"grad_norm": 0.7471584677696228,
"learning_rate": 9.514370664023786e-06,
"loss": 0.217,
"step": 26100
},
{
"epoch": 4.327717211760819,
"grad_norm": 0.9198097586631775,
"learning_rate": 9.508252884533031e-06,
"loss": 0.2093,
"step": 26200
},
{
"epoch": 4.344235216385861,
"grad_norm": 1.2914992570877075,
"learning_rate": 9.502135105042274e-06,
"loss": 0.2117,
"step": 26300
},
{
"epoch": 4.360753221010902,
"grad_norm": 1.1176624298095703,
"learning_rate": 9.496017325551518e-06,
"loss": 0.2213,
"step": 26400
},
{
"epoch": 4.377271225635943,
"grad_norm": 0.799958348274231,
"learning_rate": 9.489899546060763e-06,
"loss": 0.2175,
"step": 26500
},
{
"epoch": 4.3937892302609844,
"grad_norm": 0.8993442058563232,
"learning_rate": 9.483781766570006e-06,
"loss": 0.2126,
"step": 26600
},
{
"epoch": 4.410307234886026,
"grad_norm": 1.000191330909729,
"learning_rate": 9.477663987079252e-06,
"loss": 0.2044,
"step": 26700
},
{
"epoch": 4.426825239511067,
"grad_norm": 1.1478374004364014,
"learning_rate": 9.471546207588495e-06,
"loss": 0.2325,
"step": 26800
},
{
"epoch": 4.443343244136108,
"grad_norm": 0.7666307091712952,
"learning_rate": 9.465428428097739e-06,
"loss": 0.2086,
"step": 26900
},
{
"epoch": 4.4598612487611495,
"grad_norm": 0.9440354108810425,
"learning_rate": 9.459310648606982e-06,
"loss": 0.2054,
"step": 27000
},
{
"epoch": 4.4598612487611495,
"eval_cer": 0.05489870645404148,
"eval_loss": 0.17803701758384705,
"eval_runtime": 48.3147,
"eval_samples_per_second": 35.0,
"eval_steps_per_second": 8.755,
"eval_wer": 0.30181727346530945,
"step": 27000
},
{
"epoch": 4.476379253386191,
"grad_norm": 0.975606381893158,
"learning_rate": 9.453192869116227e-06,
"loss": 0.2188,
"step": 27100
},
{
"epoch": 4.492897258011232,
"grad_norm": 1.0825639963150024,
"learning_rate": 9.44707508962547e-06,
"loss": 0.1951,
"step": 27200
},
{
"epoch": 4.509415262636273,
"grad_norm": 0.858279824256897,
"learning_rate": 9.440957310134714e-06,
"loss": 0.2202,
"step": 27300
},
{
"epoch": 4.525933267261315,
"grad_norm": 0.8295080661773682,
"learning_rate": 9.434839530643958e-06,
"loss": 0.2143,
"step": 27400
},
{
"epoch": 4.542451271886356,
"grad_norm": 1.0606642961502075,
"learning_rate": 9.428721751153203e-06,
"loss": 0.2116,
"step": 27500
},
{
"epoch": 4.558969276511397,
"grad_norm": 0.8650080561637878,
"learning_rate": 9.422603971662446e-06,
"loss": 0.202,
"step": 27600
},
{
"epoch": 4.575487281136438,
"grad_norm": 0.7315616011619568,
"learning_rate": 9.41648619217169e-06,
"loss": 0.2088,
"step": 27700
},
{
"epoch": 4.59200528576148,
"grad_norm": 0.6952201724052429,
"learning_rate": 9.410368412680933e-06,
"loss": 0.2007,
"step": 27800
},
{
"epoch": 4.608523290386521,
"grad_norm": 1.3911277055740356,
"learning_rate": 9.404250633190178e-06,
"loss": 0.2242,
"step": 27900
},
{
"epoch": 4.625041295011562,
"grad_norm": 0.9672855734825134,
"learning_rate": 9.398132853699422e-06,
"loss": 0.2021,
"step": 28000
},
{
"epoch": 4.625041295011562,
"eval_cer": 0.05435117377318459,
"eval_loss": 0.17107851803302765,
"eval_runtime": 48.3911,
"eval_samples_per_second": 34.944,
"eval_steps_per_second": 8.741,
"eval_wer": 0.2986323612065197,
"step": 28000
},
{
"epoch": 4.6415592996366035,
"grad_norm": 0.869714617729187,
"learning_rate": 9.392015074208665e-06,
"loss": 0.2054,
"step": 28100
},
{
"epoch": 4.658077304261646,
"grad_norm": 0.7512599229812622,
"learning_rate": 9.385897294717909e-06,
"loss": 0.2449,
"step": 28200
},
{
"epoch": 4.674595308886687,
"grad_norm": 0.8734112977981567,
"learning_rate": 9.379779515227154e-06,
"loss": 0.2067,
"step": 28300
},
{
"epoch": 4.691113313511728,
"grad_norm": 1.0213971138000488,
"learning_rate": 9.373661735736397e-06,
"loss": 0.1941,
"step": 28400
},
{
"epoch": 4.7076313181367695,
"grad_norm": 0.8136150240898132,
"learning_rate": 9.367543956245642e-06,
"loss": 0.197,
"step": 28500
},
{
"epoch": 4.724149322761811,
"grad_norm": 0.889690637588501,
"learning_rate": 9.361426176754886e-06,
"loss": 0.1887,
"step": 28600
},
{
"epoch": 4.740667327386852,
"grad_norm": 0.8246539235115051,
"learning_rate": 9.35530839726413e-06,
"loss": 0.1963,
"step": 28700
},
{
"epoch": 4.757185332011893,
"grad_norm": 1.07891845703125,
"learning_rate": 9.349190617773374e-06,
"loss": 0.2013,
"step": 28800
},
{
"epoch": 4.773703336636935,
"grad_norm": 0.8867871165275574,
"learning_rate": 9.343072838282618e-06,
"loss": 0.1969,
"step": 28900
},
{
"epoch": 4.790221341261976,
"grad_norm": 1.0651185512542725,
"learning_rate": 9.336955058791861e-06,
"loss": 0.1944,
"step": 29000
},
{
"epoch": 4.790221341261976,
"eval_cer": 0.05322188761891725,
"eval_loss": 0.16940154135227203,
"eval_runtime": 48.8775,
"eval_samples_per_second": 34.597,
"eval_steps_per_second": 8.654,
"eval_wer": 0.29307437706863176,
"step": 29000
},
{
"epoch": 4.806739345887017,
"grad_norm": 0.8285331726074219,
"learning_rate": 9.330837279301107e-06,
"loss": 0.2038,
"step": 29100
},
{
"epoch": 4.823257350512058,
"grad_norm": 0.8632585406303406,
"learning_rate": 9.32471949981035e-06,
"loss": 0.1947,
"step": 29200
},
{
"epoch": 4.8397753551371,
"grad_norm": 0.7332074046134949,
"learning_rate": 9.318601720319593e-06,
"loss": 0.1929,
"step": 29300
},
{
"epoch": 4.856293359762141,
"grad_norm": 0.8279902935028076,
"learning_rate": 9.312483940828837e-06,
"loss": 0.188,
"step": 29400
},
{
"epoch": 4.872811364387182,
"grad_norm": 0.887501060962677,
"learning_rate": 9.306366161338082e-06,
"loss": 0.2087,
"step": 29500
},
{
"epoch": 4.8893293690122235,
"grad_norm": 0.8915200233459473,
"learning_rate": 9.300248381847326e-06,
"loss": 0.1861,
"step": 29600
},
{
"epoch": 4.905847373637265,
"grad_norm": 0.8219689130783081,
"learning_rate": 9.294130602356569e-06,
"loss": 0.1884,
"step": 29700
},
{
"epoch": 4.922365378262306,
"grad_norm": 0.8272607326507568,
"learning_rate": 9.288012822865812e-06,
"loss": 0.1991,
"step": 29800
},
{
"epoch": 4.938883382887347,
"grad_norm": 2.43432354927063,
"learning_rate": 9.281895043375058e-06,
"loss": 0.196,
"step": 29900
},
{
"epoch": 4.955401387512389,
"grad_norm": 0.922092616558075,
"learning_rate": 9.275777263884301e-06,
"loss": 0.1933,
"step": 30000
},
{
"epoch": 4.955401387512389,
"eval_cer": 0.0528796796933817,
"eval_loss": 0.1640305370092392,
"eval_runtime": 50.5291,
"eval_samples_per_second": 33.466,
"eval_steps_per_second": 8.371,
"eval_wer": 0.29063885592955724,
"step": 30000
},
{
"epoch": 4.97191939213743,
"grad_norm": 0.6855641603469849,
"learning_rate": 9.269659484393545e-06,
"loss": 0.1918,
"step": 30100
},
{
"epoch": 4.988437396762471,
"grad_norm": 0.7922428846359253,
"learning_rate": 9.263541704902788e-06,
"loss": 0.2464,
"step": 30200
},
{
"epoch": 5.004955401387512,
"grad_norm": 0.9734669923782349,
"learning_rate": 9.257423925412033e-06,
"loss": 0.1909,
"step": 30300
},
{
"epoch": 5.021473406012554,
"grad_norm": 0.9491944313049316,
"learning_rate": 9.251306145921277e-06,
"loss": 0.1824,
"step": 30400
},
{
"epoch": 5.037991410637595,
"grad_norm": 1.4745386838912964,
"learning_rate": 9.24518836643052e-06,
"loss": 0.1934,
"step": 30500
},
{
"epoch": 5.054509415262636,
"grad_norm": 0.8815566897392273,
"learning_rate": 9.239070586939765e-06,
"loss": 0.1844,
"step": 30600
},
{
"epoch": 5.0710274198876775,
"grad_norm": 0.741477906703949,
"learning_rate": 9.232952807449009e-06,
"loss": 0.2308,
"step": 30700
},
{
"epoch": 5.087545424512719,
"grad_norm": 0.784695029258728,
"learning_rate": 9.226835027958252e-06,
"loss": 0.1852,
"step": 30800
},
{
"epoch": 5.10406342913776,
"grad_norm": 0.7334086298942566,
"learning_rate": 9.220717248467497e-06,
"loss": 0.1885,
"step": 30900
},
{
"epoch": 5.120581433762801,
"grad_norm": 1.0273959636688232,
"learning_rate": 9.21459946897674e-06,
"loss": 0.1885,
"step": 31000
},
{
"epoch": 5.120581433762801,
"eval_cer": 0.05156217918006981,
"eval_loss": 0.16276109218597412,
"eval_runtime": 48.3633,
"eval_samples_per_second": 34.965,
"eval_steps_per_second": 8.746,
"eval_wer": 0.28289514769249985,
"step": 31000
},
{
"epoch": 5.1370994383878426,
"grad_norm": 0.8171842694282532,
"learning_rate": 9.208481689485986e-06,
"loss": 0.186,
"step": 31100
},
{
"epoch": 5.153617443012884,
"grad_norm": 0.9652734994888306,
"learning_rate": 9.20236390999523e-06,
"loss": 0.1813,
"step": 31200
},
{
"epoch": 5.170135447637925,
"grad_norm": 1.062037467956543,
"learning_rate": 9.196246130504473e-06,
"loss": 0.1744,
"step": 31300
},
{
"epoch": 5.186653452262966,
"grad_norm": 0.8532341718673706,
"learning_rate": 9.190128351013718e-06,
"loss": 0.1833,
"step": 31400
},
{
"epoch": 5.203171456888008,
"grad_norm": 0.7094704508781433,
"learning_rate": 9.184010571522962e-06,
"loss": 0.1827,
"step": 31500
},
{
"epoch": 5.219689461513049,
"grad_norm": 0.8176188468933105,
"learning_rate": 9.177892792032205e-06,
"loss": 0.1896,
"step": 31600
},
{
"epoch": 5.23620746613809,
"grad_norm": 0.9988218545913696,
"learning_rate": 9.171775012541448e-06,
"loss": 0.1953,
"step": 31700
},
{
"epoch": 5.2527254707631315,
"grad_norm": 1.0254257917404175,
"learning_rate": 9.165657233050694e-06,
"loss": 0.1858,
"step": 31800
},
{
"epoch": 5.269243475388173,
"grad_norm": 0.7182506918907166,
"learning_rate": 9.159539453559937e-06,
"loss": 0.2522,
"step": 31900
},
{
"epoch": 5.285761480013214,
"grad_norm": 0.8318942189216614,
"learning_rate": 9.15342167406918e-06,
"loss": 0.1871,
"step": 32000
},
{
"epoch": 5.285761480013214,
"eval_cer": 0.05092909451782903,
"eval_loss": 0.15823741257190704,
"eval_runtime": 48.5699,
"eval_samples_per_second": 34.816,
"eval_steps_per_second": 8.709,
"eval_wer": 0.28108411915318804,
"step": 32000
},
{
"epoch": 5.302279484638255,
"grad_norm": 0.6533938050270081,
"learning_rate": 9.147303894578424e-06,
"loss": 0.1801,
"step": 32100
},
{
"epoch": 5.3187974892632965,
"grad_norm": 0.8273053169250488,
"learning_rate": 9.141186115087669e-06,
"loss": 0.1815,
"step": 32200
},
{
"epoch": 5.335315493888339,
"grad_norm": 0.8062841892242432,
"learning_rate": 9.135068335596913e-06,
"loss": 0.2024,
"step": 32300
},
{
"epoch": 5.35183349851338,
"grad_norm": 0.9883460402488708,
"learning_rate": 9.128950556106156e-06,
"loss": 0.176,
"step": 32400
},
{
"epoch": 5.368351503138421,
"grad_norm": 1.0027878284454346,
"learning_rate": 9.1228327766154e-06,
"loss": 0.2513,
"step": 32500
},
{
"epoch": 5.3848695077634625,
"grad_norm": 0.6766846776008606,
"learning_rate": 9.116714997124645e-06,
"loss": 0.2155,
"step": 32600
},
{
"epoch": 5.401387512388504,
"grad_norm": 0.7808175086975098,
"learning_rate": 9.110597217633888e-06,
"loss": 0.188,
"step": 32700
},
{
"epoch": 5.417905517013545,
"grad_norm": 0.7467240691184998,
"learning_rate": 9.104479438143132e-06,
"loss": 0.182,
"step": 32800
},
{
"epoch": 5.434423521638586,
"grad_norm": 0.6580876708030701,
"learning_rate": 9.098361658652377e-06,
"loss": 0.1771,
"step": 32900
},
{
"epoch": 5.450941526263628,
"grad_norm": 0.7481684684753418,
"learning_rate": 9.09224387916162e-06,
"loss": 0.1811,
"step": 33000
},
{
"epoch": 5.450941526263628,
"eval_cer": 0.05002224351515981,
"eval_loss": 0.15250813961029053,
"eval_runtime": 49.105,
"eval_samples_per_second": 34.436,
"eval_steps_per_second": 8.614,
"eval_wer": 0.2747142946356086,
"step": 33000
},
{
"epoch": 5.467459530888669,
"grad_norm": 0.9323834180831909,
"learning_rate": 9.086126099670864e-06,
"loss": 0.2123,
"step": 33100
},
{
"epoch": 5.48397753551371,
"grad_norm": 0.779329776763916,
"learning_rate": 9.080008320180109e-06,
"loss": 0.1807,
"step": 33200
},
{
"epoch": 5.500495540138751,
"grad_norm": 1.0125453472137451,
"learning_rate": 9.073890540689352e-06,
"loss": 0.1746,
"step": 33300
},
{
"epoch": 5.517013544763793,
"grad_norm": 0.8062576055526733,
"learning_rate": 9.067772761198596e-06,
"loss": 0.1747,
"step": 33400
},
{
"epoch": 5.533531549388834,
"grad_norm": 0.817570686340332,
"learning_rate": 9.061654981707841e-06,
"loss": 0.1707,
"step": 33500
},
{
"epoch": 5.550049554013875,
"grad_norm": 0.7053462266921997,
"learning_rate": 9.055537202217084e-06,
"loss": 0.1777,
"step": 33600
},
{
"epoch": 5.5665675586389165,
"grad_norm": 0.8066178560256958,
"learning_rate": 9.049419422726328e-06,
"loss": 0.171,
"step": 33700
},
{
"epoch": 5.583085563263958,
"grad_norm": 0.8742169141769409,
"learning_rate": 9.043301643235573e-06,
"loss": 0.1721,
"step": 33800
},
{
"epoch": 5.599603567888999,
"grad_norm": 0.7562609314918518,
"learning_rate": 9.037183863744816e-06,
"loss": 0.2377,
"step": 33900
},
{
"epoch": 5.61612157251404,
"grad_norm": 1.084651231765747,
"learning_rate": 9.03106608425406e-06,
"loss": 0.2145,
"step": 34000
},
{
"epoch": 5.61612157251404,
"eval_cer": 0.04938060365478064,
"eval_loss": 0.14984501898288727,
"eval_runtime": 48.5125,
"eval_samples_per_second": 34.857,
"eval_steps_per_second": 8.719,
"eval_wer": 0.2715918316367951,
"step": 34000
},
{
"epoch": 5.632639577139082,
"grad_norm": 0.8906255960464478,
"learning_rate": 9.024948304763303e-06,
"loss": 0.188,
"step": 34100
},
{
"epoch": 5.649157581764123,
"grad_norm": 0.8091058135032654,
"learning_rate": 9.018830525272549e-06,
"loss": 0.1696,
"step": 34200
},
{
"epoch": 5.665675586389164,
"grad_norm": 0.9051063656806946,
"learning_rate": 9.012712745781792e-06,
"loss": 0.1744,
"step": 34300
},
{
"epoch": 5.682193591014205,
"grad_norm": 1.0009392499923706,
"learning_rate": 9.006594966291035e-06,
"loss": 0.1738,
"step": 34400
},
{
"epoch": 5.698711595639247,
"grad_norm": 0.8981117010116577,
"learning_rate": 9.000477186800279e-06,
"loss": 0.1855,
"step": 34500
},
{
"epoch": 5.715229600264288,
"grad_norm": 0.9005815386772156,
"learning_rate": 8.994359407309524e-06,
"loss": 0.1784,
"step": 34600
},
{
"epoch": 5.731747604889329,
"grad_norm": 0.9097332954406738,
"learning_rate": 8.988241627818768e-06,
"loss": 0.1833,
"step": 34700
},
{
"epoch": 5.7482656095143705,
"grad_norm": 0.7952153086662292,
"learning_rate": 8.982123848328011e-06,
"loss": 0.1676,
"step": 34800
},
{
"epoch": 5.764783614139412,
"grad_norm": 0.840761661529541,
"learning_rate": 8.976006068837254e-06,
"loss": 0.1744,
"step": 34900
},
{
"epoch": 5.781301618764453,
"grad_norm": 0.972186267375946,
"learning_rate": 8.9698882893465e-06,
"loss": 0.1713,
"step": 35000
},
{
"epoch": 5.781301618764453,
"eval_cer": 0.049508931626856476,
"eval_loss": 0.14971515536308289,
"eval_runtime": 48.1943,
"eval_samples_per_second": 35.087,
"eval_steps_per_second": 8.777,
"eval_wer": 0.2710297882970087,
"step": 35000
},
{
"epoch": 5.797819623389494,
"grad_norm": 0.7469506859779358,
"learning_rate": 8.963770509855743e-06,
"loss": 0.165,
"step": 35100
},
{
"epoch": 5.814337628014536,
"grad_norm": 0.8786357045173645,
"learning_rate": 8.957652730364987e-06,
"loss": 0.1715,
"step": 35200
},
{
"epoch": 5.830855632639577,
"grad_norm": 0.8044286370277405,
"learning_rate": 8.951534950874232e-06,
"loss": 0.1687,
"step": 35300
},
{
"epoch": 5.847373637264618,
"grad_norm": 0.859174370765686,
"learning_rate": 8.945417171383475e-06,
"loss": 0.1658,
"step": 35400
},
{
"epoch": 5.863891641889659,
"grad_norm": 0.8090763092041016,
"learning_rate": 8.93929939189272e-06,
"loss": 0.1836,
"step": 35500
},
{
"epoch": 5.880409646514701,
"grad_norm": 0.8392448425292969,
"learning_rate": 8.933181612401964e-06,
"loss": 0.1796,
"step": 35600
},
{
"epoch": 5.896927651139742,
"grad_norm": 0.8285984396934509,
"learning_rate": 8.927063832911207e-06,
"loss": 0.1703,
"step": 35700
},
{
"epoch": 5.913445655764784,
"grad_norm": 0.9240791201591492,
"learning_rate": 8.920946053420452e-06,
"loss": 0.2069,
"step": 35800
},
{
"epoch": 5.929963660389825,
"grad_norm": 0.9001137018203735,
"learning_rate": 8.914828273929696e-06,
"loss": 0.1724,
"step": 35900
},
{
"epoch": 5.946481665014867,
"grad_norm": 0.8891072273254395,
"learning_rate": 8.90871049443894e-06,
"loss": 0.1766,
"step": 36000
},
{
"epoch": 5.946481665014867,
"eval_cer": 0.04906406132366026,
"eval_loss": 0.14797988533973694,
"eval_runtime": 48.6094,
"eval_samples_per_second": 34.787,
"eval_steps_per_second": 8.702,
"eval_wer": 0.2692187597576969,
"step": 36000
},
{
"epoch": 5.962999669639908,
"grad_norm": 1.0912429094314575,
"learning_rate": 8.902592714948183e-06,
"loss": 0.1653,
"step": 36100
},
{
"epoch": 5.979517674264949,
"grad_norm": 1.203782320022583,
"learning_rate": 8.896474935457428e-06,
"loss": 0.1675,
"step": 36200
},
{
"epoch": 5.9960356788899905,
"grad_norm": 0.7841401696205139,
"learning_rate": 8.890357155966671e-06,
"loss": 0.1652,
"step": 36300
},
{
"epoch": 6.012553683515032,
"grad_norm": 0.841820478439331,
"learning_rate": 8.884239376475915e-06,
"loss": 0.1677,
"step": 36400
},
{
"epoch": 6.029071688140073,
"grad_norm": 0.8913053870201111,
"learning_rate": 8.878121596985158e-06,
"loss": 0.1613,
"step": 36500
},
{
"epoch": 6.045589692765114,
"grad_norm": 1.3167953491210938,
"learning_rate": 8.872003817494404e-06,
"loss": 0.1639,
"step": 36600
},
{
"epoch": 6.0621076973901555,
"grad_norm": 0.7834457159042358,
"learning_rate": 8.865886038003647e-06,
"loss": 0.1743,
"step": 36700
},
{
"epoch": 6.078625702015197,
"grad_norm": 0.790767252445221,
"learning_rate": 8.85976825851289e-06,
"loss": 0.1736,
"step": 36800
},
{
"epoch": 6.095143706640238,
"grad_norm": 0.8585237860679626,
"learning_rate": 8.853650479022134e-06,
"loss": 0.1764,
"step": 36900
},
{
"epoch": 6.111661711265279,
"grad_norm": 0.9544495344161987,
"learning_rate": 8.847532699531379e-06,
"loss": 0.1662,
"step": 37000
},
{
"epoch": 6.111661711265279,
"eval_cer": 0.04808021353774553,
"eval_loss": 0.1443062424659729,
"eval_runtime": 48.3927,
"eval_samples_per_second": 34.943,
"eval_steps_per_second": 8.741,
"eval_wer": 0.2633485293199276,
"step": 37000
},
{
"epoch": 6.128179715890321,
"grad_norm": 0.6950782537460327,
"learning_rate": 8.841414920040623e-06,
"loss": 0.1574,
"step": 37100
},
{
"epoch": 6.144697720515362,
"grad_norm": 0.7609145641326904,
"learning_rate": 8.835297140549866e-06,
"loss": 0.1699,
"step": 37200
},
{
"epoch": 6.161215725140403,
"grad_norm": 0.6571762561798096,
"learning_rate": 8.829179361059111e-06,
"loss": 0.1767,
"step": 37300
},
{
"epoch": 6.177733729765444,
"grad_norm": 1.0556763410568237,
"learning_rate": 8.823061581568355e-06,
"loss": 0.1562,
"step": 37400
},
{
"epoch": 6.194251734390486,
"grad_norm": 2.556347131729126,
"learning_rate": 8.816943802077598e-06,
"loss": 0.1628,
"step": 37500
},
{
"epoch": 6.210769739015527,
"grad_norm": 0.7034619450569153,
"learning_rate": 8.810826022586843e-06,
"loss": 0.1953,
"step": 37600
},
{
"epoch": 6.227287743640568,
"grad_norm": 0.7905510067939758,
"learning_rate": 8.804708243096087e-06,
"loss": 0.163,
"step": 37700
},
{
"epoch": 6.2438057482656095,
"grad_norm": 0.5802010893821716,
"learning_rate": 8.79859046360533e-06,
"loss": 0.1669,
"step": 37800
},
{
"epoch": 6.260323752890651,
"grad_norm": 0.6908354163169861,
"learning_rate": 8.792472684114575e-06,
"loss": 0.1594,
"step": 37900
},
{
"epoch": 6.276841757515692,
"grad_norm": 0.5971213579177856,
"learning_rate": 8.786354904623819e-06,
"loss": 0.1629,
"step": 38000
},
{
"epoch": 6.276841757515692,
"eval_cer": 0.04748134966805831,
"eval_loss": 0.14269790053367615,
"eval_runtime": 48.0871,
"eval_samples_per_second": 35.165,
"eval_steps_per_second": 8.797,
"eval_wer": 0.2614126022606632,
"step": 38000
},
{
"epoch": 6.293359762140733,
"grad_norm": 0.6142451763153076,
"learning_rate": 8.780237125133062e-06,
"loss": 0.1564,
"step": 38100
},
{
"epoch": 6.309877766765775,
"grad_norm": 0.6337829232215881,
"learning_rate": 8.774119345642307e-06,
"loss": 0.1605,
"step": 38200
},
{
"epoch": 6.326395771390816,
"grad_norm": 0.9899505972862244,
"learning_rate": 8.76800156615155e-06,
"loss": 0.1568,
"step": 38300
},
{
"epoch": 6.342913776015857,
"grad_norm": 0.8140648007392883,
"learning_rate": 8.761883786660794e-06,
"loss": 0.1671,
"step": 38400
},
{
"epoch": 6.359431780640898,
"grad_norm": 0.9894407987594604,
"learning_rate": 8.755766007170038e-06,
"loss": 0.1635,
"step": 38500
},
{
"epoch": 6.37594978526594,
"grad_norm": 0.6572480797767639,
"learning_rate": 8.749648227679283e-06,
"loss": 0.1673,
"step": 38600
},
{
"epoch": 6.392467789890981,
"grad_norm": 0.6784759759902954,
"learning_rate": 8.743530448188526e-06,
"loss": 0.1602,
"step": 38700
},
{
"epoch": 6.408985794516022,
"grad_norm": 1.053363561630249,
"learning_rate": 8.73741266869777e-06,
"loss": 0.1649,
"step": 38800
},
{
"epoch": 6.4255037991410635,
"grad_norm": 0.8504717946052551,
"learning_rate": 8.731294889207013e-06,
"loss": 0.1679,
"step": 38900
},
{
"epoch": 6.442021803766105,
"grad_norm": 0.8182229399681091,
"learning_rate": 8.725177109716258e-06,
"loss": 0.1607,
"step": 39000
},
{
"epoch": 6.442021803766105,
"eval_cer": 0.047164807336937925,
"eval_loss": 0.139786496758461,
"eval_runtime": 48.4561,
"eval_samples_per_second": 34.898,
"eval_steps_per_second": 8.73,
"eval_wer": 0.2596015737213514,
"step": 39000
},
{
"epoch": 6.458539808391146,
"grad_norm": 0.7049907445907593,
"learning_rate": 8.719059330225502e-06,
"loss": 0.1582,
"step": 39100
},
{
"epoch": 6.475057813016187,
"grad_norm": 0.928734540939331,
"learning_rate": 8.712941550734745e-06,
"loss": 0.1542,
"step": 39200
},
{
"epoch": 6.491575817641229,
"grad_norm": 0.7158243656158447,
"learning_rate": 8.706823771243989e-06,
"loss": 0.2327,
"step": 39300
},
{
"epoch": 6.50809382226627,
"grad_norm": 0.7434096336364746,
"learning_rate": 8.700705991753234e-06,
"loss": 0.1557,
"step": 39400
},
{
"epoch": 6.524611826891311,
"grad_norm": 0.9645175933837891,
"learning_rate": 8.694588212262477e-06,
"loss": 0.1658,
"step": 39500
},
{
"epoch": 6.541129831516352,
"grad_norm": 0.7772352695465088,
"learning_rate": 8.688470432771721e-06,
"loss": 0.1707,
"step": 39600
},
{
"epoch": 6.557647836141394,
"grad_norm": 0.7710452675819397,
"learning_rate": 8.682352653280966e-06,
"loss": 0.1546,
"step": 39700
},
{
"epoch": 6.574165840766435,
"grad_norm": 0.6807363033294678,
"learning_rate": 8.67623487379021e-06,
"loss": 0.1524,
"step": 39800
},
{
"epoch": 6.590683845391476,
"grad_norm": 0.6985335350036621,
"learning_rate": 8.670117094299455e-06,
"loss": 0.2057,
"step": 39900
},
{
"epoch": 6.607201850016518,
"grad_norm": 0.6793562173843384,
"learning_rate": 8.663999314808698e-06,
"loss": 0.1616,
"step": 40000
},
{
"epoch": 6.607201850016518,
"eval_cer": 0.04697659297789337,
"eval_loss": 0.14157184958457947,
"eval_runtime": 48.5024,
"eval_samples_per_second": 34.864,
"eval_steps_per_second": 8.721,
"eval_wer": 0.25810279148192095,
"step": 40000
},
{
"epoch": 6.62371985464156,
"grad_norm": 0.9306012988090515,
"learning_rate": 8.657881535317942e-06,
"loss": 0.1916,
"step": 40100
},
{
"epoch": 6.640237859266601,
"grad_norm": 0.7695144414901733,
"learning_rate": 8.651763755827187e-06,
"loss": 0.1581,
"step": 40200
},
{
"epoch": 6.656755863891642,
"grad_norm": 0.9468954205513,
"learning_rate": 8.64564597633643e-06,
"loss": 0.1543,
"step": 40300
},
{
"epoch": 6.6732738685166835,
"grad_norm": 0.9969133138656616,
"learning_rate": 8.639528196845674e-06,
"loss": 0.2421,
"step": 40400
},
{
"epoch": 6.689791873141725,
"grad_norm": 0.7657153606414795,
"learning_rate": 8.633410417354917e-06,
"loss": 0.1498,
"step": 40500
},
{
"epoch": 6.706309877766766,
"grad_norm": 0.6543861031532288,
"learning_rate": 8.627292637864162e-06,
"loss": 0.1592,
"step": 40600
},
{
"epoch": 6.722827882391807,
"grad_norm": 0.7110750675201416,
"learning_rate": 8.621174858373406e-06,
"loss": 0.1871,
"step": 40700
},
{
"epoch": 6.739345887016849,
"grad_norm": 0.6737387776374817,
"learning_rate": 8.61505707888265e-06,
"loss": 0.1634,
"step": 40800
},
{
"epoch": 6.75586389164189,
"grad_norm": 0.9051392078399658,
"learning_rate": 8.608939299391893e-06,
"loss": 0.1535,
"step": 40900
},
{
"epoch": 6.772381896266931,
"grad_norm": 0.7674338221549988,
"learning_rate": 8.602821519901138e-06,
"loss": 0.1512,
"step": 41000
},
{
"epoch": 6.772381896266931,
"eval_cer": 0.046300732324960646,
"eval_loss": 0.13961651921272278,
"eval_runtime": 48.6061,
"eval_samples_per_second": 34.79,
"eval_steps_per_second": 8.703,
"eval_wer": 0.2526697058639855,
"step": 41000
},
{
"epoch": 6.788899900891972,
"grad_norm": 0.9694798588752747,
"learning_rate": 8.596703740410381e-06,
"loss": 0.1555,
"step": 41100
},
{
"epoch": 6.805417905517014,
"grad_norm": 0.7667569518089294,
"learning_rate": 8.590585960919625e-06,
"loss": 0.1791,
"step": 41200
},
{
"epoch": 6.821935910142055,
"grad_norm": 0.8261126279830933,
"learning_rate": 8.584468181428868e-06,
"loss": 0.1554,
"step": 41300
},
{
"epoch": 6.838453914767096,
"grad_norm": 0.8309662342071533,
"learning_rate": 8.578350401938113e-06,
"loss": 0.1863,
"step": 41400
},
{
"epoch": 6.8549719193921375,
"grad_norm": 0.8523270487785339,
"learning_rate": 8.572232622447357e-06,
"loss": 0.1847,
"step": 41500
},
{
"epoch": 6.871489924017179,
"grad_norm": 0.7406333684921265,
"learning_rate": 8.5661148429566e-06,
"loss": 0.1486,
"step": 41600
},
{
"epoch": 6.88800792864222,
"grad_norm": 0.6426775455474854,
"learning_rate": 8.559997063465845e-06,
"loss": 0.1485,
"step": 41700
},
{
"epoch": 6.904525933267261,
"grad_norm": 0.705653727054596,
"learning_rate": 8.553879283975089e-06,
"loss": 0.1645,
"step": 41800
},
{
"epoch": 6.9210439378923025,
"grad_norm": 0.7143226265907288,
"learning_rate": 8.547761504484332e-06,
"loss": 0.1483,
"step": 41900
},
{
"epoch": 6.937561942517344,
"grad_norm": 0.6951993107795715,
"learning_rate": 8.541643724993578e-06,
"loss": 0.1497,
"step": 42000
},
{
"epoch": 6.937561942517344,
"eval_cer": 0.046266511532407094,
"eval_loss": 0.13769099116325378,
"eval_runtime": 48.8917,
"eval_samples_per_second": 34.587,
"eval_steps_per_second": 8.652,
"eval_wer": 0.2522950103041279,
"step": 42000
},
{
"epoch": 6.954079947142385,
"grad_norm": 0.781966507434845,
"learning_rate": 8.535525945502821e-06,
"loss": 0.1469,
"step": 42100
},
{
"epoch": 6.970597951767426,
"grad_norm": 0.7758731842041016,
"learning_rate": 8.529408166012065e-06,
"loss": 0.1523,
"step": 42200
},
{
"epoch": 6.987115956392468,
"grad_norm": 0.9695360064506531,
"learning_rate": 8.52329038652131e-06,
"loss": 0.1475,
"step": 42300
},
{
"epoch": 7.003633961017509,
"grad_norm": 0.8952251672744751,
"learning_rate": 8.517172607030553e-06,
"loss": 0.1509,
"step": 42400
},
{
"epoch": 7.02015196564255,
"grad_norm": 0.649591326713562,
"learning_rate": 8.511054827539797e-06,
"loss": 0.1454,
"step": 42500
},
{
"epoch": 7.0366699702675914,
"grad_norm": 0.6509200930595398,
"learning_rate": 8.504937048049042e-06,
"loss": 0.1546,
"step": 42600
},
{
"epoch": 7.053187974892633,
"grad_norm": 0.7854897975921631,
"learning_rate": 8.498819268558285e-06,
"loss": 0.1519,
"step": 42700
},
{
"epoch": 7.069705979517674,
"grad_norm": 0.7244015336036682,
"learning_rate": 8.492701489067529e-06,
"loss": 0.1485,
"step": 42800
},
{
"epoch": 7.086223984142715,
"grad_norm": 0.8875409960746765,
"learning_rate": 8.486583709576772e-06,
"loss": 0.1577,
"step": 42900
},
{
"epoch": 7.1027419887677565,
"grad_norm": 1.101585030555725,
"learning_rate": 8.480465930086017e-06,
"loss": 0.158,
"step": 43000
},
{
"epoch": 7.1027419887677565,
"eval_cer": 0.045624871672027924,
"eval_loss": 0.13763436675071716,
"eval_runtime": 48.2628,
"eval_samples_per_second": 35.037,
"eval_steps_per_second": 8.765,
"eval_wer": 0.24960969212514833,
"step": 43000
},
{
"epoch": 7.119259993392798,
"grad_norm": 0.7627003788948059,
"learning_rate": 8.47434815059526e-06,
"loss": 0.1601,
"step": 43100
},
{
"epoch": 7.135777998017839,
"grad_norm": 0.9609680771827698,
"learning_rate": 8.468230371104504e-06,
"loss": 0.1512,
"step": 43200
},
{
"epoch": 7.15229600264288,
"grad_norm": 0.9379695057868958,
"learning_rate": 8.462112591613748e-06,
"loss": 0.153,
"step": 43300
},
{
"epoch": 7.168814007267922,
"grad_norm": 0.6494946479797363,
"learning_rate": 8.455994812122993e-06,
"loss": 0.1518,
"step": 43400
},
{
"epoch": 7.185332011892964,
"grad_norm": 1.0411746501922607,
"learning_rate": 8.449877032632236e-06,
"loss": 0.1513,
"step": 43500
},
{
"epoch": 7.201850016518005,
"grad_norm": 0.6832746863365173,
"learning_rate": 8.44375925314148e-06,
"loss": 0.146,
"step": 43600
},
{
"epoch": 7.218368021143046,
"grad_norm": 0.7576162219047546,
"learning_rate": 8.437641473650723e-06,
"loss": 0.153,
"step": 43700
},
{
"epoch": 7.234886025768088,
"grad_norm": 0.7100921273231506,
"learning_rate": 8.431523694159968e-06,
"loss": 0.1504,
"step": 43800
},
{
"epoch": 7.251404030393129,
"grad_norm": 1.0959173440933228,
"learning_rate": 8.425405914669212e-06,
"loss": 0.1456,
"step": 43900
},
{
"epoch": 7.26792203501817,
"grad_norm": 0.6423998475074768,
"learning_rate": 8.419288135178455e-06,
"loss": 0.1498,
"step": 44000
},
{
"epoch": 7.26792203501817,
"eval_cer": 0.04530832934090753,
"eval_loss": 0.13714681565761566,
"eval_runtime": 48.9228,
"eval_samples_per_second": 34.565,
"eval_steps_per_second": 8.646,
"eval_wer": 0.24786111284581278,
"step": 44000
},
{
"epoch": 7.284440039643211,
"grad_norm": 0.8662620782852173,
"learning_rate": 8.4131703556877e-06,
"loss": 0.152,
"step": 44100
},
{
"epoch": 7.300958044268253,
"grad_norm": 0.6693345308303833,
"learning_rate": 8.407052576196944e-06,
"loss": 0.1499,
"step": 44200
},
{
"epoch": 7.317476048893294,
"grad_norm": 1.1424570083618164,
"learning_rate": 8.400934796706189e-06,
"loss": 0.1524,
"step": 44300
},
{
"epoch": 7.333994053518335,
"grad_norm": 0.9028821587562561,
"learning_rate": 8.394817017215433e-06,
"loss": 0.1433,
"step": 44400
},
{
"epoch": 7.3505120581433765,
"grad_norm": 0.7449358701705933,
"learning_rate": 8.388699237724676e-06,
"loss": 0.1497,
"step": 44500
},
{
"epoch": 7.367030062768418,
"grad_norm": 0.8447193503379822,
"learning_rate": 8.382581458233921e-06,
"loss": 0.1531,
"step": 44600
},
{
"epoch": 7.383548067393459,
"grad_norm": 0.661593496799469,
"learning_rate": 8.376463678743165e-06,
"loss": 0.1463,
"step": 44700
},
{
"epoch": 7.4000660720185,
"grad_norm": 0.9252416491508484,
"learning_rate": 8.370345899252408e-06,
"loss": 0.1459,
"step": 44800
},
{
"epoch": 7.416584076643542,
"grad_norm": 0.8766170740127563,
"learning_rate": 8.364228119761652e-06,
"loss": 0.1447,
"step": 44900
},
{
"epoch": 7.433102081268583,
"grad_norm": 0.7817343473434448,
"learning_rate": 8.358110340270897e-06,
"loss": 0.1477,
"step": 45000
},
{
"epoch": 7.433102081268583,
"eval_cer": 0.04499178700978715,
"eval_loss": 0.13690289855003357,
"eval_runtime": 48.2135,
"eval_samples_per_second": 35.073,
"eval_steps_per_second": 8.773,
"eval_wer": 0.24592518578654843,
"step": 45000
},
{
"epoch": 7.449620085893624,
"grad_norm": 0.891497790813446,
"learning_rate": 8.35199256078014e-06,
"loss": 0.1461,
"step": 45100
},
{
"epoch": 7.466138090518665,
"grad_norm": 0.7994738221168518,
"learning_rate": 8.345874781289384e-06,
"loss": 0.1498,
"step": 45200
},
{
"epoch": 7.482656095143707,
"grad_norm": 0.9430075287818909,
"learning_rate": 8.339757001798627e-06,
"loss": 0.1939,
"step": 45300
},
{
"epoch": 7.499174099768748,
"grad_norm": 0.7117358446121216,
"learning_rate": 8.333639222307872e-06,
"loss": 0.149,
"step": 45400
},
{
"epoch": 7.515692104393789,
"grad_norm": 0.6447555422782898,
"learning_rate": 8.327521442817116e-06,
"loss": 0.1509,
"step": 45500
},
{
"epoch": 7.5322101090188305,
"grad_norm": 0.6948108077049255,
"learning_rate": 8.32140366332636e-06,
"loss": 0.1488,
"step": 45600
},
{
"epoch": 7.548728113643872,
"grad_norm": 1.0623235702514648,
"learning_rate": 8.315285883835603e-06,
"loss": 0.135,
"step": 45700
},
{
"epoch": 7.565246118268913,
"grad_norm": 0.7654304504394531,
"learning_rate": 8.309168104344848e-06,
"loss": 0.1352,
"step": 45800
},
{
"epoch": 7.581764122893954,
"grad_norm": 0.8501843810081482,
"learning_rate": 8.303050324854091e-06,
"loss": 0.1487,
"step": 45900
},
{
"epoch": 7.598282127518996,
"grad_norm": 0.7539622783660889,
"learning_rate": 8.296932545363335e-06,
"loss": 0.1449,
"step": 46000
},
{
"epoch": 7.598282127518996,
"eval_cer": 0.04526555335021559,
"eval_loss": 0.13489525020122528,
"eval_runtime": 48.4075,
"eval_samples_per_second": 34.933,
"eval_steps_per_second": 8.738,
"eval_wer": 0.24605008430650097,
"step": 46000
},
{
"epoch": 7.614800132144037,
"grad_norm": 0.679431140422821,
"learning_rate": 8.29081476587258e-06,
"loss": 0.1418,
"step": 46100
},
{
"epoch": 7.631318136769078,
"grad_norm": 0.9577202796936035,
"learning_rate": 8.284696986381823e-06,
"loss": 0.1444,
"step": 46200
},
{
"epoch": 7.647836141394119,
"grad_norm": 0.7873533964157104,
"learning_rate": 8.278579206891067e-06,
"loss": 0.1443,
"step": 46300
},
{
"epoch": 7.664354146019161,
"grad_norm": 0.9830496907234192,
"learning_rate": 8.272461427400312e-06,
"loss": 0.1447,
"step": 46400
},
{
"epoch": 7.680872150644202,
"grad_norm": 0.7039462327957153,
"learning_rate": 8.266343647909555e-06,
"loss": 0.1546,
"step": 46500
},
{
"epoch": 7.697390155269243,
"grad_norm": 1.1326159238815308,
"learning_rate": 8.260225868418799e-06,
"loss": 0.1681,
"step": 46600
},
{
"epoch": 7.7139081598942845,
"grad_norm": 0.9435326457023621,
"learning_rate": 8.254108088928044e-06,
"loss": 0.1441,
"step": 46700
},
{
"epoch": 7.730426164519326,
"grad_norm": 1.0461517572402954,
"learning_rate": 8.247990309437287e-06,
"loss": 0.1438,
"step": 46800
},
{
"epoch": 7.746944169144367,
"grad_norm": 0.6983148455619812,
"learning_rate": 8.241872529946531e-06,
"loss": 0.1461,
"step": 46900
},
{
"epoch": 7.763462173769408,
"grad_norm": 0.6941544413566589,
"learning_rate": 8.235754750455776e-06,
"loss": 0.1414,
"step": 47000
},
{
"epoch": 7.763462173769408,
"eval_cer": 0.044256039969885703,
"eval_loss": 0.13219207525253296,
"eval_runtime": 48.7223,
"eval_samples_per_second": 34.707,
"eval_steps_per_second": 8.682,
"eval_wer": 0.24261537500780617,
"step": 47000
},
{
"epoch": 7.7799801783944496,
"grad_norm": 0.7406185865402222,
"learning_rate": 8.22963697096502e-06,
"loss": 0.1462,
"step": 47100
},
{
"epoch": 7.796498183019491,
"grad_norm": 0.7421987652778625,
"learning_rate": 8.223519191474263e-06,
"loss": 0.1377,
"step": 47200
},
{
"epoch": 7.813016187644532,
"grad_norm": 0.9080411195755005,
"learning_rate": 8.217401411983506e-06,
"loss": 0.1447,
"step": 47300
},
{
"epoch": 7.829534192269573,
"grad_norm": 0.9030922651290894,
"learning_rate": 8.211283632492752e-06,
"loss": 0.1488,
"step": 47400
},
{
"epoch": 7.846052196894615,
"grad_norm": 0.8082269430160522,
"learning_rate": 8.205165853001995e-06,
"loss": 0.1695,
"step": 47500
},
{
"epoch": 7.862570201519657,
"grad_norm": 0.8809986114501953,
"learning_rate": 8.199048073511239e-06,
"loss": 0.144,
"step": 47600
},
{
"epoch": 7.879088206144698,
"grad_norm": 0.6915613412857056,
"learning_rate": 8.192930294020482e-06,
"loss": 0.172,
"step": 47700
},
{
"epoch": 7.895606210769739,
"grad_norm": 0.8618057370185852,
"learning_rate": 8.186812514529727e-06,
"loss": 0.1362,
"step": 47800
},
{
"epoch": 7.912124215394781,
"grad_norm": 0.8087924122810364,
"learning_rate": 8.18069473503897e-06,
"loss": 0.1714,
"step": 47900
},
{
"epoch": 7.928642220019822,
"grad_norm": 1.0039881467819214,
"learning_rate": 8.174576955548214e-06,
"loss": 0.148,
"step": 48000
},
{
"epoch": 7.928642220019822,
"eval_cer": 0.04386250085551981,
"eval_loss": 0.13279926776885986,
"eval_runtime": 48.5969,
"eval_samples_per_second": 34.796,
"eval_steps_per_second": 8.704,
"eval_wer": 0.24136638980828076,
"step": 48000
},
{
"epoch": 7.945160224644863,
"grad_norm": 0.8676062822341919,
"learning_rate": 8.168459176057458e-06,
"loss": 0.1426,
"step": 48100
},
{
"epoch": 7.961678229269904,
"grad_norm": 0.7867946028709412,
"learning_rate": 8.162341396566703e-06,
"loss": 0.149,
"step": 48200
},
{
"epoch": 7.978196233894946,
"grad_norm": 0.7121617794036865,
"learning_rate": 8.156223617075946e-06,
"loss": 0.1447,
"step": 48300
},
{
"epoch": 7.994714238519987,
"grad_norm": 0.7431650757789612,
"learning_rate": 8.15010583758519e-06,
"loss": 0.1352,
"step": 48400
},
{
"epoch": 8.011232243145027,
"grad_norm": 1.1219011545181274,
"learning_rate": 8.143988058094435e-06,
"loss": 0.1427,
"step": 48500
},
{
"epoch": 8.027750247770069,
"grad_norm": 0.6559448838233948,
"learning_rate": 8.137870278603678e-06,
"loss": 0.1434,
"step": 48600
},
{
"epoch": 8.04426825239511,
"grad_norm": 0.7558749318122864,
"learning_rate": 8.131752499112923e-06,
"loss": 0.1396,
"step": 48700
},
{
"epoch": 8.060786257020151,
"grad_norm": 0.7132017016410828,
"learning_rate": 8.125634719622167e-06,
"loss": 0.1384,
"step": 48800
},
{
"epoch": 8.077304261645192,
"grad_norm": 0.7818734645843506,
"learning_rate": 8.11951694013141e-06,
"loss": 0.1386,
"step": 48900
},
{
"epoch": 8.093822266270234,
"grad_norm": 1.4067357778549194,
"learning_rate": 8.113399160640656e-06,
"loss": 0.1356,
"step": 49000
},
{
"epoch": 8.093822266270234,
"eval_cer": 0.04489767983026487,
"eval_loss": 0.1310672014951706,
"eval_runtime": 48.4665,
"eval_samples_per_second": 34.89,
"eval_steps_per_second": 8.728,
"eval_wer": 0.2447386498469993,
"step": 49000
},
{
"epoch": 8.110340270895275,
"grad_norm": 0.6177778840065002,
"learning_rate": 8.107281381149899e-06,
"loss": 0.1602,
"step": 49100
},
{
"epoch": 8.126858275520316,
"grad_norm": 0.6477259993553162,
"learning_rate": 8.101163601659142e-06,
"loss": 0.1356,
"step": 49200
},
{
"epoch": 8.14337628014536,
"grad_norm": 0.7413625717163086,
"learning_rate": 8.095045822168388e-06,
"loss": 0.1564,
"step": 49300
},
{
"epoch": 8.1598942847704,
"grad_norm": 0.6410078406333923,
"learning_rate": 8.088928042677631e-06,
"loss": 0.1344,
"step": 49400
},
{
"epoch": 8.176412289395442,
"grad_norm": 0.7430760860443115,
"learning_rate": 8.082810263186875e-06,
"loss": 0.1423,
"step": 49500
},
{
"epoch": 8.192930294020483,
"grad_norm": 0.7136946320533752,
"learning_rate": 8.076692483696118e-06,
"loss": 0.1328,
"step": 49600
},
{
"epoch": 8.209448298645524,
"grad_norm": 0.6934331059455872,
"learning_rate": 8.070574704205363e-06,
"loss": 0.1372,
"step": 49700
},
{
"epoch": 8.225966303270566,
"grad_norm": 0.9232444167137146,
"learning_rate": 8.064456924714607e-06,
"loss": 0.1333,
"step": 49800
},
{
"epoch": 8.242484307895607,
"grad_norm": 0.9037547707557678,
"learning_rate": 8.05833914522385e-06,
"loss": 0.1304,
"step": 49900
},
{
"epoch": 8.259002312520648,
"grad_norm": 0.7326195240020752,
"learning_rate": 8.052221365733094e-06,
"loss": 0.1426,
"step": 50000
},
{
"epoch": 8.259002312520648,
"eval_cer": 0.043101088221203204,
"eval_loss": 0.1275395154953003,
"eval_runtime": 48.7537,
"eval_samples_per_second": 34.685,
"eval_steps_per_second": 8.676,
"eval_wer": 0.23793168050958596,
"step": 50000
},
{
"epoch": 8.27552031714569,
"grad_norm": 0.8698177933692932,
"learning_rate": 8.046103586242339e-06,
"loss": 0.1436,
"step": 50100
},
{
"epoch": 8.29203832177073,
"grad_norm": 0.6039656400680542,
"learning_rate": 8.039985806751582e-06,
"loss": 0.1381,
"step": 50200
},
{
"epoch": 8.308556326395772,
"grad_norm": 0.6586022973060608,
"learning_rate": 8.033868027260826e-06,
"loss": 0.1419,
"step": 50300
},
{
"epoch": 8.325074331020813,
"grad_norm": 0.7226503491401672,
"learning_rate": 8.027750247770069e-06,
"loss": 0.1383,
"step": 50400
},
{
"epoch": 8.341592335645855,
"grad_norm": 0.6569647192955017,
"learning_rate": 8.021632468279314e-06,
"loss": 0.14,
"step": 50500
},
{
"epoch": 8.358110340270896,
"grad_norm": 0.7345595955848694,
"learning_rate": 8.015514688788558e-06,
"loss": 0.1335,
"step": 50600
},
{
"epoch": 8.374628344895937,
"grad_norm": 0.9025945067405701,
"learning_rate": 8.009396909297801e-06,
"loss": 0.1327,
"step": 50700
},
{
"epoch": 8.391146349520978,
"grad_norm": 0.693868100643158,
"learning_rate": 8.003279129807046e-06,
"loss": 0.1405,
"step": 50800
},
{
"epoch": 8.40766435414602,
"grad_norm": 0.726399302482605,
"learning_rate": 7.99716135031629e-06,
"loss": 0.1412,
"step": 50900
},
{
"epoch": 8.424182358771061,
"grad_norm": 0.8612838983535767,
"learning_rate": 7.991043570825533e-06,
"loss": 0.1605,
"step": 51000
},
{
"epoch": 8.424182358771061,
"eval_cer": 0.04358873451509137,
"eval_loss": 0.12762609124183655,
"eval_runtime": 48.4653,
"eval_samples_per_second": 34.891,
"eval_steps_per_second": 8.728,
"eval_wer": 0.2394929120089927,
"step": 51000
},
{
"epoch": 8.440700363396102,
"grad_norm": 0.7443174719810486,
"learning_rate": 7.984925791334778e-06,
"loss": 0.1586,
"step": 51100
},
{
"epoch": 8.457218368021143,
"grad_norm": 0.8640626668930054,
"learning_rate": 7.978808011844022e-06,
"loss": 0.1315,
"step": 51200
},
{
"epoch": 8.473736372646185,
"grad_norm": 0.8193531632423401,
"learning_rate": 7.972690232353267e-06,
"loss": 0.1345,
"step": 51300
},
{
"epoch": 8.490254377271226,
"grad_norm": 0.922201931476593,
"learning_rate": 7.96657245286251e-06,
"loss": 0.1406,
"step": 51400
},
{
"epoch": 8.506772381896267,
"grad_norm": 0.6063607931137085,
"learning_rate": 7.960454673371754e-06,
"loss": 0.1311,
"step": 51500
},
{
"epoch": 8.523290386521309,
"grad_norm": 0.6972509622573853,
"learning_rate": 7.954336893880997e-06,
"loss": 0.1313,
"step": 51600
},
{
"epoch": 8.53980839114635,
"grad_norm": 0.8324514627456665,
"learning_rate": 7.948219114390243e-06,
"loss": 0.1357,
"step": 51700
},
{
"epoch": 8.556326395771391,
"grad_norm": 0.8537706136703491,
"learning_rate": 7.942101334899486e-06,
"loss": 0.1326,
"step": 51800
},
{
"epoch": 8.572844400396432,
"grad_norm": 0.7509739398956299,
"learning_rate": 7.93598355540873e-06,
"loss": 0.141,
"step": 51900
},
{
"epoch": 8.589362405021474,
"grad_norm": 0.8359591364860535,
"learning_rate": 7.929865775917973e-06,
"loss": 0.1619,
"step": 52000
},
{
"epoch": 8.589362405021474,
"eval_cer": 0.04345185134487715,
"eval_loss": 0.128912091255188,
"eval_runtime": 48.8649,
"eval_samples_per_second": 34.606,
"eval_steps_per_second": 8.657,
"eval_wer": 0.23874352088927747,
"step": 52000
},
{
"epoch": 8.605880409646515,
"grad_norm": 0.9481146335601807,
"learning_rate": 7.923747996427218e-06,
"loss": 0.1305,
"step": 52100
},
{
"epoch": 8.622398414271556,
"grad_norm": 0.7615697979927063,
"learning_rate": 7.917630216936462e-06,
"loss": 0.1575,
"step": 52200
},
{
"epoch": 8.638916418896597,
"grad_norm": 0.8259508609771729,
"learning_rate": 7.911512437445705e-06,
"loss": 0.1378,
"step": 52300
},
{
"epoch": 8.655434423521639,
"grad_norm": 2.3674380779266357,
"learning_rate": 7.905394657954948e-06,
"loss": 0.1279,
"step": 52400
},
{
"epoch": 8.67195242814668,
"grad_norm": 0.6892039179801941,
"learning_rate": 7.899276878464194e-06,
"loss": 0.1387,
"step": 52500
},
{
"epoch": 8.688470432771721,
"grad_norm": 0.7626641988754272,
"learning_rate": 7.893159098973437e-06,
"loss": 0.1354,
"step": 52600
},
{
"epoch": 8.704988437396763,
"grad_norm": 0.9490280747413635,
"learning_rate": 7.88704131948268e-06,
"loss": 0.1273,
"step": 52700
},
{
"epoch": 8.721506442021804,
"grad_norm": 0.7699686884880066,
"learning_rate": 7.880923539991924e-06,
"loss": 0.1277,
"step": 52800
},
{
"epoch": 8.738024446646845,
"grad_norm": 0.819313108921051,
"learning_rate": 7.87480576050117e-06,
"loss": 0.1337,
"step": 52900
},
{
"epoch": 8.754542451271886,
"grad_norm": 0.7003619074821472,
"learning_rate": 7.868687981010413e-06,
"loss": 0.1594,
"step": 53000
},
{
"epoch": 8.754542451271886,
"eval_cer": 0.042955649852850594,
"eval_loss": 0.12719017267227173,
"eval_runtime": 48.6494,
"eval_samples_per_second": 34.759,
"eval_steps_per_second": 8.695,
"eval_wer": 0.237244738649847,
"step": 53000
},
{
"epoch": 8.771060455896928,
"grad_norm": 0.6663370728492737,
"learning_rate": 7.862570201519658e-06,
"loss": 0.1376,
"step": 53100
},
{
"epoch": 8.787578460521969,
"grad_norm": 0.5805346369743347,
"learning_rate": 7.856452422028901e-06,
"loss": 0.1379,
"step": 53200
},
{
"epoch": 8.80409646514701,
"grad_norm": 0.7736044526100159,
"learning_rate": 7.850334642538145e-06,
"loss": 0.1323,
"step": 53300
},
{
"epoch": 8.820614469772051,
"grad_norm": 0.9529663324356079,
"learning_rate": 7.84421686304739e-06,
"loss": 0.1289,
"step": 53400
},
{
"epoch": 8.837132474397093,
"grad_norm": 0.583885908126831,
"learning_rate": 7.838099083556633e-06,
"loss": 0.1322,
"step": 53500
},
{
"epoch": 8.853650479022134,
"grad_norm": 0.7250145673751831,
"learning_rate": 7.831981304065877e-06,
"loss": 0.133,
"step": 53600
},
{
"epoch": 8.870168483647175,
"grad_norm": 0.6262508034706116,
"learning_rate": 7.825863524575122e-06,
"loss": 0.1599,
"step": 53700
},
{
"epoch": 8.886686488272217,
"grad_norm": 0.7456868290901184,
"learning_rate": 7.819745745084365e-06,
"loss": 0.1389,
"step": 53800
},
{
"epoch": 8.903204492897258,
"grad_norm": 0.944975733757019,
"learning_rate": 7.813627965593609e-06,
"loss": 0.1339,
"step": 53900
},
{
"epoch": 8.919722497522299,
"grad_norm": 0.7597010731697083,
"learning_rate": 7.807510186102852e-06,
"loss": 0.1247,
"step": 54000
},
{
"epoch": 8.919722497522299,
"eval_cer": 0.042852987475189924,
"eval_loss": 0.12474211305379868,
"eval_runtime": 48.5952,
"eval_samples_per_second": 34.798,
"eval_steps_per_second": 8.705,
"eval_wer": 0.23530881159058264,
"step": 54000
},
{
"epoch": 8.93624050214734,
"grad_norm": 0.8667477965354919,
"learning_rate": 7.801392406612098e-06,
"loss": 0.1305,
"step": 54100
},
{
"epoch": 8.952758506772382,
"grad_norm": 0.7182130813598633,
"learning_rate": 7.795274627121341e-06,
"loss": 0.1315,
"step": 54200
},
{
"epoch": 8.969276511397423,
"grad_norm": 0.7797411680221558,
"learning_rate": 7.789156847630584e-06,
"loss": 0.1342,
"step": 54300
},
{
"epoch": 8.985794516022464,
"grad_norm": 0.7155345678329468,
"learning_rate": 7.783039068139828e-06,
"loss": 0.128,
"step": 54400
},
{
"epoch": 9.002312520647505,
"grad_norm": 0.8948495388031006,
"learning_rate": 7.776921288649073e-06,
"loss": 0.1352,
"step": 54500
},
{
"epoch": 9.018830525272547,
"grad_norm": 1.397560954093933,
"learning_rate": 7.770803509158317e-06,
"loss": 0.1362,
"step": 54600
},
{
"epoch": 9.035348529897588,
"grad_norm": 0.7347814440727234,
"learning_rate": 7.76468572966756e-06,
"loss": 0.1331,
"step": 54700
},
{
"epoch": 9.05186653452263,
"grad_norm": 0.9538244009017944,
"learning_rate": 7.758567950176803e-06,
"loss": 0.1339,
"step": 54800
},
{
"epoch": 9.06838453914767,
"grad_norm": 0.7012799382209778,
"learning_rate": 7.752450170686049e-06,
"loss": 0.1572,
"step": 54900
},
{
"epoch": 9.084902543772712,
"grad_norm": 0.6902897357940674,
"learning_rate": 7.746332391195292e-06,
"loss": 0.1382,
"step": 55000
},
{
"epoch": 9.084902543772712,
"eval_cer": 0.04332352337280131,
"eval_loss": 0.1268453598022461,
"eval_runtime": 48.6908,
"eval_samples_per_second": 34.729,
"eval_steps_per_second": 8.687,
"eval_wer": 0.23674514457003684,
"step": 55000
},
{
"epoch": 9.101420548397753,
"grad_norm": 0.785682737827301,
"learning_rate": 7.740214611704536e-06,
"loss": 0.1246,
"step": 55100
},
{
"epoch": 9.117938553022794,
"grad_norm": 0.6153995990753174,
"learning_rate": 7.73409683221378e-06,
"loss": 0.1308,
"step": 55200
},
{
"epoch": 9.134456557647836,
"grad_norm": 0.9429919719696045,
"learning_rate": 7.727979052723024e-06,
"loss": 0.1297,
"step": 55300
},
{
"epoch": 9.150974562272877,
"grad_norm": 0.509573221206665,
"learning_rate": 7.721861273232268e-06,
"loss": 0.1536,
"step": 55400
},
{
"epoch": 9.167492566897918,
"grad_norm": 0.8129322528839111,
"learning_rate": 7.715743493741513e-06,
"loss": 0.1335,
"step": 55500
},
{
"epoch": 9.18401057152296,
"grad_norm": 0.6652195453643799,
"learning_rate": 7.709625714250756e-06,
"loss": 0.1227,
"step": 55600
},
{
"epoch": 9.200528576148,
"grad_norm": 0.9443718791007996,
"learning_rate": 7.703507934760001e-06,
"loss": 0.1327,
"step": 55700
},
{
"epoch": 9.217046580773042,
"grad_norm": 1.0480358600616455,
"learning_rate": 7.697390155269245e-06,
"loss": 0.1333,
"step": 55800
},
{
"epoch": 9.233564585398083,
"grad_norm": 0.8347544074058533,
"learning_rate": 7.691272375778488e-06,
"loss": 0.1265,
"step": 55900
},
{
"epoch": 9.250082590023124,
"grad_norm": 0.9064726233482361,
"learning_rate": 7.685154596287732e-06,
"loss": 0.1263,
"step": 56000
},
{
"epoch": 9.250082590023124,
"eval_cer": 0.043246526589555814,
"eval_loss": 0.12526705861091614,
"eval_runtime": 48.6324,
"eval_samples_per_second": 34.771,
"eval_steps_per_second": 8.698,
"eval_wer": 0.23574595641041654,
"step": 56000
},
{
"epoch": 9.266600594648166,
"grad_norm": 0.6867343783378601,
"learning_rate": 7.679036816796977e-06,
"loss": 0.1634,
"step": 56100
},
{
"epoch": 9.283118599273207,
"grad_norm": 0.5822398662567139,
"learning_rate": 7.67291903730622e-06,
"loss": 0.1232,
"step": 56200
},
{
"epoch": 9.299636603898248,
"grad_norm": 0.70078444480896,
"learning_rate": 7.666801257815464e-06,
"loss": 0.1334,
"step": 56300
},
{
"epoch": 9.31615460852329,
"grad_norm": 0.7892748117446899,
"learning_rate": 7.660683478324707e-06,
"loss": 0.1364,
"step": 56400
},
{
"epoch": 9.33267261314833,
"grad_norm": 0.6174075603485107,
"learning_rate": 7.654565698833952e-06,
"loss": 0.1279,
"step": 56500
},
{
"epoch": 9.349190617773372,
"grad_norm": 0.5943549275398254,
"learning_rate": 7.648447919343196e-06,
"loss": 0.1211,
"step": 56600
},
{
"epoch": 9.365708622398415,
"grad_norm": 0.7577424645423889,
"learning_rate": 7.64233013985244e-06,
"loss": 0.1282,
"step": 56700
},
{
"epoch": 9.382226627023456,
"grad_norm": 0.6361554861068726,
"learning_rate": 7.636212360361683e-06,
"loss": 0.146,
"step": 56800
},
{
"epoch": 9.398744631648498,
"grad_norm": 0.7385000586509705,
"learning_rate": 7.630094580870928e-06,
"loss": 0.1218,
"step": 56900
},
{
"epoch": 9.415262636273539,
"grad_norm": 0.8645774126052856,
"learning_rate": 7.6239768013801715e-06,
"loss": 0.1296,
"step": 57000
},
{
"epoch": 9.415262636273539,
"eval_cer": 0.041988912463212645,
"eval_loss": 0.12412171810865402,
"eval_runtime": 48.7466,
"eval_samples_per_second": 34.69,
"eval_steps_per_second": 8.678,
"eval_wer": 0.23099981265222008,
"step": 57000
},
{
"epoch": 9.43178064089858,
"grad_norm": 0.8043680787086487,
"learning_rate": 7.617859021889416e-06,
"loss": 0.1263,
"step": 57100
},
{
"epoch": 9.448298645523622,
"grad_norm": 0.6487779021263123,
"learning_rate": 7.611741242398659e-06,
"loss": 0.1409,
"step": 57200
},
{
"epoch": 9.464816650148663,
"grad_norm": 0.8494729399681091,
"learning_rate": 7.605623462907904e-06,
"loss": 0.1292,
"step": 57300
},
{
"epoch": 9.481334654773704,
"grad_norm": 0.6968827247619629,
"learning_rate": 7.599505683417148e-06,
"loss": 0.1237,
"step": 57400
},
{
"epoch": 9.497852659398745,
"grad_norm": 0.7528768181800842,
"learning_rate": 7.593387903926391e-06,
"loss": 0.121,
"step": 57500
},
{
"epoch": 9.514370664023787,
"grad_norm": 0.8891571164131165,
"learning_rate": 7.587270124435635e-06,
"loss": 0.1271,
"step": 57600
},
{
"epoch": 9.530888668648828,
"grad_norm": 0.60162353515625,
"learning_rate": 7.58115234494488e-06,
"loss": 0.1321,
"step": 57700
},
{
"epoch": 9.54740667327387,
"grad_norm": 0.9320480823516846,
"learning_rate": 7.575034565454123e-06,
"loss": 0.136,
"step": 57800
},
{
"epoch": 9.56392467789891,
"grad_norm": 0.7406982779502869,
"learning_rate": 7.568916785963367e-06,
"loss": 0.128,
"step": 57900
},
{
"epoch": 9.580442682523952,
"grad_norm": 0.753324568271637,
"learning_rate": 7.562799006472611e-06,
"loss": 0.1263,
"step": 58000
},
{
"epoch": 9.580442682523952,
"eval_cer": 0.042305454794333036,
"eval_loss": 0.12526877224445343,
"eval_runtime": 48.8065,
"eval_samples_per_second": 34.647,
"eval_steps_per_second": 8.667,
"eval_wer": 0.23212389933179292,
"step": 58000
},
{
"epoch": 9.596960687148993,
"grad_norm": 0.6665119528770447,
"learning_rate": 7.5566812269818555e-06,
"loss": 0.1285,
"step": 58100
},
{
"epoch": 9.613478691774034,
"grad_norm": 0.6425819993019104,
"learning_rate": 7.5505634474911e-06,
"loss": 0.1303,
"step": 58200
},
{
"epoch": 9.629996696399076,
"grad_norm": 0.7088574767112732,
"learning_rate": 7.544445668000343e-06,
"loss": 0.1246,
"step": 58300
},
{
"epoch": 9.646514701024117,
"grad_norm": 0.6304578185081482,
"learning_rate": 7.538327888509587e-06,
"loss": 0.1357,
"step": 58400
},
{
"epoch": 9.663032705649158,
"grad_norm": 0.9591554403305054,
"learning_rate": 7.532210109018832e-06,
"loss": 0.1352,
"step": 58500
},
{
"epoch": 9.6795507102742,
"grad_norm": 0.8646364808082581,
"learning_rate": 7.526092329528075e-06,
"loss": 0.1225,
"step": 58600
},
{
"epoch": 9.69606871489924,
"grad_norm": 0.729404866695404,
"learning_rate": 7.519974550037319e-06,
"loss": 0.1241,
"step": 58700
},
{
"epoch": 9.712586719524282,
"grad_norm": 0.719990611076355,
"learning_rate": 7.513856770546562e-06,
"loss": 0.1184,
"step": 58800
},
{
"epoch": 9.729104724149323,
"grad_norm": 0.7622655630111694,
"learning_rate": 7.507738991055807e-06,
"loss": 0.1281,
"step": 58900
},
{
"epoch": 9.745622728774364,
"grad_norm": 0.8316338658332825,
"learning_rate": 7.501621211565051e-06,
"loss": 0.1327,
"step": 59000
},
{
"epoch": 9.745622728774364,
"eval_cer": 0.042014578057627816,
"eval_loss": 0.12281199544668198,
"eval_runtime": 48.6883,
"eval_samples_per_second": 34.731,
"eval_steps_per_second": 8.688,
"eval_wer": 0.23018797227252857,
"step": 59000
},
{
"epoch": 9.762140733399406,
"grad_norm": 0.6978473663330078,
"learning_rate": 7.495503432074294e-06,
"loss": 0.1218,
"step": 59100
},
{
"epoch": 9.778658738024447,
"grad_norm": 0.7994058728218079,
"learning_rate": 7.489385652583539e-06,
"loss": 0.1217,
"step": 59200
},
{
"epoch": 9.795176742649488,
"grad_norm": 0.6791940927505493,
"learning_rate": 7.483267873092783e-06,
"loss": 0.1598,
"step": 59300
},
{
"epoch": 9.81169474727453,
"grad_norm": 0.7519752383232117,
"learning_rate": 7.477150093602027e-06,
"loss": 0.1267,
"step": 59400
},
{
"epoch": 9.82821275189957,
"grad_norm": 0.6616401672363281,
"learning_rate": 7.471032314111271e-06,
"loss": 0.126,
"step": 59500
},
{
"epoch": 9.844730756524612,
"grad_norm": 0.8174837231636047,
"learning_rate": 7.464914534620514e-06,
"loss": 0.1287,
"step": 59600
},
{
"epoch": 9.861248761149653,
"grad_norm": 0.746444046497345,
"learning_rate": 7.458796755129759e-06,
"loss": 0.1576,
"step": 59700
},
{
"epoch": 9.877766765774695,
"grad_norm": 0.9220054745674133,
"learning_rate": 7.452678975639003e-06,
"loss": 0.1396,
"step": 59800
},
{
"epoch": 9.894284770399736,
"grad_norm": 0.8045241832733154,
"learning_rate": 7.446561196148246e-06,
"loss": 0.1217,
"step": 59900
},
{
"epoch": 9.910802775024777,
"grad_norm": 0.759081244468689,
"learning_rate": 7.44044341665749e-06,
"loss": 0.1262,
"step": 60000
},
{
"epoch": 9.910802775024777,
"eval_cer": 0.041911915679967146,
"eval_loss": 0.12326313555240631,
"eval_runtime": 48.7409,
"eval_samples_per_second": 34.694,
"eval_steps_per_second": 8.679,
"eval_wer": 0.23018797227252857,
"step": 60000
},
{
"epoch": 9.927320779649818,
"grad_norm": 0.8618572354316711,
"learning_rate": 7.434325637166735e-06,
"loss": 0.1251,
"step": 60100
},
{
"epoch": 9.94383878427486,
"grad_norm": 0.6436170935630798,
"learning_rate": 7.428207857675978e-06,
"loss": 0.1201,
"step": 60200
},
{
"epoch": 9.960356788899901,
"grad_norm": 0.8590137958526611,
"learning_rate": 7.422090078185223e-06,
"loss": 0.129,
"step": 60300
},
{
"epoch": 9.976874793524942,
"grad_norm": 0.7668434381484985,
"learning_rate": 7.415972298694466e-06,
"loss": 0.1255,
"step": 60400
},
{
"epoch": 9.993392798149983,
"grad_norm": 0.7597298622131348,
"learning_rate": 7.4098545192037104e-06,
"loss": 0.1231,
"step": 60500
},
{
"epoch": 10.009910802775025,
"grad_norm": 0.8070718050003052,
"learning_rate": 7.403736739712955e-06,
"loss": 0.1642,
"step": 60600
},
{
"epoch": 10.026428807400066,
"grad_norm": 0.7364042401313782,
"learning_rate": 7.397618960222198e-06,
"loss": 0.1243,
"step": 60700
},
{
"epoch": 10.042946812025107,
"grad_norm": 0.8967491984367371,
"learning_rate": 7.391501180731442e-06,
"loss": 0.1321,
"step": 60800
},
{
"epoch": 10.059464816650149,
"grad_norm": 0.8420141339302063,
"learning_rate": 7.385383401240687e-06,
"loss": 0.1274,
"step": 60900
},
{
"epoch": 10.07598282127519,
"grad_norm": 0.7145309448242188,
"learning_rate": 7.37926562174993e-06,
"loss": 0.1207,
"step": 61000
},
{
"epoch": 10.07598282127519,
"eval_cer": 0.041920470878105534,
"eval_loss": 0.12428971379995346,
"eval_runtime": 48.748,
"eval_samples_per_second": 34.689,
"eval_steps_per_second": 8.677,
"eval_wer": 0.22881408855305066,
"step": 61000
},
{
"epoch": 10.092500825900231,
"grad_norm": 0.7484616041183472,
"learning_rate": 7.373147842259174e-06,
"loss": 0.1279,
"step": 61100
},
{
"epoch": 10.109018830525272,
"grad_norm": 0.7732555270195007,
"learning_rate": 7.367030062768418e-06,
"loss": 0.1251,
"step": 61200
},
{
"epoch": 10.125536835150314,
"grad_norm": 0.7943729162216187,
"learning_rate": 7.360912283277662e-06,
"loss": 0.1194,
"step": 61300
},
{
"epoch": 10.142054839775355,
"grad_norm": 0.7555480003356934,
"learning_rate": 7.354794503786906e-06,
"loss": 0.1171,
"step": 61400
},
{
"epoch": 10.158572844400396,
"grad_norm": 0.6439567804336548,
"learning_rate": 7.34867672429615e-06,
"loss": 0.1203,
"step": 61500
},
{
"epoch": 10.175090849025437,
"grad_norm": 0.5505064725875854,
"learning_rate": 7.342558944805394e-06,
"loss": 0.1198,
"step": 61600
},
{
"epoch": 10.191608853650479,
"grad_norm": 0.6508448123931885,
"learning_rate": 7.336441165314639e-06,
"loss": 0.1227,
"step": 61700
},
{
"epoch": 10.20812685827552,
"grad_norm": 0.6717207431793213,
"learning_rate": 7.330323385823882e-06,
"loss": 0.1258,
"step": 61800
},
{
"epoch": 10.224644862900561,
"grad_norm": 0.7035212516784668,
"learning_rate": 7.324205606333126e-06,
"loss": 0.1235,
"step": 61900
},
{
"epoch": 10.241162867525603,
"grad_norm": 0.6881560683250427,
"learning_rate": 7.318087826842369e-06,
"loss": 0.1411,
"step": 62000
},
{
"epoch": 10.241162867525603,
"eval_cer": 0.04166381493395387,
"eval_loss": 0.12054095417261124,
"eval_runtime": 48.5305,
"eval_samples_per_second": 34.844,
"eval_steps_per_second": 8.716,
"eval_wer": 0.22781490039343033,
"step": 62000
},
{
"epoch": 10.257680872150644,
"grad_norm": 0.8385311961174011,
"learning_rate": 7.311970047351614e-06,
"loss": 0.1223,
"step": 62100
},
{
"epoch": 10.274198876775685,
"grad_norm": 0.5517755746841431,
"learning_rate": 7.305852267860858e-06,
"loss": 0.1251,
"step": 62200
},
{
"epoch": 10.290716881400726,
"grad_norm": 0.736827552318573,
"learning_rate": 7.299734488370101e-06,
"loss": 0.1215,
"step": 62300
},
{
"epoch": 10.307234886025768,
"grad_norm": 0.8742785453796387,
"learning_rate": 7.2936167088793455e-06,
"loss": 0.1284,
"step": 62400
},
{
"epoch": 10.323752890650809,
"grad_norm": 0.6363995671272278,
"learning_rate": 7.28749892938859e-06,
"loss": 0.1287,
"step": 62500
},
{
"epoch": 10.34027089527585,
"grad_norm": 0.8067464232444763,
"learning_rate": 7.281381149897834e-06,
"loss": 0.1206,
"step": 62600
},
{
"epoch": 10.356788899900891,
"grad_norm": 0.7671234011650085,
"learning_rate": 7.275263370407078e-06,
"loss": 0.1216,
"step": 62700
},
{
"epoch": 10.373306904525933,
"grad_norm": 1.0161293745040894,
"learning_rate": 7.269145590916321e-06,
"loss": 0.1244,
"step": 62800
},
{
"epoch": 10.389824909150974,
"grad_norm": 0.7212845683097839,
"learning_rate": 7.263027811425566e-06,
"loss": 0.1298,
"step": 62900
},
{
"epoch": 10.406342913776015,
"grad_norm": 0.6176585555076599,
"learning_rate": 7.25691003193481e-06,
"loss": 0.1401,
"step": 63000
},
{
"epoch": 10.406342913776015,
"eval_cer": 0.04089384710149887,
"eval_loss": 0.12006353586912155,
"eval_runtime": 48.5157,
"eval_samples_per_second": 34.855,
"eval_steps_per_second": 8.719,
"eval_wer": 0.22494223443452194,
"step": 63000
},
{
"epoch": 10.422860918401057,
"grad_norm": 0.724654495716095,
"learning_rate": 7.250792252444053e-06,
"loss": 0.1246,
"step": 63100
},
{
"epoch": 10.439378923026098,
"grad_norm": 2.0829966068267822,
"learning_rate": 7.244674472953297e-06,
"loss": 0.1206,
"step": 63200
},
{
"epoch": 10.455896927651139,
"grad_norm": 0.7068758606910706,
"learning_rate": 7.238556693462542e-06,
"loss": 0.1214,
"step": 63300
},
{
"epoch": 10.47241493227618,
"grad_norm": 0.717832624912262,
"learning_rate": 7.232438913971785e-06,
"loss": 0.1233,
"step": 63400
},
{
"epoch": 10.488932936901222,
"grad_norm": 0.7591824531555176,
"learning_rate": 7.226321134481029e-06,
"loss": 0.1223,
"step": 63500
},
{
"epoch": 10.505450941526263,
"grad_norm": 0.8358705639839172,
"learning_rate": 7.220203354990273e-06,
"loss": 0.117,
"step": 63600
},
{
"epoch": 10.521968946151304,
"grad_norm": 0.7193006277084351,
"learning_rate": 7.214085575499517e-06,
"loss": 0.1229,
"step": 63700
},
{
"epoch": 10.538486950776345,
"grad_norm": 0.8279296159744263,
"learning_rate": 7.207967796008762e-06,
"loss": 0.1304,
"step": 63800
},
{
"epoch": 10.555004955401387,
"grad_norm": 0.9237922430038452,
"learning_rate": 7.201850016518005e-06,
"loss": 0.121,
"step": 63900
},
{
"epoch": 10.571522960026428,
"grad_norm": 0.6493191719055176,
"learning_rate": 7.1957322370272486e-06,
"loss": 0.1165,
"step": 64000
},
{
"epoch": 10.571522960026428,
"eval_cer": 0.04139860379166382,
"eval_loss": 0.12036388367414474,
"eval_runtime": 48.7181,
"eval_samples_per_second": 34.71,
"eval_steps_per_second": 8.683,
"eval_wer": 0.2271279585336914,
"step": 64000
},
{
"epoch": 10.58804096465147,
"grad_norm": 0.9154326319694519,
"learning_rate": 7.189614457536494e-06,
"loss": 0.1184,
"step": 64100
},
{
"epoch": 10.60455896927651,
"grad_norm": 0.8205140829086304,
"learning_rate": 7.183496678045737e-06,
"loss": 0.1243,
"step": 64200
},
{
"epoch": 10.621076973901552,
"grad_norm": 0.7464902400970459,
"learning_rate": 7.177378898554981e-06,
"loss": 0.1274,
"step": 64300
},
{
"epoch": 10.637594978526593,
"grad_norm": 0.6660764217376709,
"learning_rate": 7.171261119064224e-06,
"loss": 0.1348,
"step": 64400
},
{
"epoch": 10.654112983151634,
"grad_norm": 0.8469193577766418,
"learning_rate": 7.165143339573469e-06,
"loss": 0.1273,
"step": 64500
},
{
"epoch": 10.670630987776677,
"grad_norm": 0.7771629095077515,
"learning_rate": 7.159025560082713e-06,
"loss": 0.1202,
"step": 64600
},
{
"epoch": 10.687148992401719,
"grad_norm": 0.7291647791862488,
"learning_rate": 7.152907780591957e-06,
"loss": 0.12,
"step": 64700
},
{
"epoch": 10.70366699702676,
"grad_norm": 0.670477032661438,
"learning_rate": 7.1467900011012005e-06,
"loss": 0.1407,
"step": 64800
},
{
"epoch": 10.720185001651801,
"grad_norm": 0.7225449085235596,
"learning_rate": 7.140672221610445e-06,
"loss": 0.1207,
"step": 64900
},
{
"epoch": 10.736703006276842,
"grad_norm": 0.6769737601280212,
"learning_rate": 7.134554442119689e-06,
"loss": 0.1145,
"step": 65000
},
{
"epoch": 10.736703006276842,
"eval_cer": 0.04139004859352543,
"eval_loss": 0.12175419926643372,
"eval_runtime": 48.7387,
"eval_samples_per_second": 34.695,
"eval_steps_per_second": 8.679,
"eval_wer": 0.2271279585336914,
"step": 65000
},
{
"epoch": 10.753221010901884,
"grad_norm": 0.7679712772369385,
"learning_rate": 7.1284366626289326e-06,
"loss": 0.1201,
"step": 65100
},
{
"epoch": 10.769739015526925,
"grad_norm": 0.8149623274803162,
"learning_rate": 7.122318883138176e-06,
"loss": 0.1142,
"step": 65200
},
{
"epoch": 10.786257020151966,
"grad_norm": 0.6343280076980591,
"learning_rate": 7.116201103647421e-06,
"loss": 0.1392,
"step": 65300
},
{
"epoch": 10.802775024777008,
"grad_norm": 0.759663462638855,
"learning_rate": 7.110083324156665e-06,
"loss": 0.1168,
"step": 65400
},
{
"epoch": 10.819293029402049,
"grad_norm": 0.9799485206604004,
"learning_rate": 7.103965544665908e-06,
"loss": 0.122,
"step": 65500
},
{
"epoch": 10.83581103402709,
"grad_norm": 0.6957365274429321,
"learning_rate": 7.097847765175152e-06,
"loss": 0.1152,
"step": 65600
},
{
"epoch": 10.852329038652131,
"grad_norm": 0.8867782950401306,
"learning_rate": 7.091729985684397e-06,
"loss": 0.1175,
"step": 65700
},
{
"epoch": 10.868847043277173,
"grad_norm": 0.8960587382316589,
"learning_rate": 7.08561220619364e-06,
"loss": 0.1217,
"step": 65800
},
{
"epoch": 10.885365047902214,
"grad_norm": 0.6762132048606873,
"learning_rate": 7.0794944267028845e-06,
"loss": 0.1223,
"step": 65900
},
{
"epoch": 10.901883052527255,
"grad_norm": 0.835166335105896,
"learning_rate": 7.073376647212128e-06,
"loss": 0.119,
"step": 66000
},
{
"epoch": 10.901883052527255,
"eval_cer": 0.0415611525562932,
"eval_loss": 0.11837079375982285,
"eval_runtime": 48.8246,
"eval_samples_per_second": 34.634,
"eval_steps_per_second": 8.664,
"eval_wer": 0.22619121963404734,
"step": 66000
},
{
"epoch": 10.918401057152296,
"grad_norm": 0.6607236862182617,
"learning_rate": 7.067258867721373e-06,
"loss": 0.1195,
"step": 66100
},
{
"epoch": 10.934919061777338,
"grad_norm": 0.7014028429985046,
"learning_rate": 7.061141088230617e-06,
"loss": 0.1233,
"step": 66200
},
{
"epoch": 10.951437066402379,
"grad_norm": 0.8678550124168396,
"learning_rate": 7.05502330873986e-06,
"loss": 0.127,
"step": 66300
},
{
"epoch": 10.96795507102742,
"grad_norm": 0.676571786403656,
"learning_rate": 7.048905529249105e-06,
"loss": 0.1171,
"step": 66400
},
{
"epoch": 10.984473075652462,
"grad_norm": 0.8348824381828308,
"learning_rate": 7.042787749758349e-06,
"loss": 0.1209,
"step": 66500
},
{
"epoch": 11.000991080277503,
"grad_norm": 1.0055019855499268,
"learning_rate": 7.036669970267592e-06,
"loss": 0.1217,
"step": 66600
},
{
"epoch": 11.017509084902544,
"grad_norm": 0.8197912573814392,
"learning_rate": 7.030552190776836e-06,
"loss": 0.12,
"step": 66700
},
{
"epoch": 11.034027089527585,
"grad_norm": 0.8832284212112427,
"learning_rate": 7.024434411286081e-06,
"loss": 0.1175,
"step": 66800
},
{
"epoch": 11.050545094152627,
"grad_norm": 0.7222535014152527,
"learning_rate": 7.018316631795324e-06,
"loss": 0.1138,
"step": 66900
},
{
"epoch": 11.067063098777668,
"grad_norm": 0.6838215589523315,
"learning_rate": 7.012198852304568e-06,
"loss": 0.1155,
"step": 67000
},
{
"epoch": 11.067063098777668,
"eval_cer": 0.04099650947915954,
"eval_loss": 0.11776668578386307,
"eval_runtime": 48.4053,
"eval_samples_per_second": 34.934,
"eval_steps_per_second": 8.739,
"eval_wer": 0.22519203147442704,
"step": 67000
},
{
"epoch": 11.08358110340271,
"grad_norm": 0.8637756109237671,
"learning_rate": 7.006081072813812e-06,
"loss": 0.1202,
"step": 67100
},
{
"epoch": 11.10009910802775,
"grad_norm": 0.9718282222747803,
"learning_rate": 6.999963293323056e-06,
"loss": 0.1203,
"step": 67200
},
{
"epoch": 11.116617112652792,
"grad_norm": 0.531356930732727,
"learning_rate": 6.993845513832301e-06,
"loss": 0.1128,
"step": 67300
},
{
"epoch": 11.133135117277833,
"grad_norm": 0.5169577598571777,
"learning_rate": 6.987727734341544e-06,
"loss": 0.1232,
"step": 67400
},
{
"epoch": 11.149653121902874,
"grad_norm": 0.634283185005188,
"learning_rate": 6.9816099548507875e-06,
"loss": 0.1125,
"step": 67500
},
{
"epoch": 11.166171126527916,
"grad_norm": 0.7183799743652344,
"learning_rate": 6.975492175360033e-06,
"loss": 0.1187,
"step": 67600
},
{
"epoch": 11.182689131152957,
"grad_norm": 0.5369941592216492,
"learning_rate": 6.969374395869276e-06,
"loss": 0.1161,
"step": 67700
},
{
"epoch": 11.199207135777998,
"grad_norm": 0.5835019946098328,
"learning_rate": 6.96325661637852e-06,
"loss": 0.1137,
"step": 67800
},
{
"epoch": 11.21572514040304,
"grad_norm": 0.7346104383468628,
"learning_rate": 6.957138836887763e-06,
"loss": 0.1135,
"step": 67900
},
{
"epoch": 11.23224314502808,
"grad_norm": 0.6166725754737854,
"learning_rate": 6.951021057397008e-06,
"loss": 0.1224,
"step": 68000
},
{
"epoch": 11.23224314502808,
"eval_cer": 0.040833960714530146,
"eval_loss": 0.11831438541412354,
"eval_runtime": 48.7233,
"eval_samples_per_second": 34.706,
"eval_steps_per_second": 8.682,
"eval_wer": 0.22494223443452194,
"step": 68000
},
{
"epoch": 11.248761149653122,
"grad_norm": 0.9099162220954895,
"learning_rate": 6.944903277906252e-06,
"loss": 0.1248,
"step": 68100
},
{
"epoch": 11.265279154278163,
"grad_norm": 0.5954209566116333,
"learning_rate": 6.938785498415496e-06,
"loss": 0.1184,
"step": 68200
},
{
"epoch": 11.281797158903204,
"grad_norm": 0.765312910079956,
"learning_rate": 6.9326677189247395e-06,
"loss": 0.1422,
"step": 68300
},
{
"epoch": 11.298315163528246,
"grad_norm": 0.9866732954978943,
"learning_rate": 6.926549939433984e-06,
"loss": 0.1213,
"step": 68400
},
{
"epoch": 11.314833168153287,
"grad_norm": 0.6962186694145203,
"learning_rate": 6.920432159943228e-06,
"loss": 0.1178,
"step": 68500
},
{
"epoch": 11.331351172778328,
"grad_norm": 0.547361433506012,
"learning_rate": 6.9143143804524715e-06,
"loss": 0.1215,
"step": 68600
},
{
"epoch": 11.34786917740337,
"grad_norm": 0.5408198833465576,
"learning_rate": 6.908196600961715e-06,
"loss": 0.122,
"step": 68700
},
{
"epoch": 11.36438718202841,
"grad_norm": 0.7383239269256592,
"learning_rate": 6.90207882147096e-06,
"loss": 0.1232,
"step": 68800
},
{
"epoch": 11.380905186653452,
"grad_norm": 0.7225533127784729,
"learning_rate": 6.895961041980204e-06,
"loss": 0.1773,
"step": 68900
},
{
"epoch": 11.397423191278493,
"grad_norm": 0.7376521825790405,
"learning_rate": 6.889843262489447e-06,
"loss": 0.1146,
"step": 69000
},
{
"epoch": 11.397423191278493,
"eval_cer": 0.04105639586612826,
"eval_loss": 0.11837118864059448,
"eval_runtime": 48.8385,
"eval_samples_per_second": 34.624,
"eval_steps_per_second": 8.661,
"eval_wer": 0.22556672703428465,
"step": 69000
},
{
"epoch": 11.413941195903535,
"grad_norm": 0.6268288493156433,
"learning_rate": 6.883725482998691e-06,
"loss": 0.141,
"step": 69100
},
{
"epoch": 11.430459200528576,
"grad_norm": 0.9457260370254517,
"learning_rate": 6.877607703507936e-06,
"loss": 0.1174,
"step": 69200
},
{
"epoch": 11.446977205153617,
"grad_norm": 0.8935351371765137,
"learning_rate": 6.871489924017179e-06,
"loss": 0.1109,
"step": 69300
},
{
"epoch": 11.463495209778658,
"grad_norm": 0.6600612998008728,
"learning_rate": 6.8653721445264235e-06,
"loss": 0.1285,
"step": 69400
},
{
"epoch": 11.4800132144037,
"grad_norm": 0.6968724727630615,
"learning_rate": 6.859254365035667e-06,
"loss": 0.1327,
"step": 69500
},
{
"epoch": 11.496531219028741,
"grad_norm": 0.738458514213562,
"learning_rate": 6.853136585544912e-06,
"loss": 0.1089,
"step": 69600
},
{
"epoch": 11.513049223653782,
"grad_norm": 0.8320337533950806,
"learning_rate": 6.8470188060541556e-06,
"loss": 0.1131,
"step": 69700
},
{
"epoch": 11.529567228278824,
"grad_norm": 0.6417104005813599,
"learning_rate": 6.840901026563399e-06,
"loss": 0.1219,
"step": 69800
},
{
"epoch": 11.546085232903865,
"grad_norm": 0.7197741866111755,
"learning_rate": 6.8347832470726425e-06,
"loss": 0.1165,
"step": 69900
},
{
"epoch": 11.562603237528906,
"grad_norm": 0.8726572394371033,
"learning_rate": 6.828665467581888e-06,
"loss": 0.1137,
"step": 70000
},
{
"epoch": 11.562603237528906,
"eval_cer": 0.04100506467729793,
"eval_loss": 0.11831272393465042,
"eval_runtime": 48.5609,
"eval_samples_per_second": 34.822,
"eval_steps_per_second": 8.711,
"eval_wer": 0.22487978517454568,
"step": 70000
},
{
"epoch": 11.579121242153947,
"grad_norm": 0.6094586849212646,
"learning_rate": 6.822547688091131e-06,
"loss": 0.1233,
"step": 70100
},
{
"epoch": 11.595639246778989,
"grad_norm": 0.7053238749504089,
"learning_rate": 6.8164299086003746e-06,
"loss": 0.1207,
"step": 70200
},
{
"epoch": 11.61215725140403,
"grad_norm": 0.5145518183708191,
"learning_rate": 6.810312129109619e-06,
"loss": 0.1181,
"step": 70300
},
{
"epoch": 11.628675256029071,
"grad_norm": 1.1360536813735962,
"learning_rate": 6.804194349618863e-06,
"loss": 0.1227,
"step": 70400
},
{
"epoch": 11.645193260654112,
"grad_norm": 0.7354953289031982,
"learning_rate": 6.7980765701281075e-06,
"loss": 0.1151,
"step": 70500
},
{
"epoch": 11.661711265279154,
"grad_norm": 0.6327475309371948,
"learning_rate": 6.791958790637351e-06,
"loss": 0.1132,
"step": 70600
},
{
"epoch": 11.678229269904195,
"grad_norm": 0.7320681214332581,
"learning_rate": 6.785841011146594e-06,
"loss": 0.116,
"step": 70700
},
{
"epoch": 11.694747274529236,
"grad_norm": 0.7258247137069702,
"learning_rate": 6.7797232316558396e-06,
"loss": 0.1236,
"step": 70800
},
{
"epoch": 11.711265279154278,
"grad_norm": 0.7472134232521057,
"learning_rate": 6.773605452165083e-06,
"loss": 0.1148,
"step": 70900
},
{
"epoch": 11.727783283779319,
"grad_norm": 0.9377081394195557,
"learning_rate": 6.7674876726743265e-06,
"loss": 0.1173,
"step": 71000
},
{
"epoch": 11.727783283779319,
"eval_cer": 0.04082540551639176,
"eval_loss": 0.11736804246902466,
"eval_runtime": 48.644,
"eval_samples_per_second": 34.763,
"eval_steps_per_second": 8.696,
"eval_wer": 0.22294385811528133,
"step": 71000
},
{
"epoch": 11.74430128840436,
"grad_norm": 0.638346791267395,
"learning_rate": 6.76136989318357e-06,
"loss": 0.1333,
"step": 71100
},
{
"epoch": 11.760819293029401,
"grad_norm": 0.7357622981071472,
"learning_rate": 6.755252113692815e-06,
"loss": 0.1136,
"step": 71200
},
{
"epoch": 11.777337297654443,
"grad_norm": 0.801539957523346,
"learning_rate": 6.7491343342020586e-06,
"loss": 0.1162,
"step": 71300
},
{
"epoch": 11.793855302279484,
"grad_norm": 0.869429886341095,
"learning_rate": 6.743016554711302e-06,
"loss": 0.112,
"step": 71400
},
{
"epoch": 11.810373306904525,
"grad_norm": 0.649721622467041,
"learning_rate": 6.736898775220546e-06,
"loss": 0.1115,
"step": 71500
},
{
"epoch": 11.826891311529566,
"grad_norm": 0.8566005229949951,
"learning_rate": 6.730780995729791e-06,
"loss": 0.1178,
"step": 71600
},
{
"epoch": 11.84340931615461,
"grad_norm": 0.8232606649398804,
"learning_rate": 6.724663216239035e-06,
"loss": 0.1267,
"step": 71700
},
{
"epoch": 11.85992732077965,
"grad_norm": 0.7500156760215759,
"learning_rate": 6.7185454367482784e-06,
"loss": 0.1163,
"step": 71800
},
{
"epoch": 11.876445325404692,
"grad_norm": 0.635427713394165,
"learning_rate": 6.712427657257522e-06,
"loss": 0.1555,
"step": 71900
},
{
"epoch": 11.892963330029733,
"grad_norm": 0.807422399520874,
"learning_rate": 6.706309877766767e-06,
"loss": 0.1428,
"step": 72000
},
{
"epoch": 11.892963330029733,
"eval_cer": 0.04042331120388748,
"eval_loss": 0.11749948561191559,
"eval_runtime": 48.902,
"eval_samples_per_second": 34.579,
"eval_steps_per_second": 8.65,
"eval_wer": 0.2228189595953288,
"step": 72000
},
{
"epoch": 11.909481334654775,
"grad_norm": 0.6549407839775085,
"learning_rate": 6.7001920982760105e-06,
"loss": 0.1114,
"step": 72100
},
{
"epoch": 11.925999339279816,
"grad_norm": 1.0132852792739868,
"learning_rate": 6.694074318785254e-06,
"loss": 0.1153,
"step": 72200
},
{
"epoch": 11.942517343904857,
"grad_norm": 0.5365763306617737,
"learning_rate": 6.6879565392944974e-06,
"loss": 0.1109,
"step": 72300
},
{
"epoch": 11.959035348529898,
"grad_norm": 0.6037495732307434,
"learning_rate": 6.681838759803743e-06,
"loss": 0.1144,
"step": 72400
},
{
"epoch": 11.97555335315494,
"grad_norm": 0.4775562286376953,
"learning_rate": 6.675720980312986e-06,
"loss": 0.1178,
"step": 72500
},
{
"epoch": 11.992071357779981,
"grad_norm": 0.863073468208313,
"learning_rate": 6.66960320082223e-06,
"loss": 0.1168,
"step": 72600
},
{
"epoch": 12.008589362405022,
"grad_norm": 2.4738621711730957,
"learning_rate": 6.663485421331474e-06,
"loss": 0.1322,
"step": 72700
},
{
"epoch": 12.025107367030063,
"grad_norm": 0.6702748537063599,
"learning_rate": 6.657367641840718e-06,
"loss": 0.1702,
"step": 72800
},
{
"epoch": 12.041625371655105,
"grad_norm": 0.9668029546737671,
"learning_rate": 6.6512498623499624e-06,
"loss": 0.1358,
"step": 72900
},
{
"epoch": 12.058143376280146,
"grad_norm": 0.6446594595909119,
"learning_rate": 6.645132082859206e-06,
"loss": 0.1128,
"step": 73000
},
{
"epoch": 12.058143376280146,
"eval_cer": 0.040833960714530146,
"eval_loss": 0.11493762582540512,
"eval_runtime": 48.8939,
"eval_samples_per_second": 34.585,
"eval_steps_per_second": 8.651,
"eval_wer": 0.22231936551551865,
"step": 73000
},
{
"epoch": 12.074661380905187,
"grad_norm": 0.7115280032157898,
"learning_rate": 6.639014303368449e-06,
"loss": 0.1126,
"step": 73100
},
{
"epoch": 12.091179385530229,
"grad_norm": 0.6623009443283081,
"learning_rate": 6.6328965238776945e-06,
"loss": 0.1187,
"step": 73200
},
{
"epoch": 12.10769739015527,
"grad_norm": 0.7320263981819153,
"learning_rate": 6.626778744386938e-06,
"loss": 0.11,
"step": 73300
},
{
"epoch": 12.124215394780311,
"grad_norm": 0.7504459619522095,
"learning_rate": 6.6206609648961814e-06,
"loss": 0.1128,
"step": 73400
},
{
"epoch": 12.140733399405352,
"grad_norm": 0.6281275749206543,
"learning_rate": 6.614543185405426e-06,
"loss": 0.1169,
"step": 73500
},
{
"epoch": 12.157251404030394,
"grad_norm": 0.6658099889755249,
"learning_rate": 6.60842540591467e-06,
"loss": 0.1093,
"step": 73600
},
{
"epoch": 12.173769408655435,
"grad_norm": 0.8157078623771667,
"learning_rate": 6.6023076264239135e-06,
"loss": 0.1128,
"step": 73700
},
{
"epoch": 12.190287413280476,
"grad_norm": 0.7392610907554626,
"learning_rate": 6.596189846933158e-06,
"loss": 0.1168,
"step": 73800
},
{
"epoch": 12.206805417905517,
"grad_norm": 0.5370469689369202,
"learning_rate": 6.590072067442401e-06,
"loss": 0.115,
"step": 73900
},
{
"epoch": 12.223323422530559,
"grad_norm": 0.7587655782699585,
"learning_rate": 6.5839542879516465e-06,
"loss": 0.1063,
"step": 74000
},
{
"epoch": 12.223323422530559,
"eval_cer": 0.04020087605228937,
"eval_loss": 0.1159936785697937,
"eval_runtime": 48.8724,
"eval_samples_per_second": 34.6,
"eval_steps_per_second": 8.655,
"eval_wer": 0.22044588771623055,
"step": 74000
},
{
"epoch": 12.2398414271556,
"grad_norm": 0.6018242835998535,
"learning_rate": 6.57783650846089e-06,
"loss": 0.1118,
"step": 74100
},
{
"epoch": 12.256359431780641,
"grad_norm": 0.6748114228248596,
"learning_rate": 6.571718728970133e-06,
"loss": 0.1094,
"step": 74200
},
{
"epoch": 12.272877436405683,
"grad_norm": 0.6757166981697083,
"learning_rate": 6.565600949479377e-06,
"loss": 0.1097,
"step": 74300
},
{
"epoch": 12.289395441030724,
"grad_norm": 0.948271632194519,
"learning_rate": 6.559483169988622e-06,
"loss": 0.1186,
"step": 74400
},
{
"epoch": 12.305913445655765,
"grad_norm": 0.6468844413757324,
"learning_rate": 6.5533653904978655e-06,
"loss": 0.1206,
"step": 74500
},
{
"epoch": 12.322431450280806,
"grad_norm": 0.6049870848655701,
"learning_rate": 6.547247611007109e-06,
"loss": 0.1672,
"step": 74600
},
{
"epoch": 12.338949454905848,
"grad_norm": 1.080959677696228,
"learning_rate": 6.541129831516353e-06,
"loss": 0.136,
"step": 74700
},
{
"epoch": 12.355467459530889,
"grad_norm": 0.7225471138954163,
"learning_rate": 6.5350120520255975e-06,
"loss": 0.1095,
"step": 74800
},
{
"epoch": 12.37198546415593,
"grad_norm": 0.6947051286697388,
"learning_rate": 6.528894272534841e-06,
"loss": 0.1354,
"step": 74900
},
{
"epoch": 12.388503468780971,
"grad_norm": 0.6471466422080994,
"learning_rate": 6.522776493044085e-06,
"loss": 0.1051,
"step": 75000
},
{
"epoch": 12.388503468780971,
"eval_cer": 0.04020943125042776,
"eval_loss": 0.11492911726236343,
"eval_runtime": 48.7391,
"eval_samples_per_second": 34.695,
"eval_steps_per_second": 8.679,
"eval_wer": 0.2200087428963967,
"step": 75000
},
{
"epoch": 12.405021473406013,
"grad_norm": 0.6231672763824463,
"learning_rate": 6.516658713553329e-06,
"loss": 0.1331,
"step": 75100
},
{
"epoch": 12.421539478031054,
"grad_norm": 0.49103644490242004,
"learning_rate": 6.510540934062574e-06,
"loss": 0.11,
"step": 75200
},
{
"epoch": 12.438057482656095,
"grad_norm": 0.7189831733703613,
"learning_rate": 6.504423154571817e-06,
"loss": 0.115,
"step": 75300
},
{
"epoch": 12.454575487281137,
"grad_norm": 0.5822007060050964,
"learning_rate": 6.498305375081061e-06,
"loss": 0.112,
"step": 75400
},
{
"epoch": 12.471093491906178,
"grad_norm": 0.6000872254371643,
"learning_rate": 6.492187595590304e-06,
"loss": 0.1088,
"step": 75500
},
{
"epoch": 12.487611496531219,
"grad_norm": 0.6508600115776062,
"learning_rate": 6.4860698160995495e-06,
"loss": 0.1111,
"step": 75600
},
{
"epoch": 12.50412950115626,
"grad_norm": 0.6574178338050842,
"learning_rate": 6.479952036608793e-06,
"loss": 0.1095,
"step": 75700
},
{
"epoch": 12.520647505781302,
"grad_norm": 0.845613956451416,
"learning_rate": 6.473834257118036e-06,
"loss": 0.1139,
"step": 75800
},
{
"epoch": 12.537165510406343,
"grad_norm": 0.5848095417022705,
"learning_rate": 6.467716477627281e-06,
"loss": 0.1147,
"step": 75900
},
{
"epoch": 12.553683515031384,
"grad_norm": 0.9496851563453674,
"learning_rate": 6.461598698136525e-06,
"loss": 0.1128,
"step": 76000
},
{
"epoch": 12.553683515031384,
"eval_cer": 0.040106768872767096,
"eval_loss": 0.112928107380867,
"eval_runtime": 48.6721,
"eval_samples_per_second": 34.743,
"eval_steps_per_second": 8.691,
"eval_wer": 0.22025853993630176,
"step": 76000
},
{
"epoch": 12.570201519656425,
"grad_norm": 0.7724167108535767,
"learning_rate": 6.455480918645769e-06,
"loss": 0.1156,
"step": 76100
},
{
"epoch": 12.586719524281467,
"grad_norm": 0.753487765789032,
"learning_rate": 6.449363139155013e-06,
"loss": 0.1216,
"step": 76200
},
{
"epoch": 12.603237528906508,
"grad_norm": 0.7323099970817566,
"learning_rate": 6.443245359664256e-06,
"loss": 0.1163,
"step": 76300
},
{
"epoch": 12.61975553353155,
"grad_norm": 0.5276266932487488,
"learning_rate": 6.437127580173501e-06,
"loss": 0.1261,
"step": 76400
},
{
"epoch": 12.63627353815659,
"grad_norm": 0.7041454315185547,
"learning_rate": 6.431009800682745e-06,
"loss": 0.1097,
"step": 76500
},
{
"epoch": 12.652791542781632,
"grad_norm": 0.5830830931663513,
"learning_rate": 6.424892021191988e-06,
"loss": 0.1053,
"step": 76600
},
{
"epoch": 12.669309547406673,
"grad_norm": 0.8507035970687866,
"learning_rate": 6.418774241701232e-06,
"loss": 0.1157,
"step": 76700
},
{
"epoch": 12.685827552031714,
"grad_norm": 0.7934384942054749,
"learning_rate": 6.412656462210477e-06,
"loss": 0.1139,
"step": 76800
},
{
"epoch": 12.702345556656756,
"grad_norm": 0.8126075863838196,
"learning_rate": 6.40653868271972e-06,
"loss": 0.1382,
"step": 76900
},
{
"epoch": 12.718863561281797,
"grad_norm": 0.7506862282752991,
"learning_rate": 6.400420903228965e-06,
"loss": 0.1108,
"step": 77000
},
{
"epoch": 12.718863561281797,
"eval_cer": 0.04023509684484293,
"eval_loss": 0.11516769230365753,
"eval_runtime": 48.6282,
"eval_samples_per_second": 34.774,
"eval_steps_per_second": 8.699,
"eval_wer": 0.2200087428963967,
"step": 77000
},
{
"epoch": 12.735381565906838,
"grad_norm": 0.6928473114967346,
"learning_rate": 6.394303123738208e-06,
"loss": 0.1118,
"step": 77100
},
{
"epoch": 12.75189957053188,
"grad_norm": 0.7494087815284729,
"learning_rate": 6.3881853442474525e-06,
"loss": 0.1152,
"step": 77200
},
{
"epoch": 12.76841757515692,
"grad_norm": 0.7207498550415039,
"learning_rate": 6.382067564756697e-06,
"loss": 0.1074,
"step": 77300
},
{
"epoch": 12.784935579781962,
"grad_norm": 0.6607386469841003,
"learning_rate": 6.37594978526594e-06,
"loss": 0.1102,
"step": 77400
},
{
"epoch": 12.801453584407003,
"grad_norm": 0.5259993076324463,
"learning_rate": 6.369832005775184e-06,
"loss": 0.1067,
"step": 77500
},
{
"epoch": 12.817971589032044,
"grad_norm": 0.7667635679244995,
"learning_rate": 6.363714226284429e-06,
"loss": 0.1079,
"step": 77600
},
{
"epoch": 12.834489593657086,
"grad_norm": 0.676259458065033,
"learning_rate": 6.357596446793672e-06,
"loss": 0.1323,
"step": 77700
},
{
"epoch": 12.851007598282127,
"grad_norm": 0.6613221168518066,
"learning_rate": 6.351478667302916e-06,
"loss": 0.1104,
"step": 77800
},
{
"epoch": 12.867525602907168,
"grad_norm": 0.8658110499382019,
"learning_rate": 6.34536088781216e-06,
"loss": 0.1087,
"step": 77900
},
{
"epoch": 12.88404360753221,
"grad_norm": 0.5932702422142029,
"learning_rate": 6.3392431083214044e-06,
"loss": 0.1184,
"step": 78000
},
{
"epoch": 12.88404360753221,
"eval_cer": 0.04030353842995004,
"eval_loss": 0.11411629617214203,
"eval_runtime": 48.5234,
"eval_samples_per_second": 34.849,
"eval_steps_per_second": 8.717,
"eval_wer": 0.22019609067632548,
"step": 78000
},
{
"epoch": 12.90056161215725,
"grad_norm": 0.7417730689048767,
"learning_rate": 6.333125328830648e-06,
"loss": 0.1262,
"step": 78100
},
{
"epoch": 12.917079616782292,
"grad_norm": 0.625182032585144,
"learning_rate": 6.327007549339892e-06,
"loss": 0.1114,
"step": 78200
},
{
"epoch": 12.933597621407333,
"grad_norm": 0.9503306746482849,
"learning_rate": 6.320889769849136e-06,
"loss": 0.109,
"step": 78300
},
{
"epoch": 12.950115626032375,
"grad_norm": 0.4723988473415375,
"learning_rate": 6.314771990358381e-06,
"loss": 0.1346,
"step": 78400
},
{
"epoch": 12.966633630657416,
"grad_norm": 0.5400856137275696,
"learning_rate": 6.308654210867624e-06,
"loss": 0.1162,
"step": 78500
},
{
"epoch": 12.983151635282457,
"grad_norm": 0.9495701789855957,
"learning_rate": 6.302536431376868e-06,
"loss": 0.1116,
"step": 78600
},
{
"epoch": 12.999669639907498,
"grad_norm": 0.5586131811141968,
"learning_rate": 6.296418651886111e-06,
"loss": 0.1093,
"step": 78700
},
{
"epoch": 13.01618764453254,
"grad_norm": 0.7302865386009216,
"learning_rate": 6.290300872395356e-06,
"loss": 0.1095,
"step": 78800
},
{
"epoch": 13.032705649157581,
"grad_norm": 0.726801872253418,
"learning_rate": 6.2841830929046e-06,
"loss": 0.1144,
"step": 78900
},
{
"epoch": 13.049223653782622,
"grad_norm": 0.6335176825523376,
"learning_rate": 6.278065313413843e-06,
"loss": 0.1099,
"step": 79000
},
{
"epoch": 13.049223653782622,
"eval_cer": 0.04023509684484293,
"eval_loss": 0.11533664911985397,
"eval_runtime": 48.8975,
"eval_samples_per_second": 34.583,
"eval_steps_per_second": 8.651,
"eval_wer": 0.2199462936364204,
"step": 79000
},
{
"epoch": 13.065741658407664,
"grad_norm": 0.7183253765106201,
"learning_rate": 6.271947533923088e-06,
"loss": 0.1015,
"step": 79100
},
{
"epoch": 13.082259663032705,
"grad_norm": 0.8460133075714111,
"learning_rate": 6.265829754432332e-06,
"loss": 0.1147,
"step": 79200
},
{
"epoch": 13.098777667657746,
"grad_norm": 0.9035709500312805,
"learning_rate": 6.259711974941575e-06,
"loss": 0.105,
"step": 79300
},
{
"epoch": 13.115295672282787,
"grad_norm": 1.1149568557739258,
"learning_rate": 6.25359419545082e-06,
"loss": 0.1173,
"step": 79400
},
{
"epoch": 13.131813676907829,
"grad_norm": 0.746825635433197,
"learning_rate": 6.247476415960063e-06,
"loss": 0.1103,
"step": 79500
},
{
"epoch": 13.148331681532872,
"grad_norm": 0.5890305638313293,
"learning_rate": 6.241358636469308e-06,
"loss": 0.1075,
"step": 79600
},
{
"epoch": 13.164849686157913,
"grad_norm": 0.6706238985061646,
"learning_rate": 6.235240856978552e-06,
"loss": 0.1043,
"step": 79700
},
{
"epoch": 13.181367690782954,
"grad_norm": 0.7864231467247009,
"learning_rate": 6.229123077487795e-06,
"loss": 0.1105,
"step": 79800
},
{
"epoch": 13.197885695407995,
"grad_norm": 0.7406273484230042,
"learning_rate": 6.223005297997039e-06,
"loss": 0.1046,
"step": 79900
},
{
"epoch": 13.214403700033037,
"grad_norm": 0.7028843760490417,
"learning_rate": 6.216887518506284e-06,
"loss": 0.1119,
"step": 80000
},
{
"epoch": 13.214403700033037,
"eval_cer": 0.039867223324892204,
"eval_loss": 0.11367151141166687,
"eval_runtime": 48.9472,
"eval_samples_per_second": 34.547,
"eval_steps_per_second": 8.642,
"eval_wer": 0.21919690251670518,
"step": 80000
},
{
"epoch": 13.230921704658078,
"grad_norm": 0.6652178168296814,
"learning_rate": 6.210769739015527e-06,
"loss": 0.1118,
"step": 80100
},
{
"epoch": 13.24743970928312,
"grad_norm": 0.9752405285835266,
"learning_rate": 6.204651959524771e-06,
"loss": 0.1305,
"step": 80200
},
{
"epoch": 13.26395771390816,
"grad_norm": 0.6729234457015991,
"learning_rate": 6.198534180034015e-06,
"loss": 0.1102,
"step": 80300
},
{
"epoch": 13.280475718533202,
"grad_norm": 0.7551404237747192,
"learning_rate": 6.192416400543259e-06,
"loss": 0.1131,
"step": 80400
},
{
"epoch": 13.296993723158243,
"grad_norm": 0.5141217112541199,
"learning_rate": 6.186298621052504e-06,
"loss": 0.1041,
"step": 80500
},
{
"epoch": 13.313511727783284,
"grad_norm": 0.7362185716629028,
"learning_rate": 6.180180841561747e-06,
"loss": 0.1054,
"step": 80600
},
{
"epoch": 13.330029732408326,
"grad_norm": 0.6110237240791321,
"learning_rate": 6.174063062070991e-06,
"loss": 0.1067,
"step": 80700
},
{
"epoch": 13.346547737033367,
"grad_norm": 0.5987915992736816,
"learning_rate": 6.167945282580236e-06,
"loss": 0.1518,
"step": 80800
},
{
"epoch": 13.363065741658408,
"grad_norm": 0.7611739635467529,
"learning_rate": 6.161827503089479e-06,
"loss": 0.1036,
"step": 80900
},
{
"epoch": 13.37958374628345,
"grad_norm": 0.5500743389129639,
"learning_rate": 6.155709723598723e-06,
"loss": 0.1102,
"step": 81000
},
{
"epoch": 13.37958374628345,
"eval_cer": 0.039858668126753816,
"eval_loss": 0.11399171501398087,
"eval_runtime": 48.7532,
"eval_samples_per_second": 34.685,
"eval_steps_per_second": 8.676,
"eval_wer": 0.21869730843689503,
"step": 81000
},
{
"epoch": 13.39610175090849,
"grad_norm": 0.7734577059745789,
"learning_rate": 6.149591944107966e-06,
"loss": 0.109,
"step": 81100
},
{
"epoch": 13.412619755533532,
"grad_norm": 0.6689289808273315,
"learning_rate": 6.143474164617211e-06,
"loss": 0.1097,
"step": 81200
},
{
"epoch": 13.429137760158573,
"grad_norm": 0.7353644371032715,
"learning_rate": 6.137356385126455e-06,
"loss": 0.1084,
"step": 81300
},
{
"epoch": 13.445655764783615,
"grad_norm": 0.6356621384620667,
"learning_rate": 6.131238605635699e-06,
"loss": 0.115,
"step": 81400
},
{
"epoch": 13.462173769408656,
"grad_norm": 0.6484361290931702,
"learning_rate": 6.1251208261449426e-06,
"loss": 0.1125,
"step": 81500
},
{
"epoch": 13.478691774033697,
"grad_norm": 0.9929621815681458,
"learning_rate": 6.119003046654187e-06,
"loss": 0.1033,
"step": 81600
},
{
"epoch": 13.495209778658738,
"grad_norm": 0.7411353588104248,
"learning_rate": 6.112885267163431e-06,
"loss": 0.115,
"step": 81700
},
{
"epoch": 13.51172778328378,
"grad_norm": 0.7139526009559631,
"learning_rate": 6.106767487672675e-06,
"loss": 0.1023,
"step": 81800
},
{
"epoch": 13.528245787908821,
"grad_norm": 0.6597611904144287,
"learning_rate": 6.100649708181918e-06,
"loss": 0.1063,
"step": 81900
},
{
"epoch": 13.544763792533862,
"grad_norm": 0.8007270097732544,
"learning_rate": 6.094531928691163e-06,
"loss": 0.1086,
"step": 82000
},
{
"epoch": 13.544763792533862,
"eval_cer": 0.03977311614536993,
"eval_loss": 0.11309035122394562,
"eval_runtime": 48.6041,
"eval_samples_per_second": 34.791,
"eval_steps_per_second": 8.703,
"eval_wer": 0.2185724099169425,
"step": 82000
},
{
"epoch": 13.561281797158903,
"grad_norm": 0.8659864068031311,
"learning_rate": 6.088414149200407e-06,
"loss": 0.1084,
"step": 82100
},
{
"epoch": 13.577799801783945,
"grad_norm": 0.6871950030326843,
"learning_rate": 6.08229636970965e-06,
"loss": 0.1072,
"step": 82200
},
{
"epoch": 13.594317806408986,
"grad_norm": 0.5756420493125916,
"learning_rate": 6.0761785902188945e-06,
"loss": 0.1124,
"step": 82300
},
{
"epoch": 13.610835811034027,
"grad_norm": 1.0295737981796265,
"learning_rate": 6.070060810728139e-06,
"loss": 0.1162,
"step": 82400
},
{
"epoch": 13.627353815659069,
"grad_norm": 0.5129362940788269,
"learning_rate": 6.063943031237382e-06,
"loss": 0.1103,
"step": 82500
},
{
"epoch": 13.64387182028411,
"grad_norm": 0.7439867258071899,
"learning_rate": 6.0578252517466266e-06,
"loss": 0.1061,
"step": 82600
},
{
"epoch": 13.660389824909151,
"grad_norm": 0.4660612940788269,
"learning_rate": 6.05170747225587e-06,
"loss": 0.1143,
"step": 82700
},
{
"epoch": 13.676907829534192,
"grad_norm": 0.7765456438064575,
"learning_rate": 6.045589692765114e-06,
"loss": 0.1858,
"step": 82800
},
{
"epoch": 13.693425834159234,
"grad_norm": 0.793312132358551,
"learning_rate": 6.039471913274359e-06,
"loss": 0.1098,
"step": 82900
},
{
"epoch": 13.709943838784275,
"grad_norm": 0.6621662378311157,
"learning_rate": 6.033354133783602e-06,
"loss": 0.1151,
"step": 83000
},
{
"epoch": 13.709943838784275,
"eval_cer": 0.03952501539935665,
"eval_loss": 0.11215273290872574,
"eval_runtime": 49.1873,
"eval_samples_per_second": 34.379,
"eval_steps_per_second": 8.6,
"eval_wer": 0.21701117841753575,
"step": 83000
},
{
"epoch": 13.726461843409316,
"grad_norm": 0.6841396689414978,
"learning_rate": 6.0272363542928456e-06,
"loss": 0.1094,
"step": 83100
},
{
"epoch": 13.742979848034357,
"grad_norm": 0.7111786007881165,
"learning_rate": 6.021118574802091e-06,
"loss": 0.1072,
"step": 83200
},
{
"epoch": 13.759497852659399,
"grad_norm": 0.7815682291984558,
"learning_rate": 6.015000795311334e-06,
"loss": 0.1102,
"step": 83300
},
{
"epoch": 13.77601585728444,
"grad_norm": 0.8677568435668945,
"learning_rate": 6.008883015820578e-06,
"loss": 0.1062,
"step": 83400
},
{
"epoch": 13.792533861909481,
"grad_norm": 0.5680195689201355,
"learning_rate": 6.002765236329822e-06,
"loss": 0.106,
"step": 83500
},
{
"epoch": 13.809051866534523,
"grad_norm": 0.9129924178123474,
"learning_rate": 5.996647456839066e-06,
"loss": 0.0995,
"step": 83600
},
{
"epoch": 13.825569871159564,
"grad_norm": 0.662200927734375,
"learning_rate": 5.99052967734831e-06,
"loss": 0.1088,
"step": 83700
},
{
"epoch": 13.842087875784605,
"grad_norm": 0.887140691280365,
"learning_rate": 5.984411897857554e-06,
"loss": 0.1098,
"step": 83800
},
{
"epoch": 13.858605880409646,
"grad_norm": 0.9814369082450867,
"learning_rate": 5.978294118366798e-06,
"loss": 0.1068,
"step": 83900
},
{
"epoch": 13.875123885034688,
"grad_norm": 0.761234700679779,
"learning_rate": 5.972176338876043e-06,
"loss": 0.1033,
"step": 84000
},
{
"epoch": 13.875123885034688,
"eval_cer": 0.03929402504962015,
"eval_loss": 0.112494558095932,
"eval_runtime": 49.3853,
"eval_samples_per_second": 34.241,
"eval_steps_per_second": 8.565,
"eval_wer": 0.21626178729782053,
"step": 84000
},
{
"epoch": 13.891641889659729,
"grad_norm": 0.5570207238197327,
"learning_rate": 5.966058559385286e-06,
"loss": 0.1081,
"step": 84100
},
{
"epoch": 13.90815989428477,
"grad_norm": 0.5992655158042908,
"learning_rate": 5.95994077989453e-06,
"loss": 0.1362,
"step": 84200
},
{
"epoch": 13.924677898909811,
"grad_norm": 0.4389006197452545,
"learning_rate": 5.953823000403775e-06,
"loss": 0.1122,
"step": 84300
},
{
"epoch": 13.941195903534853,
"grad_norm": 0.6106426119804382,
"learning_rate": 5.947705220913018e-06,
"loss": 0.1065,
"step": 84400
},
{
"epoch": 13.957713908159894,
"grad_norm": 0.5008405447006226,
"learning_rate": 5.941587441422262e-06,
"loss": 0.1301,
"step": 84500
},
{
"epoch": 13.974231912784935,
"grad_norm": 20.616357803344727,
"learning_rate": 5.935469661931505e-06,
"loss": 0.1729,
"step": 84600
},
{
"epoch": 13.990749917409977,
"grad_norm": 0.7851992845535278,
"learning_rate": 5.92935188244075e-06,
"loss": 0.1043,
"step": 84700
},
{
"epoch": 14.007267922035018,
"grad_norm": 0.8801394104957581,
"learning_rate": 5.923234102949994e-06,
"loss": 0.1074,
"step": 84800
},
{
"epoch": 14.023785926660059,
"grad_norm": 0.5735670924186707,
"learning_rate": 5.917116323459238e-06,
"loss": 0.105,
"step": 84900
},
{
"epoch": 14.0403039312851,
"grad_norm": 0.6361643671989441,
"learning_rate": 5.9109985439684815e-06,
"loss": 0.1078,
"step": 85000
},
{
"epoch": 14.0403039312851,
"eval_cer": 0.03945657381424954,
"eval_loss": 0.1119338721036911,
"eval_runtime": 49.079,
"eval_samples_per_second": 34.455,
"eval_steps_per_second": 8.619,
"eval_wer": 0.2175107724973459,
"step": 85000
},
{
"epoch": 14.056821935910142,
"grad_norm": 0.6829052567481995,
"learning_rate": 5.904880764477726e-06,
"loss": 0.1007,
"step": 85100
},
{
"epoch": 14.073339940535183,
"grad_norm": 0.5998505353927612,
"learning_rate": 5.89876298498697e-06,
"loss": 0.1058,
"step": 85200
},
{
"epoch": 14.089857945160224,
"grad_norm": 0.7161391973495483,
"learning_rate": 5.892645205496214e-06,
"loss": 0.1096,
"step": 85300
},
{
"epoch": 14.106375949785265,
"grad_norm": 0.5567154288291931,
"learning_rate": 5.886527426005457e-06,
"loss": 0.1078,
"step": 85400
},
{
"epoch": 14.122893954410307,
"grad_norm": 0.9288133978843689,
"learning_rate": 5.880409646514702e-06,
"loss": 0.1075,
"step": 85500
},
{
"epoch": 14.139411959035348,
"grad_norm": 0.7576249837875366,
"learning_rate": 5.874291867023946e-06,
"loss": 0.1135,
"step": 85600
},
{
"epoch": 14.15592996366039,
"grad_norm": 0.7857004404067993,
"learning_rate": 5.868174087533189e-06,
"loss": 0.1045,
"step": 85700
},
{
"epoch": 14.17244796828543,
"grad_norm": 0.962145984172821,
"learning_rate": 5.8620563080424335e-06,
"loss": 0.1,
"step": 85800
},
{
"epoch": 14.188965972910472,
"grad_norm": 0.7464323043823242,
"learning_rate": 5.855938528551678e-06,
"loss": 0.1034,
"step": 85900
},
{
"epoch": 14.205483977535513,
"grad_norm": 0.8271916508674622,
"learning_rate": 5.849820749060921e-06,
"loss": 0.1082,
"step": 86000
},
{
"epoch": 14.205483977535513,
"eval_cer": 0.039174252275682706,
"eval_loss": 0.1129402220249176,
"eval_runtime": 48.9184,
"eval_samples_per_second": 34.568,
"eval_steps_per_second": 8.647,
"eval_wer": 0.21513770061824766,
"step": 86000
},
{
"epoch": 14.222001982160554,
"grad_norm": 0.5619252324104309,
"learning_rate": 5.8437029695701655e-06,
"loss": 0.1072,
"step": 86100
},
{
"epoch": 14.238519986785596,
"grad_norm": 0.5619592070579529,
"learning_rate": 5.837585190079409e-06,
"loss": 0.1038,
"step": 86200
},
{
"epoch": 14.255037991410637,
"grad_norm": 1.2644349336624146,
"learning_rate": 5.831467410588653e-06,
"loss": 0.102,
"step": 86300
},
{
"epoch": 14.271555996035678,
"grad_norm": 0.7374313473701477,
"learning_rate": 5.825349631097898e-06,
"loss": 0.1008,
"step": 86400
},
{
"epoch": 14.28807400066072,
"grad_norm": 0.8285679221153259,
"learning_rate": 5.819231851607141e-06,
"loss": 0.1024,
"step": 86500
},
{
"epoch": 14.30459200528576,
"grad_norm": 0.5749133825302124,
"learning_rate": 5.8131140721163845e-06,
"loss": 0.1078,
"step": 86600
},
{
"epoch": 14.321110009910802,
"grad_norm": 0.6757526397705078,
"learning_rate": 5.80699629262563e-06,
"loss": 0.1298,
"step": 86700
},
{
"epoch": 14.337628014535843,
"grad_norm": 0.4636983275413513,
"learning_rate": 5.800878513134873e-06,
"loss": 0.1045,
"step": 86800
},
{
"epoch": 14.354146019160885,
"grad_norm": 0.6189342737197876,
"learning_rate": 5.794760733644117e-06,
"loss": 0.1335,
"step": 86900
},
{
"epoch": 14.370664023785928,
"grad_norm": 0.7641118764877319,
"learning_rate": 5.788642954153361e-06,
"loss": 0.102,
"step": 87000
},
{
"epoch": 14.370664023785928,
"eval_cer": 0.038857709944562314,
"eval_loss": 0.11137784272432327,
"eval_runtime": 49.1649,
"eval_samples_per_second": 34.394,
"eval_steps_per_second": 8.604,
"eval_wer": 0.21457565727846126,
"step": 87000
},
{
"epoch": 14.387182028410969,
"grad_norm": 0.8745734095573425,
"learning_rate": 5.782525174662605e-06,
"loss": 0.1056,
"step": 87100
},
{
"epoch": 14.40370003303601,
"grad_norm": 0.4426126182079315,
"learning_rate": 5.776407395171849e-06,
"loss": 0.1294,
"step": 87200
},
{
"epoch": 14.420218037661051,
"grad_norm": 0.7525532841682434,
"learning_rate": 5.770289615681093e-06,
"loss": 0.1139,
"step": 87300
},
{
"epoch": 14.436736042286093,
"grad_norm": 0.6336373686790466,
"learning_rate": 5.7641718361903365e-06,
"loss": 0.1023,
"step": 87400
},
{
"epoch": 14.453254046911134,
"grad_norm": 0.6930210590362549,
"learning_rate": 5.758054056699582e-06,
"loss": 0.1336,
"step": 87500
},
{
"epoch": 14.469772051536175,
"grad_norm": 0.7454831004142761,
"learning_rate": 5.751936277208825e-06,
"loss": 0.1032,
"step": 87600
},
{
"epoch": 14.486290056161216,
"grad_norm": 0.7100419998168945,
"learning_rate": 5.7458184977180686e-06,
"loss": 0.1034,
"step": 87700
},
{
"epoch": 14.502808060786258,
"grad_norm": 0.6206198334693909,
"learning_rate": 5.739700718227312e-06,
"loss": 0.1073,
"step": 87800
},
{
"epoch": 14.519326065411299,
"grad_norm": 0.5653363466262817,
"learning_rate": 5.733582938736557e-06,
"loss": 0.1034,
"step": 87900
},
{
"epoch": 14.53584407003634,
"grad_norm": 0.6938855051994324,
"learning_rate": 5.727465159245801e-06,
"loss": 0.1065,
"step": 88000
},
{
"epoch": 14.53584407003634,
"eval_cer": 0.03898603791663815,
"eval_loss": 0.11170890182256699,
"eval_runtime": 62.1432,
"eval_samples_per_second": 27.211,
"eval_steps_per_second": 6.807,
"eval_wer": 0.21582464247798663,
"step": 88000
},
{
"epoch": 14.552362074661382,
"grad_norm": 0.9062691330909729,
"learning_rate": 5.721347379755044e-06,
"loss": 0.0999,
"step": 88100
},
{
"epoch": 14.568880079286423,
"grad_norm": 0.6869949102401733,
"learning_rate": 5.715229600264288e-06,
"loss": 0.1033,
"step": 88200
},
{
"epoch": 14.585398083911464,
"grad_norm": 0.6004628539085388,
"learning_rate": 5.709111820773533e-06,
"loss": 0.108,
"step": 88300
},
{
"epoch": 14.601916088536505,
"grad_norm": 0.6582931876182556,
"learning_rate": 5.702994041282777e-06,
"loss": 0.1034,
"step": 88400
},
{
"epoch": 14.618434093161547,
"grad_norm": 0.5958510637283325,
"learning_rate": 5.6968762617920205e-06,
"loss": 0.1082,
"step": 88500
},
{
"epoch": 14.634952097786588,
"grad_norm": 0.8877278566360474,
"learning_rate": 5.690758482301264e-06,
"loss": 0.1051,
"step": 88600
},
{
"epoch": 14.65147010241163,
"grad_norm": 0.46800002455711365,
"learning_rate": 5.684640702810509e-06,
"loss": 0.1032,
"step": 88700
},
{
"epoch": 14.66798810703667,
"grad_norm": 0.6601079106330872,
"learning_rate": 5.6785229233197526e-06,
"loss": 0.108,
"step": 88800
},
{
"epoch": 14.684506111661712,
"grad_norm": 0.6476488709449768,
"learning_rate": 5.672405143828996e-06,
"loss": 0.1049,
"step": 88900
},
{
"epoch": 14.701024116286753,
"grad_norm": 0.7255818843841553,
"learning_rate": 5.6662873643382395e-06,
"loss": 0.1322,
"step": 89000
},
{
"epoch": 14.701024116286753,
"eval_cer": 0.039174252275682706,
"eval_loss": 0.11011859029531479,
"eval_runtime": 52.7786,
"eval_samples_per_second": 32.04,
"eval_steps_per_second": 8.015,
"eval_wer": 0.21588709173796292,
"step": 89000
},
{
"epoch": 14.717542120911794,
"grad_norm": 0.7102627754211426,
"learning_rate": 5.660169584847485e-06,
"loss": 0.1046,
"step": 89100
},
{
"epoch": 14.734060125536836,
"grad_norm": 0.6122440099716187,
"learning_rate": 5.654051805356728e-06,
"loss": 0.1012,
"step": 89200
},
{
"epoch": 14.750578130161877,
"grad_norm": 0.6586080193519592,
"learning_rate": 5.6479340258659724e-06,
"loss": 0.1021,
"step": 89300
},
{
"epoch": 14.767096134786918,
"grad_norm": 0.9857539534568787,
"learning_rate": 5.641816246375216e-06,
"loss": 0.158,
"step": 89400
},
{
"epoch": 14.78361413941196,
"grad_norm": 0.8294028043746948,
"learning_rate": 5.63569846688446e-06,
"loss": 0.1115,
"step": 89500
},
{
"epoch": 14.800132144037,
"grad_norm": 0.6861185431480408,
"learning_rate": 5.6295806873937045e-06,
"loss": 0.1043,
"step": 89600
},
{
"epoch": 14.816650148662042,
"grad_norm": 0.6036092042922974,
"learning_rate": 5.623462907902948e-06,
"loss": 0.1105,
"step": 89700
},
{
"epoch": 14.833168153287083,
"grad_norm": 0.778626561164856,
"learning_rate": 5.6173451284121914e-06,
"loss": 0.1032,
"step": 89800
},
{
"epoch": 14.849686157912124,
"grad_norm": 0.5784227252006531,
"learning_rate": 5.611227348921437e-06,
"loss": 0.1122,
"step": 89900
},
{
"epoch": 14.866204162537166,
"grad_norm": 0.6248123645782471,
"learning_rate": 5.60510956943068e-06,
"loss": 0.1027,
"step": 90000
},
{
"epoch": 14.866204162537166,
"eval_cer": 0.0388662651427007,
"eval_loss": 0.11089600622653961,
"eval_runtime": 53.1033,
"eval_samples_per_second": 31.844,
"eval_steps_per_second": 7.966,
"eval_wer": 0.2147005557984138,
"step": 90000
},
{
"epoch": 14.882722167162207,
"grad_norm": 0.5955941081047058,
"learning_rate": 5.5989917899399235e-06,
"loss": 0.117,
"step": 90100
},
{
"epoch": 14.899240171787248,
"grad_norm": 0.7445477247238159,
"learning_rate": 5.592874010449168e-06,
"loss": 0.1035,
"step": 90200
},
{
"epoch": 14.91575817641229,
"grad_norm": 0.4745796024799347,
"learning_rate": 5.586756230958412e-06,
"loss": 0.1042,
"step": 90300
},
{
"epoch": 14.93227618103733,
"grad_norm": 0.7581929564476013,
"learning_rate": 5.580638451467656e-06,
"loss": 0.1047,
"step": 90400
},
{
"epoch": 14.948794185662372,
"grad_norm": 1.0136349201202393,
"learning_rate": 5.5745206719769e-06,
"loss": 0.105,
"step": 90500
},
{
"epoch": 14.965312190287413,
"grad_norm": 0.7604655623435974,
"learning_rate": 5.568402892486143e-06,
"loss": 0.1091,
"step": 90600
},
{
"epoch": 14.981830194912455,
"grad_norm": 0.7419881224632263,
"learning_rate": 5.562285112995388e-06,
"loss": 0.1009,
"step": 90700
},
{
"epoch": 14.998348199537496,
"grad_norm": 0.571348249912262,
"learning_rate": 5.556167333504632e-06,
"loss": 0.099,
"step": 90800
},
{
"epoch": 15.014866204162537,
"grad_norm": 0.7786069512367249,
"learning_rate": 5.5500495540138754e-06,
"loss": 0.1141,
"step": 90900
},
{
"epoch": 15.031384208787578,
"grad_norm": 0.6933959722518921,
"learning_rate": 5.543931774523119e-06,
"loss": 0.1029,
"step": 91000
},
{
"epoch": 15.031384208787578,
"eval_cer": 0.03914003148312915,
"eval_loss": 0.110771544277668,
"eval_runtime": 55.563,
"eval_samples_per_second": 30.434,
"eval_steps_per_second": 7.613,
"eval_wer": 0.21532504839817648,
"step": 91000
},
{
"epoch": 15.04790221341262,
"grad_norm": 0.5616199374198914,
"learning_rate": 5.537813995032364e-06,
"loss": 0.0993,
"step": 91100
},
{
"epoch": 15.064420218037661,
"grad_norm": 0.6372345089912415,
"learning_rate": 5.5316962155416075e-06,
"loss": 0.104,
"step": 91200
},
{
"epoch": 15.080938222662702,
"grad_norm": 0.7811592817306519,
"learning_rate": 5.525578436050851e-06,
"loss": 0.1002,
"step": 91300
},
{
"epoch": 15.097456227287744,
"grad_norm": 0.974866509437561,
"learning_rate": 5.519460656560095e-06,
"loss": 0.1252,
"step": 91400
},
{
"epoch": 15.113974231912785,
"grad_norm": 0.705301821231842,
"learning_rate": 5.51334287706934e-06,
"loss": 0.1028,
"step": 91500
},
{
"epoch": 15.130492236537826,
"grad_norm": 0.6617374420166016,
"learning_rate": 5.507225097578583e-06,
"loss": 0.1029,
"step": 91600
},
{
"epoch": 15.147010241162867,
"grad_norm": 0.6031976342201233,
"learning_rate": 5.501107318087827e-06,
"loss": 0.0994,
"step": 91700
},
{
"epoch": 15.163528245787909,
"grad_norm": 0.8132845163345337,
"learning_rate": 5.494989538597071e-06,
"loss": 0.1013,
"step": 91800
},
{
"epoch": 15.18004625041295,
"grad_norm": 0.4518735110759735,
"learning_rate": 5.488871759106316e-06,
"loss": 0.1086,
"step": 91900
},
{
"epoch": 15.196564255037991,
"grad_norm": 0.6119063496589661,
"learning_rate": 5.4827539796155595e-06,
"loss": 0.0978,
"step": 92000
},
{
"epoch": 15.196564255037991,
"eval_cer": 0.03872082677434809,
"eval_loss": 0.11010745912790298,
"eval_runtime": 56.6751,
"eval_samples_per_second": 29.837,
"eval_steps_per_second": 7.464,
"eval_wer": 0.21351401985886467,
"step": 92000
},
{
"epoch": 15.213082259663032,
"grad_norm": 1.2135928869247437,
"learning_rate": 5.476636200124803e-06,
"loss": 0.126,
"step": 92100
},
{
"epoch": 15.229600264288074,
"grad_norm": 0.7273651361465454,
"learning_rate": 5.470518420634046e-06,
"loss": 0.1026,
"step": 92200
},
{
"epoch": 15.246118268913115,
"grad_norm": 0.5206860303878784,
"learning_rate": 5.4644006411432915e-06,
"loss": 0.1061,
"step": 92300
},
{
"epoch": 15.262636273538156,
"grad_norm": 0.5830551981925964,
"learning_rate": 5.458282861652535e-06,
"loss": 0.1049,
"step": 92400
},
{
"epoch": 15.279154278163197,
"grad_norm": 0.5370995402336121,
"learning_rate": 5.4521650821617785e-06,
"loss": 0.1022,
"step": 92500
},
{
"epoch": 15.295672282788239,
"grad_norm": 0.6254607439041138,
"learning_rate": 5.446047302671023e-06,
"loss": 0.1076,
"step": 92600
},
{
"epoch": 15.31219028741328,
"grad_norm": 0.7650060057640076,
"learning_rate": 5.439929523180267e-06,
"loss": 0.1061,
"step": 92700
},
{
"epoch": 15.328708292038321,
"grad_norm": 0.786281168460846,
"learning_rate": 5.433811743689511e-06,
"loss": 0.1059,
"step": 92800
},
{
"epoch": 15.345226296663363,
"grad_norm": 0.7369528412818909,
"learning_rate": 5.427693964198755e-06,
"loss": 0.1025,
"step": 92900
},
{
"epoch": 15.361744301288404,
"grad_norm": 0.8376733660697937,
"learning_rate": 5.421576184707998e-06,
"loss": 0.1048,
"step": 93000
},
{
"epoch": 15.361744301288404,
"eval_cer": 0.038275956471151874,
"eval_loss": 0.1115972101688385,
"eval_runtime": 60.1921,
"eval_samples_per_second": 28.093,
"eval_steps_per_second": 7.028,
"eval_wer": 0.21095360019983764,
"step": 93000
},
{
"epoch": 15.378262305913445,
"grad_norm": 0.6252397298812866,
"learning_rate": 5.4154584052172435e-06,
"loss": 0.1,
"step": 93100
},
{
"epoch": 15.394780310538486,
"grad_norm": 0.7326919436454773,
"learning_rate": 5.409340625726487e-06,
"loss": 0.11,
"step": 93200
},
{
"epoch": 15.411298315163528,
"grad_norm": 0.6019201874732971,
"learning_rate": 5.40322284623573e-06,
"loss": 0.1065,
"step": 93300
},
{
"epoch": 15.427816319788569,
"grad_norm": 0.7445711493492126,
"learning_rate": 5.397105066744974e-06,
"loss": 0.105,
"step": 93400
},
{
"epoch": 15.44433432441361,
"grad_norm": 1.068389892578125,
"learning_rate": 5.390987287254219e-06,
"loss": 0.1038,
"step": 93500
},
{
"epoch": 15.460852329038651,
"grad_norm": 0.672622561454773,
"learning_rate": 5.3848695077634625e-06,
"loss": 0.104,
"step": 93600
},
{
"epoch": 15.477370333663693,
"grad_norm": 0.6717888712882996,
"learning_rate": 5.378751728272707e-06,
"loss": 0.0985,
"step": 93700
},
{
"epoch": 15.493888338288734,
"grad_norm": 1.2381566762924194,
"learning_rate": 5.37263394878195e-06,
"loss": 0.1078,
"step": 93800
},
{
"epoch": 15.510406342913775,
"grad_norm": 0.6967211365699768,
"learning_rate": 5.3665161692911946e-06,
"loss": 0.1017,
"step": 93900
},
{
"epoch": 15.526924347538817,
"grad_norm": 0.7272515892982483,
"learning_rate": 5.360398389800439e-06,
"loss": 0.1027,
"step": 94000
},
{
"epoch": 15.526924347538817,
"eval_cer": 0.038635274792964205,
"eval_loss": 0.10984691232442856,
"eval_runtime": 58.0727,
"eval_samples_per_second": 29.119,
"eval_steps_per_second": 7.284,
"eval_wer": 0.21320177355898332,
"step": 94000
},
{
"epoch": 15.543442352163858,
"grad_norm": 0.7049939036369324,
"learning_rate": 5.354280610309682e-06,
"loss": 0.1035,
"step": 94100
},
{
"epoch": 15.559960356788899,
"grad_norm": 1.3722845315933228,
"learning_rate": 5.348162830818926e-06,
"loss": 0.1002,
"step": 94200
},
{
"epoch": 15.57647836141394,
"grad_norm": 0.7943611145019531,
"learning_rate": 5.342045051328171e-06,
"loss": 0.0976,
"step": 94300
},
{
"epoch": 15.592996366038982,
"grad_norm": 0.6992027163505554,
"learning_rate": 5.335927271837414e-06,
"loss": 0.1038,
"step": 94400
},
{
"epoch": 15.609514370664023,
"grad_norm": 0.9091536998748779,
"learning_rate": 5.329809492346658e-06,
"loss": 0.1059,
"step": 94500
},
{
"epoch": 15.626032375289064,
"grad_norm": 0.6817540526390076,
"learning_rate": 5.323691712855902e-06,
"loss": 0.1099,
"step": 94600
},
{
"epoch": 15.642550379914105,
"grad_norm": 0.6020603775978088,
"learning_rate": 5.3175739333651465e-06,
"loss": 0.0986,
"step": 94700
},
{
"epoch": 15.659068384539147,
"grad_norm": 0.6270213723182678,
"learning_rate": 5.31145615387439e-06,
"loss": 0.1034,
"step": 94800
},
{
"epoch": 15.67558638916419,
"grad_norm": 0.7782559990882874,
"learning_rate": 5.305338374383634e-06,
"loss": 0.0969,
"step": 94900
},
{
"epoch": 15.692104393789231,
"grad_norm": 0.6843757629394531,
"learning_rate": 5.299220594892878e-06,
"loss": 0.0996,
"step": 95000
},
{
"epoch": 15.692104393789231,
"eval_cer": 0.03852405721716515,
"eval_loss": 0.11085934937000275,
"eval_runtime": 60.5345,
"eval_samples_per_second": 27.934,
"eval_steps_per_second": 6.988,
"eval_wer": 0.21270217947917316,
"step": 95000
},
{
"epoch": 15.708622398414272,
"grad_norm": 0.5856791138648987,
"learning_rate": 5.293102815402122e-06,
"loss": 0.1043,
"step": 95100
},
{
"epoch": 15.725140403039314,
"grad_norm": 1.0118597745895386,
"learning_rate": 5.286985035911366e-06,
"loss": 0.1207,
"step": 95200
},
{
"epoch": 15.741658407664355,
"grad_norm": 0.4559677541255951,
"learning_rate": 5.28086725642061e-06,
"loss": 0.0953,
"step": 95300
},
{
"epoch": 15.758176412289396,
"grad_norm": 0.7937479615211487,
"learning_rate": 5.274749476929853e-06,
"loss": 0.1018,
"step": 95400
},
{
"epoch": 15.774694416914437,
"grad_norm": 0.7912297248840332,
"learning_rate": 5.2686316974390984e-06,
"loss": 0.1013,
"step": 95500
},
{
"epoch": 15.791212421539479,
"grad_norm": 0.9011877775192261,
"learning_rate": 5.262513917948342e-06,
"loss": 0.1085,
"step": 95600
},
{
"epoch": 15.80773042616452,
"grad_norm": 0.7926939129829407,
"learning_rate": 5.256396138457585e-06,
"loss": 0.0993,
"step": 95700
},
{
"epoch": 15.824248430789561,
"grad_norm": 0.9147284626960754,
"learning_rate": 5.25027835896683e-06,
"loss": 0.106,
"step": 95800
},
{
"epoch": 15.840766435414602,
"grad_norm": 0.6496513485908508,
"learning_rate": 5.244160579476074e-06,
"loss": 0.101,
"step": 95900
},
{
"epoch": 15.857284440039644,
"grad_norm": 0.5024608969688416,
"learning_rate": 5.2380427999853174e-06,
"loss": 0.0959,
"step": 96000
},
{
"epoch": 15.857284440039644,
"eval_cer": 0.03859249880227226,
"eval_loss": 0.1082993894815445,
"eval_runtime": 61.2314,
"eval_samples_per_second": 27.617,
"eval_steps_per_second": 6.908,
"eval_wer": 0.21288952725910198,
"step": 96000
},
{
"epoch": 15.873802444664685,
"grad_norm": 0.9131097197532654,
"learning_rate": 5.231925020494562e-06,
"loss": 0.1023,
"step": 96100
},
{
"epoch": 15.890320449289726,
"grad_norm": 0.7231972813606262,
"learning_rate": 5.225807241003805e-06,
"loss": 0.1057,
"step": 96200
},
{
"epoch": 15.906838453914768,
"grad_norm": 0.7179155349731445,
"learning_rate": 5.21968946151305e-06,
"loss": 0.1054,
"step": 96300
},
{
"epoch": 15.923356458539809,
"grad_norm": 0.6966670751571655,
"learning_rate": 5.213571682022294e-06,
"loss": 0.0992,
"step": 96400
},
{
"epoch": 15.93987446316485,
"grad_norm": 0.7580718398094177,
"learning_rate": 5.207453902531537e-06,
"loss": 0.0978,
"step": 96500
},
{
"epoch": 15.956392467789891,
"grad_norm": 0.6020950675010681,
"learning_rate": 5.201336123040781e-06,
"loss": 0.1,
"step": 96600
},
{
"epoch": 15.972910472414933,
"grad_norm": 0.7800185680389404,
"learning_rate": 5.195218343550026e-06,
"loss": 0.1041,
"step": 96700
},
{
"epoch": 15.989428477039974,
"grad_norm": 0.6527479290962219,
"learning_rate": 5.189100564059269e-06,
"loss": 0.1504,
"step": 96800
},
{
"epoch": 16.005946481665013,
"grad_norm": 2.855896472930908,
"learning_rate": 5.182982784568513e-06,
"loss": 0.1247,
"step": 96900
},
{
"epoch": 16.022464486290055,
"grad_norm": 0.6793861985206604,
"learning_rate": 5.176865005077757e-06,
"loss": 0.1015,
"step": 97000
},
{
"epoch": 16.022464486290055,
"eval_cer": 0.038601054000410646,
"eval_loss": 0.10963103175163269,
"eval_runtime": 57.4338,
"eval_samples_per_second": 29.443,
"eval_steps_per_second": 7.365,
"eval_wer": 0.21257728095922063,
"step": 97000
},
{
"epoch": 16.038982490915096,
"grad_norm": 0.5223618149757385,
"learning_rate": 5.1707472255870015e-06,
"loss": 0.0952,
"step": 97100
},
{
"epoch": 16.055500495540137,
"grad_norm": 0.6774017810821533,
"learning_rate": 5.164629446096246e-06,
"loss": 0.101,
"step": 97200
},
{
"epoch": 16.07201850016518,
"grad_norm": 0.6046952605247498,
"learning_rate": 5.158511666605489e-06,
"loss": 0.0986,
"step": 97300
},
{
"epoch": 16.08853650479022,
"grad_norm": 0.5968722701072693,
"learning_rate": 5.152393887114733e-06,
"loss": 0.1172,
"step": 97400
},
{
"epoch": 16.10505450941526,
"grad_norm": 0.49890223145484924,
"learning_rate": 5.146276107623978e-06,
"loss": 0.095,
"step": 97500
},
{
"epoch": 16.121572514040302,
"grad_norm": 0.5506992936134338,
"learning_rate": 5.140158328133221e-06,
"loss": 0.0983,
"step": 97600
},
{
"epoch": 16.138090518665344,
"grad_norm": 0.8042443990707397,
"learning_rate": 5.134040548642465e-06,
"loss": 0.2112,
"step": 97700
},
{
"epoch": 16.154608523290385,
"grad_norm": 0.8264985680580139,
"learning_rate": 5.127922769151708e-06,
"loss": 0.1013,
"step": 97800
},
{
"epoch": 16.171126527915426,
"grad_norm": 0.5965238809585571,
"learning_rate": 5.121804989660953e-06,
"loss": 0.1457,
"step": 97900
},
{
"epoch": 16.187644532540467,
"grad_norm": 0.8089606761932373,
"learning_rate": 5.115687210170197e-06,
"loss": 0.1058,
"step": 98000
},
{
"epoch": 16.187644532540467,
"eval_cer": 0.03804496612141537,
"eval_loss": 0.10823166370391846,
"eval_runtime": 58.9445,
"eval_samples_per_second": 28.688,
"eval_steps_per_second": 7.176,
"eval_wer": 0.21132829575969525,
"step": 98000
},
{
"epoch": 16.20416253716551,
"grad_norm": 0.5970620512962341,
"learning_rate": 5.109569430679441e-06,
"loss": 0.1046,
"step": 98100
},
{
"epoch": 16.22068054179055,
"grad_norm": 0.45412981510162354,
"learning_rate": 5.103451651188685e-06,
"loss": 0.1035,
"step": 98200
},
{
"epoch": 16.23719854641559,
"grad_norm": 0.5827893018722534,
"learning_rate": 5.097333871697929e-06,
"loss": 0.0992,
"step": 98300
},
{
"epoch": 16.253716551040633,
"grad_norm": 0.6516451239585876,
"learning_rate": 5.091216092207173e-06,
"loss": 0.1023,
"step": 98400
},
{
"epoch": 16.270234555665677,
"grad_norm": 0.731946587562561,
"learning_rate": 5.085098312716417e-06,
"loss": 0.0969,
"step": 98500
},
{
"epoch": 16.28675256029072,
"grad_norm": 0.70552659034729,
"learning_rate": 5.07898053322566e-06,
"loss": 0.1478,
"step": 98600
},
{
"epoch": 16.30327056491576,
"grad_norm": 0.7130141258239746,
"learning_rate": 5.072862753734905e-06,
"loss": 0.1049,
"step": 98700
},
{
"epoch": 16.3197885695408,
"grad_norm": 0.9122040867805481,
"learning_rate": 5.066744974244149e-06,
"loss": 0.0976,
"step": 98800
},
{
"epoch": 16.336306574165842,
"grad_norm": 0.7150751948356628,
"learning_rate": 5.060627194753392e-06,
"loss": 0.0938,
"step": 98900
},
{
"epoch": 16.352824578790884,
"grad_norm": 0.571891188621521,
"learning_rate": 5.0545094152626366e-06,
"loss": 0.0966,
"step": 99000
},
{
"epoch": 16.352824578790884,
"eval_cer": 0.03804496612141537,
"eval_loss": 0.1098564937710762,
"eval_runtime": 59.3833,
"eval_samples_per_second": 28.476,
"eval_steps_per_second": 7.123,
"eval_wer": 0.21089115093986135,
"step": 99000
},
{
"epoch": 16.369342583415925,
"grad_norm": 0.5009652376174927,
"learning_rate": 5.048391635771881e-06,
"loss": 0.0956,
"step": 99100
},
{
"epoch": 16.385860588040966,
"grad_norm": 0.6411744952201843,
"learning_rate": 5.042273856281124e-06,
"loss": 0.1109,
"step": 99200
},
{
"epoch": 16.402378592666008,
"grad_norm": 0.7724633812904358,
"learning_rate": 5.036156076790369e-06,
"loss": 0.0996,
"step": 99300
},
{
"epoch": 16.41889659729105,
"grad_norm": 0.5513240694999695,
"learning_rate": 5.030038297299612e-06,
"loss": 0.0972,
"step": 99400
},
{
"epoch": 16.43541460191609,
"grad_norm": 0.685674786567688,
"learning_rate": 5.023920517808856e-06,
"loss": 0.1259,
"step": 99500
},
{
"epoch": 16.45193260654113,
"grad_norm": 0.7051562070846558,
"learning_rate": 5.017802738318101e-06,
"loss": 0.1076,
"step": 99600
},
{
"epoch": 16.468450611166173,
"grad_norm": 0.6196284890174866,
"learning_rate": 5.011684958827344e-06,
"loss": 0.1033,
"step": 99700
},
{
"epoch": 16.484968615791214,
"grad_norm": 0.6664172410964966,
"learning_rate": 5.005567179336588e-06,
"loss": 0.0988,
"step": 99800
},
{
"epoch": 16.501486620416255,
"grad_norm": 0.8101247549057007,
"learning_rate": 4.999449399845832e-06,
"loss": 0.1005,
"step": 99900
},
{
"epoch": 16.518004625041296,
"grad_norm": 0.5494738817214966,
"learning_rate": 4.993331620355076e-06,
"loss": 0.0988,
"step": 100000
},
{
"epoch": 16.518004625041296,
"eval_cer": 0.03866949558551776,
"eval_loss": 0.10886727273464203,
"eval_runtime": 57.6452,
"eval_samples_per_second": 29.335,
"eval_steps_per_second": 7.338,
"eval_wer": 0.21388871541872229,
"step": 100000
},
{
"epoch": 16.534522629666338,
"grad_norm": 0.752741813659668,
"learning_rate": 4.98721384086432e-06,
"loss": 0.0958,
"step": 100100
},
{
"epoch": 16.55104063429138,
"grad_norm": 0.6410621404647827,
"learning_rate": 4.981096061373564e-06,
"loss": 0.1042,
"step": 100200
},
{
"epoch": 16.56755863891642,
"grad_norm": 0.5088207125663757,
"learning_rate": 4.974978281882808e-06,
"loss": 0.0953,
"step": 100300
},
{
"epoch": 16.58407664354146,
"grad_norm": 0.7134143114089966,
"learning_rate": 4.968860502392052e-06,
"loss": 0.1021,
"step": 100400
},
{
"epoch": 16.600594648166503,
"grad_norm": 0.9732416272163391,
"learning_rate": 4.962742722901296e-06,
"loss": 0.1004,
"step": 100500
},
{
"epoch": 16.617112652791544,
"grad_norm": 0.5259725451469421,
"learning_rate": 4.95662494341054e-06,
"loss": 0.1008,
"step": 100600
},
{
"epoch": 16.633630657416585,
"grad_norm": 0.7298964262008667,
"learning_rate": 4.950507163919784e-06,
"loss": 0.1216,
"step": 100700
},
{
"epoch": 16.650148662041627,
"grad_norm": 0.6519650816917419,
"learning_rate": 4.944389384429028e-06,
"loss": 0.0989,
"step": 100800
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.5744999647140503,
"learning_rate": 4.938271604938272e-06,
"loss": 0.1034,
"step": 100900
},
{
"epoch": 16.68318467129171,
"grad_norm": 0.8439196944236755,
"learning_rate": 4.932153825447516e-06,
"loss": 0.0983,
"step": 101000
},
{
"epoch": 16.68318467129171,
"eval_cer": 0.03823318048045993,
"eval_loss": 0.10998154431581497,
"eval_runtime": 57.0437,
"eval_samples_per_second": 29.644,
"eval_steps_per_second": 7.415,
"eval_wer": 0.21026665834009867,
"step": 101000
},
{
"epoch": 16.69970267591675,
"grad_norm": 0.7444531917572021,
"learning_rate": 4.9260360459567594e-06,
"loss": 0.0986,
"step": 101100
},
{
"epoch": 16.71622068054179,
"grad_norm": 0.64404296875,
"learning_rate": 4.919918266466004e-06,
"loss": 0.0977,
"step": 101200
},
{
"epoch": 16.732738685166833,
"grad_norm": 0.5869942903518677,
"learning_rate": 4.913800486975248e-06,
"loss": 0.0943,
"step": 101300
},
{
"epoch": 16.749256689791874,
"grad_norm": 0.7566863894462585,
"learning_rate": 4.9076827074844915e-06,
"loss": 0.0932,
"step": 101400
},
{
"epoch": 16.765774694416915,
"grad_norm": 0.6613245010375977,
"learning_rate": 4.901564927993736e-06,
"loss": 0.094,
"step": 101500
},
{
"epoch": 16.782292699041957,
"grad_norm": 0.5942184925079346,
"learning_rate": 4.89544714850298e-06,
"loss": 0.1035,
"step": 101600
},
{
"epoch": 16.798810703666998,
"grad_norm": 0.6501281261444092,
"learning_rate": 4.889329369012224e-06,
"loss": 0.0991,
"step": 101700
},
{
"epoch": 16.81532870829204,
"grad_norm": 0.5310657024383545,
"learning_rate": 4.883211589521468e-06,
"loss": 0.1031,
"step": 101800
},
{
"epoch": 16.83184671291708,
"grad_norm": 0.7567028403282166,
"learning_rate": 4.877093810030712e-06,
"loss": 0.1248,
"step": 101900
},
{
"epoch": 16.848364717542122,
"grad_norm": 0.5749494433403015,
"learning_rate": 4.870976030539956e-06,
"loss": 0.1394,
"step": 102000
},
{
"epoch": 16.848364717542122,
"eval_cer": 0.03819040448976798,
"eval_loss": 0.10844554007053375,
"eval_runtime": 55.7352,
"eval_samples_per_second": 30.34,
"eval_steps_per_second": 7.589,
"eval_wer": 0.21051645538000374,
"step": 102000
},
{
"epoch": 16.864882722167163,
"grad_norm": 0.6915029883384705,
"learning_rate": 4.8648582510492e-06,
"loss": 0.0999,
"step": 102100
},
{
"epoch": 16.881400726792204,
"grad_norm": 0.6687765121459961,
"learning_rate": 4.8587404715584434e-06,
"loss": 0.147,
"step": 102200
},
{
"epoch": 16.897918731417246,
"grad_norm": 0.6900584697723389,
"learning_rate": 4.852622692067688e-06,
"loss": 0.1014,
"step": 102300
},
{
"epoch": 16.914436736042287,
"grad_norm": 0.5827245712280273,
"learning_rate": 4.846504912576931e-06,
"loss": 0.1018,
"step": 102400
},
{
"epoch": 16.930954740667328,
"grad_norm": 0.6611018180847168,
"learning_rate": 4.8403871330861755e-06,
"loss": 0.0986,
"step": 102500
},
{
"epoch": 16.94747274529237,
"grad_norm": 0.7171707153320312,
"learning_rate": 4.834269353595419e-06,
"loss": 0.1365,
"step": 102600
},
{
"epoch": 16.96399074991741,
"grad_norm": 0.7955174446105957,
"learning_rate": 4.828151574104663e-06,
"loss": 0.1115,
"step": 102700
},
{
"epoch": 16.980508754542452,
"grad_norm": 0.6726603507995605,
"learning_rate": 4.822033794613908e-06,
"loss": 0.0998,
"step": 102800
},
{
"epoch": 16.997026759167493,
"grad_norm": 0.6470670104026794,
"learning_rate": 4.815916015123152e-06,
"loss": 0.1008,
"step": 102900
},
{
"epoch": 17.013544763792535,
"grad_norm": 0.8556126952171326,
"learning_rate": 4.809798235632395e-06,
"loss": 0.1134,
"step": 103000
},
{
"epoch": 17.013544763792535,
"eval_cer": 0.03776264458284854,
"eval_loss": 0.1081945076584816,
"eval_runtime": 57.2963,
"eval_samples_per_second": 29.513,
"eval_steps_per_second": 7.383,
"eval_wer": 0.20945481796040716,
"step": 103000
},
{
"epoch": 17.030062768417576,
"grad_norm": 0.8898919224739075,
"learning_rate": 4.80368045614164e-06,
"loss": 0.1023,
"step": 103100
},
{
"epoch": 17.046580773042617,
"grad_norm": 0.4918752908706665,
"learning_rate": 4.797562676650883e-06,
"loss": 0.1012,
"step": 103200
},
{
"epoch": 17.06309877766766,
"grad_norm": 0.5613105297088623,
"learning_rate": 4.7914448971601275e-06,
"loss": 0.1064,
"step": 103300
},
{
"epoch": 17.0796167822927,
"grad_norm": 0.6109268665313721,
"learning_rate": 4.785327117669371e-06,
"loss": 0.1048,
"step": 103400
},
{
"epoch": 17.09613478691774,
"grad_norm": 0.6237761974334717,
"learning_rate": 4.779209338178615e-06,
"loss": 0.1044,
"step": 103500
},
{
"epoch": 17.112652791542782,
"grad_norm": 0.6789395213127136,
"learning_rate": 4.773091558687859e-06,
"loss": 0.0973,
"step": 103600
},
{
"epoch": 17.129170796167823,
"grad_norm": 0.7461550831794739,
"learning_rate": 4.766973779197103e-06,
"loss": 0.0965,
"step": 103700
},
{
"epoch": 17.145688800792865,
"grad_norm": 0.655927836894989,
"learning_rate": 4.760855999706347e-06,
"loss": 0.0962,
"step": 103800
},
{
"epoch": 17.162206805417906,
"grad_norm": 0.6233280897140503,
"learning_rate": 4.754738220215591e-06,
"loss": 0.0949,
"step": 103900
},
{
"epoch": 17.178724810042947,
"grad_norm": 0.6471518874168396,
"learning_rate": 4.748620440724835e-06,
"loss": 0.0916,
"step": 104000
},
{
"epoch": 17.178724810042947,
"eval_cer": 0.038019300527000206,
"eval_loss": 0.10819939523935318,
"eval_runtime": 61.0456,
"eval_samples_per_second": 27.701,
"eval_steps_per_second": 6.929,
"eval_wer": 0.21051645538000374,
"step": 104000
},
{
"epoch": 17.19524281466799,
"grad_norm": 0.6647598147392273,
"learning_rate": 4.742502661234079e-06,
"loss": 0.0941,
"step": 104100
},
{
"epoch": 17.21176081929303,
"grad_norm": 0.6160650849342346,
"learning_rate": 4.736384881743323e-06,
"loss": 0.0958,
"step": 104200
},
{
"epoch": 17.22827882391807,
"grad_norm": 0.7830073833465576,
"learning_rate": 4.730267102252567e-06,
"loss": 0.0988,
"step": 104300
},
{
"epoch": 17.244796828543112,
"grad_norm": 0.5620446801185608,
"learning_rate": 4.724149322761811e-06,
"loss": 0.0981,
"step": 104400
},
{
"epoch": 17.261314833168154,
"grad_norm": 0.5912889838218689,
"learning_rate": 4.718031543271055e-06,
"loss": 0.1015,
"step": 104500
},
{
"epoch": 17.277832837793195,
"grad_norm": 0.8370086550712585,
"learning_rate": 4.711913763780298e-06,
"loss": 0.0953,
"step": 104600
},
{
"epoch": 17.294350842418236,
"grad_norm": 0.5910390615463257,
"learning_rate": 4.705795984289543e-06,
"loss": 0.1,
"step": 104700
},
{
"epoch": 17.310868847043277,
"grad_norm": 0.6430118083953857,
"learning_rate": 4.699678204798786e-06,
"loss": 0.1022,
"step": 104800
},
{
"epoch": 17.32738685166832,
"grad_norm": 0.6246772408485413,
"learning_rate": 4.6935604253080305e-06,
"loss": 0.0989,
"step": 104900
},
{
"epoch": 17.34390485629336,
"grad_norm": 0.598013699054718,
"learning_rate": 4.687442645817275e-06,
"loss": 0.1074,
"step": 105000
},
{
"epoch": 17.34390485629336,
"eval_cer": 0.038062076517692146,
"eval_loss": 0.11045213788747787,
"eval_runtime": 57.9821,
"eval_samples_per_second": 29.164,
"eval_steps_per_second": 7.295,
"eval_wer": 0.20883032536064447,
"step": 105000
},
{
"epoch": 17.3604228609184,
"grad_norm": 1.371077060699463,
"learning_rate": 4.681324866326519e-06,
"loss": 0.0984,
"step": 105100
},
{
"epoch": 17.376940865543443,
"grad_norm": 0.9299737811088562,
"learning_rate": 4.6752070868357626e-06,
"loss": 0.1229,
"step": 105200
},
{
"epoch": 17.393458870168484,
"grad_norm": 0.7221639156341553,
"learning_rate": 4.669089307345007e-06,
"loss": 0.104,
"step": 105300
},
{
"epoch": 17.409976874793525,
"grad_norm": 1.0506755113601685,
"learning_rate": 4.66297152785425e-06,
"loss": 0.0978,
"step": 105400
},
{
"epoch": 17.426494879418566,
"grad_norm": 0.6425598859786987,
"learning_rate": 4.656853748363495e-06,
"loss": 0.0983,
"step": 105500
},
{
"epoch": 17.443012884043608,
"grad_norm": 0.7174783945083618,
"learning_rate": 4.650735968872738e-06,
"loss": 0.0997,
"step": 105600
},
{
"epoch": 17.45953088866865,
"grad_norm": 0.5940792560577393,
"learning_rate": 4.644618189381982e-06,
"loss": 0.1225,
"step": 105700
},
{
"epoch": 17.47604889329369,
"grad_norm": 0.47687768936157227,
"learning_rate": 4.638500409891226e-06,
"loss": 0.0972,
"step": 105800
},
{
"epoch": 17.49256689791873,
"grad_norm": 0.7543063759803772,
"learning_rate": 4.63238263040047e-06,
"loss": 0.095,
"step": 105900
},
{
"epoch": 17.509084902543773,
"grad_norm": 0.6112996339797974,
"learning_rate": 4.6262648509097145e-06,
"loss": 0.095,
"step": 106000
},
{
"epoch": 17.509084902543773,
"eval_cer": 0.038027855725138594,
"eval_loss": 0.10872569680213928,
"eval_runtime": 58.3098,
"eval_samples_per_second": 29.0,
"eval_steps_per_second": 7.254,
"eval_wer": 0.20964216574033598,
"step": 106000
},
{
"epoch": 17.525602907168814,
"grad_norm": 0.7269030809402466,
"learning_rate": 4.620147071418958e-06,
"loss": 0.1022,
"step": 106100
},
{
"epoch": 17.542120911793855,
"grad_norm": 0.6887866258621216,
"learning_rate": 4.614029291928202e-06,
"loss": 0.1003,
"step": 106200
},
{
"epoch": 17.558638916418897,
"grad_norm": 0.7257598638534546,
"learning_rate": 4.6079115124374466e-06,
"loss": 0.096,
"step": 106300
},
{
"epoch": 17.575156921043938,
"grad_norm": 0.8789656162261963,
"learning_rate": 4.60179373294669e-06,
"loss": 0.0986,
"step": 106400
},
{
"epoch": 17.59167492566898,
"grad_norm": 0.6779934167861938,
"learning_rate": 4.595675953455934e-06,
"loss": 0.1376,
"step": 106500
},
{
"epoch": 17.60819293029402,
"grad_norm": 0.63017737865448,
"learning_rate": 4.589558173965178e-06,
"loss": 0.1013,
"step": 106600
},
{
"epoch": 17.62471093491906,
"grad_norm": 0.6014170050621033,
"learning_rate": 4.583440394474422e-06,
"loss": 0.093,
"step": 106700
},
{
"epoch": 17.641228939544103,
"grad_norm": 0.7778664231300354,
"learning_rate": 4.577322614983666e-06,
"loss": 0.1045,
"step": 106800
},
{
"epoch": 17.657746944169144,
"grad_norm": 0.6275627017021179,
"learning_rate": 4.57120483549291e-06,
"loss": 0.0915,
"step": 106900
},
{
"epoch": 17.674264948794185,
"grad_norm": 0.5987668633460999,
"learning_rate": 4.565087056002153e-06,
"loss": 0.0973,
"step": 107000
},
{
"epoch": 17.674264948794185,
"eval_cer": 0.03785675176237081,
"eval_loss": 0.10704370588064194,
"eval_runtime": 57.1479,
"eval_samples_per_second": 29.59,
"eval_steps_per_second": 7.402,
"eval_wer": 0.20826828202085806,
"step": 107000
},
{
"epoch": 17.690782953419227,
"grad_norm": 0.6655980944633484,
"learning_rate": 4.558969276511398e-06,
"loss": 0.097,
"step": 107100
},
{
"epoch": 17.707300958044268,
"grad_norm": 0.6261619925498962,
"learning_rate": 4.552851497020642e-06,
"loss": 0.0994,
"step": 107200
},
{
"epoch": 17.72381896266931,
"grad_norm": 0.6233117580413818,
"learning_rate": 4.546733717529886e-06,
"loss": 0.0967,
"step": 107300
},
{
"epoch": 17.74033696729435,
"grad_norm": 0.5294709205627441,
"learning_rate": 4.54061593803913e-06,
"loss": 0.0941,
"step": 107400
},
{
"epoch": 17.75685497191939,
"grad_norm": 0.6875845193862915,
"learning_rate": 4.534498158548374e-06,
"loss": 0.1,
"step": 107500
},
{
"epoch": 17.773372976544433,
"grad_norm": 0.7154032588005066,
"learning_rate": 4.5283803790576175e-06,
"loss": 0.094,
"step": 107600
},
{
"epoch": 17.789890981169474,
"grad_norm": 0.672591507434845,
"learning_rate": 4.522262599566862e-06,
"loss": 0.0971,
"step": 107700
},
{
"epoch": 17.806408985794516,
"grad_norm": 0.744452178478241,
"learning_rate": 4.516144820076105e-06,
"loss": 0.092,
"step": 107800
},
{
"epoch": 17.822926990419557,
"grad_norm": 0.8155786991119385,
"learning_rate": 4.51002704058535e-06,
"loss": 0.0919,
"step": 107900
},
{
"epoch": 17.839444995044598,
"grad_norm": 0.7288583517074585,
"learning_rate": 4.503909261094593e-06,
"loss": 0.0984,
"step": 108000
},
{
"epoch": 17.839444995044598,
"eval_cer": 0.03769420299774143,
"eval_loss": 0.10798373073339462,
"eval_runtime": 57.3331,
"eval_samples_per_second": 29.494,
"eval_steps_per_second": 7.378,
"eval_wer": 0.20833073128083432,
"step": 108000
},
{
"epoch": 17.85596299966964,
"grad_norm": 0.6306447386741638,
"learning_rate": 4.497791481603837e-06,
"loss": 0.0945,
"step": 108100
},
{
"epoch": 17.87248100429468,
"grad_norm": 0.7502180933952332,
"learning_rate": 4.491673702113082e-06,
"loss": 0.0991,
"step": 108200
},
{
"epoch": 17.888999008919722,
"grad_norm": 0.6571519374847412,
"learning_rate": 4.485555922622325e-06,
"loss": 0.1046,
"step": 108300
},
{
"epoch": 17.905517013544763,
"grad_norm": 0.6141965389251709,
"learning_rate": 4.4794381431315694e-06,
"loss": 0.0982,
"step": 108400
},
{
"epoch": 17.922035018169804,
"grad_norm": 0.4874700903892517,
"learning_rate": 4.473320363640814e-06,
"loss": 0.0984,
"step": 108500
},
{
"epoch": 17.938553022794846,
"grad_norm": 0.8021253347396851,
"learning_rate": 4.467202584150057e-06,
"loss": 0.0955,
"step": 108600
},
{
"epoch": 17.955071027419887,
"grad_norm": 0.6977095603942871,
"learning_rate": 4.4610848046593015e-06,
"loss": 0.1019,
"step": 108700
},
{
"epoch": 17.97158903204493,
"grad_norm": 0.649456262588501,
"learning_rate": 4.454967025168545e-06,
"loss": 0.0992,
"step": 108800
},
{
"epoch": 17.98810703666997,
"grad_norm": 0.6027668714523315,
"learning_rate": 4.448849245677789e-06,
"loss": 0.0958,
"step": 108900
},
{
"epoch": 18.00462504129501,
"grad_norm": 0.7994409203529358,
"learning_rate": 4.442731466187033e-06,
"loss": 0.0972,
"step": 109000
},
{
"epoch": 18.00462504129501,
"eval_cer": 0.03761720621449593,
"eval_loss": 0.10664419084787369,
"eval_runtime": 61.6933,
"eval_samples_per_second": 27.41,
"eval_steps_per_second": 6.856,
"eval_wer": 0.20845562980078686,
"step": 109000
},
{
"epoch": 18.021143045920052,
"grad_norm": 0.5892287492752075,
"learning_rate": 4.436613686696277e-06,
"loss": 0.1006,
"step": 109100
},
{
"epoch": 18.037661050545093,
"grad_norm": 0.5561397075653076,
"learning_rate": 4.4304959072055205e-06,
"loss": 0.1027,
"step": 109200
},
{
"epoch": 18.054179055170135,
"grad_norm": 0.6827639937400818,
"learning_rate": 4.424378127714765e-06,
"loss": 0.0924,
"step": 109300
},
{
"epoch": 18.070697059795176,
"grad_norm": 0.4659579396247864,
"learning_rate": 4.418260348224009e-06,
"loss": 0.0955,
"step": 109400
},
{
"epoch": 18.087215064420217,
"grad_norm": 0.5949708223342896,
"learning_rate": 4.4121425687332535e-06,
"loss": 0.1026,
"step": 109500
},
{
"epoch": 18.10373306904526,
"grad_norm": 0.7214411497116089,
"learning_rate": 4.406024789242497e-06,
"loss": 0.0951,
"step": 109600
},
{
"epoch": 18.1202510736703,
"grad_norm": 0.7198790311813354,
"learning_rate": 4.399907009751741e-06,
"loss": 0.0958,
"step": 109700
},
{
"epoch": 18.13676907829534,
"grad_norm": 0.6852260828018188,
"learning_rate": 4.393789230260985e-06,
"loss": 0.0946,
"step": 109800
},
{
"epoch": 18.153287082920382,
"grad_norm": 0.6294082403182983,
"learning_rate": 4.387671450770229e-06,
"loss": 0.0956,
"step": 109900
},
{
"epoch": 18.169805087545424,
"grad_norm": 0.6702645421028137,
"learning_rate": 4.3815536712794725e-06,
"loss": 0.1124,
"step": 110000
},
{
"epoch": 18.169805087545424,
"eval_cer": 0.037873862158647596,
"eval_loss": 0.10725517570972443,
"eval_runtime": 57.4138,
"eval_samples_per_second": 29.453,
"eval_steps_per_second": 7.368,
"eval_wer": 0.20920502092050208,
"step": 110000
},
{
"epoch": 18.186323092170465,
"grad_norm": 0.8165464997291565,
"learning_rate": 4.375435891788717e-06,
"loss": 0.1,
"step": 110100
},
{
"epoch": 18.202841096795506,
"grad_norm": 0.5550252199172974,
"learning_rate": 4.36931811229796e-06,
"loss": 0.1161,
"step": 110200
},
{
"epoch": 18.219359101420547,
"grad_norm": 0.6268564462661743,
"learning_rate": 4.3632003328072045e-06,
"loss": 0.0868,
"step": 110300
},
{
"epoch": 18.23587710604559,
"grad_norm": 0.9998523592948914,
"learning_rate": 4.357082553316449e-06,
"loss": 0.0888,
"step": 110400
},
{
"epoch": 18.25239511067063,
"grad_norm": 0.7963108420372009,
"learning_rate": 4.350964773825692e-06,
"loss": 0.0976,
"step": 110500
},
{
"epoch": 18.26891311529567,
"grad_norm": 0.6764956712722778,
"learning_rate": 4.344846994334937e-06,
"loss": 0.0945,
"step": 110600
},
{
"epoch": 18.285431119920712,
"grad_norm": 0.845342218875885,
"learning_rate": 4.338729214844181e-06,
"loss": 0.1018,
"step": 110700
},
{
"epoch": 18.301949124545754,
"grad_norm": 0.519926130771637,
"learning_rate": 4.332611435353424e-06,
"loss": 0.1012,
"step": 110800
},
{
"epoch": 18.318467129170795,
"grad_norm": 0.5771397352218628,
"learning_rate": 4.326493655862669e-06,
"loss": 0.0963,
"step": 110900
},
{
"epoch": 18.334985133795836,
"grad_norm": 0.6836599707603455,
"learning_rate": 4.320375876371912e-06,
"loss": 0.0961,
"step": 111000
},
{
"epoch": 18.334985133795836,
"eval_cer": 0.03749743344055848,
"eval_loss": 0.10766017436981201,
"eval_runtime": 57.0005,
"eval_samples_per_second": 29.666,
"eval_steps_per_second": 7.421,
"eval_wer": 0.20683194904140387,
"step": 111000
},
{
"epoch": 18.351503138420878,
"grad_norm": 0.655540943145752,
"learning_rate": 4.3142580968811565e-06,
"loss": 0.0937,
"step": 111100
},
{
"epoch": 18.36802114304592,
"grad_norm": 0.7102084755897522,
"learning_rate": 4.3081403173904e-06,
"loss": 0.0982,
"step": 111200
},
{
"epoch": 18.38453914767096,
"grad_norm": 0.5947690010070801,
"learning_rate": 4.302022537899644e-06,
"loss": 0.0984,
"step": 111300
},
{
"epoch": 18.401057152296,
"grad_norm": 0.5899379253387451,
"learning_rate": 4.295904758408888e-06,
"loss": 0.0897,
"step": 111400
},
{
"epoch": 18.417575156921043,
"grad_norm": 0.48331010341644287,
"learning_rate": 4.289786978918132e-06,
"loss": 0.0971,
"step": 111500
},
{
"epoch": 18.434093161546084,
"grad_norm": 0.6152350306510925,
"learning_rate": 4.283669199427376e-06,
"loss": 0.1001,
"step": 111600
},
{
"epoch": 18.450611166171125,
"grad_norm": 0.5093644857406616,
"learning_rate": 4.277551419936621e-06,
"loss": 0.099,
"step": 111700
},
{
"epoch": 18.467129170796166,
"grad_norm": 0.6065688133239746,
"learning_rate": 4.271433640445864e-06,
"loss": 0.1007,
"step": 111800
},
{
"epoch": 18.483647175421208,
"grad_norm": 0.7295845746994019,
"learning_rate": 4.265315860955108e-06,
"loss": 0.1,
"step": 111900
},
{
"epoch": 18.50016518004625,
"grad_norm": 1.8832347393035889,
"learning_rate": 4.259198081464352e-06,
"loss": 0.0975,
"step": 112000
},
{
"epoch": 18.50016518004625,
"eval_cer": 0.03751454383683526,
"eval_loss": 0.10801618546247482,
"eval_runtime": 57.4664,
"eval_samples_per_second": 29.426,
"eval_steps_per_second": 7.361,
"eval_wer": 0.2083931805408106,
"step": 112000
},
{
"epoch": 18.51668318467129,
"grad_norm": 0.7237940430641174,
"learning_rate": 4.253080301973596e-06,
"loss": 0.0929,
"step": 112100
},
{
"epoch": 18.53320118929633,
"grad_norm": 0.8659229278564453,
"learning_rate": 4.24696252248284e-06,
"loss": 0.0956,
"step": 112200
},
{
"epoch": 18.549719193921373,
"grad_norm": 0.6070505380630493,
"learning_rate": 4.240844742992084e-06,
"loss": 0.0942,
"step": 112300
},
{
"epoch": 18.566237198546414,
"grad_norm": 0.5244882702827454,
"learning_rate": 4.234726963501327e-06,
"loss": 0.0917,
"step": 112400
},
{
"epoch": 18.582755203171455,
"grad_norm": 0.7137103080749512,
"learning_rate": 4.228609184010572e-06,
"loss": 0.0949,
"step": 112500
},
{
"epoch": 18.599273207796497,
"grad_norm": 0.5891650319099426,
"learning_rate": 4.222491404519816e-06,
"loss": 0.0953,
"step": 112600
},
{
"epoch": 18.615791212421538,
"grad_norm": 0.5612820386886597,
"learning_rate": 4.2163736250290595e-06,
"loss": 0.0966,
"step": 112700
},
{
"epoch": 18.63230921704658,
"grad_norm": 0.7165923714637756,
"learning_rate": 4.210255845538304e-06,
"loss": 0.0908,
"step": 112800
},
{
"epoch": 18.64882722167162,
"grad_norm": 0.6711476445198059,
"learning_rate": 4.204138066047548e-06,
"loss": 0.1005,
"step": 112900
},
{
"epoch": 18.66534522629666,
"grad_norm": 0.5342008471488953,
"learning_rate": 4.198020286556792e-06,
"loss": 0.089,
"step": 113000
},
{
"epoch": 18.66534522629666,
"eval_cer": 0.03749743344055848,
"eval_loss": 0.10710610449314117,
"eval_runtime": 62.8493,
"eval_samples_per_second": 26.906,
"eval_steps_per_second": 6.73,
"eval_wer": 0.20783113720102417,
"step": 113000
},
{
"epoch": 18.681863230921703,
"grad_norm": 0.49494898319244385,
"learning_rate": 4.191902507066036e-06,
"loss": 0.098,
"step": 113100
},
{
"epoch": 18.698381235546744,
"grad_norm": 0.8655831217765808,
"learning_rate": 4.185784727575279e-06,
"loss": 0.1006,
"step": 113200
},
{
"epoch": 18.71489924017179,
"grad_norm": 0.7897951006889343,
"learning_rate": 4.179666948084524e-06,
"loss": 0.0914,
"step": 113300
},
{
"epoch": 18.73141724479683,
"grad_norm": 0.5988522171974182,
"learning_rate": 4.173549168593767e-06,
"loss": 0.1146,
"step": 113400
},
{
"epoch": 18.74793524942187,
"grad_norm": 0.690118134021759,
"learning_rate": 4.1674313891030114e-06,
"loss": 0.096,
"step": 113500
},
{
"epoch": 18.764453254046913,
"grad_norm": 0.7711309790611267,
"learning_rate": 4.161313609612255e-06,
"loss": 0.0935,
"step": 113600
},
{
"epoch": 18.780971258671954,
"grad_norm": 0.596615195274353,
"learning_rate": 4.155195830121499e-06,
"loss": 0.0973,
"step": 113700
},
{
"epoch": 18.797489263296995,
"grad_norm": 0.7073595523834229,
"learning_rate": 4.1490780506307435e-06,
"loss": 0.0941,
"step": 113800
},
{
"epoch": 18.814007267922037,
"grad_norm": 0.7061730027198792,
"learning_rate": 4.142960271139988e-06,
"loss": 0.0935,
"step": 113900
},
{
"epoch": 18.830525272547078,
"grad_norm": 0.7775730490684509,
"learning_rate": 4.136842491649231e-06,
"loss": 0.0902,
"step": 114000
},
{
"epoch": 18.830525272547078,
"eval_cer": 0.03754876462938882,
"eval_loss": 0.10710606724023819,
"eval_runtime": 57.0219,
"eval_samples_per_second": 29.655,
"eval_steps_per_second": 7.418,
"eval_wer": 0.20683194904140387,
"step": 114000
},
{
"epoch": 18.84704327717212,
"grad_norm": 0.6550432443618774,
"learning_rate": 4.130724712158476e-06,
"loss": 0.0954,
"step": 114100
},
{
"epoch": 18.86356128179716,
"grad_norm": 0.5847755670547485,
"learning_rate": 4.124606932667719e-06,
"loss": 0.0996,
"step": 114200
},
{
"epoch": 18.880079286422202,
"grad_norm": 0.6805459260940552,
"learning_rate": 4.118489153176963e-06,
"loss": 0.0948,
"step": 114300
},
{
"epoch": 18.896597291047243,
"grad_norm": 0.6957184076309204,
"learning_rate": 4.112371373686207e-06,
"loss": 0.0981,
"step": 114400
},
{
"epoch": 18.913115295672284,
"grad_norm": 0.6642732620239258,
"learning_rate": 4.106253594195451e-06,
"loss": 0.0929,
"step": 114500
},
{
"epoch": 18.929633300297326,
"grad_norm": 0.6945010423660278,
"learning_rate": 4.100135814704695e-06,
"loss": 0.1019,
"step": 114600
},
{
"epoch": 18.946151304922367,
"grad_norm": 0.7043463587760925,
"learning_rate": 4.094018035213939e-06,
"loss": 0.0999,
"step": 114700
},
{
"epoch": 18.962669309547408,
"grad_norm": 0.5579137206077576,
"learning_rate": 4.087900255723183e-06,
"loss": 0.0944,
"step": 114800
},
{
"epoch": 18.97918731417245,
"grad_norm": 0.6382579803466797,
"learning_rate": 4.081782476232427e-06,
"loss": 0.0975,
"step": 114900
},
{
"epoch": 18.99570531879749,
"grad_norm": 0.6321828365325928,
"learning_rate": 4.075664696741671e-06,
"loss": 0.101,
"step": 115000
},
{
"epoch": 18.99570531879749,
"eval_cer": 0.03747176784614332,
"eval_loss": 0.10826370120048523,
"eval_runtime": 56.9853,
"eval_samples_per_second": 29.674,
"eval_steps_per_second": 7.423,
"eval_wer": 0.20770623868107163,
"step": 115000
},
{
"epoch": 19.012223323422532,
"grad_norm": 0.6674085855484009,
"learning_rate": 4.069546917250915e-06,
"loss": 0.0945,
"step": 115100
},
{
"epoch": 19.028741328047573,
"grad_norm": 0.7357644438743591,
"learning_rate": 4.063429137760159e-06,
"loss": 0.0907,
"step": 115200
},
{
"epoch": 19.045259332672615,
"grad_norm": 0.6607419848442078,
"learning_rate": 4.057311358269403e-06,
"loss": 0.1015,
"step": 115300
},
{
"epoch": 19.061777337297656,
"grad_norm": 0.7660622000694275,
"learning_rate": 4.0511935787786465e-06,
"loss": 0.0989,
"step": 115400
},
{
"epoch": 19.078295341922697,
"grad_norm": 0.5479562282562256,
"learning_rate": 4.045075799287891e-06,
"loss": 0.0942,
"step": 115500
},
{
"epoch": 19.09481334654774,
"grad_norm": 0.9880116581916809,
"learning_rate": 4.038958019797134e-06,
"loss": 0.0926,
"step": 115600
},
{
"epoch": 19.11133135117278,
"grad_norm": 0.9058769941329956,
"learning_rate": 4.032840240306379e-06,
"loss": 0.0956,
"step": 115700
},
{
"epoch": 19.12784935579782,
"grad_norm": 0.6099743247032166,
"learning_rate": 4.026722460815622e-06,
"loss": 0.0999,
"step": 115800
},
{
"epoch": 19.144367360422862,
"grad_norm": 0.7211606502532959,
"learning_rate": 4.020604681324866e-06,
"loss": 0.093,
"step": 115900
},
{
"epoch": 19.160885365047903,
"grad_norm": 0.7449648380279541,
"learning_rate": 4.014486901834111e-06,
"loss": 0.0957,
"step": 116000
},
{
"epoch": 19.160885365047903,
"eval_cer": 0.03714667031688454,
"eval_loss": 0.10729096084833145,
"eval_runtime": 57.487,
"eval_samples_per_second": 29.415,
"eval_steps_per_second": 7.358,
"eval_wer": 0.20583276088178354,
"step": 116000
},
{
"epoch": 19.177403369672945,
"grad_norm": 0.7683896422386169,
"learning_rate": 4.008369122343355e-06,
"loss": 0.0959,
"step": 116100
},
{
"epoch": 19.193921374297986,
"grad_norm": 0.6918036341667175,
"learning_rate": 4.0022513428525985e-06,
"loss": 0.0922,
"step": 116200
},
{
"epoch": 19.210439378923027,
"grad_norm": 0.9067769050598145,
"learning_rate": 3.996133563361843e-06,
"loss": 0.1088,
"step": 116300
},
{
"epoch": 19.22695738354807,
"grad_norm": 0.5687412023544312,
"learning_rate": 3.990015783871086e-06,
"loss": 0.0955,
"step": 116400
},
{
"epoch": 19.24347538817311,
"grad_norm": 0.6453192830085754,
"learning_rate": 3.9838980043803305e-06,
"loss": 0.0942,
"step": 116500
},
{
"epoch": 19.25999339279815,
"grad_norm": 0.6212557554244995,
"learning_rate": 3.977780224889574e-06,
"loss": 0.0972,
"step": 116600
},
{
"epoch": 19.276511397423192,
"grad_norm": 0.7683126926422119,
"learning_rate": 3.971662445398818e-06,
"loss": 0.0943,
"step": 116700
},
{
"epoch": 19.293029402048234,
"grad_norm": 0.9485934972763062,
"learning_rate": 3.965544665908062e-06,
"loss": 0.0954,
"step": 116800
},
{
"epoch": 19.309547406673275,
"grad_norm": 0.5345008373260498,
"learning_rate": 3.959426886417306e-06,
"loss": 0.0951,
"step": 116900
},
{
"epoch": 19.326065411298316,
"grad_norm": 0.5996564626693726,
"learning_rate": 3.95330910692655e-06,
"loss": 0.094,
"step": 117000
},
{
"epoch": 19.326065411298316,
"eval_cer": 0.03736910546848265,
"eval_loss": 0.10748081654310226,
"eval_runtime": 57.0717,
"eval_samples_per_second": 29.629,
"eval_steps_per_second": 7.412,
"eval_wer": 0.20670705052145133,
"step": 117000
},
{
"epoch": 19.342583415923357,
"grad_norm": 0.7661871910095215,
"learning_rate": 3.947191327435794e-06,
"loss": 0.092,
"step": 117100
},
{
"epoch": 19.3591014205484,
"grad_norm": 0.5788329839706421,
"learning_rate": 3.941073547945038e-06,
"loss": 0.0949,
"step": 117200
},
{
"epoch": 19.37561942517344,
"grad_norm": 0.6844035983085632,
"learning_rate": 3.9349557684542825e-06,
"loss": 0.0948,
"step": 117300
},
{
"epoch": 19.39213742979848,
"grad_norm": 0.5855575203895569,
"learning_rate": 3.928837988963526e-06,
"loss": 0.0932,
"step": 117400
},
{
"epoch": 19.408655434423522,
"grad_norm": 0.500566840171814,
"learning_rate": 3.92272020947277e-06,
"loss": 0.0984,
"step": 117500
},
{
"epoch": 19.425173439048564,
"grad_norm": 0.7234964370727539,
"learning_rate": 3.916602429982014e-06,
"loss": 0.1,
"step": 117600
},
{
"epoch": 19.441691443673605,
"grad_norm": 0.6670413613319397,
"learning_rate": 3.910484650491258e-06,
"loss": 0.0903,
"step": 117700
},
{
"epoch": 19.458209448298646,
"grad_norm": 1.1672645807266235,
"learning_rate": 3.9043668710005015e-06,
"loss": 0.0941,
"step": 117800
},
{
"epoch": 19.474727452923688,
"grad_norm": 0.8242597579956055,
"learning_rate": 3.898249091509746e-06,
"loss": 0.1007,
"step": 117900
},
{
"epoch": 19.49124545754873,
"grad_norm": 0.6898741722106934,
"learning_rate": 3.892131312018989e-06,
"loss": 0.094,
"step": 118000
},
{
"epoch": 19.49124545754873,
"eval_cer": 0.03757443022380398,
"eval_loss": 0.10757853835821152,
"eval_runtime": 59.6316,
"eval_samples_per_second": 28.357,
"eval_steps_per_second": 7.094,
"eval_wer": 0.2075188909011428,
"step": 118000
},
{
"epoch": 19.50776346217377,
"grad_norm": 1.0092437267303467,
"learning_rate": 3.8860135325282336e-06,
"loss": 0.0977,
"step": 118100
},
{
"epoch": 19.52428146679881,
"grad_norm": 0.5592585802078247,
"learning_rate": 3.879895753037478e-06,
"loss": 0.0908,
"step": 118200
},
{
"epoch": 19.540799471423853,
"grad_norm": 0.7661089301109314,
"learning_rate": 3.873777973546722e-06,
"loss": 0.0894,
"step": 118300
},
{
"epoch": 19.557317476048894,
"grad_norm": 0.6303833723068237,
"learning_rate": 3.867660194055966e-06,
"loss": 0.0938,
"step": 118400
},
{
"epoch": 19.573835480673935,
"grad_norm": 0.6270598769187927,
"learning_rate": 3.86154241456521e-06,
"loss": 0.0963,
"step": 118500
},
{
"epoch": 19.590353485298976,
"grad_norm": 0.7540850639343262,
"learning_rate": 3.855424635074453e-06,
"loss": 0.0941,
"step": 118600
},
{
"epoch": 19.606871489924018,
"grad_norm": 0.6837806701660156,
"learning_rate": 3.849306855583698e-06,
"loss": 0.0965,
"step": 118700
},
{
"epoch": 19.62338949454906,
"grad_norm": 0.5979442000389099,
"learning_rate": 3.843189076092942e-06,
"loss": 0.0934,
"step": 118800
},
{
"epoch": 19.6399074991741,
"grad_norm": 0.5547999143600464,
"learning_rate": 3.8370712966021855e-06,
"loss": 0.0954,
"step": 118900
},
{
"epoch": 19.65642550379914,
"grad_norm": 0.7073753476142883,
"learning_rate": 3.83095351711143e-06,
"loss": 0.0912,
"step": 119000
},
{
"epoch": 19.65642550379914,
"eval_cer": 0.037488878242420094,
"eval_loss": 0.10558834671974182,
"eval_runtime": 57.5545,
"eval_samples_per_second": 29.381,
"eval_steps_per_second": 7.35,
"eval_wer": 0.20639480422156997,
"step": 119000
},
{
"epoch": 19.672943508424183,
"grad_norm": 0.6621033549308777,
"learning_rate": 3.824835737620673e-06,
"loss": 0.096,
"step": 119100
},
{
"epoch": 19.689461513049224,
"grad_norm": 0.6240584254264832,
"learning_rate": 3.818717958129918e-06,
"loss": 0.0925,
"step": 119200
},
{
"epoch": 19.705979517674265,
"grad_norm": 0.834823489189148,
"learning_rate": 3.8126001786391615e-06,
"loss": 0.0904,
"step": 119300
},
{
"epoch": 19.722497522299307,
"grad_norm": 0.5534527897834778,
"learning_rate": 3.8064823991484058e-06,
"loss": 0.0864,
"step": 119400
},
{
"epoch": 19.739015526924348,
"grad_norm": 0.5191404819488525,
"learning_rate": 3.8003646196576492e-06,
"loss": 0.0943,
"step": 119500
},
{
"epoch": 19.75553353154939,
"grad_norm": 0.7800885438919067,
"learning_rate": 3.7942468401668936e-06,
"loss": 0.096,
"step": 119600
},
{
"epoch": 19.77205153617443,
"grad_norm": 0.624622106552124,
"learning_rate": 3.7881290606761374e-06,
"loss": 0.1021,
"step": 119700
},
{
"epoch": 19.78856954079947,
"grad_norm": 0.448397159576416,
"learning_rate": 3.7820112811853813e-06,
"loss": 0.0934,
"step": 119800
},
{
"epoch": 19.805087545424513,
"grad_norm": 0.6804871559143066,
"learning_rate": 3.775893501694625e-06,
"loss": 0.0907,
"step": 119900
},
{
"epoch": 19.821605550049554,
"grad_norm": 0.8749545216560364,
"learning_rate": 3.7697757222038695e-06,
"loss": 0.0914,
"step": 120000
},
{
"epoch": 19.821605550049554,
"eval_cer": 0.03736910546848265,
"eval_loss": 0.1068761870265007,
"eval_runtime": 61.4436,
"eval_samples_per_second": 27.521,
"eval_steps_per_second": 6.884,
"eval_wer": 0.20639480422156997,
"step": 120000
},
{
"epoch": 19.838123554674596,
"grad_norm": 0.6852620244026184,
"learning_rate": 3.763657942713113e-06,
"loss": 0.0893,
"step": 120100
},
{
"epoch": 19.854641559299637,
"grad_norm": 0.48279762268066406,
"learning_rate": 3.7575401632223573e-06,
"loss": 0.0929,
"step": 120200
},
{
"epoch": 19.871159563924678,
"grad_norm": 0.9051792025566101,
"learning_rate": 3.751422383731601e-06,
"loss": 0.1167,
"step": 120300
},
{
"epoch": 19.88767756854972,
"grad_norm": 0.5648931264877319,
"learning_rate": 3.7453046042408455e-06,
"loss": 0.0946,
"step": 120400
},
{
"epoch": 19.90419557317476,
"grad_norm": 0.6829082369804382,
"learning_rate": 3.739186824750089e-06,
"loss": 0.0914,
"step": 120500
},
{
"epoch": 19.920713577799802,
"grad_norm": 0.8707834482192993,
"learning_rate": 3.7330690452593333e-06,
"loss": 0.0919,
"step": 120600
},
{
"epoch": 19.937231582424843,
"grad_norm": 0.6333926916122437,
"learning_rate": 3.7269512657685767e-06,
"loss": 0.0991,
"step": 120700
},
{
"epoch": 19.953749587049884,
"grad_norm": 0.5039823055267334,
"learning_rate": 3.720833486277821e-06,
"loss": 0.0899,
"step": 120800
},
{
"epoch": 19.970267591674926,
"grad_norm": 0.5696250200271606,
"learning_rate": 3.714715706787065e-06,
"loss": 0.0934,
"step": 120900
},
{
"epoch": 19.986785596299967,
"grad_norm": 0.6956700682640076,
"learning_rate": 3.7085979272963092e-06,
"loss": 0.0933,
"step": 121000
},
{
"epoch": 19.986785596299967,
"eval_cer": 0.03744610225172815,
"eval_loss": 0.10595033317804337,
"eval_runtime": 57.0726,
"eval_samples_per_second": 29.629,
"eval_steps_per_second": 7.412,
"eval_wer": 0.20664460126147505,
"step": 121000
},
{
"epoch": 20.00330360092501,
"grad_norm": 0.615598201751709,
"learning_rate": 3.7024801478055527e-06,
"loss": 0.0954,
"step": 121100
},
{
"epoch": 20.01982160555005,
"grad_norm": 0.4726826250553131,
"learning_rate": 3.696362368314797e-06,
"loss": 0.1088,
"step": 121200
},
{
"epoch": 20.03633961017509,
"grad_norm": 0.554155170917511,
"learning_rate": 3.690244588824041e-06,
"loss": 0.0934,
"step": 121300
},
{
"epoch": 20.052857614800132,
"grad_norm": 0.8273882269859314,
"learning_rate": 3.6841268093332848e-06,
"loss": 0.0982,
"step": 121400
},
{
"epoch": 20.069375619425173,
"grad_norm": 0.6084610819816589,
"learning_rate": 3.6780090298425287e-06,
"loss": 0.096,
"step": 121500
},
{
"epoch": 20.085893624050215,
"grad_norm": 0.42655861377716064,
"learning_rate": 3.671891250351773e-06,
"loss": 0.0929,
"step": 121600
},
{
"epoch": 20.102411628675256,
"grad_norm": 0.7716320753097534,
"learning_rate": 3.6657734708610164e-06,
"loss": 0.0925,
"step": 121700
},
{
"epoch": 20.118929633300297,
"grad_norm": 0.5255216360092163,
"learning_rate": 3.6596556913702607e-06,
"loss": 0.0929,
"step": 121800
},
{
"epoch": 20.13544763792534,
"grad_norm": 0.8503526449203491,
"learning_rate": 3.6535379118795046e-06,
"loss": 0.0963,
"step": 121900
},
{
"epoch": 20.15196564255038,
"grad_norm": 0.5264951586723328,
"learning_rate": 3.6474201323887485e-06,
"loss": 0.1132,
"step": 122000
},
{
"epoch": 20.15196564255038,
"eval_cer": 0.03729210868523715,
"eval_loss": 0.10676946491003036,
"eval_runtime": 57.951,
"eval_samples_per_second": 29.18,
"eval_steps_per_second": 7.299,
"eval_wer": 0.20701929682133266,
"step": 122000
},
{
"epoch": 20.16848364717542,
"grad_norm": 0.6232183575630188,
"learning_rate": 3.6413023528979924e-06,
"loss": 0.0964,
"step": 122100
},
{
"epoch": 20.185001651800462,
"grad_norm": 0.6945717334747314,
"learning_rate": 3.6351845734072367e-06,
"loss": 0.0967,
"step": 122200
},
{
"epoch": 20.201519656425504,
"grad_norm": 0.7937995195388794,
"learning_rate": 3.62906679391648e-06,
"loss": 0.0896,
"step": 122300
},
{
"epoch": 20.218037661050545,
"grad_norm": 0.7246369123458862,
"learning_rate": 3.6229490144257245e-06,
"loss": 0.0867,
"step": 122400
},
{
"epoch": 20.234555665675586,
"grad_norm": 0.5831708908081055,
"learning_rate": 3.6168312349349684e-06,
"loss": 0.091,
"step": 122500
},
{
"epoch": 20.251073670300627,
"grad_norm": 0.7024865746498108,
"learning_rate": 3.6107134554442127e-06,
"loss": 0.0935,
"step": 122600
},
{
"epoch": 20.26759167492567,
"grad_norm": 0.8907782435417175,
"learning_rate": 3.604595675953456e-06,
"loss": 0.0956,
"step": 122700
},
{
"epoch": 20.28410967955071,
"grad_norm": 0.6609148383140564,
"learning_rate": 3.5984778964627004e-06,
"loss": 0.0933,
"step": 122800
},
{
"epoch": 20.30062768417575,
"grad_norm": 0.8460065722465515,
"learning_rate": 3.592360116971944e-06,
"loss": 0.0891,
"step": 122900
},
{
"epoch": 20.317145688800792,
"grad_norm": 0.6879094839096069,
"learning_rate": 3.586242337481188e-06,
"loss": 0.0888,
"step": 123000
},
{
"epoch": 20.317145688800792,
"eval_cer": 0.03731777427965232,
"eval_loss": 0.10576903820037842,
"eval_runtime": 55.0332,
"eval_samples_per_second": 30.727,
"eval_steps_per_second": 7.686,
"eval_wer": 0.20670705052145133,
"step": 123000
},
{
"epoch": 20.333663693425834,
"grad_norm": 0.7055560946464539,
"learning_rate": 3.580124557990432e-06,
"loss": 0.098,
"step": 123100
},
{
"epoch": 20.350181698050875,
"grad_norm": 0.5633586049079895,
"learning_rate": 3.5740067784996764e-06,
"loss": 0.0926,
"step": 123200
},
{
"epoch": 20.366699702675916,
"grad_norm": 0.6035296320915222,
"learning_rate": 3.56788899900892e-06,
"loss": 0.0939,
"step": 123300
},
{
"epoch": 20.383217707300957,
"grad_norm": 0.6581436395645142,
"learning_rate": 3.561771219518164e-06,
"loss": 0.094,
"step": 123400
},
{
"epoch": 20.399735711926,
"grad_norm": 0.6265963912010193,
"learning_rate": 3.555653440027408e-06,
"loss": 0.0901,
"step": 123500
},
{
"epoch": 20.41625371655104,
"grad_norm": 0.6421579718589783,
"learning_rate": 3.549535660536652e-06,
"loss": 0.0916,
"step": 123600
},
{
"epoch": 20.43277172117608,
"grad_norm": 0.5874105095863342,
"learning_rate": 3.543417881045896e-06,
"loss": 0.0899,
"step": 123700
},
{
"epoch": 20.449289725801123,
"grad_norm": 0.7892938256263733,
"learning_rate": 3.53730010155514e-06,
"loss": 0.0943,
"step": 123800
},
{
"epoch": 20.465807730426164,
"grad_norm": 0.5755423903465271,
"learning_rate": 3.5311823220643836e-06,
"loss": 0.0895,
"step": 123900
},
{
"epoch": 20.482325735051205,
"grad_norm": 0.5386433005332947,
"learning_rate": 3.525064542573628e-06,
"loss": 0.0942,
"step": 124000
},
{
"epoch": 20.482325735051205,
"eval_cer": 0.03739477106289782,
"eval_loss": 0.10733035951852798,
"eval_runtime": 53.425,
"eval_samples_per_second": 31.652,
"eval_steps_per_second": 7.918,
"eval_wer": 0.205707862361831,
"step": 124000
},
{
"epoch": 20.498843739676246,
"grad_norm": 0.6741786003112793,
"learning_rate": 3.518946763082872e-06,
"loss": 0.1323,
"step": 124100
},
{
"epoch": 20.515361744301288,
"grad_norm": 0.5971143245697021,
"learning_rate": 3.5128289835921157e-06,
"loss": 0.1021,
"step": 124200
},
{
"epoch": 20.53187974892633,
"grad_norm": 0.6608400344848633,
"learning_rate": 3.5067112041013596e-06,
"loss": 0.0947,
"step": 124300
},
{
"epoch": 20.54839775355137,
"grad_norm": 0.7231072187423706,
"learning_rate": 3.500593424610604e-06,
"loss": 0.1009,
"step": 124400
},
{
"epoch": 20.56491575817641,
"grad_norm": 0.6929687857627869,
"learning_rate": 3.4944756451198473e-06,
"loss": 0.0947,
"step": 124500
},
{
"epoch": 20.581433762801453,
"grad_norm": 0.7624922394752502,
"learning_rate": 3.4883578656290917e-06,
"loss": 0.1048,
"step": 124600
},
{
"epoch": 20.597951767426494,
"grad_norm": 0.6456249356269836,
"learning_rate": 3.4822400861383355e-06,
"loss": 0.0898,
"step": 124700
},
{
"epoch": 20.614469772051535,
"grad_norm": 0.5414004921913147,
"learning_rate": 3.47612230664758e-06,
"loss": 0.0919,
"step": 124800
},
{
"epoch": 20.630987776676577,
"grad_norm": 0.6581851840019226,
"learning_rate": 3.4700045271568233e-06,
"loss": 0.0933,
"step": 124900
},
{
"epoch": 20.647505781301618,
"grad_norm": 0.7606977820396423,
"learning_rate": 3.4638867476660676e-06,
"loss": 0.09,
"step": 125000
},
{
"epoch": 20.647505781301618,
"eval_cer": 0.037086783929915816,
"eval_loss": 0.10901934653520584,
"eval_runtime": 53.6092,
"eval_samples_per_second": 31.543,
"eval_steps_per_second": 7.89,
"eval_wer": 0.2050833697620683,
"step": 125000
},
{
"epoch": 20.66402378592666,
"grad_norm": 0.8043733239173889,
"learning_rate": 3.457768968175311e-06,
"loss": 0.0946,
"step": 125100
},
{
"epoch": 20.6805417905517,
"grad_norm": 0.5810668468475342,
"learning_rate": 3.4516511886845554e-06,
"loss": 0.1174,
"step": 125200
},
{
"epoch": 20.69705979517674,
"grad_norm": 0.5117190480232239,
"learning_rate": 3.4455334091937993e-06,
"loss": 0.087,
"step": 125300
},
{
"epoch": 20.713577799801783,
"grad_norm": 0.6740157604217529,
"learning_rate": 3.4394156297030436e-06,
"loss": 0.0974,
"step": 125400
},
{
"epoch": 20.730095804426824,
"grad_norm": 0.7044069170951843,
"learning_rate": 3.433297850212287e-06,
"loss": 0.0983,
"step": 125500
},
{
"epoch": 20.746613809051865,
"grad_norm": 0.7274563908576965,
"learning_rate": 3.4271800707215314e-06,
"loss": 0.0893,
"step": 125600
},
{
"epoch": 20.763131813676907,
"grad_norm": 0.6939309239387512,
"learning_rate": 3.4210622912307752e-06,
"loss": 0.0905,
"step": 125700
},
{
"epoch": 20.779649818301948,
"grad_norm": 0.841923713684082,
"learning_rate": 3.414944511740019e-06,
"loss": 0.0919,
"step": 125800
},
{
"epoch": 20.79616782292699,
"grad_norm": 0.6695510149002075,
"learning_rate": 3.408826732249263e-06,
"loss": 0.0996,
"step": 125900
},
{
"epoch": 20.81268582755203,
"grad_norm": 0.5963577628135681,
"learning_rate": 3.4027089527585073e-06,
"loss": 0.1104,
"step": 126000
},
{
"epoch": 20.81268582755203,
"eval_cer": 0.03736910546848265,
"eval_loss": 0.10715510696172714,
"eval_runtime": 53.3765,
"eval_samples_per_second": 31.681,
"eval_steps_per_second": 7.925,
"eval_wer": 0.20608255792168864,
"step": 126000
},
{
"epoch": 20.829203832177072,
"grad_norm": 0.5884077548980713,
"learning_rate": 3.396591173267751e-06,
"loss": 0.0941,
"step": 126100
},
{
"epoch": 20.845721836802113,
"grad_norm": 0.7187851071357727,
"learning_rate": 3.390473393776995e-06,
"loss": 0.0951,
"step": 126200
},
{
"epoch": 20.862239841427154,
"grad_norm": 0.8274132609367371,
"learning_rate": 3.384355614286239e-06,
"loss": 0.0926,
"step": 126300
},
{
"epoch": 20.878757846052196,
"grad_norm": 0.7908247113227844,
"learning_rate": 3.378237834795483e-06,
"loss": 0.0929,
"step": 126400
},
{
"epoch": 20.895275850677237,
"grad_norm": 0.7135681509971619,
"learning_rate": 3.3721200553047268e-06,
"loss": 0.0896,
"step": 126500
},
{
"epoch": 20.911793855302278,
"grad_norm": 0.5181264877319336,
"learning_rate": 3.366002275813971e-06,
"loss": 0.0903,
"step": 126600
},
{
"epoch": 20.92831185992732,
"grad_norm": 0.6559743285179138,
"learning_rate": 3.3598844963232145e-06,
"loss": 0.091,
"step": 126700
},
{
"epoch": 20.94482986455236,
"grad_norm": 0.498858243227005,
"learning_rate": 3.353766716832459e-06,
"loss": 0.0873,
"step": 126800
},
{
"epoch": 20.961347869177402,
"grad_norm": 0.6528595089912415,
"learning_rate": 3.3476489373417027e-06,
"loss": 0.0944,
"step": 126900
},
{
"epoch": 20.977865873802443,
"grad_norm": 0.4162144064903259,
"learning_rate": 3.341531157850947e-06,
"loss": 0.0865,
"step": 127000
},
{
"epoch": 20.977865873802443,
"eval_cer": 0.037052563137362264,
"eval_loss": 0.10633628815412521,
"eval_runtime": 53.5789,
"eval_samples_per_second": 31.561,
"eval_steps_per_second": 7.895,
"eval_wer": 0.2053331668019734,
"step": 127000
},
{
"epoch": 20.994383878427485,
"grad_norm": 0.8712024092674255,
"learning_rate": 3.3354133783601905e-06,
"loss": 0.0937,
"step": 127100
},
{
"epoch": 21.010901883052526,
"grad_norm": 0.8483607172966003,
"learning_rate": 3.329295598869435e-06,
"loss": 0.0879,
"step": 127200
},
{
"epoch": 21.027419887677567,
"grad_norm": 0.9952839016914368,
"learning_rate": 3.3231778193786783e-06,
"loss": 0.095,
"step": 127300
},
{
"epoch": 21.04393789230261,
"grad_norm": 0.4832421541213989,
"learning_rate": 3.3170600398879226e-06,
"loss": 0.109,
"step": 127400
},
{
"epoch": 21.06045589692765,
"grad_norm": 0.6822460889816284,
"learning_rate": 3.3109422603971665e-06,
"loss": 0.0897,
"step": 127500
},
{
"epoch": 21.07697390155269,
"grad_norm": 0.6260835528373718,
"learning_rate": 3.3048244809064108e-06,
"loss": 0.0892,
"step": 127600
},
{
"epoch": 21.093491906177732,
"grad_norm": 0.6604743003845215,
"learning_rate": 3.2987067014156542e-06,
"loss": 0.0957,
"step": 127700
},
{
"epoch": 21.110009910802773,
"grad_norm": 0.5889437794685364,
"learning_rate": 3.2925889219248985e-06,
"loss": 0.0923,
"step": 127800
},
{
"epoch": 21.126527915427815,
"grad_norm": 0.6197744011878967,
"learning_rate": 3.2864711424341424e-06,
"loss": 0.0936,
"step": 127900
},
{
"epoch": 21.143045920052856,
"grad_norm": 0.6159409880638123,
"learning_rate": 3.2803533629433863e-06,
"loss": 0.0901,
"step": 128000
},
{
"epoch": 21.143045920052856,
"eval_cer": 0.03699267675039354,
"eval_loss": 0.10548759251832962,
"eval_runtime": 53.1647,
"eval_samples_per_second": 31.807,
"eval_steps_per_second": 7.956,
"eval_wer": 0.20320989196278025,
"step": 128000
},
{
"epoch": 21.159563924677897,
"grad_norm": 0.4305579960346222,
"learning_rate": 3.27423558345263e-06,
"loss": 0.0962,
"step": 128100
},
{
"epoch": 21.17608192930294,
"grad_norm": 0.8560436367988586,
"learning_rate": 3.2681178039618745e-06,
"loss": 0.1129,
"step": 128200
},
{
"epoch": 21.19259993392798,
"grad_norm": 0.824738085269928,
"learning_rate": 3.262000024471118e-06,
"loss": 0.095,
"step": 128300
},
{
"epoch": 21.20911793855302,
"grad_norm": 0.7285254597663879,
"learning_rate": 3.2558822449803623e-06,
"loss": 0.0967,
"step": 128400
},
{
"epoch": 21.225635943178062,
"grad_norm": 0.8130694627761841,
"learning_rate": 3.249764465489606e-06,
"loss": 0.0952,
"step": 128500
},
{
"epoch": 21.242153947803104,
"grad_norm": 0.640154242515564,
"learning_rate": 3.24364668599885e-06,
"loss": 0.0911,
"step": 128600
},
{
"epoch": 21.258671952428145,
"grad_norm": 0.5193492770195007,
"learning_rate": 3.237528906508094e-06,
"loss": 0.085,
"step": 128700
},
{
"epoch": 21.27518995705319,
"grad_norm": 0.7556692957878113,
"learning_rate": 3.2314111270173382e-06,
"loss": 0.0904,
"step": 128800
},
{
"epoch": 21.29170796167823,
"grad_norm": 0.6025686860084534,
"learning_rate": 3.2252933475265817e-06,
"loss": 0.0969,
"step": 128900
},
{
"epoch": 21.308225966303272,
"grad_norm": 0.6533762812614441,
"learning_rate": 3.219175568035826e-06,
"loss": 0.0962,
"step": 129000
},
{
"epoch": 21.308225966303272,
"eval_cer": 0.037095339128054204,
"eval_loss": 0.10510192811489105,
"eval_runtime": 53.7455,
"eval_samples_per_second": 31.463,
"eval_steps_per_second": 7.87,
"eval_wer": 0.20539561606194967,
"step": 129000
},
{
"epoch": 21.324743970928314,
"grad_norm": 0.7926602363586426,
"learning_rate": 3.21305778854507e-06,
"loss": 0.0847,
"step": 129100
},
{
"epoch": 21.341261975553355,
"grad_norm": 0.5735198855400085,
"learning_rate": 3.2069400090543142e-06,
"loss": 0.0949,
"step": 129200
},
{
"epoch": 21.357779980178396,
"grad_norm": 0.4958854615688324,
"learning_rate": 3.2008222295635577e-06,
"loss": 0.0916,
"step": 129300
},
{
"epoch": 21.374297984803437,
"grad_norm": 0.9454948306083679,
"learning_rate": 3.194704450072802e-06,
"loss": 0.1177,
"step": 129400
},
{
"epoch": 21.39081598942848,
"grad_norm": 0.6658288240432739,
"learning_rate": 3.1885866705820454e-06,
"loss": 0.0931,
"step": 129500
},
{
"epoch": 21.40733399405352,
"grad_norm": 0.6774040460586548,
"learning_rate": 3.1824688910912898e-06,
"loss": 0.0971,
"step": 129600
},
{
"epoch": 21.42385199867856,
"grad_norm": 0.5878866910934448,
"learning_rate": 3.1763511116005336e-06,
"loss": 0.1129,
"step": 129700
},
{
"epoch": 21.440370003303602,
"grad_norm": 0.6971415281295776,
"learning_rate": 3.170233332109778e-06,
"loss": 0.0937,
"step": 129800
},
{
"epoch": 21.456888007928644,
"grad_norm": 0.7083709239959717,
"learning_rate": 3.1641155526190214e-06,
"loss": 0.088,
"step": 129900
},
{
"epoch": 21.473406012553685,
"grad_norm": 0.7533379197120667,
"learning_rate": 3.1579977731282657e-06,
"loss": 0.0979,
"step": 130000
},
{
"epoch": 21.473406012553685,
"eval_cer": 0.036872903976456095,
"eval_loss": 0.10504589229822159,
"eval_runtime": 53.1803,
"eval_samples_per_second": 31.797,
"eval_steps_per_second": 7.954,
"eval_wer": 0.20427152938237683,
"step": 130000
},
{
"epoch": 21.489924017178726,
"grad_norm": 0.7101976275444031,
"learning_rate": 3.1518799936375096e-06,
"loss": 0.0909,
"step": 130100
},
{
"epoch": 21.506442021803768,
"grad_norm": 0.6949977874755859,
"learning_rate": 3.1457622141467535e-06,
"loss": 0.0878,
"step": 130200
},
{
"epoch": 21.52296002642881,
"grad_norm": 0.584557831287384,
"learning_rate": 3.1396444346559974e-06,
"loss": 0.101,
"step": 130300
},
{
"epoch": 21.53947803105385,
"grad_norm": 1.0865356922149658,
"learning_rate": 3.1335266551652417e-06,
"loss": 0.0925,
"step": 130400
},
{
"epoch": 21.55599603567889,
"grad_norm": 0.6468126177787781,
"learning_rate": 3.127408875674485e-06,
"loss": 0.0855,
"step": 130500
},
{
"epoch": 21.572514040303933,
"grad_norm": 0.6762518286705017,
"learning_rate": 3.1212910961837295e-06,
"loss": 0.1107,
"step": 130600
},
{
"epoch": 21.589032044928974,
"grad_norm": 0.7978318333625793,
"learning_rate": 3.1151733166929734e-06,
"loss": 0.0897,
"step": 130700
},
{
"epoch": 21.605550049554015,
"grad_norm": 0.8376022577285767,
"learning_rate": 3.1090555372022172e-06,
"loss": 0.0902,
"step": 130800
},
{
"epoch": 21.622068054179056,
"grad_norm": 0.702617347240448,
"learning_rate": 3.102937757711461e-06,
"loss": 0.0937,
"step": 130900
},
{
"epoch": 21.638586058804098,
"grad_norm": 0.5407445430755615,
"learning_rate": 3.0968199782207054e-06,
"loss": 0.0912,
"step": 131000
},
{
"epoch": 21.638586058804098,
"eval_cer": 0.03681301758948737,
"eval_loss": 0.10601798444986343,
"eval_runtime": 52.6946,
"eval_samples_per_second": 32.091,
"eval_steps_per_second": 8.027,
"eval_wer": 0.20352213826266158,
"step": 131000
},
{
"epoch": 21.65510406342914,
"grad_norm": 0.48049646615982056,
"learning_rate": 3.090702198729949e-06,
"loss": 0.1007,
"step": 131100
},
{
"epoch": 21.67162206805418,
"grad_norm": 0.8997465372085571,
"learning_rate": 3.084584419239193e-06,
"loss": 0.0901,
"step": 131200
},
{
"epoch": 21.68814007267922,
"grad_norm": 0.6192366480827332,
"learning_rate": 3.078466639748437e-06,
"loss": 0.0892,
"step": 131300
},
{
"epoch": 21.704658077304263,
"grad_norm": 0.7299876809120178,
"learning_rate": 3.0723488602576814e-06,
"loss": 0.0929,
"step": 131400
},
{
"epoch": 21.721176081929304,
"grad_norm": 0.6832283735275269,
"learning_rate": 3.066231080766925e-06,
"loss": 0.0936,
"step": 131500
},
{
"epoch": 21.737694086554345,
"grad_norm": 0.5136446952819824,
"learning_rate": 3.060113301276169e-06,
"loss": 0.0911,
"step": 131600
},
{
"epoch": 21.754212091179387,
"grad_norm": 0.6710427403450012,
"learning_rate": 3.0539955217854126e-06,
"loss": 0.0833,
"step": 131700
},
{
"epoch": 21.770730095804428,
"grad_norm": 0.6596719026565552,
"learning_rate": 3.047877742294657e-06,
"loss": 0.0921,
"step": 131800
},
{
"epoch": 21.78724810042947,
"grad_norm": 0.5548281669616699,
"learning_rate": 3.041759962803901e-06,
"loss": 0.0903,
"step": 131900
},
{
"epoch": 21.80376610505451,
"grad_norm": 0.5049402713775635,
"learning_rate": 3.035642183313145e-06,
"loss": 0.1321,
"step": 132000
},
{
"epoch": 21.80376610505451,
"eval_cer": 0.03675313120251865,
"eval_loss": 0.10501556098461151,
"eval_runtime": 52.4304,
"eval_samples_per_second": 32.252,
"eval_steps_per_second": 8.068,
"eval_wer": 0.20283519640292264,
"step": 132000
},
{
"epoch": 21.82028410967955,
"grad_norm": 0.7295696139335632,
"learning_rate": 3.0295244038223886e-06,
"loss": 0.0888,
"step": 132100
},
{
"epoch": 21.836802114304593,
"grad_norm": 0.5694403648376465,
"learning_rate": 3.023406624331633e-06,
"loss": 0.1097,
"step": 132200
},
{
"epoch": 21.853320118929634,
"grad_norm": 0.5931413769721985,
"learning_rate": 3.017288844840877e-06,
"loss": 0.0895,
"step": 132300
},
{
"epoch": 21.869838123554675,
"grad_norm": 0.715791642665863,
"learning_rate": 3.0111710653501207e-06,
"loss": 0.0926,
"step": 132400
},
{
"epoch": 21.886356128179717,
"grad_norm": 0.7110834717750549,
"learning_rate": 3.0050532858593646e-06,
"loss": 0.0892,
"step": 132500
},
{
"epoch": 21.902874132804758,
"grad_norm": 0.8973935842514038,
"learning_rate": 2.998935506368609e-06,
"loss": 0.0917,
"step": 132600
},
{
"epoch": 21.9193921374298,
"grad_norm": 0.6259893178939819,
"learning_rate": 2.9928177268778523e-06,
"loss": 0.0919,
"step": 132700
},
{
"epoch": 21.93591014205484,
"grad_norm": 0.6321418881416321,
"learning_rate": 2.9866999473870966e-06,
"loss": 0.0985,
"step": 132800
},
{
"epoch": 21.952428146679882,
"grad_norm": 0.690564751625061,
"learning_rate": 2.9805821678963405e-06,
"loss": 0.0857,
"step": 132900
},
{
"epoch": 21.968946151304923,
"grad_norm": 0.5872605443000793,
"learning_rate": 2.9744643884055844e-06,
"loss": 0.0954,
"step": 133000
},
{
"epoch": 21.968946151304923,
"eval_cer": 0.03696701115597837,
"eval_loss": 0.10601279884576797,
"eval_runtime": 52.5874,
"eval_samples_per_second": 32.156,
"eval_steps_per_second": 8.044,
"eval_wer": 0.20445887716230562,
"step": 133000
},
{
"epoch": 21.985464155929964,
"grad_norm": 0.5473292469978333,
"learning_rate": 2.9683466089148283e-06,
"loss": 0.0858,
"step": 133100
},
{
"epoch": 22.001982160555006,
"grad_norm": 0.7367832660675049,
"learning_rate": 2.9622288294240726e-06,
"loss": 0.0909,
"step": 133200
},
{
"epoch": 22.018500165180047,
"grad_norm": 1.0184003114700317,
"learning_rate": 2.956111049933316e-06,
"loss": 0.0901,
"step": 133300
},
{
"epoch": 22.035018169805088,
"grad_norm": 0.7270667552947998,
"learning_rate": 2.9499932704425604e-06,
"loss": 0.0942,
"step": 133400
},
{
"epoch": 22.05153617443013,
"grad_norm": 0.6220849752426147,
"learning_rate": 2.9438754909518043e-06,
"loss": 0.0892,
"step": 133500
},
{
"epoch": 22.06805417905517,
"grad_norm": 0.6055799126625061,
"learning_rate": 2.9377577114610486e-06,
"loss": 0.0895,
"step": 133600
},
{
"epoch": 22.084572183680212,
"grad_norm": 0.5487551689147949,
"learning_rate": 2.931639931970292e-06,
"loss": 0.0894,
"step": 133700
},
{
"epoch": 22.101090188305253,
"grad_norm": 0.6704040765762329,
"learning_rate": 2.9255221524795364e-06,
"loss": 0.0945,
"step": 133800
},
{
"epoch": 22.117608192930295,
"grad_norm": 0.5721579194068909,
"learning_rate": 2.91940437298878e-06,
"loss": 0.0959,
"step": 133900
},
{
"epoch": 22.134126197555336,
"grad_norm": 0.6543858051300049,
"learning_rate": 2.913286593498024e-06,
"loss": 0.1333,
"step": 134000
},
{
"epoch": 22.134126197555336,
"eval_cer": 0.03689001437273287,
"eval_loss": 0.10688560456037521,
"eval_runtime": 52.1166,
"eval_samples_per_second": 32.446,
"eval_steps_per_second": 8.116,
"eval_wer": 0.2038968338225192,
"step": 134000
},
{
"epoch": 22.150644202180377,
"grad_norm": 0.6130584478378296,
"learning_rate": 2.907168814007268e-06,
"loss": 0.0962,
"step": 134100
},
{
"epoch": 22.16716220680542,
"grad_norm": 0.7324750423431396,
"learning_rate": 2.9010510345165123e-06,
"loss": 0.0903,
"step": 134200
},
{
"epoch": 22.18368021143046,
"grad_norm": 0.6277410984039307,
"learning_rate": 2.8949332550257558e-06,
"loss": 0.0818,
"step": 134300
},
{
"epoch": 22.2001982160555,
"grad_norm": 0.5178551077842712,
"learning_rate": 2.888815475535e-06,
"loss": 0.1053,
"step": 134400
},
{
"epoch": 22.216716220680542,
"grad_norm": 0.6540612578392029,
"learning_rate": 2.8826976960442436e-06,
"loss": 0.0866,
"step": 134500
},
{
"epoch": 22.233234225305583,
"grad_norm": 0.5932282209396362,
"learning_rate": 2.876579916553488e-06,
"loss": 0.0927,
"step": 134600
},
{
"epoch": 22.249752229930625,
"grad_norm": 0.6185062527656555,
"learning_rate": 2.8704621370627317e-06,
"loss": 0.089,
"step": 134700
},
{
"epoch": 22.266270234555666,
"grad_norm": 0.8983421921730042,
"learning_rate": 2.864344357571976e-06,
"loss": 0.0861,
"step": 134800
},
{
"epoch": 22.282788239180707,
"grad_norm": 0.3891274034976959,
"learning_rate": 2.8582265780812195e-06,
"loss": 0.0944,
"step": 134900
},
{
"epoch": 22.29930624380575,
"grad_norm": 0.7119171023368835,
"learning_rate": 2.852108798590464e-06,
"loss": 0.089,
"step": 135000
},
{
"epoch": 22.29930624380575,
"eval_cer": 0.03690712476900965,
"eval_loss": 0.10521671921014786,
"eval_runtime": 52.7579,
"eval_samples_per_second": 32.052,
"eval_steps_per_second": 8.018,
"eval_wer": 0.20402173234247173,
"step": 135000
},
{
"epoch": 22.31582424843079,
"grad_norm": 0.5368226766586304,
"learning_rate": 2.8459910190997077e-06,
"loss": 0.0905,
"step": 135100
},
{
"epoch": 22.33234225305583,
"grad_norm": 0.6488823890686035,
"learning_rate": 2.8398732396089516e-06,
"loss": 0.1179,
"step": 135200
},
{
"epoch": 22.348860257680872,
"grad_norm": 0.6369620561599731,
"learning_rate": 2.8337554601181955e-06,
"loss": 0.0939,
"step": 135300
},
{
"epoch": 22.365378262305914,
"grad_norm": 0.6993893384933472,
"learning_rate": 2.82763768062744e-06,
"loss": 0.0944,
"step": 135400
},
{
"epoch": 22.381896266930955,
"grad_norm": 0.8022906184196472,
"learning_rate": 2.8215199011366833e-06,
"loss": 0.0832,
"step": 135500
},
{
"epoch": 22.398414271555996,
"grad_norm": 0.5833423733711243,
"learning_rate": 2.8154021216459276e-06,
"loss": 0.0944,
"step": 135600
},
{
"epoch": 22.414932276181037,
"grad_norm": 0.72309410572052,
"learning_rate": 2.8092843421551715e-06,
"loss": 0.093,
"step": 135700
},
{
"epoch": 22.43145028080608,
"grad_norm": 0.5882470011711121,
"learning_rate": 2.8031665626644158e-06,
"loss": 0.1,
"step": 135800
},
{
"epoch": 22.44796828543112,
"grad_norm": 0.6774691343307495,
"learning_rate": 2.7970487831736592e-06,
"loss": 0.0954,
"step": 135900
},
{
"epoch": 22.46448629005616,
"grad_norm": 0.9647297263145447,
"learning_rate": 2.7909310036829035e-06,
"loss": 0.094,
"step": 136000
},
{
"epoch": 22.46448629005616,
"eval_cer": 0.03667613441927315,
"eval_loss": 0.10520410537719727,
"eval_runtime": 52.5832,
"eval_samples_per_second": 32.159,
"eval_steps_per_second": 8.044,
"eval_wer": 0.20314744270280397,
"step": 136000
},
{
"epoch": 22.481004294681203,
"grad_norm": 0.6736404895782471,
"learning_rate": 2.784813224192147e-06,
"loss": 0.0912,
"step": 136100
},
{
"epoch": 22.497522299306244,
"grad_norm": 0.4658312499523163,
"learning_rate": 2.7786954447013913e-06,
"loss": 0.0874,
"step": 136200
},
{
"epoch": 22.514040303931285,
"grad_norm": 0.8794094920158386,
"learning_rate": 2.7725776652106356e-06,
"loss": 0.1005,
"step": 136300
},
{
"epoch": 22.530558308556326,
"grad_norm": 0.6956797242164612,
"learning_rate": 2.7664598857198795e-06,
"loss": 0.0895,
"step": 136400
},
{
"epoch": 22.547076313181368,
"grad_norm": 0.4646037220954895,
"learning_rate": 2.7603421062291234e-06,
"loss": 0.0869,
"step": 136500
},
{
"epoch": 22.56359431780641,
"grad_norm": 0.8446247577667236,
"learning_rate": 2.7542243267383673e-06,
"loss": 0.0958,
"step": 136600
},
{
"epoch": 22.58011232243145,
"grad_norm": 0.47825750708580017,
"learning_rate": 2.7481065472476116e-06,
"loss": 0.0918,
"step": 136700
},
{
"epoch": 22.59663032705649,
"grad_norm": 0.8411787152290344,
"learning_rate": 2.741988767756855e-06,
"loss": 0.0886,
"step": 136800
},
{
"epoch": 22.613148331681533,
"grad_norm": 0.5080142021179199,
"learning_rate": 2.7358709882660994e-06,
"loss": 0.0982,
"step": 136900
},
{
"epoch": 22.629666336306574,
"grad_norm": 0.7875675559043884,
"learning_rate": 2.7297532087753432e-06,
"loss": 0.0909,
"step": 137000
},
{
"epoch": 22.629666336306574,
"eval_cer": 0.03681301758948737,
"eval_loss": 0.10519874840974808,
"eval_runtime": 52.8842,
"eval_samples_per_second": 31.976,
"eval_steps_per_second": 7.999,
"eval_wer": 0.20264784862299381,
"step": 137000
},
{
"epoch": 22.646184340931615,
"grad_norm": 0.7804688215255737,
"learning_rate": 2.7236354292845875e-06,
"loss": 0.1158,
"step": 137100
},
{
"epoch": 22.662702345556657,
"grad_norm": 0.49170786142349243,
"learning_rate": 2.717517649793831e-06,
"loss": 0.0981,
"step": 137200
},
{
"epoch": 22.679220350181698,
"grad_norm": 0.649940550327301,
"learning_rate": 2.7113998703030753e-06,
"loss": 0.0892,
"step": 137300
},
{
"epoch": 22.69573835480674,
"grad_norm": 0.7027512192726135,
"learning_rate": 2.7052820908123188e-06,
"loss": 0.1086,
"step": 137400
},
{
"epoch": 22.71225635943178,
"grad_norm": 0.7389455437660217,
"learning_rate": 2.699164311321563e-06,
"loss": 0.0899,
"step": 137500
},
{
"epoch": 22.72877436405682,
"grad_norm": 0.7065523862838745,
"learning_rate": 2.693046531830807e-06,
"loss": 0.0851,
"step": 137600
},
{
"epoch": 22.745292368681863,
"grad_norm": 0.768282949924469,
"learning_rate": 2.6869287523400513e-06,
"loss": 0.0869,
"step": 137700
},
{
"epoch": 22.761810373306904,
"grad_norm": 0.6381931900978088,
"learning_rate": 2.6808109728492948e-06,
"loss": 0.0894,
"step": 137800
},
{
"epoch": 22.778328377931945,
"grad_norm": 0.6711616516113281,
"learning_rate": 2.674693193358539e-06,
"loss": 0.0932,
"step": 137900
},
{
"epoch": 22.794846382556987,
"grad_norm": 0.8620249629020691,
"learning_rate": 2.668575413867783e-06,
"loss": 0.0946,
"step": 138000
},
{
"epoch": 22.794846382556987,
"eval_cer": 0.036624803230442815,
"eval_loss": 0.1052507609128952,
"eval_runtime": 52.4513,
"eval_samples_per_second": 32.239,
"eval_steps_per_second": 8.065,
"eval_wer": 0.20314744270280397,
"step": 138000
},
{
"epoch": 22.811364387182028,
"grad_norm": 0.4814409911632538,
"learning_rate": 2.662457634377027e-06,
"loss": 0.0948,
"step": 138100
},
{
"epoch": 22.82788239180707,
"grad_norm": 1.151419997215271,
"learning_rate": 2.6563398548862707e-06,
"loss": 0.1138,
"step": 138200
},
{
"epoch": 22.84440039643211,
"grad_norm": 0.6814967393875122,
"learning_rate": 2.650222075395515e-06,
"loss": 0.114,
"step": 138300
},
{
"epoch": 22.860918401057152,
"grad_norm": 0.8873021602630615,
"learning_rate": 2.6441042959047585e-06,
"loss": 0.0866,
"step": 138400
},
{
"epoch": 22.877436405682193,
"grad_norm": 0.6129996180534363,
"learning_rate": 2.637986516414003e-06,
"loss": 0.0902,
"step": 138500
},
{
"epoch": 22.893954410307234,
"grad_norm": 0.8606892228126526,
"learning_rate": 2.6318687369232467e-06,
"loss": 0.0953,
"step": 138600
},
{
"epoch": 22.910472414932276,
"grad_norm": 0.6854122281074524,
"learning_rate": 2.6257509574324906e-06,
"loss": 0.0963,
"step": 138700
},
{
"epoch": 22.926990419557317,
"grad_norm": 0.7230859398841858,
"learning_rate": 2.6196331779417345e-06,
"loss": 0.0866,
"step": 138800
},
{
"epoch": 22.943508424182358,
"grad_norm": 0.4967285692691803,
"learning_rate": 2.6135153984509788e-06,
"loss": 0.0875,
"step": 138900
},
{
"epoch": 22.9600264288074,
"grad_norm": 0.6331928372383118,
"learning_rate": 2.6073976189602222e-06,
"loss": 0.0897,
"step": 139000
},
{
"epoch": 22.9600264288074,
"eval_cer": 0.03681301758948737,
"eval_loss": 0.10469213128089905,
"eval_runtime": 52.8869,
"eval_samples_per_second": 31.974,
"eval_steps_per_second": 7.998,
"eval_wer": 0.20383438456254294,
"step": 139000
},
{
"epoch": 22.97654443343244,
"grad_norm": 0.68625807762146,
"learning_rate": 2.6012798394694665e-06,
"loss": 0.103,
"step": 139100
},
{
"epoch": 22.993062438057482,
"grad_norm": 0.7166194915771484,
"learning_rate": 2.5951620599787104e-06,
"loss": 0.0937,
"step": 139200
},
{
"epoch": 23.009580442682523,
"grad_norm": 0.7146703600883484,
"learning_rate": 2.5890442804879547e-06,
"loss": 0.0936,
"step": 139300
},
{
"epoch": 23.026098447307564,
"grad_norm": 0.5112409591674805,
"learning_rate": 2.582926500997198e-06,
"loss": 0.0863,
"step": 139400
},
{
"epoch": 23.042616451932606,
"grad_norm": 0.5813011527061462,
"learning_rate": 2.5768087215064425e-06,
"loss": 0.0827,
"step": 139500
},
{
"epoch": 23.059134456557647,
"grad_norm": 0.6480150818824768,
"learning_rate": 2.570690942015686e-06,
"loss": 0.0866,
"step": 139600
},
{
"epoch": 23.07565246118269,
"grad_norm": 1.00325608253479,
"learning_rate": 2.5645731625249303e-06,
"loss": 0.0927,
"step": 139700
},
{
"epoch": 23.09217046580773,
"grad_norm": 0.5901710391044617,
"learning_rate": 2.558455383034174e-06,
"loss": 0.0978,
"step": 139800
},
{
"epoch": 23.10868847043277,
"grad_norm": 0.6397861242294312,
"learning_rate": 2.5523376035434185e-06,
"loss": 0.0869,
"step": 139900
},
{
"epoch": 23.125206475057812,
"grad_norm": 0.4879724085330963,
"learning_rate": 2.546219824052662e-06,
"loss": 0.0876,
"step": 140000
},
{
"epoch": 23.125206475057812,
"eval_cer": 0.03693279036342482,
"eval_loss": 0.10442952066659927,
"eval_runtime": 52.9301,
"eval_samples_per_second": 31.948,
"eval_steps_per_second": 7.992,
"eval_wer": 0.2038968338225192,
"step": 140000
},
{
"epoch": 23.141724479682853,
"grad_norm": 0.7894465327262878,
"learning_rate": 2.5401020445619062e-06,
"loss": 0.1086,
"step": 140100
},
{
"epoch": 23.158242484307895,
"grad_norm": 0.7804042100906372,
"learning_rate": 2.53398426507115e-06,
"loss": 0.0881,
"step": 140200
},
{
"epoch": 23.174760488932936,
"grad_norm": 0.5835601091384888,
"learning_rate": 2.527866485580394e-06,
"loss": 0.0909,
"step": 140300
},
{
"epoch": 23.191278493557977,
"grad_norm": 0.7063116431236267,
"learning_rate": 2.521748706089638e-06,
"loss": 0.0875,
"step": 140400
},
{
"epoch": 23.20779649818302,
"grad_norm": 0.66155606508255,
"learning_rate": 2.515630926598882e-06,
"loss": 0.0859,
"step": 140500
},
{
"epoch": 23.22431450280806,
"grad_norm": 0.5779556035995483,
"learning_rate": 2.5095131471081257e-06,
"loss": 0.0879,
"step": 140600
},
{
"epoch": 23.2408325074331,
"grad_norm": 0.5715177655220032,
"learning_rate": 2.50339536761737e-06,
"loss": 0.0914,
"step": 140700
},
{
"epoch": 23.257350512058142,
"grad_norm": 0.5225812792778015,
"learning_rate": 2.497277588126614e-06,
"loss": 0.0902,
"step": 140800
},
{
"epoch": 23.273868516683184,
"grad_norm": 0.8125872015953064,
"learning_rate": 2.4911598086358578e-06,
"loss": 0.1079,
"step": 140900
},
{
"epoch": 23.290386521308225,
"grad_norm": 0.7094987034797668,
"learning_rate": 2.4850420291451016e-06,
"loss": 0.0863,
"step": 141000
},
{
"epoch": 23.290386521308225,
"eval_cer": 0.03659913763602765,
"eval_loss": 0.10513997077941895,
"eval_runtime": 52.6735,
"eval_samples_per_second": 32.103,
"eval_steps_per_second": 8.031,
"eval_wer": 0.20246050084306502,
"step": 141000
},
{
"epoch": 23.306904525933266,
"grad_norm": 0.5953539609909058,
"learning_rate": 2.4789242496543455e-06,
"loss": 0.0941,
"step": 141100
},
{
"epoch": 23.323422530558307,
"grad_norm": 0.6508031487464905,
"learning_rate": 2.4728064701635894e-06,
"loss": 0.0951,
"step": 141200
},
{
"epoch": 23.33994053518335,
"grad_norm": 0.6292299032211304,
"learning_rate": 2.4666886906728333e-06,
"loss": 0.0838,
"step": 141300
},
{
"epoch": 23.35645853980839,
"grad_norm": 0.7818630337715149,
"learning_rate": 2.4605709111820776e-06,
"loss": 0.0869,
"step": 141400
},
{
"epoch": 23.37297654443343,
"grad_norm": 0.8426799774169922,
"learning_rate": 2.4544531316913215e-06,
"loss": 0.0852,
"step": 141500
},
{
"epoch": 23.389494549058472,
"grad_norm": 0.5545341968536377,
"learning_rate": 2.4483353522005654e-06,
"loss": 0.0827,
"step": 141600
},
{
"epoch": 23.406012553683514,
"grad_norm": 1.2653288841247559,
"learning_rate": 2.4422175727098093e-06,
"loss": 0.0841,
"step": 141700
},
{
"epoch": 23.422530558308555,
"grad_norm": 0.7402147650718689,
"learning_rate": 2.436099793219053e-06,
"loss": 0.0836,
"step": 141800
},
{
"epoch": 23.439048562933596,
"grad_norm": 0.5832458734512329,
"learning_rate": 2.4299820137282975e-06,
"loss": 0.1247,
"step": 141900
},
{
"epoch": 23.455566567558638,
"grad_norm": 0.6517156958580017,
"learning_rate": 2.4238642342375413e-06,
"loss": 0.0871,
"step": 142000
},
{
"epoch": 23.455566567558638,
"eval_cer": 0.03671035521182671,
"eval_loss": 0.10508172959089279,
"eval_runtime": 52.7407,
"eval_samples_per_second": 32.063,
"eval_steps_per_second": 8.02,
"eval_wer": 0.20246050084306502,
"step": 142000
},
{
"epoch": 23.47208457218368,
"grad_norm": 0.6041878461837769,
"learning_rate": 2.4177464547467852e-06,
"loss": 0.0912,
"step": 142100
},
{
"epoch": 23.48860257680872,
"grad_norm": 0.5178912878036499,
"learning_rate": 2.411628675256029e-06,
"loss": 0.0925,
"step": 142200
},
{
"epoch": 23.50512058143376,
"grad_norm": 0.6299303770065308,
"learning_rate": 2.405510895765273e-06,
"loss": 0.088,
"step": 142300
},
{
"epoch": 23.521638586058803,
"grad_norm": 0.6988112926483154,
"learning_rate": 2.399393116274517e-06,
"loss": 0.1043,
"step": 142400
},
{
"epoch": 23.538156590683844,
"grad_norm": 0.5607922077178955,
"learning_rate": 2.393275336783761e-06,
"loss": 0.089,
"step": 142500
},
{
"epoch": 23.554674595308885,
"grad_norm": 0.4817243218421936,
"learning_rate": 2.387157557293005e-06,
"loss": 0.0874,
"step": 142600
},
{
"epoch": 23.571192599933926,
"grad_norm": 0.6620100140571594,
"learning_rate": 2.381039777802249e-06,
"loss": 0.0878,
"step": 142700
},
{
"epoch": 23.587710604558968,
"grad_norm": 0.9131438732147217,
"learning_rate": 2.374921998311493e-06,
"loss": 0.0879,
"step": 142800
},
{
"epoch": 23.60422860918401,
"grad_norm": 0.5091140270233154,
"learning_rate": 2.3688042188207367e-06,
"loss": 0.0941,
"step": 142900
},
{
"epoch": 23.62074661380905,
"grad_norm": 0.6191192865371704,
"learning_rate": 2.362686439329981e-06,
"loss": 0.0932,
"step": 143000
},
{
"epoch": 23.62074661380905,
"eval_cer": 0.036659024022996374,
"eval_loss": 0.10472416132688522,
"eval_runtime": 52.4751,
"eval_samples_per_second": 32.225,
"eval_steps_per_second": 8.061,
"eval_wer": 0.2030849934428277,
"step": 143000
},
{
"epoch": 23.63726461843409,
"grad_norm": 0.5774977803230286,
"learning_rate": 2.356568659839225e-06,
"loss": 0.1072,
"step": 143100
},
{
"epoch": 23.653782623059133,
"grad_norm": 0.6127384901046753,
"learning_rate": 2.350450880348469e-06,
"loss": 0.0856,
"step": 143200
},
{
"epoch": 23.670300627684174,
"grad_norm": 0.5244102478027344,
"learning_rate": 2.3443331008577127e-06,
"loss": 0.0845,
"step": 143300
},
{
"epoch": 23.686818632309215,
"grad_norm": 0.8045458197593689,
"learning_rate": 2.3382153213669566e-06,
"loss": 0.0887,
"step": 143400
},
{
"epoch": 23.703336636934257,
"grad_norm": 0.5768733024597168,
"learning_rate": 2.3320975418762005e-06,
"loss": 0.0997,
"step": 143500
},
{
"epoch": 23.7198546415593,
"grad_norm": 0.7417640089988708,
"learning_rate": 2.3259797623854448e-06,
"loss": 0.0952,
"step": 143600
},
{
"epoch": 23.736372646184343,
"grad_norm": 0.6068658232688904,
"learning_rate": 2.3198619828946887e-06,
"loss": 0.0928,
"step": 143700
},
{
"epoch": 23.752890650809384,
"grad_norm": 0.8562188148498535,
"learning_rate": 2.3137442034039326e-06,
"loss": 0.0868,
"step": 143800
},
{
"epoch": 23.769408655434425,
"grad_norm": 0.6002250909805298,
"learning_rate": 2.3076264239131764e-06,
"loss": 0.0883,
"step": 143900
},
{
"epoch": 23.785926660059467,
"grad_norm": 0.6457869410514832,
"learning_rate": 2.3015086444224203e-06,
"loss": 0.0871,
"step": 144000
},
{
"epoch": 23.785926660059467,
"eval_cer": 0.03666757922113476,
"eval_loss": 0.10333551466464996,
"eval_runtime": 53.1345,
"eval_samples_per_second": 31.825,
"eval_steps_per_second": 7.961,
"eval_wer": 0.20296009492287517,
"step": 144000
},
{
"epoch": 23.802444664684508,
"grad_norm": 0.6609480381011963,
"learning_rate": 2.2953908649316646e-06,
"loss": 0.0935,
"step": 144100
},
{
"epoch": 23.81896266930955,
"grad_norm": 0.5107303261756897,
"learning_rate": 2.2892730854409085e-06,
"loss": 0.0887,
"step": 144200
},
{
"epoch": 23.83548067393459,
"grad_norm": 0.6314355134963989,
"learning_rate": 2.2831553059501524e-06,
"loss": 0.0903,
"step": 144300
},
{
"epoch": 23.85199867855963,
"grad_norm": 0.49561741948127747,
"learning_rate": 2.2770375264593963e-06,
"loss": 0.0859,
"step": 144400
},
{
"epoch": 23.868516683184673,
"grad_norm": 0.7324890494346619,
"learning_rate": 2.27091974696864e-06,
"loss": 0.0924,
"step": 144500
},
{
"epoch": 23.885034687809714,
"grad_norm": 0.5809805393218994,
"learning_rate": 2.264801967477884e-06,
"loss": 0.0917,
"step": 144600
},
{
"epoch": 23.901552692434755,
"grad_norm": 0.6561674475669861,
"learning_rate": 2.2586841879871284e-06,
"loss": 0.0921,
"step": 144700
},
{
"epoch": 23.918070697059797,
"grad_norm": 0.618030846118927,
"learning_rate": 2.2525664084963723e-06,
"loss": 0.0954,
"step": 144800
},
{
"epoch": 23.934588701684838,
"grad_norm": 0.6414436101913452,
"learning_rate": 2.2464486290056166e-06,
"loss": 0.0881,
"step": 144900
},
{
"epoch": 23.95110670630988,
"grad_norm": 0.9026370644569397,
"learning_rate": 2.2403308495148605e-06,
"loss": 0.091,
"step": 145000
},
{
"epoch": 23.95110670630988,
"eval_cer": 0.036701800013688314,
"eval_loss": 0.1043509840965271,
"eval_runtime": 52.6333,
"eval_samples_per_second": 32.128,
"eval_steps_per_second": 8.037,
"eval_wer": 0.20320989196278025,
"step": 145000
},
{
"epoch": 23.96762471093492,
"grad_norm": 0.4682947099208832,
"learning_rate": 2.2342130700241043e-06,
"loss": 0.0892,
"step": 145100
},
{
"epoch": 23.984142715559962,
"grad_norm": 0.6425852179527283,
"learning_rate": 2.2280952905333482e-06,
"loss": 0.0877,
"step": 145200
},
{
"epoch": 24.000660720185003,
"grad_norm": 0.4977991282939911,
"learning_rate": 2.221977511042592e-06,
"loss": 0.0855,
"step": 145300
},
{
"epoch": 24.017178724810044,
"grad_norm": 0.68033766746521,
"learning_rate": 2.2158597315518364e-06,
"loss": 0.0887,
"step": 145400
},
{
"epoch": 24.033696729435086,
"grad_norm": 0.8233726024627686,
"learning_rate": 2.2097419520610803e-06,
"loss": 0.0926,
"step": 145500
},
{
"epoch": 24.050214734060127,
"grad_norm": 0.6886569261550903,
"learning_rate": 2.203624172570324e-06,
"loss": 0.0959,
"step": 145600
},
{
"epoch": 24.066732738685168,
"grad_norm": 0.5320963263511658,
"learning_rate": 2.197506393079568e-06,
"loss": 0.0928,
"step": 145700
},
{
"epoch": 24.08325074331021,
"grad_norm": 0.6369372010231018,
"learning_rate": 2.191388613588812e-06,
"loss": 0.0952,
"step": 145800
},
{
"epoch": 24.09976874793525,
"grad_norm": 0.6117287874221802,
"learning_rate": 2.1852708340980563e-06,
"loss": 0.1055,
"step": 145900
},
{
"epoch": 24.116286752560292,
"grad_norm": 0.7260856032371521,
"learning_rate": 2.1791530546073e-06,
"loss": 0.0978,
"step": 146000
},
{
"epoch": 24.116286752560292,
"eval_cer": 0.03682157278762576,
"eval_loss": 0.10561419278383255,
"eval_runtime": 52.1637,
"eval_samples_per_second": 32.417,
"eval_steps_per_second": 8.109,
"eval_wer": 0.20333479048273279,
"step": 146000
}
],
"logging_steps": 100,
"max_steps": 181620,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.861432368096291e+20,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}